## Import Packages

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as sp
import pywt

In [None]:
# Acess to google drive
from google.colab import drive
drive.mount('/content/drive')

.

.

.



## Declare the size of feature dataset

In [None]:
NoOfData    = 180  # 180 Data for each robotic spot-welding condition (Normal, Abnormal)
NoOfSensor  = 3    # 3 Sensor signals: Acceleration, Voltage, Current
NoOfFeature = 10   # 10 Feature types: Max, Min, Mean, RMS, Variance, Skewness, Kurtosis, Crest factor, Shape factor, Impulse factor

NoOfData, NoOfSensor, NoOfFeature

## Load Raw Dataset (360 files)

In [None]:
for i in range(NoOfData):
    
    temp_path1 = 'https://github.com/Eunseob/purdue_me597/blob/main/ml_tutorial/Dataset/Normal_%d?raw=true'%(i+1)   # File path of temporary normal data
    temp_path2 = 'https://github.com/Eunseob/purdue_me597/blob/main/ml_tutorial/Dataset/Abnormal_%d?raw=true'%(i+1) # File path of temporary abnormal data

    exec("Normal_%d   = pd.read_csv(temp_path1 , sep=',' , header=None)"%(i+1))
    exec("Abnormal_%d = pd.read_csv(temp_path2 , sep=',' , header=None)"%(i+1))

## Time Domain Feature Extraction
- 10 features * 3 sensors = 30 features

In [None]:
# Definition of rms function
def rms(x): 
    return np.sqrt(np.mean(x**2))

In [None]:
# Create empty(0) arrays for normal/abnormal feature dataset (time domain)
TimeFeature_Normal   = np.zeros((NoOfSensor*NoOfFeature , NoOfData))
TimeFeature_Abnormal = np.zeros((NoOfSensor*NoOfFeature , NoOfData))

print(TimeFeature_Normal.shape)
print(TimeFeature_Abnormal.shape)

TimeFeature_Normal

In [None]:
for i in range(NoOfData):
    
    # Declare temporary data
    exec("temp_data1 = Normal_%d"%(i+1))
    exec("temp_data2 = Abnormal_%d"%(i+1))
    
    # Time domain feature extraction
    for j in range(NoOfSensor):
        
        # Normal features
        TimeFeature_Normal[NoOfFeature*j+0, i] = np.max(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+1, i] = np.min(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+2, i] = np.mean(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+3, i] = rms(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+4, i] = np.var(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+5, i] = sp.skew(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+6, i] = sp.kurtosis(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+7, i] = np.max(temp_data1.iloc[:,j+1])/rms(temp_data1.iloc[:,j+1])
        TimeFeature_Normal[NoOfFeature*j+8, i] = rms(temp_data1.iloc[:,j+1])/np.mean(np.abs(temp_data1.iloc[:,j+1]))
        TimeFeature_Normal[NoOfFeature*j+9, i] = np.max(temp_data1.iloc[:,j+1])/np.mean(np.abs(temp_data1.iloc[:,j+1]))
        
        # Abnormal features
        TimeFeature_Abnormal[NoOfFeature*j+0, i] = np.max(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+1, i] = np.min(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+2, i] = np.mean(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+3, i] = rms(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+4, i] = np.var(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+5, i] = sp.skew(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+6, i] = sp.kurtosis(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+7, i] = np.max(temp_data2.iloc[:,j+1])/rms(temp_data2.iloc[:,j+1])
        TimeFeature_Abnormal[NoOfFeature*j+8, i] = rms(temp_data2.iloc[:,j+1])/np.mean(np.abs(temp_data2.iloc[:,j+1]))
        TimeFeature_Abnormal[NoOfFeature*j+9, i] = np.max(temp_data2.iloc[:,j+1])/np.mean(np.abs(temp_data2.iloc[:,j+1]))
        
print(TimeFeature_Normal.shape)
print(TimeFeature_Abnormal.shape)

TimeFeature_Normal

### Combine Normal and Abnormal feature arrays

* axis=0: combine rows
* axis=1: combine columns

In [None]:
TimeFeature = np.concatenate([TimeFeature_Normal, TimeFeature_Abnormal] , axis=1)
TimeFeature.shape

.

.

.



## Frequency Domain Feature Extraction
- 10 features * 8 wavelet levels * 3 sensors = 240 features

In [None]:
# Wavelet options
MotherWavelet = pywt.Wavelet('haar')   # Mother wavelet
Level   = 8                            # Wavelet decomposition level

In [None]:
# Create empty(0) arrays for normal/abnormal feature dataset (frequency Domain)
FreqFeature_Normal   = np.zeros(shape=(NoOfSensor*NoOfFeature*Level , NoOfData))
FreqFeature_Abnormal = np.zeros(shape=(NoOfSensor*NoOfFeature*Level , NoOfData))

print(FreqFeature_Normal.shape)
print(FreqFeature_Abnormal.shape)

FreqFeature_Normal

In [None]:
for i in range(NoOfData):
    
    # Declare temporary data (only sensor signals)
    exec("temp_data1 = Normal_%d.iloc[:,1:]"%(i+1))
    exec("temp_data2 = Abnormal_%d.iloc[:,1:]"%(i+1))
    
    # Walvelet decomposition
    Coef1 = pywt.wavedec(temp_data1, MotherWavelet, level=Level, axis=0)
    Coef2 = pywt.wavedec(temp_data2, MotherWavelet, level=Level, axis=0)
    
    # Frequency domain feature extraction
    for j in range(NoOfSensor):
        
        for k in np.arange(Level):
            coef1 = Coef1[Level-k]
            coef2 = Coef2[Level-k]
            
            ##################################################
            # Complete code below to obtain proper features
            # Tip: Use NoOfFeature, Level, j, and k
            ##################################################

            # Normal features
            FreqFeature_Normal[ , i] = np.max(coef1[:,j])
            FreqFeature_Normal[ , i] = np.min(coef1[:,j])
            FreqFeature_Normal[ , i] = np.mean(coef1[:,j])
            FreqFeature_Normal[ , i] = rms(coef1[:,j])
            FreqFeature_Normal[ , i] = np.var(coef1[:,j])
            FreqFeature_Normal[ , i] = sp.skew(coef1[:,j])
            FreqFeature_Normal[ , i] = sp.kurtosis(coef1[:,j])
            FreqFeature_Normal[ , i] = np.max(coef1[:,j])/rms(coef1[:,j])
            FreqFeature_Normal[ , i] = rms(coef1[:,j])/np.mean(np.abs(coef1[:,j]))
            FreqFeature_Normal[ , i] = np.max(coef1[:,j])/np.mean(np.abs(coef1[:,j]))
            
            # Abnormal features
            FreqFeature_Abnormal[ , i] = np.max(coef2[:,j])
            FreqFeature_Abnormal[ , i] = np.min(coef2[:,j])
            FreqFeature_Abnormal[ , i] = np.mean(coef2[:,j])
            FreqFeature_Abnormal[ , i] = rms(coef2[:,j])
            FreqFeature_Abnormal[ , i] = np.var(coef2[:,j])
            FreqFeature_Abnormal[ , i] = sp.skew(coef2[:,j])
            FreqFeature_Abnormal[ , i] = sp.kurtosis(coef2[:,j])
            FreqFeature_Abnormal[ , i] = np.max(coef2[:,j])/rms(coef2[:,j])
            FreqFeature_Abnormal[ , i] = rms(coef2[:,j])/np.mean(np.abs(coef2[:,j]))
            FreqFeature_Abnormal[ , i] = np.max(coef2[:,j])/np.mean(np.abs(coef2[:,j]))

            ##################################################
            ##################################################

print(FreqFeature_Normal.shape)
print(FreqFeature_Abnormal.shape)

FreqFeature_Normal

### Combine Normal and Abnormal feature arrays

* axis=0: combine rows
* axis=1: combine columns

In [None]:
FreqFeature = np.concatenate([FreqFeature_Normal, FreqFeature_Abnormal] , axis=1)
FreqFeature.shape

.

.

.



## Final Feature Dataset 
- (30 Time domain features + 240 Frequency domain features = 270 features)

In [None]:
Features = np.concatenate([TimeFeature,FreqFeature] , axis=0)

print(Features.shape)
Features

### Convert Array into Data frame format

* Easy to save as data file (csv)

In [None]:
Features_df = pd.DataFrame(Features)
Features_df

### Save Final Feature Data in Drive (.csv)

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/SavedFiles/FeatureData.csv' 
Features_df.to_csv(path, sep=',', header=None , index=None)