In [84]:
import pandas as pd
import numpy as np
from scipy.fft import fft, rfft
from scipy.fft import fftfreq, rfftfreq
import os

### Data Preparation using Data Augmentation and Feature Extraction using Fast Fourier Transform

In [85]:
path = "./DAQ_Healthy/"
positions = ['Position-I/', 'Position-II/']
waveforms = ['Sine/', 'Square/', 'Triangle/']

df = {}
for i in range(0,31):
    df[i]=[]
df['form']=[]

for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['form'].append(waveforms.index(waveform))
                df['form'].append(waveforms.index(waveform))

path = "./DAQ_Unhealthy"
positions = ['_1/', '_2/']
for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['form'].append(waveforms.index(waveform))
                df['form'].append(waveforms.index(waveform))

data = pd.DataFrame(df)
data = data.sample(frac=1).reset_index(drop=True)

In [86]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,form
0,0.000104,0.000338,0.000201,0.000255,0.000358,0.000552,0.001153,0.004737,0.000807,0.000422,...,0.000204,8.2e-05,0.0001,9.9e-05,0.000146,0.000767,0.000189,5.4e-05,0.000132,0
1,0.029193,0.000235,0.000168,0.000127,0.000201,0.000226,0.000138,0.000156,9e-05,0.000183,...,0.00015,0.000346,0.00042,0.000314,0.000257,0.000143,0.000153,0.000464,0.004216,0
2,0.026024,0.000362,0.000185,0.000311,0.000179,0.000127,0.0105,0.000143,0.000204,0.000448,...,0.000285,0.000124,0.002114,0.000141,0.001074,0.000911,0.000485,0.000405,0.000638,2
3,0.002043,0.000668,0.000118,0.000309,0.000174,0.000396,0.000468,0.000665,0.001424,0.005584,...,0.000104,0.000276,0.000201,0.000226,0.000397,0.000236,6.5e-05,0.000592,0.000343,0
4,0.000787,0.000456,0.000437,0.000467,0.000589,0.000705,0.000258,0.000614,0.000893,0.001075,...,0.004444,0.003377,0.001016,0.001089,0.000541,0.000457,0.000516,0.000854,0.000649,1


In [87]:
len(data)

528

### Model Training and Evaluation Without PCA

In [88]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X, y = data.drop(['form'], axis=1), data['form']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:" + str(accuracy_score(y_test, y_pred)))
print("Precision:" + str(precision_score(y_test, y_pred, average='weighted')))
print("Recall:" + str(recall_score(y_test, y_pred, average='weighted')))
print("F1-score:" + str(f1_score(y_test, y_pred, average='weighted')))

Accuracy:0.8616352201257862
Precision:0.8633065048159387
Recall:0.8616352201257862
F1-score:0.8618725525097899


### Principal Component Analysis (PCA)

In [89]:
from statsmodels.multivariate.pca import PCA

X, y = data.drop(['form'], axis=1), data['form']
X = PCA(X, ncomp=15).factors
X.head()

Unnamed: 0,comp_00,comp_01,comp_02,comp_03,comp_04,comp_05,comp_06,comp_07,comp_08,comp_09,comp_10,comp_11,comp_12,comp_13,comp_14
0,0.043808,-0.010831,0.006891,0.032673,-0.032814,-0.003856,0.002613,0.00421,-0.01899,0.000122,0.01193,0.004025,0.021994,-0.006646,0.00871
1,0.039278,-0.023079,-0.008946,-0.113376,0.009319,-0.069714,0.002501,-0.027134,0.111981,0.018087,0.045757,0.033862,-0.009267,0.001953,0.002992
2,0.034326,-0.024399,0.009164,-0.047805,0.028029,-0.05258,0.056495,-0.021488,-0.009651,-0.004898,-0.010706,-0.005522,-0.001761,-0.026888,0.049697
3,0.037999,-0.000576,0.01521,0.011864,-0.029585,0.014579,-0.005248,0.005887,-0.006618,0.006011,0.01682,-0.002427,0.002912,0.016365,0.024154
4,0.015689,0.002091,-0.050342,0.00373,-0.015263,0.03417,0.000185,0.033304,-0.027512,-0.040315,0.019874,-0.003054,0.005685,0.011154,-0.026293


### Model Training and Evaluation With PCA

In [90]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:" + str(accuracy_score(y_test, y_pred)))
print("Precision:" + str(precision_score(y_test, y_pred, average='weighted')))
print("Recall:" + str(recall_score(y_test, y_pred, average='weighted')))
print("F1-score:" + str(f1_score(y_test, y_pred, average='weighted')))

Accuracy:0.8867924528301887
Precision:0.8872397330923496
Recall:0.8867924528301887
F1-score:0.886808427125266
