In [75]:
import pandas as pd
import numpy as np
from scipy.fft import fft, rfft
from scipy.fft import fftfreq, rfftfreq
import os

### Data Preparation and Feature Extraction using Fast Fourier Transform

In [76]:
path = "./DAQ_Healthy/"
positions = ['Position-I/', 'Position-II/']
waveforms = ['Sine/', 'Square/', 'Triangle/']

df = {}
for i in range(0,31):
    df[i]=[]
df['freq']=[]

for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['freq'].append(i)
                df['freq'].append(i)

path = "./DAQ_Unhealthy"
positions = ['_1/', '_2/']
for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['freq'].append(i)
                df['freq'].append(i)

data = pd.DataFrame(df)
data = data.sample(frac=1).reset_index(drop=True)

In [77]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,freq
0,0.023637,0.001286,0.000962,0.001632,0.001466,0.002316,0.007885,0.004916,0.001572,0.000685,...,0.001089,0.000793,0.000404,0.00078,0.002305,0.000928,0.00041,0.000946,0.000177,16
1,0.000951,0.000643,0.000613,0.000565,0.000682,0.000985,0.001329,0.002189,0.007674,0.005079,...,0.000324,0.000229,0.000671,0.003938,0.001164,0.000674,0.000741,0.000435,0.000602,21
2,0.022988,0.001256,0.001426,0.003061,0.010201,0.007104,0.002859,0.002194,0.002228,0.0055,...,0.001143,0.000852,0.001811,0.001265,0.000354,0.001322,0.000355,0.000511,0.000409,11
3,3.7e-05,0.000592,0.000499,0.000529,0.000667,0.001095,0.000637,0.000664,0.000972,0.001373,...,0.004831,0.001346,0.000792,0.000788,0.000768,0.000573,0.000366,0.00041,0.000503,27
4,0.001501,0.00208,0.002381,0.002726,0.003551,0.006199,0.021658,0.014767,0.006102,0.00406,...,0.001421,0.000935,0.001583,0.003595,0.003366,0.002962,0.0006,0.002159,0.001873,16


In [78]:
len(data)

528

### Model Training and Evaluation Without PCA

In [79]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X, y = data.drop(['freq'], axis=1), data['freq']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:" + str(accuracy_score(y_test, y_pred)))
print("Precision:" + str(precision_score(y_test, y_pred, average='weighted')))
print("Recall:" + str(recall_score(y_test, y_pred, average='weighted')))
print("F1-score:" + str(f1_score(y_test, y_pred, average='weighted')))

Accuracy:0.9182389937106918
Precision:0.9423924782415349
Recall:0.9182389937106918
F1-score:0.9185976149861307


### Principal Component Analysis (PCA)

In [80]:
from statsmodels.multivariate.pca import PCA

X, y = data.drop(['freq'], axis=1), data['freq']
X = PCA(X, ncomp=15).factors
X.head()

Unnamed: 0,comp_00,comp_01,comp_02,comp_03,comp_04,comp_05,comp_06,comp_07,comp_08,comp_09,comp_10,comp_11,comp_12,comp_13,comp_14
0,0.004161,-0.05489,0.014141,-0.035765,0.01351,-0.046005,0.050603,0.016264,-0.01817,0.0294,-0.000873,-0.030843,0.022486,-0.013949,0.003768
1,0.009388,0.017651,0.039246,0.026331,0.002373,0.007666,0.006937,0.058107,0.013558,0.009578,0.042155,-0.023602,0.045887,0.041678,-0.005377
2,-0.012127,-0.078343,0.041444,-0.032473,0.038721,-0.003835,0.004971,0.001764,-0.004004,0.017081,0.002201,0.036946,-0.03624,0.006831,0.003365
3,0.017513,-0.002593,-0.040664,-0.007938,-0.022522,0.046148,-0.004594,0.045117,-0.023052,-0.022685,-0.011399,0.020093,0.035228,0.0025,0.001205
4,-0.086168,-0.040279,0.011739,0.056893,-0.027027,-0.028582,0.125479,-0.003729,0.008657,0.070831,-0.022097,-0.08674,0.114464,-0.065984,0.008193


### Model Training and Evaluation With PCA

In [81]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:" + str(accuracy_score(y_test, y_pred)))
print("Precision:" + str(precision_score(y_test, y_pred, average='weighted')))
print("Recall:" + str(recall_score(y_test, y_pred, average='weighted')))
print("F1-score:" + str(f1_score(y_test, y_pred, average='weighted')))

Accuracy:0.9559748427672956
Precision:0.9675751222921034
Recall:0.9559748427672956
F1-score:0.956112256223244
