In [183]:
import pandas as pd
import numpy as np
from scipy.fft import fft, rfft
from scipy.fft import fftfreq, rfftfreq
import os

### Data Preparation using Data Augmentation and Feature Extraction using Fast Fourier Transform

In [184]:
path = "./DAQ_Healthy/"
positions = ['Position-I/', 'Position-II/']
waveforms = ['Sine/', 'Square/', 'Triangle/']

df = {}
for i in range(0,31):
    df[i]=[]
df['freq']=[]

for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['freq'].append(i)
                df['freq'].append(i)

path = "./DAQ_Unhealthy"
positions = ['_1/', '_2/']
for position in positions:
    for waveform in waveforms:
            curpath = path + position + waveform
            for i in range(9,31):
                data = pd.read_csv(curpath+str(i)+'.csv')

                seq1 = data['Amplitude - Voltage_0']
                seq2 = np.array(seq1.rolling(
                    window=2,
                    center=True,
                    min_periods=1
                ).mean())
                seq1 = np.array(seq1)

                fourier1 = fft(seq1)
                fourier1 = np.abs(fourier1[0:31])/(len(seq1)/2)
                fourier2 = fft(seq2)
                fourier2 = np.abs(fourier2[0:31])/(len(seq2)/2)
                for j in range(0,31):
                    df[j].append(fourier1[j])
                    df[j].append(fourier2[j])

                df['freq'].append(i)
                df['freq'].append(i)

data = pd.DataFrame(df)
data = data.sample(frac=1).reset_index(drop=True)

In [185]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,freq
0,0.028287,0.001434,0.00144,0.001332,0.001503,0.001667,0.00183,0.002305,0.002806,0.002373,...,0.007968,0.004604,0.000918,0.000702,0.000625,0.001093,0.00101,0.001223,0.000413,28
1,0.000592,0.000124,0.000108,8.7e-05,0.000286,0.002946,0.000747,0.000732,0.000947,0.002366,...,0.000406,0.000295,0.000235,0.000314,0.000433,0.000197,0.001194,0.004374,0.000666,12
2,0.000567,0.000354,0.00038,0.000357,0.000335,0.000393,0.000576,0.000786,0.000978,0.001283,...,0.001495,0.006849,0.001989,0.001174,0.001093,0.000904,0.000508,0.000255,0.000141,29
3,0.025901,0.000819,0.001117,0.001303,0.002389,0.006601,0.010782,0.003374,0.002138,0.001637,...,0.007061,0.003371,0.000907,0.000429,0.00062,0.0003,0.009153,0.000133,0.000519,14
4,0.001777,0.001018,0.001205,0.001343,0.001876,0.00321,0.00674,0.030506,0.006232,0.004226,...,0.00072,0.003304,0.000996,0.002293,0.002035,0.003027,0.000865,0.000896,0.002938,17


In [186]:
len(data)

528

### Model Training and Evaluation Without PCA

In [187]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error

X, y = data.drop(['freq'], axis=1), data['freq']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Mean Absolute Percentage Error:" + str(mean_absolute_percentage_error(y_test, y_pred)))

Mean Absolute Percentage Error:0.04850311733681555


### Principal Component Analysis (PCA)

In [188]:
from statsmodels.multivariate.pca import PCA

X, y = data.drop(['freq'], axis=1), data['freq']
X = PCA(X, ncomp=15).factors
X.head()

Unnamed: 0,comp_00,comp_01,comp_02,comp_03,comp_04,comp_05,comp_06,comp_07,comp_08,comp_09,comp_10,comp_11,comp_12,comp_13,comp_14
0,0.029372,0.045349,0.097448,0.104687,-0.034387,-0.035644,0.012577,-0.047005,0.066502,0.022003,-0.044922,0.008049,-0.015269,0.02603,-0.01281
1,-0.014756,-0.014104,0.004178,-0.021759,0.04262,0.020209,0.036625,0.040277,0.005111,0.007262,0.048624,0.042523,0.032659,-0.061977,0.036128
2,-0.011811,-0.014004,0.056295,-0.023689,-0.008327,-0.028983,-0.027292,-0.017913,0.023189,-0.000525,-0.061473,0.016826,0.027298,0.010748,0.057385
3,0.023373,0.028808,-0.009817,0.113724,-0.036293,-0.007018,0.00217,0.023231,0.094518,0.135801,0.034892,0.08781,0.025377,0.134461,0.070683
4,0.146756,0.005949,0.119862,-0.151063,0.053098,0.242973,0.030977,0.000443,0.102182,-0.267608,-0.03421,-0.009692,0.098659,0.134539,-0.05769


### Model Training and Evaluation With PCA

In [189]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Mean Absolute Percentage Error:" + str(mean_absolute_percentage_error(y_test, y_pred)))

Mean Absolute Percentage Error:0.07877327299899804
