In [914]:
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from python_scripts.utils.feature_selection import *

# Feature Extraction

In [915]:
final_dataset_path = '../../dataset/final/whole/'
acc_dataset_path = final_dataset_path + 'PatchTable_Acceleration_Normalised.csv'
gyro_dataset_path = final_dataset_path + 'PatchTable_Gyroscope_Normalised.csv'

In [916]:
acc_dataset_df = pd.read_csv(acc_dataset_path).iloc[:, 1:]
gyro_dataset_df = pd.read_csv(gyro_dataset_path).iloc[:, 1:]
acc_dataset_df = acc_dataset_df.drop('ClassificationType', axis=1)
gyro_dataset_df = gyro_dataset_df.drop('ClassificationType', axis=1)

### Feature Importance

In [917]:
# Separate features and target variable
acc_features = acc_dataset_df.drop(columns=['Class'])
acc_target = acc_dataset_df['Class']
gyro_features = gyro_dataset_df.drop(columns=['Class'])
gyro_target = gyro_dataset_df['Class']

In [918]:
acc_features

Unnamed: 0,DominantFreq_VT,DominantFreq_ML,DominantFreq_AP,DominantFreq_Magnitude,CorssCorrelationSum_VTML,CorssCorrelationSum_VTAP,CorssCorrelationSum_MLAP,CorssCorrelationPeak_VTML,CorssCorrelationLag_VTML,CorssCorrelationPeak_VTAP,...,MeanFreq_Plom_Magnitude,MedianFreq_Plom_Magnitude,SumPSD_Plom_Magnitude,Moment2ndPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,KurtosisPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude,ModeSignal_Magnitude,EnergySignal_Magnitude
0,-0.565279,-0.183289,-0.349227,-0.038319,-0.339676,-0.226463,-0.190883,-0.786374,-0.226534,-0.699258,...,-0.492091,-0.492091,-1.027345,-0.905046,-0.751573,-0.07308,-0.083144,-0.737538,0.657242,-1.027326
1,-0.642362,1.896645,-0.727585,0.039852,1.518135,3.290363,2.295714,0.805993,3.136402,2.605245,...,-1.133617,-1.133617,1.839771,-0.590126,-0.608156,0.913162,0.887799,0.634839,-0.187022,1.839808
2,-0.398265,-0.231104,2.833913,0.890543,-0.617424,-0.405724,-0.812518,-0.938718,0.023633,-0.567433,...,-0.286002,-0.286002,-0.668854,-0.704978,-0.669724,1.253597,1.294697,-0.630665,-0.211224,-0.668855
3,2.742887,0.204009,-0.081909,-0.442972,1.032493,0.018871,-0.037891,0.920124,-0.629212,1.641298,...,-0.912159,-0.912159,0.351717,-0.721528,-0.688911,-1.080254,-0.860644,-0.473802,-0.646376,0.351628
4,-0.481772,2.236128,-0.25875,-0.801642,-0.7384,-0.413833,-0.826791,-0.705051,-0.177941,-0.537018,...,-0.072173,-0.072173,0.307707,0.30175,0.065519,-0.959054,-0.803333,0.190848,-1.754123,0.307731
5,-0.969967,-0.173726,-0.505505,2.881618,-0.580067,-0.290057,1.389306,-0.725068,0.075973,-0.198954,...,-0.35472,-0.35472,1.231292,0.953839,0.86684,0.059012,0.057047,0.005452,-0.508892,1.231322
6,0.738717,-1.015263,1.369833,0.021459,-0.736495,-0.428313,-0.848734,-0.815941,-0.807192,-1.005746,...,2.663853,2.663853,-0.58073,2.223116,2.412486,-1.434764,-1.573602,0.409136,-1.547979,-0.580734
7,-0.687328,-0.38411,-0.472604,-1.02696,-0.693986,-0.325986,-0.782489,-0.95109,-0.444676,-0.347422,...,1.433947,1.433947,-1.212053,-0.679406,-0.662371,0.215588,-0.004146,1.500628,1.819892,-1.212035
8,0.321181,-0.173726,-0.645333,-0.033721,-0.655124,-0.408778,-0.765699,0.035587,-0.193455,-0.261149,...,-0.049008,-0.049008,-0.496495,-0.416946,-0.481536,1.763047,1.237815,-0.681959,0.417107,-0.496477
9,0.006424,-1.273462,-0.49728,-0.594717,-0.378319,-0.353833,-0.562437,0.144497,0.054253,-0.118487,...,-0.37263,-0.37263,-1.318803,-0.955736,-0.761834,0.971064,1.606354,1.265671,1.209338,-1.318813


In [919]:
acc_target

0     0
1     1
2     0
3     1
4     0
5     1
6     0
7     1
8     0
9     1
10    0
11    1
Name: Class, dtype: int64

In [920]:
gyro_features

Unnamed: 0,DominantFreq_VT,DominantFreq_ML,DominantFreq_AP,DominantFreq_Magnitude,CorssCorrelationSum_VTML,CorssCorrelationSum_VTAP,CorssCorrelationSum_MLAP,CorssCorrelationPeak_VTML,CorssCorrelationLag_VTML,CorssCorrelationPeak_VTAP,...,MeanFreq_Plom_Magnitude,MedianFreq_Plom_Magnitude,SumPSD_Plom_Magnitude,Moment2ndPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,KurtosisPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude,ModeSignal_Magnitude,EnergySignal_Magnitude
0,0.727909,-0.996089,0.419431,-0.486654,0.040499,0.015641,-0.179193,0.753745,0.149019,0.448143,...,-0.492091,-0.492091,0.243521,-0.102638,-0.152766,0.437056,0.40755,-0.201971,1.140569,0.242431
1,0.605287,0.932871,-1.231731,1.036364,2.636125,2.650822,2.380678,2.197851,2.665381,1.800904,...,-1.133617,-1.133617,2.586322,-0.43706,-0.250719,1.170404,1.550426,0.284359,-0.176969,2.587382
2,-1.172733,1.459405,0.151675,0.852049,-0.083187,-0.151229,0.08104,-0.256691,-0.13609,-0.707054,...,-0.286002,-0.286002,-0.41706,-0.946692,-0.603891,-0.775134,-0.680256,1.849907,-0.386439,-0.417051
3,1.178162,-1.038511,0.588373,2.064644,1.539852,1.400624,1.795883,1.529156,1.263858,0.182726,...,-0.912159,-0.912159,0.691269,-0.634657,-0.322506,1.691026,1.858594,1.827208,0.063733,0.690648
4,1.524952,0.196722,-1.136104,-0.835882,-0.426827,-0.144381,-0.34668,-0.599753,-0.35252,-0.355487,...,-0.072173,-0.072173,-0.212128,-0.541907,-0.549846,-1.354867,-1.115711,-0.173902,0.772013,-0.211365
5,0.800716,-0.207536,-0.141581,-0.835882,-0.091859,-0.024814,-0.02703,-0.045921,-0.056464,-0.408242,...,-0.35472,-0.35472,0.437454,0.59898,0.221992,0.206712,0.138509,-0.522966,-0.065989,0.438229
6,-1.00796,-0.786473,1.2482,-0.409048,-0.92099,-1.075703,-1.110358,-0.91795,-1.23368,-0.722907,...,2.663853,2.663853,-1.449924,-0.556076,-0.542153,-1.237536,-1.077491,-0.08042,-2.156054,-1.449896
7,-1.046279,1.110046,-1.059602,0.561027,-0.871362,-1.004961,-1.02397,-0.89436,-1.045051,-0.991809,...,1.433947,1.433947,-1.250069,0.450521,0.045326,-0.106347,-0.464732,-0.564841,-0.943992,-1.249586
8,-0.87001,1.439442,-1.225356,-0.971692,-0.536956,-0.464523,-0.497764,-0.827303,-0.393554,-0.811131,...,-0.049008,-0.049008,-0.153544,0.361384,-0.142589,-0.668016,-0.751083,-1.648707,0.527311,-0.153453
9,-0.682245,-1.175759,0.699938,-1.631343,-0.517936,-0.605482,-0.489775,-0.807254,-0.29668,-0.97606,...,-0.37263,-0.37263,-0.431019,-0.673572,-0.545759,-0.934038,-0.961453,-1.225724,1.828185,-0.430109


In [921]:
gyro_target

0     0
1     1
2     0
3     1
4     0
5     1
6     0
7     1
8     0
9     1
10    0
11    1
Name: Class, dtype: int64

In [922]:
acc_random_forest_model = RandomForestClassifier()
gyro_random_forest_model = RandomForestClassifier()
acc_random_forest_model.fit(acc_features, acc_target)
gyro_random_forest_model.fit(gyro_features, gyro_target)

In [923]:
# Get feature importance
acc_feature_importance = acc_random_forest_model.feature_importances_
gyro_feature_importance = gyro_random_forest_model.feature_importances_

acc_best_features = []
gyro_best_features = []

for i in range(len(acc_feature_importance)):
    if acc_feature_importance[i] > 0.01:
        acc_best_features.append(acc_dataset_df.columns[i + 1])

for i in range(len(gyro_feature_importance)):
    if gyro_feature_importance[i] > 0.01:
        gyro_best_features.append(gyro_dataset_df.columns[i + 1])


In [924]:
acc_feature_importance

array([0.        , 0.        , 0.03633333, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.01      , 0.053     ,
       0.00714286, 0.00666667, 0.        , 0.        , 0.003     ,
       0.007     , 0.        , 0.        , 0.        , 0.01285714,
       0.004     , 0.        , 0.00333333, 0.00285714, 0.007     ,
       0.004     , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.00666667, 0.        , 0.00333333, 0.        ,
       0.        , 0.00285714, 0.        , 0.        , 0.006     ,
       0.        , 0.        , 0.        , 0.        , 0.00285714,
       0.        , 0.        , 0.        , 0.        , 0.01      ,
       0.        , 0.00333333, 0.017     , 0.        , 0.        ,
       0.02      , 0.        , 0.007     , 0.012     , 0.        ,
       0.        , 0.01      , 0.        , 0.005     , 0.     

In [925]:
gyro_feature_importance

array([0.        , 0.        , 0.005     , 0.00714286, 0.        ,
       0.        , 0.        , 0.        , 0.00285714, 0.        ,
       0.        , 0.        , 0.00233333, 0.        , 0.00285714,
       0.        , 0.        , 0.00293878, 0.01428571, 0.03095238,
       0.        , 0.01      , 0.        , 0.01      , 0.        ,
       0.01      , 0.015     , 0.        , 0.01114286, 0.        ,
       0.        , 0.        , 0.00285714, 0.01107937, 0.        ,
       0.00285714, 0.015     , 0.01      , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.005     ,
       0.        , 0.        , 0.013     , 0.01      , 0.005     ,
       0.00285714, 0.003     , 0.005     , 0.        , 0.        ,
       0.        , 0.00714286, 0.00285714, 0.007     , 0.        ,
       0.        , 0.01      , 0.        , 0.003     , 0.017     ,
       0.        , 0.        , 0.        , 0.        , 0.00489796,
       0.        , 0.        , 0.00285714, 0.01      , 0.     

In [926]:
filtered_acc_dataset_df = pd.concat([acc_dataset_df.iloc[:, :1], acc_dataset_df[acc_best_features].copy()], axis=1)
filtered_gyro_dataset_df = pd.concat([gyro_dataset_df.iloc[:, :1], gyro_dataset_df[gyro_best_features].copy()], axis=1)

In [927]:
filtered_acc_dataset_df

Unnamed: 0,Class,DominantFreq_AP,CorssCorrelationPeak_VTAP,AmplitudeNorm_VT,FirstDerivativeMean_SigComplete_VT,FirstDerivative_Moment3rd_SigComplete_VT,FirstDerivativeRatio_SigComplete_VT,FirstDerivativeRatio_SigComplete_ML,RMS_SigComplete_AP,RMSratio_SigComplete_AP,...,RelativePower_BelowDomFr_AP,MeanFreq_Plom_AP,SumPSD_Plom_AP,Moment3rdPSD_Plom_AP,SkewnessPSD_Plom_AP,KurtosisPSD_Plom_AP,SumSq25_SigComplete_Magnitude,RelativePower_BelowDomFr_Magnitude,CentroidSpMax_Magnitude,ZeroCrossingRate_Magnitude
0,0,-0.349227,-0.699258,-0.766472,1.309403,0.124576,0.683503,-0.993772,-0.172409,1.131825,...,0.828174,-0.492091,-0.229415,-0.428178,0.323581,0.103068,-0.893285,-0.676916,-0.092303,-0.737538
1,1,-0.727585,2.605245,-0.766401,0.569482,-0.34833,0.4957,-1.564703,0.739054,1.109513,...,2.666709,-1.133617,2.079489,1.380425,2.50406,3.029227,2.2446,0.52294,2.227281,0.634839
2,0,2.833913,-0.567433,0.326185,-1.016543,1.046247,0.496532,-1.444133,-0.756791,-0.597083,...,-1.025084,-0.286002,-0.556421,-0.440185,-1.165915,-0.818148,-0.692098,-0.862825,-0.108542,-0.630665
3,1,-0.081909,1.641298,-0.717911,-0.214571,-1.526651,-3.126181,0.626538,-0.742775,-0.614608,...,-0.582099,-0.912159,-0.345392,-0.439909,0.427483,0.192159,-0.067775,0.246958,1.621542,-0.473802
4,0,-0.25875,-0.537018,2.552451,-0.408045,0.968618,0.105236,-0.60223,-0.54728,-1.016114,...,-0.713276,-0.072173,-0.521942,-0.440061,-0.761668,-0.671529,0.420313,-0.747132,-0.473631,0.190848
5,1,-0.505505,-0.198954,1.537033,0.424048,0.498154,-0.226952,1.381082,2.742959,1.636454,...,0.502311,-0.35472,2.343842,2.875904,0.048449,-0.203142,1.374974,-0.479493,-0.277505,0.005452
6,0,1.369833,-1.005746,-0.012056,1.569556,-1.556469,0.517962,1.057694,-0.222956,-1.054464,...,-0.984734,2.663853,-0.642564,-0.440158,-1.353112,-0.86558,-0.42128,-0.893267,-1.897663,0.409136
7,1,-0.472604,-0.347422,-0.345552,-0.716073,-0.487857,0.475672,0.233358,0.875961,1.492575,...,0.120843,1.433947,-0.296291,-0.322825,0.134571,-0.126109,-1.138525,0.611405,0.088414,1.500628
8,0,-0.645333,-0.261149,-0.031722,1.441743,-0.19903,-0.21085,1.122462,-0.518627,-0.586496,...,-0.955883,-0.049008,-0.516365,-0.440144,-1.009854,-0.736234,-0.421015,-0.781816,-0.526397,-0.681959
9,1,-0.49728,-0.118487,-0.737907,-0.68568,-1.030527,-0.121419,0.975644,-0.998538,0.120242,...,0.360208,-0.37263,-0.619107,-0.439922,0.108513,-0.137449,-1.159031,2.50043,0.002039,1.265671


In [928]:
filtered_gyro_dataset_df

Unnamed: 0,Class,Amplitude_AP,AmplitudeNorm_VT,Range_AP,MomentPSD_2nd_ML,SkewnessPSD_VT,KurtosisPSD_VT,RangeNorm_VT,RMS_SigComplete_VT,FirstDerivativeRatio_SigComplete_AP,...,KurtosisPSD_Plom_ML,Skewness_SigComplete_AP,InterQuartileRange_SigComplete_AP,SumSq75_SigComplete_AP,Moment2ndPSD_Plom_AP,Moment3rdPSD_Plom_AP,Std_SigComplete_Magnitude,Percentile75_SigComplete_Magnitude,RelativePower_Below5Hz_Magnitude,SkewnessPSD_Plom_Magnitude
0,0,-1.075786,-1.19134,-0.045507,-0.632042,2.263877,2.425168,2.008101,1.088584,2.062836,...,-0.640767,2.334034,-0.637355,-0.537061,-0.898808,-0.602426,0.073883,-0.042897,0.964929,0.437056
1,1,-1.096867,-2.039258,-0.07022,-0.491967,1.238066,1.164746,-0.258356,0.017812,0.082861,...,0.531868,0.768691,-1.139008,1.548022,-1.296793,-0.718931,-0.961825,-0.68316,-0.132561,1.170404
2,0,0.257223,1.23766,0.233395,-0.335797,-0.776686,-0.747346,-0.535825,0.036476,0.768735,...,0.562778,-0.103857,1.091313,0.307977,0.847334,0.475208,-0.878979,-0.503766,-0.592845,-0.775134
3,1,1.131496,-0.777456,-0.166474,0.04475,1.104948,1.013641,0.342788,0.228093,-0.116291,...,-0.219131,-0.023684,-0.151667,1.555392,-0.586626,-0.505154,-1.665824,-0.565357,-1.111037,1.691026
4,0,2.291816,0.72303,0.081826,3.015914,-0.991978,-0.864298,-0.651313,0.446034,0.13085,...,-0.520611,0.422497,1.412802,0.083565,2.406502,3.100997,0.749743,0.527352,0.941986,-1.354867
5,1,0.277229,0.437516,0.802191,0.88149,-0.775867,-0.732179,1.938847,0.932399,-0.199851,...,-0.39522,0.003756,0.649367,0.498663,-0.018349,-0.3394,1.358672,1.657079,-0.79366,0.206712
6,0,-0.709022,0.609789,-0.361324,-0.575683,-0.923733,-0.833068,-1.197932,1.06898,-2.540933,...,-0.896015,-1.780564,0.328315,-1.593553,-0.235443,-0.334726,0.468387,0.782152,1.998529,-1.237536
7,1,-0.370603,0.721073,-1.196484,-0.565697,-0.117691,-0.24671,-0.683908,0.033737,0.037675,...,-0.709114,0.426987,-1.463913,-1.520985,-0.469892,-0.457302,0.027765,-0.604682,-0.502118,-0.106347
8,0,-0.444658,0.567927,2.7487,-0.533956,-0.915541,-0.836581,-0.708794,-0.6406,0.203596,...,-0.663508,-0.266817,-0.291651,-0.280994,0.715047,0.352152,1.048956,0.850269,0.978372,-0.668016
9,1,-0.136641,-1.201571,-0.431377,-0.211753,-0.372778,-0.524303,0.450054,-2.812863,-0.265476,...,2.610871,0.185674,-1.452584,-1.056284,-1.19453,-0.615861,-1.305696,-2.272928,-1.648869,-0.934038


### Correlation Selection

In [929]:
acc_corr = filtered_acc_dataset_df.corr()
gyro_corr = filtered_gyro_dataset_df.corr()

In [930]:
acc_corr

Unnamed: 0,Class,DominantFreq_AP,CorssCorrelationPeak_VTAP,AmplitudeNorm_VT,FirstDerivativeMean_SigComplete_VT,FirstDerivative_Moment3rd_SigComplete_VT,FirstDerivativeRatio_SigComplete_VT,FirstDerivativeRatio_SigComplete_ML,RMS_SigComplete_AP,RMSratio_SigComplete_AP,...,RelativePower_BelowDomFr_AP,MeanFreq_Plom_AP,SumPSD_Plom_AP,Moment3rdPSD_Plom_AP,SkewnessPSD_Plom_AP,KurtosisPSD_Plom_AP,SumSq25_SigComplete_Magnitude,RelativePower_BelowDomFr_Magnitude,CentroidSpMax_Magnitude,ZeroCrossingRate_Magnitude
Class,1.0,-0.443131,0.576362,-0.21702,-0.216287,-0.341579,-0.283991,0.249709,0.468909,0.509265,...,0.496417,-0.233972,0.491303,0.437737,0.530139,0.418353,0.340021,0.757199,0.587947,0.623688
DominantFreq_AP,-0.443131,1.0,-0.331762,0.102518,-0.102173,0.061241,0.125093,-0.235668,-0.299535,-0.392507,...,-0.50448,0.314709,-0.325808,-0.255529,-0.552861,-0.410713,-0.296335,-0.390194,-0.324431,-0.164761
CorssCorrelationPeak_VTAP,0.576362,-0.331762,1.0,-0.328518,0.000435,-0.296184,-0.447444,-0.231028,0.115381,0.231407,...,0.624466,-0.567279,0.525933,0.335998,0.77713,0.830686,0.603062,0.325459,0.924808,0.127491
AmplitudeNorm_VT,-0.21702,0.102518,-0.328518,1.0,0.007447,0.357926,0.08713,0.108959,0.293945,-0.101737,...,-0.315565,0.093415,0.160766,0.303864,-0.449428,-0.412754,0.254372,-0.405811,-0.34154,0.06746
FirstDerivativeMean_SigComplete_VT,-0.216287,-0.102173,0.000435,0.007447,1.0,-0.488515,0.061501,0.294909,0.19074,0.188822,...,0.106775,0.25291,0.21067,0.196814,-0.142001,0.000286,0.091829,-0.348786,-0.170013,0.074728
FirstDerivative_Moment3rd_SigComplete_VT,-0.341579,0.061241,-0.296184,0.357926,-0.488515,1.0,0.432003,-0.510292,0.093857,-0.110979,...,-0.037148,-0.327271,0.063067,0.081371,-0.001234,-0.054715,0.223144,-0.320083,-0.149033,-0.477687
FirstDerivativeRatio_SigComplete_VT,-0.283991,0.125093,-0.447444,0.08713,0.061501,0.432003,1.0,-0.366477,0.17811,0.174048,...,0.219094,0.32581,0.047967,0.018638,-0.077265,0.005167,0.016389,-0.069773,-0.421182,0.192039
FirstDerivativeRatio_SigComplete_ML,0.249709,-0.235668,-0.231028,0.108959,0.294909,-0.510292,-0.366477,1.0,0.210194,-0.009845,...,-0.366357,0.385973,-0.039968,0.145282,-0.390433,-0.465804,-0.203005,0.166538,-0.383291,0.236411
RMS_SigComplete_AP,0.468909,-0.299535,0.115381,0.293945,0.19074,0.093857,0.17811,0.210194,1.0,0.727071,...,0.443689,0.039104,0.83189,0.878555,0.272333,0.231369,0.547912,-0.059965,0.086529,0.264423
RMSratio_SigComplete_AP,0.509265,-0.392507,0.231407,-0.101737,0.188822,-0.110979,0.174048,-0.009845,0.727071,1.0,...,0.701346,-0.157835,0.668217,0.635319,0.496631,0.421644,0.174928,0.210203,0.366405,0.357882


In [931]:
gyro_corr

Unnamed: 0,Class,Amplitude_AP,AmplitudeNorm_VT,Range_AP,MomentPSD_2nd_ML,SkewnessPSD_VT,KurtosisPSD_VT,RangeNorm_VT,RMS_SigComplete_VT,FirstDerivativeRatio_SigComplete_AP,...,KurtosisPSD_Plom_ML,Skewness_SigComplete_AP,InterQuartileRange_SigComplete_AP,SumSq75_SigComplete_AP,Moment2ndPSD_Plom_AP,Moment3rdPSD_Plom_AP,Std_SigComplete_Magnitude,Percentile75_SigComplete_Magnitude,RelativePower_Below5Hz_Magnitude,SkewnessPSD_Plom_Magnitude
Class,1.0,0.127856,-0.48366,-0.212383,-0.052484,0.228564,0.172771,0.319317,-0.345227,-0.173276,...,0.493664,-0.009557,-0.346423,0.259842,-0.484134,-0.452105,-0.478409,-0.438862,-0.723573,0.350694
Amplitude_AP,0.127856,1.0,0.239094,0.10355,0.827426,-0.313462,-0.287156,-0.016808,-0.022891,-0.106123,...,0.125823,-0.167575,0.611121,0.252062,0.683365,0.713675,-0.126078,0.002477,-0.1672,-0.274716
AmplitudeNorm_VT,-0.48366,0.239094,1.0,0.059223,0.205644,-0.738127,-0.720112,-0.448565,0.253817,-0.187069,...,-0.449271,-0.480582,0.557108,-0.300641,0.69584,0.450135,0.587167,0.550206,0.252734,-0.394613
Range_AP,-0.212383,0.10355,0.059223,1.0,0.108011,-0.263715,-0.208129,0.123041,-0.022458,0.087166,...,-0.092372,0.019539,0.16064,0.121979,0.291701,0.213119,0.221051,0.315138,0.224523,-0.287579
MomentPSD_2nd_ML,-0.052484,0.827426,0.205644,0.108011,1.0,-0.351152,-0.313494,0.056568,0.142779,-0.022944,...,-0.060169,0.053847,0.538193,0.176234,0.698696,0.847563,0.220799,0.236385,0.055876,-0.335214
SkewnessPSD_VT,0.228564,-0.313462,-0.738127,-0.263715,-0.351152,1.0,0.994134,0.490928,0.170609,0.512144,...,0.021314,0.60288,-0.388738,0.346437,-0.588596,-0.478212,-0.418564,-0.303655,-0.098341,0.682481
KurtosisPSD_VT,0.172771,-0.287156,-0.720112,-0.208129,-0.313494,0.994134,1.0,0.514963,0.227109,0.519373,...,-0.014347,0.610952,-0.328494,0.33427,-0.534836,-0.429537,-0.381392,-0.252571,-0.024164,0.633321
RangeNorm_VT,0.319317,-0.016808,-0.448565,0.123041,0.056568,0.490928,0.514963,1.0,0.126882,0.461124,...,0.148959,0.554759,-0.090076,0.179997,-0.345296,-0.310078,-0.05655,0.001605,-0.315924,0.26925
RMS_SigComplete_VT,-0.345227,-0.022891,0.253817,-0.022458,0.142779,0.170609,0.227109,0.126882,1.0,0.027001,...,-0.817869,0.093758,0.368133,0.12411,0.190012,0.120786,0.417368,0.699091,0.554967,0.186893
FirstDerivativeRatio_SigComplete_AP,-0.173276,-0.106123,-0.187069,0.087166,-0.022944,0.512144,0.519373,0.461124,0.027001,1.0,...,-0.03178,0.823861,-0.153657,0.257049,-0.019476,0.04936,-0.036708,-0.120382,-0.20131,0.317932


In [932]:
acc_feature_corr_matrix = acc_corr.drop(columns=['Class'])
acc_target_corr = acc_feature_corr_matrix.iloc[[0]].copy()
acc_feature_corr_matrix = acc_feature_corr_matrix.drop(acc_feature_corr_matrix.index[0])
gyro_feature_corr_matrix = gyro_corr.drop(columns=['Class'])
gyro_target_corr = gyro_feature_corr_matrix.iloc[[0]].copy()
gyro_feature_corr_matrix = gyro_feature_corr_matrix.drop(gyro_feature_corr_matrix.index[0])

In [933]:
acc_corr_redundant_features = get_corr_redundant_features(acc_feature_corr_matrix, acc_target_corr)
gyro_corr_redundant_features = get_corr_redundant_features(gyro_feature_corr_matrix, gyro_target_corr)

In [934]:
print(len(acc_corr_redundant_features))
acc_corr_redundant_features

23


['FirstDerivativeRatio_SigComplete_ML',
 'RMS_SigComplete_AP',
 'SkewnessPSD_Plom_VT',
 'SumPSD_Plom_ML',
 'EnergySignal_ML',
 'Skewness_SigComplete_AP',
 'RMSratio_SigComplete_AP',
 'FirstDerivativeLogRatio_SigComplete_Magnitude',
 'AmplitudeNorm_VT',
 'Percentile25_SigComplete_AP',
 'Percentile75_SigComplete_AP',
 'SumSq75_SigComplete_AP',
 'Mean_SigComplete_VT',
 'InterQuartileRange_SigComplete_AP',
 'DominantFreq_AP',
 'RelativePower_BelowDomFr_AP',
 'MeanFreq_Plom_AP',
 'SumPSD_Plom_AP',
 'SkewnessPSD_Plom_AP',
 'KurtosisPSD_Plom_AP',
 'SumSq25_SigComplete_Magnitude',
 'CorssCorrelationPeak_VTAP',
 'ZeroCrossingRate_Magnitude']

In [935]:
print(len(gyro_corr_redundant_features))
gyro_corr_redundant_features

15


['MomentPSD_2nd_ML',
 'SkewnessPSD_VT',
 'KurtosisPSD_VT',
 'Max_SigComplete_VT',
 'Amplitude_AP',
 'Range_AP',
 'RMS_SigComplete_VT',
 'Skewness_SigComplete_AP',
 'InterQuartileRange_SigComplete_AP',
 'AmplitudeNorm_VT',
 'Moment3rdPSD_Plom_AP',
 'Std_SigComplete_Magnitude',
 'Percentile75_SigComplete_Magnitude',
 'KurtosisPSD_Plom_ML',
 'SumSq75_SigComplete_AP']

In [936]:
filtered_acc_dataset_df = filtered_acc_dataset_df.drop(columns=acc_corr_redundant_features)
filtered_gyro_dataset_df = filtered_gyro_dataset_df.drop(columns=gyro_corr_redundant_features)

In [937]:
filtered_acc_dataset_df

Unnamed: 0,Class,FirstDerivativeMean_SigComplete_VT,FirstDerivative_Moment3rd_SigComplete_VT,FirstDerivativeRatio_SigComplete_VT,SumSq75_SigComplete_ML,RelativePower_Below5Hz_AP,Moment3rdPSD_Plom_AP,RelativePower_BelowDomFr_Magnitude,CentroidSpMax_Magnitude
0,0,1.309403,0.124576,0.683503,-0.973519,-0.505171,-0.428178,-0.676916,-0.092303
1,1,0.569482,-0.34833,0.4957,1.003752,-0.370701,1.380425,0.52294,2.227281
2,0,-1.016543,1.046247,0.496532,-0.214692,-0.611616,-0.440185,-0.862825,-0.108542
3,1,-0.214571,-1.526651,-3.126181,0.902271,-0.00143,-0.439909,0.246958,1.621542
4,0,-0.408045,0.968618,0.105236,-0.566201,0.919677,-0.440061,-0.747132,-0.473631
5,1,0.424048,0.498154,-0.226952,1.075397,-1.297334,2.875904,-0.479493,-0.277505
6,0,1.569556,-1.556469,0.517962,-0.407565,1.397099,-0.440158,-0.893267,-1.897663
7,1,-0.716073,-0.487857,0.475672,-1.393816,-1.279988,-0.322825,0.611405,0.088414
8,0,1.441743,-0.19903,-0.21085,-0.94271,1.787401,-0.440144,-0.781816,-0.526397
9,1,-0.68568,-1.030527,-0.121419,-1.096803,-0.573928,-0.439922,2.50043,0.002039


In [938]:
filtered_gyro_dataset_df

Unnamed: 0,Class,RangeNorm_VT,FirstDerivativeRatio_SigComplete_AP,RelativePower_BelowDomFr_VT,ZeroCrossingRate_VT,Skewness_SigComplete_ML,Moment2ndPSD_Plom_AP,RelativePower_Below5Hz_Magnitude,SkewnessPSD_Plom_Magnitude
0,0,2.008101,2.062836,-1.172448,-1.892407,1.017572,-0.898808,0.964929,0.437056
1,1,-0.258356,0.082861,-0.49128,-0.351395,-0.600466,-1.296793,-0.132561,1.170404
2,0,-0.535825,0.768735,-0.59279,0.02833,-0.40217,0.847334,-0.592845,-0.775134
3,1,0.342788,-0.116291,0.381015,0.805185,-0.879928,-0.586626,-1.111037,1.691026
4,0,-0.651313,0.13085,0.192401,-0.349175,1.874338,2.406502,0.941986,-1.354867
5,1,1.938847,-0.199851,0.796013,1.12391,0.486762,-0.018349,-0.79366,0.206712
6,0,-1.197932,-2.540933,-0.602623,0.631136,-0.653874,-0.235443,1.998529,-1.237536
7,1,-0.683908,0.037675,0.816377,0.736669,-0.574513,-0.469892,-0.502118,-0.106347
8,0,-0.708794,0.203596,-1.029947,-1.698015,-0.315648,0.715047,0.978372,-0.668016
9,1,0.450054,-0.265476,0.763694,-0.846115,0.906763,-1.19453,-1.648869,-0.934038


### Saving to CSV

In [939]:
filtered_acc_dataset_df.to_csv(final_dataset_path + 'PatchTable_Acceleration_Filtered.csv', index=False)
filtered_gyro_dataset_df.to_csv(final_dataset_path + 'PatchTable_Gyroscope_Filtered.csv', index=False)