In [117]:
from python_scripts.utils.feature_selection_funcs import *
from sklearn.linear_model import LogisticRegression

# Feature Selection
In this notebook, features from the normalised datasets are selected using random forest feature importance, correlation and backwards elimination.

In [118]:
final_separate_dataset_path = '../../dataset/final/windows/separate/'
final_combined_dataset_path = '../../dataset/final/windows/combined/'
acc_dataset_path = final_separate_dataset_path + 'PatchTable_Acceleration_Normalised.csv'
gyro_dataset_path = final_separate_dataset_path + 'PatchTable_Gyroscope_Normalised.csv'
acc_gyro_dataset_path = final_combined_dataset_path + 'PatchTable_Acceleration_Gyroscope_Normalised.csv'

In [119]:
acc_dataset_df = pd.read_csv(acc_dataset_path)
gyro_dataset_df = pd.read_csv(gyro_dataset_path)
acc_gyro_dataset_df = pd.read_csv(acc_gyro_dataset_path)
acc_dataset_df = acc_dataset_df.drop('ClassificationType', axis=1)
gyro_dataset_df = gyro_dataset_df.drop('ClassificationType', axis=1)
acc_gyro_dataset_df = acc_gyro_dataset_df.drop('ClassificationType', axis=1)

## Random Forest Feature Importance Selection

In [120]:
# Gets the features and target values from each dataset
acc_features = acc_dataset_df.drop(columns=['Class', 'Subject'])
gyro_features = gyro_dataset_df.drop(columns=['Class', 'Subject'])
acc_gyro_features = acc_gyro_dataset_df.drop(columns=['Class', 'Subject'])
acc_target = acc_dataset_df['Class']
gyro_target = gyro_dataset_df['Class']
acc_gyro_target = acc_gyro_dataset_df['Class']

### Feature Importance Metric Calculation

In [121]:
# feature importance is extracted via a random forest classifier
# each run may produce varying results
acc_feature_importance = get_feature_importance(acc_features, acc_target)
gyro_feature_importance = get_feature_importance(gyro_features, gyro_target)
acc_gyro_feature_importance = get_feature_importance(acc_gyro_features, acc_gyro_target)

In [122]:
acc_feature_importance

array([0.00391695, 0.00285918, 0.00179432, 0.00285965, 0.00475007,
       0.00381845, 0.00494763, 0.00832141, 0.00441953, 0.00517078,
       0.00477392, 0.00402771, 0.00263855, 0.00393623, 0.00330862,
       0.00217219, 0.00180245, 0.00140083, 0.00156683, 0.00324089,
       0.00305825, 0.00313343, 0.00172727, 0.0021054 , 0.00112505,
       0.00166507, 0.00290441, 0.00148849, 0.00291629, 0.00560606,
       0.00342595, 0.00095426, 0.00179863, 0.00111615, 0.001515  ,
       0.00186328, 0.00075783, 0.0019218 , 0.00268037, 0.00205602,
       0.00139075, 0.00202354, 0.00165963, 0.00087842, 0.00145041,
       0.00171135, 0.00144505, 0.00195764, 0.00204949, 0.00110364,
       0.00159555, 0.00201004, 0.00179438, 0.00187298, 0.00207777,
       0.00298634, 0.00501483, 0.00431124, 0.00090524, 0.00191829,
       0.0014511 , 0.00174404, 0.00124514, 0.00137514, 0.00149712,
       0.00202374, 0.00214633, 0.00217759, 0.00183414, 0.00140308,
       0.00719288, 0.00467124, 0.00385826, 0.01216623, 0.00193

In [123]:
gyro_feature_importance

array([0.00177796, 0.00265934, 0.00147629, 0.00157254, 0.00490039,
       0.00959267, 0.00629735, 0.00257097, 0.00338592, 0.00557755,
       0.00418732, 0.0046374 , 0.00310275, 0.00217164, 0.00252749,
       0.00195151, 0.00255325, 0.00140054, 0.00186858, 0.00288178,
       0.00298025, 0.00226462, 0.00232195, 0.00225694, 0.00198407,
       0.00234827, 0.00201174, 0.0019692 , 0.00329222, 0.00427225,
       0.00296073, 0.00187116, 0.00171844, 0.00154933, 0.00133208,
       0.00110209, 0.00107614, 0.00187615, 0.00256379, 0.0024097 ,
       0.00213702, 0.00263605, 0.00261175, 0.00148866, 0.00140995,
       0.00226771, 0.00172152, 0.00193046, 0.00245353, 0.00174445,
       0.00224309, 0.00208412, 0.00196661, 0.00195017, 0.00216183,
       0.00287961, 0.00539437, 0.00382816, 0.00153109, 0.00214837,
       0.00118059, 0.00116734, 0.00129788, 0.00176309, 0.00238497,
       0.00204752, 0.00282304, 0.00222043, 0.00190572, 0.00171856,
       0.00441672, 0.00370423, 0.00370277, 0.00494692, 0.00231

In [124]:
acc_gyro_feature_importance

array([7.35435388e-04, 1.30499690e-03, 1.25877358e-03, 1.18401412e-03,
       2.40665784e-03, 3.60689646e-03, 1.91455966e-03, 3.46163684e-03,
       1.68420455e-03, 3.87760906e-03, 2.36533654e-03, 2.41071668e-03,
       1.78687511e-03, 2.70250716e-03, 1.08778999e-03, 1.48059146e-03,
       9.62199025e-04, 7.56985279e-04, 5.18608974e-04, 8.97420359e-04,
       1.59698956e-03, 6.91921579e-04, 8.00043449e-04, 7.82241391e-04,
       6.61862563e-04, 9.37192490e-04, 1.13382377e-03, 1.50454595e-03,
       2.28345080e-03, 3.17083201e-03, 1.46709519e-03, 4.29469076e-04,
       3.65518499e-04, 7.60028470e-04, 7.83050223e-04, 8.27177548e-04,
       5.44812310e-04, 7.86549568e-04, 7.93997029e-04, 1.03279912e-03,
       6.29239513e-04, 6.05123745e-04, 1.29153730e-03, 4.58660207e-04,
       9.39797124e-04, 5.92241073e-04, 5.44584353e-04, 6.70678445e-04,
       6.11135138e-04, 1.08852538e-03, 4.30853723e-04, 7.72645325e-04,
       5.05358326e-04, 6.54486931e-04, 5.89201764e-04, 2.90704657e-03,
      

In [125]:
# Getting feature importance for all three datasets
# threshold is set very low to reduce only the least significant features
acc_best_features = get_fi_best_features(acc_features, acc_feature_importance, fi_threshold=0.0005)
gyro_best_features = get_fi_best_features(gyro_features, gyro_feature_importance, fi_threshold=0.0005)
acc_gyro_best_features = get_fi_best_features(acc_gyro_features, acc_gyro_feature_importance, fi_threshold=0.0005)

In [126]:
print("Number of best features for acceleration data: " + str(len(acc_best_features)))
print("Number of best features for gyroscope data: " + str(len(gyro_best_features)))
print("Number of best features for acceleration and gyroscope data: " + str(len(acc_gyro_best_features)))

Number of best features for acceleration data: 241
Number of best features for gyroscope data: 241
Number of best features for acceleration and gyroscope data: 456


### Filtering the Dataset based on Feature Importance

In [127]:
# Keeping only the best features based on feature importance
filtered_acc_dataset_df = pd.concat([acc_dataset_df.iloc[:, :2], acc_dataset_df[acc_best_features].copy()], axis=1)
filtered_gyro_dataset_df = pd.concat([gyro_dataset_df.iloc[:, :2],gyro_dataset_df[gyro_best_features].copy()], axis=1)
filtered_acc_gyro_dataset_df = pd.concat([acc_gyro_dataset_df.iloc[:, :2],acc_gyro_dataset_df[acc_gyro_best_features].copy()], axis=1)

In [128]:
filtered_acc_dataset_df

Unnamed: 0,Subject,Class,DominantFreq_VT,DominantFreq_ML,DominantFreq_AP,DominantFreq_Magnitude,CorssCorrelationSum_VTML,CorssCorrelationSum_VTAP,CorssCorrelationSum_MLAP,CorssCorrelationPeak_VTML,...,MeanPSD_Plomb_Magnitude,MedianPSD_Plom_Magnitude,SumPSD_Plom_Magnitude,Moment2ndPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,KurtosisPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude,ModeSignal_Magnitude,EnergySignal_Magnitude
0,1,0,0.617654,0.747748,0.597720,0.508189,-0.277193,-0.163081,-0.438910,-0.414596,...,-0.651852,-0.165272,-0.651852,-0.285245,-0.146060,-0.086591,-0.206281,-1.771846,0.959438,-0.652570
1,1,0,-1.392837,1.610635,1.380251,0.597923,-0.279359,-0.163483,-0.442177,-0.421079,...,-0.659200,-0.224007,-0.659200,-0.285264,-0.146061,0.952947,0.737416,-2.360037,0.966257,-0.659603
2,1,0,-1.567663,1.487365,1.268460,0.508189,-0.279558,-0.163548,-0.442086,-0.421402,...,-0.659522,-0.224137,-0.659522,-0.285264,-0.146061,1.178809,0.885754,-2.654132,0.966257,-0.659652
3,1,0,1.579194,1.610635,1.268460,0.508189,-0.279463,-0.163525,-0.441743,-0.421428,...,-0.659487,-0.224224,-0.659487,-0.285264,-0.146061,1.203253,0.904705,-2.801180,0.966257,-0.659461
4,1,0,0.442829,1.610635,0.821300,-1.465954,-0.279386,-0.163469,-0.442073,-0.421943,...,-0.657555,-0.196872,-0.657555,-0.285263,-0.146061,0.069855,0.011215,-3.095275,1.049071,-0.658125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1875,6,1,-0.169060,-1.347834,-0.073020,-0.389149,-0.257069,-0.152225,-0.385749,-0.382739,...,-0.537158,-0.209324,-0.537158,-0.282367,-0.146017,-0.295417,-0.322345,0.580916,0.676851,-0.538074
1876,6,1,-0.169060,0.377939,0.038770,-0.478883,-0.249729,-0.150379,-0.263867,-0.376539,...,-0.566952,-0.219464,-0.566952,-0.283311,-0.146033,0.136042,-0.009935,-0.007274,0.676851,-0.567786
1877,6,1,-1.392837,-0.854756,-0.967341,1.315793,-0.249123,-0.148807,-0.203855,-0.348513,...,-0.512414,-0.166815,-0.512414,-0.280506,-0.145960,-0.002798,-0.100433,0.139773,0.676851,-0.513252
1878,6,1,-0.693536,0.377939,-1.079131,1.315793,-0.217467,-0.132307,-0.129628,-0.391167,...,-0.384041,0.621261,-0.384041,-0.273183,-0.145660,-0.065654,-0.006376,-0.007274,0.224671,-0.380832


In [129]:
filtered_gyro_dataset_df

Unnamed: 0,Subject,Class,DominantFreq_VT,DominantFreq_ML,DominantFreq_AP,DominantFreq_Magnitude,CorssCorrelationSum_VTML,CorssCorrelationSum_VTAP,CorssCorrelationSum_MLAP,CorssCorrelationPeak_VTML,...,MeanPSD_Plomb_Magnitude,MedianPSD_Plom_Magnitude,SumPSD_Plom_Magnitude,Moment2ndPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,KurtosisPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude,ModeSignal_Magnitude,EnergySignal_Magnitude
0,1,0,-1.434841,-0.089043,-1.377091,-0.918973,-2.081035,-1.393505,-1.720757,-2.025414,...,-2.247603,-0.999462,-2.247603,-0.697426,-0.301041,2.052965,2.272801,-2.533602,2.844971,-2.235480
1,1,0,-1.206018,-0.195019,-0.083083,0.941694,-2.090410,-1.416237,-1.722264,-2.039656,...,-2.245875,-1.000172,-2.245875,-0.697322,-0.301040,2.517236,2.721892,-2.404725,3.116532,-2.272691
2,1,0,-0.862783,-1.254781,-0.471285,1.739122,0.375798,-0.292943,0.668777,-0.129542,...,1.964734,2.097151,1.964734,0.871367,0.250196,0.436805,0.479960,-0.084937,-0.477391,1.543270
3,1,0,-0.748371,-0.724900,1.081524,-0.121544,-0.294936,-0.729608,-0.508218,0.221763,...,1.879856,-0.170724,1.879856,1.224188,0.567989,1.082580,1.077727,-0.213815,-0.477391,1.424152
4,1,0,-0.748371,-0.724900,1.081524,-0.520258,-0.036225,-0.749321,-0.437364,0.650838,...,1.218397,0.172481,1.218397,0.230621,-0.081637,-0.059726,-0.169331,0.172817,-0.314613,0.888175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2054,6,1,-0.977195,-1.148805,-0.471285,-0.254449,-1.111842,-0.708000,-0.203888,-1.286660,...,-0.118216,0.194818,-0.118216,-0.012726,-0.120209,0.982122,0.997675,-1.244831,0.883994,-0.289412
2055,6,1,-0.633960,-1.148805,-1.506492,-0.254449,-1.511486,-0.864506,-0.758434,-1.171306,...,-0.712570,-1.001458,-0.712570,-0.363895,-0.241271,0.855779,0.838980,-0.987077,0.883994,-0.735938
2056,6,1,-0.748371,-1.148805,1.469727,-0.121544,-1.171761,-0.574156,-0.166071,-1.024269,...,0.338616,0.425363,0.338616,-0.004442,-0.175226,-0.445744,-0.629712,-1.115954,1.115099,0.036538
2057,6,1,-0.405136,-1.254781,-1.506492,-0.254449,-0.912437,0.335347,0.745596,-1.053510,...,0.905783,0.369655,0.905783,0.520854,0.044095,0.099458,-0.100948,-0.858200,1.115099,0.687802


In [130]:
filtered_acc_gyro_dataset_df

Unnamed: 0,Subject,Class,Acc_DominantFreq_VT,Acc_DominantFreq_ML,Acc_DominantFreq_AP,Acc_DominantFreq_Magnitude,Acc_CorssCorrelationSum_VTML,Acc_CorssCorrelationSum_VTAP,Acc_CorssCorrelationSum_MLAP,Acc_CorssCorrelationPeak_VTML,...,Gyro_MeanPSD_Plomb_Magnitude,Gyro_MedianPSD_Plom_Magnitude,Gyro_SumPSD_Plom_Magnitude,Gyro_Moment2ndPSD_Plom_Magnitude,Gyro_Moment3rdPSD_Plom_Magnitude,Gyro_SkewnessPSD_Plom_Magnitude,Gyro_KurtosisPSD_Plom_Magnitude,Gyro_ZeroCrossingRate_Magnitude,Gyro_ModeSignal_Magnitude,Gyro_EnergySignal_Magnitude
0,1,0,-1.457520,1.572590,1.464102,0.633695,-0.269067,-0.166063,-0.430023,-0.415320,...,-2.401226,-1.015958,-2.401226,-0.762618,-0.351773,2.174065,2.413559,-2.663978,3.116538,-2.389933
1,1,0,-1.639370,1.448417,1.344344,0.540430,-0.269268,-0.166134,-0.429934,-0.415634,...,-2.399376,-1.016684,-2.399376,-0.762494,-0.351772,2.651990,2.878447,-2.531658,3.408543,-2.430147
2,1,0,-1.275670,0.455037,-0.691535,1.100017,-0.216950,-0.134558,-0.249561,-0.331519,...,2.108672,2.150362,2.108672,1.096288,0.427015,0.510375,0.557658,-0.149906,-0.455947,1.693790
3,1,0,1.179304,0.579210,-0.811292,1.100017,-0.214356,-0.129496,-0.227771,-0.302440,...,2.017798,-0.168564,2.017798,1.514355,0.875993,1.175142,1.176451,-0.282226,-0.455947,1.565059
4,1,0,0.360980,0.455037,0.985071,1.100017,-0.206889,-0.131921,-0.244277,-0.260639,...,1.063202,-0.112425,1.063202,0.241043,-0.047234,0.451578,0.475116,0.644011,-0.280915,0.743978
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1733,6,1,-0.730120,-0.165825,-0.811292,-1.231597,-0.220704,-0.152122,-0.380235,-0.270290,...,0.486848,-0.987789,0.486848,0.415986,0.088094,0.994679,1.006690,-1.473102,1.007930,0.249931
1734,6,1,-0.184570,-1.407550,-0.092747,-0.392216,-0.246470,-0.153823,-0.374753,-0.377986,...,-0.121418,0.205206,-0.121418,0.048702,-0.096294,1.071730,1.093584,-1.340782,1.007930,-0.286802
1735,6,1,-0.184570,0.330865,0.027011,-0.485480,-0.239028,-0.151816,-0.255372,-0.371949,...,0.367685,0.440941,0.367685,0.058518,-0.174021,-0.398129,-0.591044,-1.208463,1.256433,0.065454
1736,6,1,-1.457520,-0.910860,-1.050807,1.379811,-0.238414,-0.150106,-0.196592,-0.344659,...,0.974917,0.383979,0.974917,0.680956,0.135835,0.163107,-0.043682,-0.943824,1.256433,0.769280


## Correlation Coefficient Selection

### Finding Redundant Features 

In [131]:
# Getting features which are redundant - correlates too much to another more important feature
acc_corr_redundant_features = get_corr_redundant_features(filtered_acc_dataset_df.drop(columns=['Subject']), 'Class')
gyro_corr_redundant_features = get_corr_redundant_features(filtered_gyro_dataset_df.drop(columns=['Subject']), 'Class')
acc_gyro_corr_redundant_features = get_corr_redundant_features(filtered_acc_gyro_dataset_df.drop(columns=['Subject']), 'Class')

In [132]:
print("Number of redundant features for acceleration data: " + str(len(acc_corr_redundant_features)))

Number of redundant features for acceleration data: 173


In [133]:
print("Number of redundant features for gyroscope data: " + str(len(gyro_corr_redundant_features)))

Number of redundant features for gyroscope data: 173


In [134]:
print("Number of redundant features for acceleration and gyroscope data: " + str(len(acc_gyro_corr_redundant_features)))

Number of redundant features for acceleration and gyroscope data: 331


### Removing Redundant Features from Dataset based on Correlation

In [135]:
# filtering out redundant feature based on correlation
filtered_acc_dataset_df = filtered_acc_dataset_df.drop(columns=acc_corr_redundant_features)
filtered_gyro_dataset_df = filtered_gyro_dataset_df.drop(columns=gyro_corr_redundant_features)
filtered_acc_gyro_dataset_df = filtered_acc_gyro_dataset_df.drop(columns=acc_gyro_corr_redundant_features)

## Using Backwards Elimination - Logistic Regression

In [136]:
model = LogisticRegression()
# Getting groupings, features and targets
acc_id_groupings = acc_dataset_df['Subject']
gyro_id_groupings = gyro_dataset_df['Subject']
acc_gyro_id_groupings = acc_gyro_dataset_df['Subject']
acc_features = filtered_acc_dataset_df.iloc[:, 2:]
gyro_features = filtered_gyro_dataset_df.iloc[:, 2:]
acc_gyro_features = filtered_acc_gyro_dataset_df.iloc[:, 2:]

In [137]:
# Finding best features for each dataset by testing it on a logistic regression model using Recursive feature elimination with cross-validation and group k folding
acc_selected_features = get_rfecv_selected_features(model=model, features=acc_features, target=acc_target, id_groupings=acc_id_groupings, groupk_folds=6)
gyro_selected_features = get_rfecv_selected_features(model=model, features=gyro_features, target=gyro_target, id_groupings=gyro_id_groupings, groupk_folds=6)
acc_gyro_selected_features = get_rfecv_selected_features(model=model, features=acc_gyro_features, target=acc_gyro_target, id_groupings=acc_gyro_id_groupings, groupk_folds=6)

In [138]:
print("Number of features selected for acceleration data: " + str(len(acc_selected_features)))
acc_selected_features

Number of features selected for acceleration data: 32


Index(['CorssCorrelationSum_VTAP', 'CorssCorrelationPeak_VTAP',
       'IndexHarmonicity_ML', 'FirstDerivativeMean_SigComplete_VT',
       'FirstDerivative_Moment3rd_SigComplete_VT',
       'FirstDerivativeLogRatio_SigComplete_ML',
       'FirstDerivativeMean_SigComplete_AP',
       'FirstDerivative_Skweness_SigComplete_AP',
       'FirstDerivativeRatio_SigComplete_AP',
       'FirstDerivativeMax_SigComplete_Magnitude', 'RelativePower_Below5Hz_VT',
       'RelativePower_BelowDomFr_VT', 'Skewness_SigComplete_ML',
       'Kurtosis_SigComplete_ML', 'Percentile75_SigComplete_ML',
       'MaxFreqSpectrum_Below5Hz_ML', 'RelativePower_AboveDomFr_ML',
       'Moment3rdPSD_Plom_ML', 'Max_SigComplete_AP',
       'InterQuartileRange_SigComplete_AP', 'Percentile75_SigComplete_AP',
       'ZeroCrossingRate_AP', 'Mean_SigComplete_Magnitude',
       'Skewness_SigComplete_Magnitude', 'Percentile25_SigComplete_Magnitude',
       'MaxFreqSpectrum_Below5Hz_Magnitude', 'RelativePower_5To10Hz_Magnitude',
 

In [139]:
print("Number of features selected for gyroscope data: " + str(len(gyro_selected_features)))
gyro_selected_features

Number of features selected for gyroscope data: 1


Index(['RelativePower_Below5Hz_Magnitude'], dtype='object')

In [140]:
print("Number of features selected for acceleration and gyroscope data: " + str(len(acc_gyro_selected_features)))
acc_gyro_selected_features

Number of features selected for acceleration and gyroscope data: 10


Index(['Acc_CorssCorrelationSum_VTAP', 'Acc_CorssCorrelationPeak_VTAP',
       'Acc_FirstDerivativeRatio_SigComplete_AP',
       'Acc_FirstDerivativeMax_SigComplete_Magnitude',
       'Acc_InterQuartileRange_SigComplete_AP',
       'Acc_RelativePower_BelowDomFr_Magnitude',
       'Acc_ZeroCrossingRate_Magnitude',
       'Gyro_FirstDerivativeLogRatio_SigComplete_ML',
       'Gyro_FirstDerivativeLogRatio_SigComplete_AP',
       'Gyro_RelativePower_Below5Hz_Magnitude'],
      dtype='object')

In [141]:
# keeping only the selected features based on backwards elimination
filtered_acc_dataset_df = pd.concat([acc_dataset_df.iloc[:, :2], acc_dataset_df[acc_selected_features].copy()], axis=1)
filtered_gyro_dataset_df = pd.concat([gyro_dataset_df.iloc[:, :2], gyro_dataset_df[gyro_selected_features].copy()], axis=1)
filtered_acc_gyro_dataset_df = pd.concat([acc_gyro_dataset_df.iloc[:, :2], acc_gyro_dataset_df[acc_gyro_selected_features].copy()], axis=1)

## Final Filtered Datasets

In [142]:
filtered_acc_dataset_df

Unnamed: 0,Subject,Class,CorssCorrelationSum_VTAP,CorssCorrelationPeak_VTAP,IndexHarmonicity_ML,FirstDerivativeMean_SigComplete_VT,FirstDerivative_Moment3rd_SigComplete_VT,FirstDerivativeLogRatio_SigComplete_ML,FirstDerivativeMean_SigComplete_AP,FirstDerivative_Skweness_SigComplete_AP,...,Mean_SigComplete_Magnitude,Skewness_SigComplete_Magnitude,Percentile25_SigComplete_Magnitude,MaxFreqSpectrum_Below5Hz_Magnitude,RelativePower_5To10Hz_Magnitude,RelativePower_BelowDomFr_Magnitude,MedianPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude
0,1,0,-0.163081,-0.662803,0.528986,0.475757,-0.062359,-0.136782,-0.409614,-1.334350,...,-0.035799,1.275514,1.444325,-0.116140,1.224695,-0.156583,-0.165272,-0.146060,-0.086591,-1.771846
1,1,0,-0.163483,-0.668085,-0.364654,0.090667,-0.110018,-0.233595,-0.054282,-1.066276,...,-0.281341,-0.695417,1.432455,0.408305,1.251751,-0.063859,-0.224007,-0.146061,0.952947,-2.360037
2,1,0,-0.163548,-0.668120,-0.362345,-0.033453,-0.110054,-0.112613,-0.050146,-1.160794,...,-0.404884,-0.490985,1.412446,-2.039106,1.249730,0.167928,-0.224137,-0.146061,1.178809,-2.654132
3,1,0,-0.163525,-0.668035,-0.376434,0.009309,-0.110064,-0.227149,-0.042921,-1.174912,...,-0.463610,-0.346706,1.393891,-2.039106,1.052092,0.285549,-0.224224,-0.146061,1.203253,-2.801180
4,1,0,-0.163469,-0.667327,13.995720,-0.002560,-0.109835,-0.698540,0.162228,-0.055198,...,-0.208414,2.387815,1.407141,-1.951699,-1.596697,-0.016023,-0.196872,-0.146061,0.069855,-3.095275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1875,6,1,-0.152225,-0.607692,-0.416798,-0.867979,-0.047874,0.015603,-0.046169,0.002909,...,0.185887,0.225318,0.462994,-0.465771,0.163099,-0.191222,-0.209324,-0.146017,-0.295417,0.580916
1876,6,1,-0.150379,-0.623584,-0.154285,-0.524313,-0.062139,-0.028567,0.040108,0.172652,...,0.169964,0.472963,0.627831,-0.553178,0.996511,-0.178239,-0.219464,-0.146033,0.136042,-0.007274
1877,6,1,-0.148807,-0.580757,-0.314912,0.828930,-0.047780,-0.325827,-0.340791,0.078664,...,0.280245,0.255260,0.389285,1.194972,0.894499,-0.184609,-0.166815,-0.145960,-0.002798,0.139773
1878,6,1,-0.132307,-0.612842,-0.367696,1.526718,-0.039679,-0.599088,-2.453837,-3.100024,...,1.338794,0.849419,0.387508,1.194972,0.132428,-0.083886,0.621261,-0.145660,-0.065654,-0.007274


In [143]:
filtered_gyro_dataset_df

Unnamed: 0,Subject,Class,RelativePower_Below5Hz_Magnitude
0,1,0,-2.247382
1,1,0,-2.401791
2,1,0,-0.511308
3,1,0,-0.713828
4,1,0,0.242117
...,...,...,...
2054,6,1,0.432625
2055,6,1,0.203724
2056,6,1,1.628756
2057,6,1,1.868931


In [144]:
filtered_acc_gyro_dataset_df

Unnamed: 0,Subject,Class,Acc_CorssCorrelationSum_VTAP,Acc_CorssCorrelationPeak_VTAP,Acc_FirstDerivativeRatio_SigComplete_AP,Acc_FirstDerivativeMax_SigComplete_Magnitude,Acc_InterQuartileRange_SigComplete_AP,Acc_RelativePower_BelowDomFr_Magnitude,Acc_ZeroCrossingRate_Magnitude,Gyro_FirstDerivativeLogRatio_SigComplete_ML,Gyro_FirstDerivativeLogRatio_SigComplete_AP,Gyro_RelativePower_Below5Hz_Magnitude
0,1,0,-0.166063,-0.685750,-0.010657,-1.074034,-1.444151,0.032532,-2.520228,-1.835525,-1.157528,-2.318428
1,1,0,-0.166134,-0.685786,0.041014,-1.074034,-1.475770,0.710991,-2.838854,-1.416182,-0.633449,-2.477687
2,1,0,-0.134558,-0.610375,-0.038005,-0.383370,0.146241,1.432397,-0.927096,-0.205820,0.317132,-0.527828
3,1,0,-0.129496,-0.603319,-0.040401,-0.383370,-0.003206,-0.400597,-0.767783,-0.149416,0.074079,-0.736708
4,1,0,-0.131921,-0.573847,0.089224,-0.383370,-0.061001,-0.376284,-0.927096,-0.441999,0.070623,0.214021
...,...,...,...,...,...,...,...,...,...,...,...,...
1733,6,1,-0.152122,-0.658918,-0.013876,-0.420395,-1.135367,-0.253517,0.825349,-0.285219,0.754709,0.274395
1734,6,1,-0.153823,-0.624051,-0.008705,-0.617524,-1.067617,-0.340272,0.666035,-0.315874,0.456793,0.445751
1735,6,1,-0.151816,-0.640286,-0.020001,-0.617524,-0.803723,-0.302268,0.028783,-0.030334,1.163076,1.679451
1736,6,1,-0.150106,-0.596533,-0.020139,-0.617524,-0.759067,-0.320913,0.188096,0.432560,1.242770,1.927169


### Saving to CSV

In [145]:
filtered_acc_dataset_df.to_csv(final_separate_dataset_path + 'PatchTable_Acceleration_Filtered.csv', index=False)
filtered_gyro_dataset_df.to_csv(final_separate_dataset_path + 'PatchTable_Gyroscope_Filtered.csv', index=False)
filtered_acc_gyro_dataset_df.to_csv(final_combined_dataset_path + 'PatchTable_Acceleration_Gyroscope_Filtered.csv', index=False)