In [128]:
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GroupKFold

# Classification

In [129]:
final_dataset_path = '../../dataset/final/windows/'
acc_dataset_path = final_dataset_path + 'PatchTable_Acceleration_Filtered.csv'
gyro_dataset_path = final_dataset_path + 'PatchTable_Gyroscope_Filtered.csv'

In [130]:
acc_dataset_df = pd.read_csv(acc_dataset_path)
gyro_dataset_df = pd.read_csv(gyro_dataset_path)
acc_id_groupings = acc_dataset_df['Subject']
gyro_id_groupings = gyro_dataset_df['Subject']

In [131]:
acc_dataset_df

Unnamed: 0,Subject,Class,CorssCorrelationSum_VTAP,CorssCorrelationPeak_VTAP,IndexHarmonicity_ML,FirstDerivativeMean_SigComplete_VT,FirstDerivative_Moment3rd_SigComplete_VT,FirstDerivativeLogRatio_SigComplete_ML,FirstDerivativeMean_SigComplete_AP,FirstDerivative_Skweness_SigComplete_AP,...,Mean_SigComplete_Magnitude,Skewness_SigComplete_Magnitude,Percentile25_SigComplete_Magnitude,MaxFreqSpectrum_Below5Hz_Magnitude,RelativePower_5To10Hz_Magnitude,RelativePower_BelowDomFr_Magnitude,MedianPSD_Plom_Magnitude,Moment3rdPSD_Plom_Magnitude,SkewnessPSD_Plom_Magnitude,ZeroCrossingRate_Magnitude
0,1,0,-0.163081,-0.662803,0.528986,0.475757,-0.062359,-0.136782,-0.409614,-1.334350,...,-0.035799,1.275514,1.444325,-0.116140,1.224695,-0.156583,-0.165272,-0.146060,-0.086591,-1.771846
1,1,0,-0.163483,-0.668085,-0.364654,0.090667,-0.110018,-0.233595,-0.054282,-1.066276,...,-0.281341,-0.695417,1.432455,0.408305,1.251751,-0.063859,-0.224007,-0.146061,0.952947,-2.360037
2,1,0,-0.163548,-0.668120,-0.362345,-0.033453,-0.110054,-0.112613,-0.050146,-1.160794,...,-0.404884,-0.490985,1.412446,-2.039106,1.249730,0.167928,-0.224137,-0.146061,1.178809,-2.654132
3,1,0,-0.163525,-0.668035,-0.376434,0.009309,-0.110064,-0.227149,-0.042921,-1.174912,...,-0.463610,-0.346706,1.393891,-2.039106,1.052092,0.285549,-0.224224,-0.146061,1.203253,-2.801180
4,1,0,-0.163469,-0.667327,13.995720,-0.002560,-0.109835,-0.698540,0.162228,-0.055198,...,-0.208414,2.387815,1.407141,-1.951699,-1.596697,-0.016023,-0.196872,-0.146061,0.069855,-3.095275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1875,6,1,-0.152225,-0.607692,-0.416798,-0.867979,-0.047874,0.015603,-0.046169,0.002909,...,0.185887,0.225318,0.462994,-0.465771,0.163099,-0.191222,-0.209324,-0.146017,-0.295417,0.580916
1876,6,1,-0.150379,-0.623584,-0.154285,-0.524313,-0.062139,-0.028567,0.040108,0.172652,...,0.169964,0.472963,0.627831,-0.553178,0.996511,-0.178239,-0.219464,-0.146033,0.136042,-0.007274
1877,6,1,-0.148807,-0.580757,-0.314912,0.828930,-0.047780,-0.325827,-0.340791,0.078664,...,0.280245,0.255260,0.389285,1.194972,0.894499,-0.184609,-0.166815,-0.145960,-0.002798,0.139773
1878,6,1,-0.132307,-0.612842,-0.367696,1.526718,-0.039679,-0.599088,-2.453837,-3.100024,...,1.338794,0.849419,0.387508,1.194972,0.132428,-0.083886,0.621261,-0.145660,-0.065654,-0.007274


In [132]:
acc_target = acc_dataset_df.iloc[:, 1].copy().values
gyro_target = gyro_dataset_df.iloc[:, 1].copy().values
acc_features = acc_dataset_df.iloc[:, 2:].copy().values
gyro_features = gyro_dataset_df.iloc[:, 2:].copy().values

In [133]:
acc_target

array([0, 0, 0, ..., 1, 1, 1])

In [134]:
gyro_features

array([[-2.2473824 ],
       [-2.40179113],
       [-0.51130823],
       ...,
       [ 1.62875632],
       [ 1.86893121],
       [ 1.12949507]])

In [135]:
acc_features

array([[-0.16308055, -0.66280332,  0.52898562, ..., -0.14606049,
        -0.08659125, -1.77184609],
       [-0.16348274, -0.66808468, -0.36465441, ..., -0.14606052,
         0.95294719, -2.36003673],
       [-0.16354794, -0.66812018, -0.36234533, ..., -0.14606052,
         1.17880867, -2.65413205],
       ...,
       [-0.14880737, -0.58075726, -0.31491191, ..., -0.14595968,
        -0.00279838,  0.13977349],
       [-0.13230664, -0.61284218, -0.36769616, ..., -0.14566017,
        -0.06565436, -0.00727417],
       [-0.09744461, -0.46225235, -0.15936994, ..., -0.14280522,
        -0.43243434, -0.15432183]])

### Logistic Regression

In [136]:
from sklearn.model_selection import cross_val_score

# Initialize GroupKFold with the number of splits
gkf = GroupKFold(n_splits=5)

# Initialize the logistic regression model
model = LogisticRegression()

# Perform cross-validation with GroupKFold
scores = cross_val_score(model, acc_features, acc_target, cv=gkf, groups=acc_id_groupings, scoring='accuracy')

# Print the scores
print("Cross-validated accuracy scores:", scores)
print("Mean accuracy:", np.mean(scores))
print("Standard deviation of accuracy:", np.std(scores))

Cross-validated accuracy scores: [0.67623604 0.53403141 0.60431655 0.69708029 0.59247649]
Mean accuracy: 0.6208281572062537
Standard deviation of accuracy: 0.05915158005380044


In [137]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test, groups_train, groups_test = train_test_split(acc_features, acc_target, acc_id_groupings, test_size=0.2, random_state=42)

# Initialize GroupKFold with the number of splits
gkf = GroupKFold(n_splits=6)

# Initialize the logistic regression model
model = SVC()

# Perform cross-validation with GroupKFold
cv_scores = cross_val_score(model, X_train, y_train, cv=gkf, groups=groups_train, scoring='accuracy')

# Print cross-validation scores
print("Cross-validated accuracy scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())

# Train the selected model on the full training set
model.fit(X_train, y_train)

# Evaluate the final model on the testing set
final_accuracy = model.score(X_test, y_test)
print("Final evaluation accuracy:", final_accuracy)

Cross-validated accuracy scores: [0.64693878 0.5382263  0.60714286 0.58636364 0.61904762 0.51851852]
Mean accuracy: 0.5860396177128374
Final evaluation accuracy: 0.8085106382978723


### Random Forest

### Support Vector Machine 

In [138]:
# Initialize the SVM model
model = SVC(kernel='sigmoid')