In [4]:
# Core libraries
import os
import joblib

# Numerical and scientific computation
import numpy as np
import pandas as pd

# Signal processing
from scipy.signal import welch
from scipy.fft import fft
from scipy.stats import entropy, skew, kurtosis

# Machine learning
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay

# Visualization
import matplotlib.pyplot as plt

# Wavelet transform
import pywt

# Import necessary libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_score, recall_score

from sklearn.preprocessing import LabelEncoder

In [5]:
# Read the DataFrame from the CSV file
import pandas as pd

feature_df = pd.read_csv('./final_time_domain_fft_wavelet_stastical_features_1000_250_40subs.csv')

# Display the DataFrame
# print(feature_df.head())

   muscle_1_mean  muscle_1_median  muscle_1_rms  muscle_1_mav  muscle_1_wl  \
0       0.000099         0.000822      0.030228      0.021761     8.613201   
1      -0.000241         0.000995      0.025578      0.019688     7.820304   
2       0.000155         0.000422      0.028064      0.022046     8.402600   
3      -0.000076         0.000957      0.025809      0.019704     7.675642   
4      -0.000843         0.000425      0.028020      0.020596     7.988726   

   muscle_1_zcr  muscle_1_wamp  muscle_1_aac  muscle_1_logd  muscle_1_ssi  \
0           114       0.237519      0.008622       0.021322           211   
1           116       0.206147      0.007828       0.019371           222   
2           124       0.167535      0.008411       0.021664           204   
3           120       0.180043      0.007683       0.019381           211   
4           118       0.249505      0.007997       0.020218           218   

   ...  muscle_4_wavelet_level_5_rms  muscle_4_wavelet_level_5_energ

----

-----

# Leave One Subject Out

In [6]:
feature_df

Unnamed: 0,muscle_1_mean,muscle_1_median,muscle_1_rms,muscle_1_mav,muscle_1_wl,muscle_1_zcr,muscle_1_wamp,muscle_1_aac,muscle_1_logd,muscle_1_ssi,...,muscle_4_wavelet_level_5_rms,muscle_4_wavelet_level_5_energy,muscle_4_wavelet_level_5_skewness,muscle_4_wavelet_level_5_kurtosis,muscle_4_wavelet_level_5_abs_sum,muscle_4_wavelet_level_5_waveform_length,muscle_4_wavelet_level_5_zcr,activity_id,cycle_id,subject_id
0,0.000099,0.000822,0.030228,0.021761,8.613201,114,0.237519,0.008622,0.021322,211,...,0.001222,0.000749,-0.853446,7.446621,0.417394,0.581182,235,1,1,1
1,-0.000241,0.000995,0.025578,0.019688,7.820304,116,0.206147,0.007828,0.019371,222,...,0.001073,0.000578,-0.506459,5.004082,0.364582,0.492136,232,1,1,1
2,0.000155,0.000422,0.028064,0.022046,8.402600,124,0.167535,0.008411,0.021664,204,...,0.001109,0.000618,-0.708341,6.361614,0.363733,0.512816,242,1,1,1
3,-0.000076,0.000957,0.025809,0.019704,7.675642,120,0.180043,0.007683,0.019381,211,...,0.000856,0.000368,-0.752806,5.436700,0.279479,0.405016,246,1,1,1
4,-0.000843,0.000425,0.028020,0.020596,7.988726,118,0.249505,0.007997,0.020218,218,...,0.000893,0.000400,-0.378905,5.749440,0.288698,0.424437,247,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31795,-0.000151,-0.001637,0.037032,0.028464,14.613098,154,0.247905,0.014628,0.027807,250,...,0.000493,0.000122,-0.280028,1.645606,0.180078,0.266012,274,10,5,40
31796,0.000808,0.000345,0.037353,0.028798,14.194247,167,0.238372,0.014208,0.028128,251,...,0.000436,0.000095,-0.226462,0.800275,0.168110,0.249063,265,10,5,40
31797,0.000133,-0.001041,0.033897,0.024877,12.176928,148,0.268254,0.012189,0.024326,246,...,0.000504,0.000128,-0.166632,0.511613,0.196714,0.283718,266,10,5,40
31798,0.000316,0.000193,0.038018,0.028840,13.550572,148,0.288329,0.013564,0.028149,231,...,0.000489,0.000120,-0.054281,1.446314,0.184117,0.272110,260,10,5,40


In [10]:
# Scaling the Data (StandardScaler or MinMaxScaler based on classifier)
scaler = StandardScaler()  # Use MinMaxScaler() for tree-based models
scaled_features = scaler.fit_transform(feature_df.drop(columns=["activity_id", "cycle_id", "subject_id"]))
scaled_feature_df = pd.DataFrame(scaled_features, columns=feature_df.columns[:-3])
scaled_feature_df["activity_id"] = feature_df["activity_id"]
scaled_feature_df["cycle_id"] = feature_df["cycle_id"]
scaled_feature_df["subject_id"] = feature_df["subject_id"]

In [14]:
results = []

subjects = 40
for subject in range(1, subjects + 1):
    print(f"- - - - - - - - - - subject id : {subject} - - - - - - - - - -")
    train_data = scaled_feature_df[scaled_feature_df['subject_id'] != subject]
    test_data = scaled_feature_df[scaled_feature_df['subject_id'] == subject]
    X_train = train_data.drop(columns=['activity_id', 'cycle_id', 'subject_id'])
    y_train = train_data['activity_id']
    X_test = test_data.drop(columns=['activity_id', 'cycle_id', 'subject_id'])
    y_test = test_data['activity_id']

    # Encode labels
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    y_test = label_encoder.transform(y_test)

    # Hyperparameter grid
    param_grid = {
        'bootstrap': [True],
        'max_depth': [20],
        'min_samples_leaf': [1],
        'min_samples_split': [2],
        'n_estimators': [200],
        'class_weight': ['balanced']
    }

    # Random Forest with GridSearchCV
    model = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
    grid_search.fit(X_train, y_train)

    # Predictions and metrics
    y_pred = grid_search.best_estimator_.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    # Append results
    results.append({
        'subject_id': subject,
        'best_params': grid_search.best_params_,
        'accuracy': accuracy,
        'f1_score': f1,
        'precision': precision,
        'recall': recall
    })

    # Save confusion matrix to the "./loso" folder
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix for Subject {subject}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    
    # Save plot as an image
    plt.savefig(f'./loso/confusion_matrix_subject_{subject}.png')
    plt.close()  # Close the plot to free up memory


- - - - - - - - - - subject id : 1 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 3.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 3.0min
- - - - - - - - - - subject id : 2 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 3.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.8min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 3.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_sample

  _warn_prf(average, modifier, msg_start, len(result))


- - - - - - - - - - subject id : 3 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 3.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.8min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.8min
- - - - - - - - - - subject id : 4 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
- - - - - - - - - - subject id : 5 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.8min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_spli

- - - - - - - - - - subject id : 18 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
- - - - - - - - - - subject id : 19 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
- - - - - - - - - - subject id : 20 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.1min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.9min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_s

  _warn_prf(average, modifier, msg_start, len(result))


[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
- - - - - - - - - - subject id : 32 - - - - - - - - - -
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balanced, max_depth=20, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time= 1.0min
[CV] END bootstrap=True, class_weight=balance

In [15]:
# Create DataFrame from results
results_df = pd.DataFrame(results)

# Save results to a CSV file
results_df.to_csv('./loso_classification_results.csv', index=False)

# Display the results DataFrame
print(results_df)

    subject_id                                        best_params  accuracy  \
0            1  {'bootstrap': True, 'class_weight': 'balanced'...  0.650314   
1            2  {'bootstrap': True, 'class_weight': 'balanced'...  0.647799   
2            3  {'bootstrap': True, 'class_weight': 'balanced'...  0.528302   
3            4  {'bootstrap': True, 'class_weight': 'balanced'...  0.525786   
4            5  {'bootstrap': True, 'class_weight': 'balanced'...  0.694340   
5            6  {'bootstrap': True, 'class_weight': 'balanced'...  0.641509   
6            7  {'bootstrap': True, 'class_weight': 'balanced'...  0.739623   
7            8  {'bootstrap': True, 'class_weight': 'balanced'...  0.543396   
8            9  {'bootstrap': True, 'class_weight': 'balanced'...  0.744654   
9           10  {'bootstrap': True, 'class_weight': 'balanced'...  0.430189   
10          11  {'bootstrap': True, 'class_weight': 'balanced'...  0.612579   
11          12  {'bootstrap': True, 'class_weight': 

In [21]:
results_df.drop(columns=['best_params', 'subject_id']).mean()

accuracy     0.584497
f1_score     0.564184
precision    0.642137
recall       0.584497
dtype: float64