# **Mounting Google Drive**

In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
import os
os.getcwd()

'/gdrive'

#### Move to the Dataset Dircetory in My Drive

In [4]:
os.chdir("/gdrive/MyDrive/Autism_code/Young_vs_Old/Kinetics")
!pwd

/gdrive/MyDrive/Autism_code/Young_vs_Old/Kinetics


#### Loading the Kinetics dataset

In [5]:
# importing necessary packages
import matplotlib.pyplot as plt  # for making plots / graphs
import pandas as pd              # for reading the .csv file and related operations
import numpy as np               # for working with arrays (multi-dimensional)  

# read the dataset
df = pd.read_excel("./Master_YoungOld_Kinetics_TypSpeed_reduced_vars.xlsx")

# now, the whole dataset csv dataset file is saved into `df` variable.
print("df.shape = ", df.shape)
df.head(3)

df.shape =  (72, 39)


Unnamed: 0,Participant,Side,Group,ANKLE Moment_X_MAX_ST..35.70.,ANKLE Moment_X_MAX_ST_time..35.70.,ANKLE Power_X_MAX_ST..35.70.,ANKLE Power_X_MAX_ST_time..35.70.,HIP Moment_X_MAX_ST,HIP Moment_X_MAX_ST_time,HIP Moment_X_MAX_SW,...,KNEE Power_X_MIN_SW,KNEE Power_X_MIN_SW_time,KNEE Power_X_MAX_ST..12.35.,KNEE Power_X_MAX_ST_time..12.35.,KNEE Power_X_MIN_ST..01.25.,KNEE Power_X_MIN_ST_time..01.25.,KNEE Power_X_MIN_ST..35.70.,KNEE Power_X_MIN_ST_time..35.70.,KNEE Power_X_MAX_ST..01.12.,KNEE Power_X_MAX_ST_time..01.12.
0,P001,L,Y,1.6513,50.435,4.4286,55.652,0.22744,0.86957,0.3535,...,-1.0523,93.043,0.31687,22.609,-0.8517,9.5652,-1.0244,57.391,0.24955,0.86957
1,P001,R,Y,1.5483,50.0,4.5833,56.14,0.52977,5.2632,0.39082,...,-1.596,92.982,0.37378,14.035,-0.61892,9.6491,-1.4494,57.018,0.35562,0.87719
2,P002,L,Y,1.2021,45.455,2.9617,52.525,0.50208,6.0606,0.42453,...,-1.1166,88.889,0.60614,16.162,-0.97828,8.0808,-2.1391,54.545,0.23823,1.0101


In [6]:
# print the columns of the data frame
df.columns

Index(['Participant', 'Side', 'Group', 'ANKLE Moment_X_MAX_ST..35.70.',
       'ANKLE Moment_X_MAX_ST_time..35.70.', 'ANKLE Power_X_MAX_ST..35.70.',
       'ANKLE Power_X_MAX_ST_time..35.70.', 'HIP Moment_X_MAX_ST',
       'HIP Moment_X_MAX_ST_time', 'HIP Moment_X_MAX_SW',
       'HIP Moment_X_MAX_SW_time', 'HIP Moment_X_MIN_ST..35.70.',
       'HIP Moment_X_MIN_ST_time..35.70.', 'HIP Moment_Y_MAX_ST..12.35.',
       'HIP Moment_Y_MAX_ST_time..12.35.', 'HIP Moment_Y_MAX_ST..35.70.',
       'HIP Moment_Y_MAX_ST_time..35.70.', 'HIP Power_X_MAX_ST..12.35.',
       'HIP Power_X_MAX_ST_time..12.35.', 'HIP Power_X_MAX_ST..35.70.',
       'HIP Power_X_MAX_ST_time..35.70.', 'HIP Power_X_MIN_ST..35.70.',
       'HIP Power_X_MIN_ST_time..35.70.', 'KNEE Moment_X_MIN_ST',
       'KNEE Moment_X_MIN_ST_time', 'KNEE Moment_X_MAX_ST..12.35.',
       'KNEE Moment_X_MAX_ST_time..12.35.', 'KNEE Moment_X_MAX_ST..35.70.',
       'KNEE Moment_X_MAX_ST_time..35.70.', 'KNEE Power_X_MIN_SW',
       'KNEE Power

#### loading the TS dataset

In [7]:
# read the dataset
TS = pd.read_csv("./TS_Kinematics_SSF_MSF_Planar_dataset_2022-reduced_vars.csv")
TS = TS.loc[:, "Participant" : "Stride_Length_Mean"]

TS.rename(columns={"Age group" : "Group"}, inplace=True)
TS.rename(columns={"SIDE" : "Side"}, inplace=True)
TS.drop(columns=["Processed speed"], inplace=True)

# now, the whole dataset csv dataset file is saved into `TS` variable.
print("TS.shape = ", TS.shape)
TS.head(3)

TS.shape =  (72, 11)


Unnamed: 0,Participant,Group,Side,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,P001,Y,L,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,P001,Y,R,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,P002,Y,L,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [8]:
# print the columns of the data frame
TS.columns

Index(['Participant', 'Group', 'Side', 'Cycle_Time_Mean', 'Step_Length_Mean',
       'Speed', 'Double_Limb_Support_Time_Ave', 'Single Support Time',
       'Time to TO', 'Steps_Per_Minute_Mean', 'Stride_Length_Mean'],
      dtype='object')

In [9]:
pd.merge(df, TS, on=["Participant", "Group", "Side"]).head(3)

Unnamed: 0,Participant,Side,Group,ANKLE Moment_X_MAX_ST..35.70.,ANKLE Moment_X_MAX_ST_time..35.70.,ANKLE Power_X_MAX_ST..35.70.,ANKLE Power_X_MAX_ST_time..35.70.,HIP Moment_X_MAX_ST,HIP Moment_X_MAX_ST_time,HIP Moment_X_MAX_SW,...,KNEE Power_X_MAX_ST..01.12.,KNEE Power_X_MAX_ST_time..01.12.,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,P001,L,Y,1.6513,50.435,4.4286,55.652,0.22744,0.86957,0.3535,...,0.24955,0.86957,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,P001,R,Y,1.5483,50.0,4.5833,56.14,0.52977,5.2632,0.39082,...,0.35562,0.87719,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,P002,L,Y,1.2021,45.455,2.9617,52.525,0.50208,6.0606,0.42453,...,0.23823,1.0101,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [10]:
# merging the TS and Kinetics dataframes based on ["Participant", "Group", "Side"] columns.
merged = pd.merge(df, TS, on=["Participant", "Group", "Side"])
print("merged.shape = ", merged.shape)
merged.columns

merged.shape =  (72, 47)


Index(['Participant', 'Side', 'Group', 'ANKLE Moment_X_MAX_ST..35.70.',
       'ANKLE Moment_X_MAX_ST_time..35.70.', 'ANKLE Power_X_MAX_ST..35.70.',
       'ANKLE Power_X_MAX_ST_time..35.70.', 'HIP Moment_X_MAX_ST',
       'HIP Moment_X_MAX_ST_time', 'HIP Moment_X_MAX_SW',
       'HIP Moment_X_MAX_SW_time', 'HIP Moment_X_MIN_ST..35.70.',
       'HIP Moment_X_MIN_ST_time..35.70.', 'HIP Moment_Y_MAX_ST..12.35.',
       'HIP Moment_Y_MAX_ST_time..12.35.', 'HIP Moment_Y_MAX_ST..35.70.',
       'HIP Moment_Y_MAX_ST_time..35.70.', 'HIP Power_X_MAX_ST..12.35.',
       'HIP Power_X_MAX_ST_time..12.35.', 'HIP Power_X_MAX_ST..35.70.',
       'HIP Power_X_MAX_ST_time..35.70.', 'HIP Power_X_MIN_ST..35.70.',
       'HIP Power_X_MIN_ST_time..35.70.', 'KNEE Moment_X_MIN_ST',
       'KNEE Moment_X_MIN_ST_time', 'KNEE Moment_X_MAX_ST..12.35.',
       'KNEE Moment_X_MAX_ST_time..12.35.', 'KNEE Moment_X_MAX_ST..35.70.',
       'KNEE Moment_X_MAX_ST_time..35.70.', 'KNEE Power_X_MIN_SW',
       'KNEE Power

In [11]:
# Remove the Columns: ["Participant", "Side"]- These columns were not needed.
merged = merged.drop(["Participant", "Side"], axis=1)
print("merged.shape = ", merged.shape)
merged.columns

merged.shape =  (72, 45)


Index(['Group', 'ANKLE Moment_X_MAX_ST..35.70.',
       'ANKLE Moment_X_MAX_ST_time..35.70.', 'ANKLE Power_X_MAX_ST..35.70.',
       'ANKLE Power_X_MAX_ST_time..35.70.', 'HIP Moment_X_MAX_ST',
       'HIP Moment_X_MAX_ST_time', 'HIP Moment_X_MAX_SW',
       'HIP Moment_X_MAX_SW_time', 'HIP Moment_X_MIN_ST..35.70.',
       'HIP Moment_X_MIN_ST_time..35.70.', 'HIP Moment_Y_MAX_ST..12.35.',
       'HIP Moment_Y_MAX_ST_time..12.35.', 'HIP Moment_Y_MAX_ST..35.70.',
       'HIP Moment_Y_MAX_ST_time..35.70.', 'HIP Power_X_MAX_ST..12.35.',
       'HIP Power_X_MAX_ST_time..12.35.', 'HIP Power_X_MAX_ST..35.70.',
       'HIP Power_X_MAX_ST_time..35.70.', 'HIP Power_X_MIN_ST..35.70.',
       'HIP Power_X_MIN_ST_time..35.70.', 'KNEE Moment_X_MIN_ST',
       'KNEE Moment_X_MIN_ST_time', 'KNEE Moment_X_MAX_ST..12.35.',
       'KNEE Moment_X_MAX_ST_time..12.35.', 'KNEE Moment_X_MAX_ST..35.70.',
       'KNEE Moment_X_MAX_ST_time..35.70.', 'KNEE Power_X_MIN_SW',
       'KNEE Power_X_MIN_SW_time', 'KNEE 

### Label encode target variable - `y`

In [12]:
# First, look at the target variable
print(merged.loc[:, "Group"].values.shape)
print(merged.loc[:, "Group"].values)

(72,)
['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O']


In [13]:
# Perform Data Preprocessing
# Label Encoding the class variables 
# Here, we replace the "Control" and "Autism" keywords with 0 and 1 values, respectively.
merged["Group"] = merged["Group"].replace({'O': 0, 'Y': 1})
merged.head(3)

Unnamed: 0,Group,ANKLE Moment_X_MAX_ST..35.70.,ANKLE Moment_X_MAX_ST_time..35.70.,ANKLE Power_X_MAX_ST..35.70.,ANKLE Power_X_MAX_ST_time..35.70.,HIP Moment_X_MAX_ST,HIP Moment_X_MAX_ST_time,HIP Moment_X_MAX_SW,HIP Moment_X_MAX_SW_time,HIP Moment_X_MIN_ST..35.70.,...,KNEE Power_X_MAX_ST..01.12.,KNEE Power_X_MAX_ST_time..01.12.,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,1,1.6513,50.435,4.4286,55.652,0.22744,0.86957,0.3535,94.783,-0.91131,...,0.24955,0.86957,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,1,1.5483,50.0,4.5833,56.14,0.52977,5.2632,0.39082,97.368,-0.73814,...,0.35562,0.87719,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,1,1.2021,45.455,2.9617,52.525,0.50208,6.0606,0.42453,91.919,-1.0582,...,0.23823,1.0101,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [14]:
# saving the target variables into `y` variable.
y = merged.loc[:, "Group"].values
print("y.shape = ", y.shape)
print("y = ", y)

y.shape =  (72,)
y =  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [15]:
# Perform Data Preprocessing- Data Standardization
# Defining a Standard Scaler for scaling the values in the dataset
# in the range of [-a, +a], i.e. scale values to a smaller range.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [16]:
merged.shape

(72, 45)

In [17]:
# Define the different segments from dataset to be used.
segments = {
    'TS + KINETICS': merged.loc[:,'ANKLE Moment_X_MAX_ST..35.70.':'Stride_Length_Mean'],
}


In [18]:
print(segments["TS + KINETICS"].shape)
segments["TS + KINETICS"].head()

(72, 44)


Unnamed: 0,ANKLE Moment_X_MAX_ST..35.70.,ANKLE Moment_X_MAX_ST_time..35.70.,ANKLE Power_X_MAX_ST..35.70.,ANKLE Power_X_MAX_ST_time..35.70.,HIP Moment_X_MAX_ST,HIP Moment_X_MAX_ST_time,HIP Moment_X_MAX_SW,HIP Moment_X_MAX_SW_time,HIP Moment_X_MIN_ST..35.70.,HIP Moment_X_MIN_ST_time..35.70.,...,KNEE Power_X_MAX_ST..01.12.,KNEE Power_X_MAX_ST_time..01.12.,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,1.6513,50.435,4.4286,55.652,0.22744,0.86957,0.3535,94.783,-0.91131,51.304,...,0.24955,0.86957,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,1.5483,50.0,4.5833,56.14,0.52977,5.2632,0.39082,97.368,-0.73814,54.386,...,0.35562,0.87719,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,1.2021,45.455,2.9617,52.525,0.50208,6.0606,0.42453,91.919,-1.0582,47.475,...,0.23823,1.0101,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637
3,1.3666,46.0,3.2444,53.0,0.62291,5.0,0.5459,95.0,-1.1355,48.0,...,0.31108,1.0,0.985,0.690197,1.412893,0.135,0.43,0.56,121.224518,1.404761
4,1.4126,46.903,3.2861,53.982,0.22335,4.4248,0.22118,95.575,-0.50891,53.097,...,0.28169,12.389,1.113333,0.608685,1.133143,0.19,0.455,0.65,105.263168,1.233512


In [19]:
segments["TS + KINETICS"].columns

Index(['ANKLE Moment_X_MAX_ST..35.70.', 'ANKLE Moment_X_MAX_ST_time..35.70.',
       'ANKLE Power_X_MAX_ST..35.70.', 'ANKLE Power_X_MAX_ST_time..35.70.',
       'HIP Moment_X_MAX_ST', 'HIP Moment_X_MAX_ST_time',
       'HIP Moment_X_MAX_SW', 'HIP Moment_X_MAX_SW_time',
       'HIP Moment_X_MIN_ST..35.70.', 'HIP Moment_X_MIN_ST_time..35.70.',
       'HIP Moment_Y_MAX_ST..12.35.', 'HIP Moment_Y_MAX_ST_time..12.35.',
       'HIP Moment_Y_MAX_ST..35.70.', 'HIP Moment_Y_MAX_ST_time..35.70.',
       'HIP Power_X_MAX_ST..12.35.', 'HIP Power_X_MAX_ST_time..12.35.',
       'HIP Power_X_MAX_ST..35.70.', 'HIP Power_X_MAX_ST_time..35.70.',
       'HIP Power_X_MIN_ST..35.70.', 'HIP Power_X_MIN_ST_time..35.70.',
       'KNEE Moment_X_MIN_ST', 'KNEE Moment_X_MIN_ST_time',
       'KNEE Moment_X_MAX_ST..12.35.', 'KNEE Moment_X_MAX_ST_time..12.35.',
       'KNEE Moment_X_MAX_ST..35.70.', 'KNEE Moment_X_MAX_ST_time..35.70.',
       'KNEE Power_X_MIN_SW', 'KNEE Power_X_MIN_SW_time',
       'KNEE Power_X_M

# Defining **Cross Validation** method to be used

In [20]:
# Define Leave-One-Out CV
from sklearn.model_selection import LeaveOneOut
loocv = LeaveOneOut()

# # Define Repeated Stratified k-fold CV
# from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
# rskf_cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=5, random_state=36851234)

# Defning the **Classifer** to be used

In [43]:
# Define the Classifier to be used for Sequential Feature Selection (SFS)

# # Apply Linear LDA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# lda = LinearDiscriminantAnalysis(solver='svd', n_components=None)

# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# Define SVM classifier with RBF kernel
from sklearn.svm import SVC
svm = SVC(kernel='rbf', C=90, verbose=False)


# Defining the **Feature Selection** algorithm to be used

In [44]:
!pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [45]:
# Define the Sequential Feature Selection class
# https://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/

# Below is the code for applying Forward Feature Selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs = SFS(estimator=svm, 
            k_features=(1,15),
            forward=True, floating=False,
            verbose=2,
            scoring=('accuracy'),
            cv=loocv,
            n_jobs=-1)

# **Hyper-Parameter Optimization** for Non-Linear SVC (RBF)

In [46]:
# Define the Classifier and Parameter Grid to be used for GridSearch and final Evaluation
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import SVC
svm_classifier = SVC()

param_grid = [
              {'C': [0.01, 0.1, 0, 0.5, 1, 2, 3, 5, 8, 20, 50, 90], 
               'gamma': ['scale', 'auto', 0.01, 0.03, 0.04, 0.043, 0.045, 0.048, 0.05, 0.053, 0.055, 0.058, 0.06, 0.08, 0.0001, 0.001, 0.1, 1, 10], 
               'tol':[1e-2, 1e-3, 1e-4, 1e-5], 
               'kernel': ['rbf']}, #rbfSVM
]

:### Change the `estimator` in GridSearch to the estimator you are using.

In [47]:
# Define Grid Search class
from sklearn.model_selection import GridSearchCV
gridSearch = GridSearchCV(estimator=svm_classifier, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                          n_jobs=-1,
                          cv=loocv, # uses Leave One Out CV
                          refit=True, verbose=1)

# Main Driver Code: **Non Linear SVM (RBF)**

In [48]:
# Type the name of the Algorithm that you are using
# This will be used while Writing the Scores in .txt file
# LDA, LinearSVM, SVM (RBF), SVM (polynomial), LogisticRegression, RandomForest
algorith_you_are_using = 'SVM (RBF)' 

In [28]:
'''svm = SVC(kernel='rbf', verbose=False, C=1)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS + KINETICS
X.shape =  (72, 44)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  44 out of  44 | elapsed:    9.6s finished

[2022-09-21 13:30:32] Features: 1/44 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done  43 out of  43 | elapsed:    6.4s finished

[2022-09-21 13:30:38] Features: 2/44 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 out of  42 | elapsed:    3.7s finished

[2022-09-21 13:30:42] Features: 3/44 -- score: 0.8333333333333334[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 out of  41 | elapsed:    3.6s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  41 out of  41 | elapsed:    3.7s finished

[2022-09-21 13:30:46] 


sfs.k_score_ =  0.9166666666666666
sfs.k_feature_idx_ =  (0, 12, 22, 28, 31, 32, 37)
[After SFS] X.shape =  (72, 7)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=1, tol=0.01)

##############################################################

SVM (RBF), TS + KINETICS, 0.917, 0.972, 0.861, 0.875, 0.969, 31.000, 0.912, 0.839

##############################################################


In [35]:
'''svm = SVC(kernel='rbf', C=30, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS + KINETICS
X.shape =  (72, 44)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  44 out of  44 | elapsed:    6.8s finished

[2022-09-21 13:34:23] Features: 1/44 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done  43 out of  43 | elapsed:    8.2s finished

[2022-09-21 13:34:32] Features: 2/44 -- score: 0.7916666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done  42 out of  42 | elapsed:    8.3s finished

[2022-09-21 13:34:40] Features: 3/44 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done  41 out of  41 | elapsed:    5.6s finished

[2022-09-21 13:34:45] Featu


sfs.k_score_ =  0.9861111111111112
sfs.k_feature_idx_ =  (0, 2, 5, 8, 11, 12, 16, 17, 22, 24, 26, 27, 32, 36, 39, 43)
[After SFS] X.shape =  (72, 16)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=20, tol=0.01)

##############################################################

SVM (RBF), TS + KINETICS, 0.986, 1.000, 0.972, 0.973, 1.000, 0.972, 0.986, 0.973

##############################################################


In [42]:
'''svm = SVC(kernel='rbf', C=60, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS + KINETICS
X.shape =  (72, 44)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done  44 out of  44 | elapsed:    5.6s finished

[2022-09-21 13:38:11] Features: 1/44 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done  43 out of  43 | elapsed:    9.0s finished

[2022-09-21 13:38:20] Features: 2/44 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done  42 out of  42 | elapsed:    8.9s finished

[2022-09-21 13:38:29] Features: 3/44 -- score: 0.7916666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done  41 out o


sfs.k_score_ =  0.9444444444444444
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 13, 15, 16, 19, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 37, 38, 42)
[After SFS] X.shape =  (72, 27)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=5, gamma=0.048, tol=0.01)

##############################################################

SVM (RBF), TS + KINETICS, 0.958, 0.972, 0.944, 0.946, 0.971, 34.000, 0.958, 0.917

##############################################################


In [49]:
'''svm = SVC(kernel='rbf', verbose=False, C=90)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS + KINETICS
X.shape =  (72, 44)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done  44 out of  44 | elapsed:    8.8s finished

[2022-09-21 13:42:22] Features: 1/44 -- score: 0.7083333333333334[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done  43 out of  43 | elapsed:    9.1s finished

[2022-09-21 13:42:32] Features: 2/44 -- score: 0.7916666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done  42 out of  42 | elapsed:    8.0s finished

[2022-09-21 13:42:40] Features: 3/44 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done  41 out o


sfs.k_score_ =  0.9722222222222222
sfs.k_feature_idx_ =  (0, 2, 3, 5, 6, 8, 9, 10, 13, 14, 16, 17, 20, 25, 26, 27, 32, 36, 37, 38, 39, 42, 43)
[After SFS] X.shape =  (72, 23)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=8, gamma=0.048, tol=0.01)

##############################################################

SVM (RBF), TS + KINETICS, 0.972, 1.000, 0.944, 0.947, 1.000, 0.944, 0.971, 0.946

##############################################################


# **Verification**

In [50]:
# get the names of the feature subset selected using the Feature Selection algorithm.
sfs_feature_idx = [0, 2, 5, 8, 11, 12, 16, 17, 22, 24, 26, 27, 32, 36, 39, 43]
print("Number of Features selected: ", len(sfs_feature_idx))

segments["TS + KINETICS"].iloc[:, sfs_feature_idx].head()

Number of Features selected:  16


Unnamed: 0,ANKLE Moment_X_MAX_ST..35.70.,ANKLE Power_X_MAX_ST..35.70.,HIP Moment_X_MAX_ST_time,HIP Moment_X_MIN_ST..35.70.,HIP Moment_Y_MAX_ST_time..12.35.,HIP Moment_Y_MAX_ST..35.70.,HIP Power_X_MAX_ST..35.70.,HIP Power_X_MAX_ST_time..35.70.,KNEE Moment_X_MAX_ST..12.35.,KNEE Moment_X_MAX_ST..35.70.,KNEE Power_X_MIN_SW,KNEE Power_X_MIN_SW_time,KNEE Power_X_MIN_ST..35.70.,Cycle_Time_Mean,Double_Limb_Support_Time_Ave,Stride_Length_Mean
0,1.6513,4.4286,0.86957,-0.91131,14.783,0.8775,1.1526,56.522,0.6341,0.26139,-1.0523,93.043,-1.0244,1.135,0.26,1.410565
1,1.5483,4.5833,5.2632,-0.73814,14.912,0.96517,1.4667,56.14,0.40453,0.30982,-1.596,92.982,-1.4494,1.135,0.26,1.418065
2,1.2021,2.9617,6.0606,-1.0582,16.162,0.8324,1.5085,53.535,0.4561,0.5055,-1.1166,88.889,-2.1391,0.985,0.135,1.378637
3,1.3666,3.2444,5.0,-1.1355,15.0,0.93015,1.6039,54.0,0.36565,0.38262,-1.4729,90.0,-1.3109,0.985,0.135,1.404761
4,1.4126,3.2861,4.4248,-0.50891,14.159,0.72957,0.66525,65.487,1.1165,0.30479,-0.85151,90.265,-1.1395,1.113333,0.19,1.233512


In [51]:
temp = pd.DataFrame(segments["TS + KINETICS"].keys().to_numpy(), columns=["FeatureNames"])

features = temp.iloc[sfs_feature_idx]
features

Unnamed: 0,FeatureNames
0,ANKLE Moment_X_MAX_ST..35.70.
2,ANKLE Power_X_MAX_ST..35.70.
5,HIP Moment_X_MAX_ST_time
8,HIP Moment_X_MIN_ST..35.70.
11,HIP Moment_Y_MAX_ST_time..12.35.
12,HIP Moment_Y_MAX_ST..35.70.
16,HIP Power_X_MAX_ST..35.70.
17,HIP Power_X_MAX_ST_time..35.70.
22,KNEE Moment_X_MAX_ST..12.35.
24,KNEE Moment_X_MAX_ST..35.70.


In [52]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.iloc[:, sfs_feature_idx].values
  X = sc.fit_transform(X) # Standard Scaler
  print("X.shape = ", X.shape)

  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])

  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")

Running:  TS + KINETICS
X.shape =  (72, 16)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=20, tol=0.01)

##############################################################

SVM (RBF), TS + KINETICS, 0.986, 1.000, 0.972, 0.973, 1.000, 0.972, 0.986, 0.973

##############################################################


In [53]:
search_results.best_params_

{'C': 20, 'gamma': 'scale', 'kernel': 'rbf', 'tol': 0.01}

In [54]:
search_results.best_score_

0.9861111111111112

In [None]:
##############################################################################
//////////////////////////////////////////////////////////////////////////////
##############################################################################