# **Mounting Google Drive**

In [7]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [8]:
import os
os.getcwd()

'/gdrive'

#### Move to the Dataset Dircetory in My Drive

In [9]:
os.chdir("/gdrive/MyDrive/Autism_code/Young_vs_Old/Kinematics")
!pwd

/gdrive/MyDrive/Autism_code/Young_vs_Old/Kinematics


In [10]:
# importing necessary packages
import matplotlib.pyplot as plt  # for making plots / graphs
import pandas as pd              # for reading the .csv file and related operations
import numpy as np               # for working with arrays (multi-dimensional)  

# read the dataset
df = pd.read_csv("./TS_Kinematics_SSF_MSF_Planar_dataset_2022-reduced_vars.csv")

# now, the whole dataset csv dataset file is saved into `df` variable.
print("df.shape = ", df.shape)
df.head(3)

df.shape =  (72, 260)


Unnamed: 0,Participant,Age group,Processed speed,SIDE,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,P001,Y,N,L,1.135,0.682212,1.246092,0.26,0.44,0.7,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,P001,Y,N,R,1.135,0.73205,1.246092,0.26,0.435,0.7,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,P002,Y,N,L,0.985,0.701486,1.412893,0.135,0.42,0.56,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [11]:
# print the columns of the data frame
print(df.columns)

Index(['Participant', 'Age group', 'Processed speed', 'SIDE',
       'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=260)


In [12]:
# Remove unwanted columns- columns not needed for this analysis.
# dropping "Collected Speed", "Dimensionless Speed", 

In [13]:
# Remove the Columns: ["Participant", "Side"]- These columns were not needed.
df = df.drop(["Participant", "SIDE", "Processed speed"], axis=1)
print("df.shape = ", df.shape)
df.columns

df.shape =  (72, 257)


Index(['Age group', 'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       'Steps_Per_Minute_Mean', 'Stride_Length_Mean', 'Min Stance_Pelv_X',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=257)

### Label encode target variable - `y`

In [14]:
# First, look at the target variable
print(df.loc[:, "Age group"].values.shape)
print(df.loc[:, "Age group"].values)

(72,)
['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O']


In [15]:
# Perform Data Preprocessing
# Label Encoding the class variables 
# Here, we replace the "Control" and "Autism" keywords with 0 and 1 values, respectively.
df["Age group"] = df["Age group"].replace({'O': 0, 'Y': 1})
df.head(3)

Unnamed: 0,Age group,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean,Min Stance_Pelv_X,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,1,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565,2.361861,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,1,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065,2.095112,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,1,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637,3.227882,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [16]:
# saving the target variables into `y` variable.
y = df.loc[:, "Age group"].values
print("y.shape = ", y.shape)
print("y = ", y)

y.shape =  (72,)
y =  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]


In [17]:
# Perform Data Preprocessing- Data Standardization
# Defining a Standard Scaler for scaling the values in the dataset
# in the range of [-a, +a], i.e. scale values to a smaller range.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [18]:
df.shape

(72, 257)

In [19]:
# Define the different segments from dataset to be used.
segments = {
    'KINEMATICS': df.loc[:,'Min Stance_Pelv_X' : 'TimeMax Swing_Knee_Z'],
}


In [20]:
print(segments["KINEMATICS"].shape)
segments["KINEMATICS"].head()

(72, 96)


Unnamed: 0,Min Stance_Pelv_X,TimeMin Stance_Pelv_X,Max Stance_Pelv_X,TimeMax Stance_Pelv_X,Min Swing_Pelv_X,TimeMin Swing_Pelv_X,Max Swing_Pelv_X,TimeMax Swing_Pelv_X,Min Stance_Pelv_Y,TimeMin Stance_Pelv_Y,...,Max Swing_Knee_Y,TimeMax Swing_Knee_Y,Min Stance_Knee_Z,TimeMin Stance_Knee_Z,Max Stance_Knee_Z,TimeMax Stance_Knee_Z,Min Swing_Knee_Z,TimeMin Swing_Knee_Z,Max Swing_Knee_Z,TimeMax Swing_Knee_Z
0,2.361861,10.526325,4.812255,46.491222,3.004734,61.403507,6.042765,81.578949,-3.046228,61.403507,...,7.665916,78.947372,-41.339787,61.403507,-16.819296,13.157902,-43.43322,63.15789,-21.240538,95.614037
1,2.095112,59.9119,6.042765,31.718073,2.300779,61.674011,5.046373,96.475304,-3.728348,61.674011,...,2.255617,81.505486,-27.003588,61.674011,-12.001336,13.215867,-32.745613,63.436356,-13.656424,98.237648
2,3.227882,0.0,8.612376,56.12244,2.51663,97.959183,8.612287,57.142849,-4.68152,57.142849,...,3.904225,59.183662,-39.819038,20.408167,-27.184574,0.0,-46.804024,90.816307,-24.449614,66.326515
3,2.51663,47.474754,8.612376,6.0606,4.870071,56.565655,8.038762,100.0,-5.974075,56.565655,...,4.516067,85.858582,-38.80048,56.565655,-26.507957,41.414127,-42.876225,76.262619,-29.689373,80.303032
4,10.515388,36.764725,12.8866,0.0,9.552069,63.358917,13.517436,94.111969,-3.445211,58.823544,...,0.878909,94.509811,-6.941079,20.944746,1.711521,12.644828,-11.247166,87.298965,4.961246,71.02729


In [21]:
segments["KINEMATICS"].columns

Index(['Min Stance_Pelv_X', 'TimeMin Stance_Pelv_X', 'Max Stance_Pelv_X',
       'TimeMax Stance_Pelv_X', 'Min Swing_Pelv_X', 'TimeMin Swing_Pelv_X',
       'Max Swing_Pelv_X', 'TimeMax Swing_Pelv_X', 'Min Stance_Pelv_Y',
       'TimeMin Stance_Pelv_Y', 'Max Stance_Pelv_Y', 'TimeMax Stance_Pelv_Y',
       'Min Swing_Pelv_Y', 'TimeMin Swing_Pelv_Y', 'Max Swing_Pelv_Y',
       'TimeMax Swing_Pelv_Y', 'Min Stance_Pelv_Z', 'TimeMin Stance_Pelv_Z',
       'Max Stance_Pelv_Z', 'TimeMax Stance_Pelv_Z', 'Min Swing_Pelv_Z',
       'TimeMin Swing_Pelv_Z', 'Max Swing_Pelv_Z', 'TimeMax Swing_Pelv_Z',
       'Min Stance_Trunk_X', 'TimeMin Stance_Trunk_X', 'Max Stance_Trunk_X',
       'TimeMax Stance_Trunk_X', 'Min Swing_Trunk_X', 'TimeMin Swing_Trunk_X',
       'Max Swing_Trunk_X', 'TimeMax Swing_Trunk_X', 'Min Stance_Trunk_Y',
       'TimeMin Stance_Trunk_Y', 'Max Stance_Trunk_Y',
       'TimeMax Stance_Trunk_Y', 'Min Swing_Trunk_Y', 'TimeMin Swing_Trunk_Y',
       'Max Swing_Trunk_Y', 'TimeMax Sw

# Defining **Cross Validation** method to be used

In [22]:
# Define Leave-One-Out CV
from sklearn.model_selection import LeaveOneOut
loocv = LeaveOneOut()

# # Define Repeated Stratified k-fold CV
# from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
# rskf_cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=5, random_state=36851234)

# Defning the **Classifer** to be used

In [44]:
# Define the Classifier to be used for Sequential Feature Selection (SFS)

# # Apply Linear LDA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# lda = LinearDiscriminantAnalysis(solver='svd', n_components=None)

# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# Define SVM classifier with RBF kernel
from sklearn.svm import SVC
svm = SVC(kernel='rbf', C=90, verbose=False)


# Defining the **Feature Selection** algorithm to be used

In [45]:
!pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [46]:
# Define the Sequential Feature Selection class
# https://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/

# Below is the code for applying Forward Feature Selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs = SFS(estimator=svm, 
            k_features=(1,15),
            forward=True, floating=False,
            verbose=2,
            scoring=('accuracy'),
            cv=loocv,
            n_jobs=-1)

# **Hyper-Parameter Optimization** for Non-Linear SVC (RBF)

In [47]:
# Define the Classifier and Parameter Grid to be used for GridSearch and final Evaluation
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import SVC
svm_classifier = SVC()

param_grid = [
              {'C': [0.01, 0.1, 0, 0.5, 1, 2, 3, 5, 8, 20, 50, 90], 
               'gamma': ['scale', 'auto', 0.01, 0.03, 0.04, 0.043, 0.045, 0.048, 0.05, 0.053, 0.055, 0.058, 0.06, 0.08, 0.0001, 0.001, 0.1, 1, 10], 
               'tol':[1e-2, 1e-3, 1e-4, 1e-5], 
               'kernel': ['rbf']}, #rbfSVM
]

:### Change the `estimator` in GridSearch to the estimator you are using.

In [48]:
# Define Grid Search class
from sklearn.model_selection import GridSearchCV
gridSearch = GridSearchCV(estimator=svm_classifier, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                          n_jobs=-1,
                          cv=loocv, # uses Leave One Out CV
                          refit=True, verbose=1)

# Main Driver Code: **Non Linear SVM (RBF)**

In [49]:
# Type the name of the Algorithm that you are using
# This will be used while Writing the Scores in .txt file
# LDA, LinearSVM, SVM (RBF), SVM (polynomial), LogisticRegression, RandomForest
algorith_you_are_using = 'SVM (RBF)' 

In [29]:
'''svm = SVC(kernel='rbf', verbose=False, C=1)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  KINEMATICS
X.shape =  (72, 96)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:   18.6s finished

[2022-09-19 13:24:23] Features: 1/96 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done  95 out of  95 | elapsed:   12.4s finished

[2022-09-19 13:24:35] Features: 2/96 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done  94 out of  94 | elapsed:   17.6s finished

[2022-09-19 13:24:53] Features: 3/96 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done  93 out o


sfs.k_score_ =  0.9166666666666666
sfs.k_feature_idx_ =  (5, 9, 11, 13, 17, 27, 31, 33, 35, 37, 53, 56, 57, 72, 80, 92, 94)
[After SFS] X.shape =  (72, 17)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=1, tol=0.01)

##############################################################

SVM (RBF), KINEMATICS, 0.917, 0.889, 0.944, 0.941, 0.895, 8.500, 0.919, 0.835

##############################################################


In [36]:
'''svm = SVC(kernel='rbf', C=30, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  KINEMATICS
X.shape =  (72, 96)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:   13.3s finished

[2022-09-19 13:34:52] Features: 1/96 -- score: 0.7638888888888888[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done  95 out of  95 | elapsed:   15.4s finished

[2022-09-19 13:35:08] Features: 2/96 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  94 out of  94 | elapsed:    9.0s finished

[2022-09-19 13:35:17] Features: 3/96 -- score: 0.8472222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  90 out o


sfs.k_score_ =  0.9583333333333334
sfs.k_feature_idx_ =  (0, 2, 4, 6, 10, 11, 13, 15, 23, 24, 26, 28, 30, 35, 37, 43, 45, 47, 48, 49, 50, 54, 55, 57, 60, 61, 64, 66, 78)
[After SFS] X.shape =  (72, 29)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=8, gamma=0.043, tol=0.01)

##############################################################

SVM (RBF), KINEMATICS, 0.958, 1.000, 0.917, 0.923, 1.000, 0.917, 0.957, 0.920

##############################################################


In [43]:
'''svm = SVC(kernel='rbf', C=60, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  KINEMATICS
X.shape =  (72, 96)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:   17.0s finished

[2022-09-19 13:46:10] Features: 1/96 -- score: 0.7638888888888888[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done  95 out of  95 | elapsed:   15.6s finished

[2022-09-19 13:46:25] Features: 2/96 -- score: 0.8055555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  70 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done  91 out of  94 | elapsed:   12.3s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  94 out of  94 | elapsed:   12.5s finished

[2022-09-19 13:46:38] Features: 3/96 -- score: 0.8472222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-


sfs.k_score_ =  0.9305555555555556
sfs.k_feature_idx_ =  (0, 2, 4, 6, 9, 11, 13, 17, 19, 24, 33, 34, 35, 37, 39, 40, 41, 43, 44, 45, 47, 48, 49, 50, 52, 53, 54, 57, 60, 62, 78, 90, 94)
[After SFS] X.shape =  (72, 33)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=5, gamma=0.04, tol=0.01)

##############################################################

SVM (RBF), KINEMATICS, 0.944, 0.944, 0.944, 0.944, 0.944, 17.000, 0.944, 0.889

##############################################################


In [50]:
'''svm = SVC(kernel='rbf', verbose=False, C=90)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  KINEMATICS
X.shape =  (72, 96)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.7s
[Parallel(n_jobs=-1)]: Done  96 out of  96 | elapsed:   21.1s finished

[2022-09-19 13:57:53] Features: 1/96 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done  95 out of  95 | elapsed:   11.8s finished

[2022-09-19 13:58:05] Features: 2/96 -- score: 0.8055555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  94 out of  94 | elapsed:   10.3s finished

[2022-09-19 13:58:15] Features: 3/96 -- score: 0.8472222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  93 out of  93 | elapse


sfs.k_score_ =  0.9305555555555556
sfs.k_feature_idx_ =  (0, 2, 4, 6, 7, 10, 11, 13, 15, 16, 17, 18, 19, 21, 23, 24, 26, 28, 30, 33, 35, 37, 40, 41, 42, 47, 53, 57, 62, 63, 65, 78, 79, 81, 84, 90)
[After SFS] X.shape =  (72, 36)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=8, tol=0.01)

##############################################################

SVM (RBF), KINEMATICS, 0.931, 0.944, 0.917, 0.919, 0.943, 16.500, 0.930, 0.861

##############################################################


# **Verification**

In [51]:
# get the names of the feature subset selected using the Feature Selection algorithm.
sfs_feature_idx = [0, 2, 4, 6, 10, 11, 13, 15, 23, 24, 26, 28, 30, 35, 37, 43, 45, 47, 48, 49, 50, 54, 55, 57, 60, 61, 64, 66, 78]
print("Number of Features selected: ", len(sfs_feature_idx))

segments["KINEMATICS"].iloc[:, sfs_feature_idx].head()

Number of Features selected:  29


Unnamed: 0,Min Stance_Pelv_X,Max Stance_Pelv_X,Min Swing_Pelv_X,Max Swing_Pelv_X,Max Stance_Pelv_Y,TimeMax Stance_Pelv_Y,TimeMin Swing_Pelv_Y,TimeMax Swing_Pelv_Y,TimeMax Swing_Pelv_Z,Min Stance_Trunk_X,...,TimeMin Stance_Hip_X,Max Stance_Hip_X,Max Swing_Hip_X,TimeMax Swing_Hip_X,TimeMin Stance_Hip_Y,Min Swing_Hip_Y,TimeMin Swing_Hip_Y,Min Stance_Hip_Z,Max Stance_Hip_Z,Max Swing_Knee_X
0,2.361861,4.812255,3.004734,6.042765,4.48039,14.035093,64.03508,86.842094,100.0,-8.504707,...,52.631588,22.124025,29.717546,82.456139,61.403507,-6.182244,67.543854,-2.979814,16.02681,62.762642
1,2.095112,6.042765,2.300779,5.046373,3.204445,14.096923,63.426228,85.465698,100.0,-9.158589,...,52.863449,22.58095,26.398714,83.26783,61.674011,-8.969019,66.515396,-13.662539,-5.225837,61.876545
2,3.227882,8.612376,2.51663,8.612287,8.623679,15.306108,63.265293,100.0,100.0,-11.825068,...,47.959187,23.687756,28.081585,84.693863,57.142849,-4.946302,62.244884,1.419459,25.018757,54.855919
3,2.51663,8.612376,4.870071,8.038762,6.077716,13.131301,65.151505,100.0,99.494949,-11.382055,...,49.494953,24.61726,29.371717,85.353546,56.565655,-6.347039,64.646454,8.142233,23.043734,58.152878
4,10.515388,12.8866,9.552069,13.517436,4.959414,11.335794,62.006264,91.406662,100.0,-13.153559,...,52.027618,32.626053,33.340851,85.518623,58.823544,-11.430178,67.854523,-17.272408,-9.048944,56.608116


In [52]:
temp = pd.DataFrame(segments["KINEMATICS"].keys().to_numpy(), columns=["FeatureNames"])

features = temp.iloc[sfs_feature_idx]
features

Unnamed: 0,FeatureNames
0,Min Stance_Pelv_X
2,Max Stance_Pelv_X
4,Min Swing_Pelv_X
6,Max Swing_Pelv_X
10,Max Stance_Pelv_Y
11,TimeMax Stance_Pelv_Y
13,TimeMin Swing_Pelv_Y
15,TimeMax Swing_Pelv_Y
23,TimeMax Swing_Pelv_Z
24,Min Stance_Trunk_X


In [53]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.iloc[:, sfs_feature_idx].values
  X = sc.fit_transform(X) # Standard Scaler
  print("X.shape = ", X.shape)

  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])

  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")

Running:  KINEMATICS
X.shape =  (72, 29)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=8, gamma=0.043, tol=0.01)

##############################################################

SVM (RBF), KINEMATICS, 0.958, 1.000, 0.917, 0.923, 1.000, 0.917, 0.957, 0.920

##############################################################


In [54]:
search_results.best_params_

{'C': 8, 'gamma': 0.043, 'kernel': 'rbf', 'tol': 0.01}

In [55]:
search_results.best_score_

0.9583333333333334

In [None]:
##############################################################################
//////////////////////////////////////////////////////////////////////////////
##############################################################################