# **Mounting Google Drive**

In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
import os
os.getcwd()

'/gdrive'

#### Move to the Dataset Dircetory in My Drive

In [3]:
os.chdir("/gdrive/MyDrive/Autism_code/Young_vs_Old/SSF_MSF")
!pwd

/gdrive/MyDrive/Autism_code/Young_vs_Old/SSF_MSF


In [4]:
# importing necessary packages
import matplotlib.pyplot as plt  # for making plots / graphs
import pandas as pd              # for reading the .csv file and related operations
import numpy as np               # for working with arrays (multi-dimensional)  

# read the dataset
df = pd.read_csv("./TS_Kinematics_SSF_MSF_Planar_dataset_2022-reduced_vars.csv")

# now, the whole dataset csv dataset file is saved into `df` variable.
print("df.shape = ", df.shape)
df.head(3)

df.shape =  (72, 260)


Unnamed: 0,Participant,Age group,Processed speed,SIDE,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,P001,Y,N,L,1.135,0.682212,1.246092,0.26,0.44,0.7,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,P001,Y,N,R,1.135,0.73205,1.246092,0.26,0.435,0.7,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,P002,Y,N,L,0.985,0.701486,1.412893,0.135,0.42,0.56,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [5]:
# print the columns of the data frame
print(df.columns)

Index(['Participant', 'Age group', 'Processed speed', 'SIDE',
       'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=260)


In [6]:
# Remove unwanted columns- columns not needed for this analysis.
# dropping "Collected Speed", "Dimensionless Speed", 

In [7]:
df_Cal_Met_cols = df.loc[:, 'Min Stance_Cal_Met_X' : 'TimeMax Swing_Cal_Met_Z'].columns.values
df_Cal_Mid_cols = df.loc[:, 'Min Stance_Cal_Mid_X' : 'TimeMax Swing_Cal_Mid_Z'].columns.values
df_Sha_Cal_cols = df.loc[:, 'Min Stance_Sha_Cal_X' : 'TimeMax Swing_Sha_Cal_Z'].columns.values

In [8]:
print("df_Cal_Met_cols.shape = ", df_Cal_Met_cols.shape)
print("df_Cal_Mid_cols.shape = ", df_Cal_Mid_cols.shape)
print("df_Sha_Cal_cols.shape = ", df_Sha_Cal_cols.shape)

df_Cal_Met_cols.shape =  (24,)
df_Cal_Mid_cols.shape =  (24,)
df_Sha_Cal_cols.shape =  (24,)


In [11]:
df_Cal_Met_Cal_Mid_Sha_Cal_cols = np.append(df_Cal_Met_cols, df_Cal_Mid_cols)
df_Cal_Met_Cal_Mid_Sha_Cal_cols = np.append(df_Cal_Met_Cal_Mid_Sha_Cal_cols, df_Sha_Cal_cols)
print("df_Cal_Met_Cal_Mid_Sha_Cal_cols.shape = ", df_Cal_Met_Cal_Mid_Sha_Cal_cols.shape)

df_Cal_Met_Cal_Mid_Sha_Cal_cols.shape =  (72,)


In [12]:
# Remove the Columns: ["Participant", "Side"]- These columns were not needed.
df = df.drop(["Participant", "SIDE", "Processed speed"], axis=1)
print("df.shape = ", df.shape)
df.columns

df.shape =  (72, 257)


Index(['Age group', 'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       'Steps_Per_Minute_Mean', 'Stride_Length_Mean', 'Min Stance_Pelv_X',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=257)

### Label encode target variable - `y`

In [13]:
# First, look at the target variable
print(df.loc[:, "Age group"].values.shape)
print(df.loc[:, "Age group"].values)

(72,)
['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O']


In [14]:
# Perform Data Preprocessing
# Label Encoding the class variables 
# Here, we replace the "Control" and "Autism" keywords with 0 and 1 values, respectively.
df["Age group"] = df["Age group"].replace({'O': 0, 'Y': 1})
df.head(3)

Unnamed: 0,Age group,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean,Min Stance_Pelv_X,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,1,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565,2.361861,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,1,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065,2.095112,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,1,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637,3.227882,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [15]:
# saving the target variables into `y` variable.
y = df.loc[:, "Age group"].values
print("y.shape = ", y.shape)
print("y = ", y)

y.shape =  (72,)
y =  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]


In [16]:
# Perform Data Preprocessing- Data Standardization
# Defining a Standard Scaler for scaling the values in the dataset
# in the range of [-a, +a], i.e. scale values to a smaller range.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [17]:
df.shape

(72, 257)

In [19]:
# Define the different segments from dataset to be used.
segments = {
    # 'MSF': df.loc[:,'Min Stance_Cal_Met_X' : 'TimeMax Swing_Sha_Cal_Z'],
    # 'SSF' : df.loc[:,'Min_Stance_Sha_Foot_X' : 'TimeMax Swing_Sha_Foot_Z']
    # 'cal_mid' : df.loc[:,'Min Stance_Cal_Mid_X':'TimeMax Swing_Cal_Mid_Z'],
    # 'Mid_met' : df.loc[:,'Min Stance_Mid_Met_X':'TimeMax Swing_Mid_Met_Z']
    # 'df_Cal_Mid_Sha_Cal' : df.loc[:, df_Cal_Mid_Sha_Cal_cols]
    'Cal_Met_Cal_Mid_Sha_Cal' : df.loc[:, df_Cal_Met_Cal_Mid_Sha_Cal_cols]
}

In [20]:
print(segments['Cal_Met_Cal_Mid_Sha_Cal'].shape)

(72, 72)


In [21]:
segments['Cal_Met_Cal_Mid_Sha_Cal'].head()

Unnamed: 0,Min Stance_Cal_Met_X,TimeMin Stance_Cal_Met_X,Max Stance_Cal_Met_X,TimeMax Stance_Cal_Met_X,Min Swing_Cal_Met_X,TimeMin Swing_Cal_Met_X,Max Swing_Cal_Met_X,TimeMax Swing_Cal_Met_X,Min Stance_Cal_Met_Y,TimeMin Stance_Cal_Met_Y,...,Max Swing_Sha_Cal_Y,TimeMax Swing_Sha_Cal_Y,Min Stance_Sha_Cal_Z,TimeMin Stance_Sha_Cal_Z,Max Stance_Sha_Cal_Z,TimeMax Stance_Sha_Cal_Z,Min Swing_Sha_Cal_Z,TimeMin Swing_Sha_Cal_Z,Max Swing_Sha_Cal_Z,TimeMax Swing_Sha_Cal_Z
0,-54.026958,61.403507,-35.202721,49.999992,-54.026958,61.403507,-44.8116,83.333328,2.690046,59.649124,...,14.945835,97.368423,3.620955,13.157902,25.790897,61.403507,9.324308,91.228058,26.228918,62.280701
1,-56.390499,61.674011,-38.140205,50.220276,-56.392658,61.674011,-46.994694,85.435318,-2.237977,59.9119,...,15.397028,97.366608,4.055041,12.33481,21.218121,61.674011,4.444386,82.832306,20.604837,62.555183
2,-57.696159,57.142849,-44.452549,44.897938,-61.410961,60.204071,-53.242382,82.653053,8.506367,57.142849,...,-0.404834,93.877533,13.272318,37.755085,26.299669,57.142849,9.961165,67.346924,26.490465,58.163254
3,-47.870407,56.565655,-29.118155,41.414127,-51.179707,59.595955,-40.516735,83.838394,7.041844,56.565655,...,3.146802,92.424248,1.63754,36.363628,16.957184,56.565655,9.732217,86.868683,17.790756,59.090904
4,-40.664665,58.823544,-25.658703,46.150841,-43.398392,61.538799,-33.849476,82.793419,3.865908,58.823544,...,16.378757,96.389618,2.316156,12.213133,14.685467,58.823544,8.036254,86.105431,16.09873,61.528854


In [22]:
print(segments["Cal_Met_Cal_Mid_Sha_Cal"].shape)
segments["Cal_Met_Cal_Mid_Sha_Cal"].head()

(72, 72)


Unnamed: 0,Min Stance_Cal_Met_X,TimeMin Stance_Cal_Met_X,Max Stance_Cal_Met_X,TimeMax Stance_Cal_Met_X,Min Swing_Cal_Met_X,TimeMin Swing_Cal_Met_X,Max Swing_Cal_Met_X,TimeMax Swing_Cal_Met_X,Min Stance_Cal_Met_Y,TimeMin Stance_Cal_Met_Y,...,Max Swing_Sha_Cal_Y,TimeMax Swing_Sha_Cal_Y,Min Stance_Sha_Cal_Z,TimeMin Stance_Sha_Cal_Z,Max Stance_Sha_Cal_Z,TimeMax Stance_Sha_Cal_Z,Min Swing_Sha_Cal_Z,TimeMin Swing_Sha_Cal_Z,Max Swing_Sha_Cal_Z,TimeMax Swing_Sha_Cal_Z
0,-54.026958,61.403507,-35.202721,49.999992,-54.026958,61.403507,-44.8116,83.333328,2.690046,59.649124,...,14.945835,97.368423,3.620955,13.157902,25.790897,61.403507,9.324308,91.228058,26.228918,62.280701
1,-56.390499,61.674011,-38.140205,50.220276,-56.392658,61.674011,-46.994694,85.435318,-2.237977,59.9119,...,15.397028,97.366608,4.055041,12.33481,21.218121,61.674011,4.444386,82.832306,20.604837,62.555183
2,-57.696159,57.142849,-44.452549,44.897938,-61.410961,60.204071,-53.242382,82.653053,8.506367,57.142849,...,-0.404834,93.877533,13.272318,37.755085,26.299669,57.142849,9.961165,67.346924,26.490465,58.163254
3,-47.870407,56.565655,-29.118155,41.414127,-51.179707,59.595955,-40.516735,83.838394,7.041844,56.565655,...,3.146802,92.424248,1.63754,36.363628,16.957184,56.565655,9.732217,86.868683,17.790756,59.090904
4,-40.664665,58.823544,-25.658703,46.150841,-43.398392,61.538799,-33.849476,82.793419,3.865908,58.823544,...,16.378757,96.389618,2.316156,12.213133,14.685467,58.823544,8.036254,86.105431,16.09873,61.528854


In [23]:
segments["Cal_Met_Cal_Mid_Sha_Cal"].columns

Index(['Min Stance_Cal_Met_X', 'TimeMin Stance_Cal_Met_X',
       'Max Stance_Cal_Met_X', 'TimeMax Stance_Cal_Met_X',
       'Min Swing_Cal_Met_X', 'TimeMin Swing_Cal_Met_X', 'Max Swing_Cal_Met_X',
       'TimeMax Swing_Cal_Met_X', 'Min Stance_Cal_Met_Y',
       'TimeMin Stance_Cal_Met_Y', 'Max Stance_Cal_Met_Y',
       'TimeMax Stance_Cal_Met_Y', 'Min Swing_Cal_Met_Y',
       'TimeMin Swing_Cal_Met_Y', 'Max Swing_Cal_Met_Y',
       'TimeMax Swing_Cal_Met_Y', 'Min Stance_Cal_Met_Z',
       'TimeMin Stance_Cal_Met_Z', 'Max Stance_Cal_Met_Z',
       'TimeMax Stance_Cal_Met_Z', 'Min Swing_Cal_Met_Z',
       'TimeMin Swing_Cal_Met_Z', 'Max Swing_Cal_Met_Z',
       'TimeMax Swing_Cal_Met_Z', 'Min Stance_Cal_Mid_X',
       'TimeMin Stance_Cal_Mid_X', 'Max Stance_Cal_Mid_X',
       'TimeMax Stance_Cal_Mid_X', 'Min Swing_Cal_Mid_X',
       'TimeMin Swing_Cal_Mid_X', 'Max Swing_Cal_Mid_X',
       'TimeMax Swing_Cal_Mid_X', 'Min Stance_Cal_Mid_Y',
       'TimeMin Stance_Cal_Mid_Y', 'Max Stance_C

# Defining **Cross Validation** method to be used

In [24]:
# Define Leave-One-Out CV
from sklearn.model_selection import LeaveOneOut
loocv = LeaveOneOut()

# # Define Repeated Stratified k-fold CV
# from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
# rskf_cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=5, random_state=36851234)

# Defning the **Classifer** to be used

In [47]:
# Define the Classifier to be used for Sequential Feature Selection (SFS)

# # Apply Linear LDA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# lda = LinearDiscriminantAnalysis(solver='svd', n_components=None)

# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# Define SVM classifier with RBF kernel
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=90, verbose=False)


# Defining the **Feature Selection** algorithm to be used

In [48]:
!pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [49]:
# Define the Sequential Feature Selection class
# https://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/

# Below is the code for applying Forward Feature Selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs = SFS(estimator=svm, 
            k_features=(1,15),
            forward=True, floating=False,
            verbose=2,
            scoring=('accuracy'),
            cv=loocv,
            n_jobs=-1)

# **Hyper-Parameter Optimization** for Non-Linear SVC (RBF)

In [50]:
# Define the Classifier and Parameter Grid to be used for GridSearch and final Evaluation
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import SVC
svm_classifier = SVC()

param_grid = [
              {'C': [0.01, 0.1, 0, 0.5, 1, 2, 3, 5, 8, 20, 50, 90], 
               'gamma': ['scale', 'auto', 0.01, 0.03, 0.04, 0.043, 0.045, 0.048, 0.05, 0.053, 0.055, 0.058, 0.06, 0.08, 0.0001, 0.001, 0.1, 1, 10], 
               'tol':[1e-2, 1e-3, 1e-4, 1e-5], 
               'kernel': ['linear']}, #rbfSVM
]

:### Change the `estimator` in GridSearch to the estimator you are using.

In [51]:
# Define Grid Search class
from sklearn.model_selection import GridSearchCV
gridSearch = GridSearchCV(estimator=svm_classifier, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                          n_jobs=-1,
                          cv=loocv, # uses Leave One Out CV
                          refit=True, verbose=1)

# Main Driver Code: **Non Linear SVM (RBF)**

In [52]:
# Type the name of the Algorithm that you are using
# This will be used while Writing the Scores in .txt file
# LDA, LinearSVM, SVM (RBF), SVM (polynomial), LogisticRegression, RandomForest
algorith_you_are_using = 'linear' 

In [32]:
'''svm = SVC(kernel='linear', verbose=False, C=1)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Met_Cal_Mid_Sha_Cal
X.shape =  (72, 72)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    7.4s finished

[2022-09-22 18:20:35] Features: 1/72 -- score: 0.6805555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  71 out of  71 | elapsed:    7.2s finished

[2022-09-22 18:20:42] Features: 2/72 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    5.9s finished

[2022-09-22 18:20:48] Features: 3/72 -- score: 0.8055555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  66 out of  69 | elapsed:    3.6s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  69 out of  69 | elapsed:    3.7s finished

[2022-09-22 18:20:52] 


sfs.k_score_ =  0.9305555555555556
sfs.k_feature_idx_ =  (0, 2, 4, 6, 15, 25, 27, 29, 31, 32, 34, 35, 37, 50, 54, 64)
[After SFS] X.shape =  (72, 16)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits
best_classifier =  SVC(C=0.5, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Met_Cal_Mid_Sha_Cal, 0.931, 0.889, 0.972, 0.970, 0.897, 8.750, 0.933, 0.864

##############################################################


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.73611111 0.73611111 0.73611111 0.73611111

In [39]:
'''svm = SVC(kernel='linear', C=30, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Met_Cal_Mid_Sha_Cal
X.shape =  (72, 72)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    8.3s finished

[2022-09-22 18:27:32] Features: 1/72 -- score: 0.6666666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  71 out of  71 | elapsed:    9.2s finished

[2022-09-22 18:27:41] Features: 2/72 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    6.4s finished

[2022-09-22 18:27:48] Features: 3/72 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  66 out of  69 | elapsed:    6.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  69 out of  69 | elapsed:    7.1s finished

[2022-09-22 18:27:55] 


sfs.k_score_ =  0.9305555555555556
sfs.k_feature_idx_ =  (0, 1, 3, 5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 68, 71)
[After SFS] X.shape =  (72, 59)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits
best_classifier =  SVC(C=0.5, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Met_Cal_Mid_Sha_Cal, 0.931, 0.917, 0.944, 0.943, 0.919, 11.333, 0.932, 0.861

##############################################################


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.79166667 0.79166667 0.79166667 0.79166667

In [46]:
'''svm = SVC(kernel='linear', C=60, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Met_Cal_Mid_Sha_Cal
X.shape =  (72, 72)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    7.6s finished

[2022-09-22 18:53:22] Features: 1/72 -- score: 0.6666666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done  71 out of  71 | elapsed:   11.1s finished

[2022-09-22 18:53:33] Features: 2/72 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  67 out of  70 | elapsed:    7.0s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    7.2s finished

[2022-09-22 18:53:40] Features: 3/72 -- score: 0.7638888888888888[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  69 


sfs.k_score_ =  0.9444444444444444
sfs.k_feature_idx_ =  (0, 5, 6, 7, 12, 15, 24, 29, 31, 35, 38, 40, 45, 71)
[After SFS] X.shape =  (72, 14)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.72222222 0.72222222 0.72222222 0.72222222

best_classifier =  SVC(C=50, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Met_Cal_Mid_Sha_Cal, 0.944, 0.972, 0.917, 0.921, 0.971, 33.000, 0.943, 0.890

##############################################################


In [53]:
'''svm = SVC(kernel='linear', verbose=False, C=90)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Met_Cal_Mid_Sha_Cal
X.shape =  (72, 72)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    7.9s finished

[2022-09-22 19:11:12] Features: 1/72 -- score: 0.6666666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done  71 out of  71 | elapsed:   12.0s finished

[2022-09-22 19:11:24] Features: 2/72 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:    9.1s finished

[2022-09-22 19:11:33] Features: 3/72 -- score: 0.7638888888888888[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done  69 out of  69 | elapse


sfs.k_score_ =  0.9444444444444444
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 36, 39, 40, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 66, 67, 68, 69, 70, 71)
[After SFS] X.shape =  (72, 62)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits
best_classifier =  SVC(C=0.5, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Met_Cal_Mid_Sha_Cal, 0.944, 0.917, 0.972, 0.971, 0.921, 11.667, 0.946, 0.890

##############################################################


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.79166667 0.79166667 0.79166667 0.79166667

# **Verification**

In [54]:
# get the names of the feature subset selected using the Feature Selection algorithm.
sfs_feature_idx = [0, 5, 6, 7, 12, 15, 24, 29, 31, 35, 38, 40, 45, 71]
print("Number of Features selected: ", len(sfs_feature_idx))

segments["Cal_Met_Cal_Mid_Sha_Cal"].iloc[:, sfs_feature_idx].head()

Number of Features selected:  14


Unnamed: 0,Min Stance_Cal_Met_X,TimeMin Swing_Cal_Met_X,Max Swing_Cal_Met_X,TimeMax Swing_Cal_Met_X,Min Swing_Cal_Met_Y,TimeMax Swing_Cal_Met_Y,Min Stance_Cal_Mid_X,TimeMin Swing_Cal_Mid_X,TimeMax Swing_Cal_Mid_X,TimeMax Stance_Cal_Mid_Y,Max Swing_Cal_Mid_Y,Min Stance_Cal_Mid_Z,TimeMin Swing_Cal_Mid_Z,TimeMax Swing_Sha_Cal_Z
0,-54.026958,61.403507,-44.8116,83.333328,3.315545,100.0,16.543676,61.403507,83.333328,58.771931,15.548207,9.231457,83.333328,62.280701
1,-56.390499,61.674011,-46.994694,85.435318,-1.213495,85.739174,24.506611,61.674011,87.805359,1.762134,14.974817,14.356872,74.425705,62.555183
2,-57.696159,60.204071,-53.242382,82.653053,7.937243,100.0,15.469666,59.183662,100.0,57.142849,30.5874,-8.341793,100.0,58.163254
3,-47.870407,59.595955,-40.516735,83.838394,5.57677,97.979813,16.562412,59.595955,90.909088,55.555553,26.770086,-0.155559,88.888885,59.090904
4,-40.664665,61.538799,-33.849476,82.793419,3.530224,100.0,26.373043,61.081284,88.810745,0.0,10.832327,8.962367,94.629166,61.528854


In [55]:
temp = pd.DataFrame(segments["Cal_Met_Cal_Mid_Sha_Cal"].keys().to_numpy(), columns=["FeatureNames"])

features = temp.iloc[sfs_feature_idx]
features

Unnamed: 0,FeatureNames
0,Min Stance_Cal_Met_X
5,TimeMin Swing_Cal_Met_X
6,Max Swing_Cal_Met_X
7,TimeMax Swing_Cal_Met_X
12,Min Swing_Cal_Met_Y
15,TimeMax Swing_Cal_Met_Y
24,Min Stance_Cal_Mid_X
29,TimeMin Swing_Cal_Mid_X
31,TimeMax Swing_Cal_Mid_X
35,TimeMax Stance_Cal_Mid_Y


In [56]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.iloc[:, sfs_feature_idx].values
  X = sc.fit_transform(X) # Standard Scaler
  print("X.shape = ", X.shape)

  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])

  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")

Running:  Cal_Met_Cal_Mid_Sha_Cal
X.shape =  (72, 14)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.72222222 0.72222222 0.72222222 0.72222222

best_classifier =  SVC(C=50, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Met_Cal_Mid_Sha_Cal, 0.944, 0.972, 0.917, 0.921, 0.971, 33.000, 0.943, 0.890

##############################################################


In [57]:
search_results.best_params_

{'C': 50, 'gamma': 'scale', 'kernel': 'linear', 'tol': 0.01}

In [58]:
search_results.best_score_

0.9444444444444444

In [None]:
##############################################################################
//////////////////////////////////////////////////////////////////////////////
##############################################################################