# **Mounting Google Drive**

In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
import os
os.getcwd()

'/gdrive'

#### Move to the Dataset Dircetory in My Drive

In [3]:
os.chdir("/gdrive/MyDrive/Autism_code/Young_vs_Old/SSF_MSF")
!pwd

/gdrive/MyDrive/Autism_code/Young_vs_Old/SSF_MSF


In [4]:
# importing necessary packages
import matplotlib.pyplot as plt  # for making plots / graphs
import pandas as pd              # for reading the .csv file and related operations
import numpy as np               # for working with arrays (multi-dimensional)  

# read the dataset
df = pd.read_csv("./TS_Kinematics_SSF_MSF_Planar_dataset_2022-reduced_vars.csv")

# now, the whole dataset csv dataset file is saved into `df` variable.
print("df.shape = ", df.shape)
df.head(3)

df.shape =  (72, 260)


Unnamed: 0,Participant,Age group,Processed speed,SIDE,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,P001,Y,N,L,1.135,0.682212,1.246092,0.26,0.44,0.7,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,P001,Y,N,R,1.135,0.73205,1.246092,0.26,0.435,0.7,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,P002,Y,N,L,0.985,0.701486,1.412893,0.135,0.42,0.56,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [5]:
# print the columns of the data frame
print(df.columns)

Index(['Participant', 'Age group', 'Processed speed', 'SIDE',
       'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=260)


In [6]:
# Remove unwanted columns- columns not needed for this analysis.
# dropping "Collected Speed", "Dimensionless Speed", 

In [6]:
df_Cal_Mid_cols = df.loc[:, 'Min Stance_Cal_Mid_X' : 'TimeMax Swing_Cal_Mid_Z'].columns.values
df_Mid_met_cols = df.loc[:, 'Min Stance_Mid_Met_X' : 'TimeMax Swing_Mid_Met_Z'].columns.values

In [7]:
print("df_Cal_Mid_cols.shape = ", df_Cal_Mid_cols.shape)
print("df_Mid_met_cols.shape = ", df_Mid_met_cols.shape)

df_Cal_Mid_cols.shape =  (24,)
df_Mid_met_cols.shape =  (24,)


In [8]:
df_Cal_Mid_Mid_Met_cols = np.append(df_Cal_Mid_cols, df_Mid_met_cols)
print("df_Cal_Mid_Mid_Met_cols.shape = ", df_Cal_Mid_Mid_Met_cols.shape)

df_Cal_Mid_Mid_Met_cols.shape =  (48,)


In [9]:
# Remove the Columns: ["Participant", "Side"]- These columns were not needed.
df = df.drop(["Participant", "SIDE", "Processed speed"], axis=1)
print("df.shape = ", df.shape)
df.columns

df.shape =  (72, 257)


Index(['Age group', 'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       'Steps_Per_Minute_Mean', 'Stride_Length_Mean', 'Min Stance_Pelv_X',
       ...
       'Max Stance_S2G', 'TimeMax Stance_S2G', 'Min Stance_S2V',
       'TimeMin Stance_S2V', 'Max Stance_S2V', 'TimeMax Stance_S2V',
       'Min Stance_V2G', 'TimeMin Stance_V2G', 'Max Stance_V2G',
       'TimeMax Stance_V2G'],
      dtype='object', length=257)

### Label encode target variable - `y`

In [10]:
# First, look at the target variable
print(df.loc[:, "Age group"].values.shape)
print(df.loc[:, "Age group"].values)

(72,)
['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O']


In [11]:
# Perform Data Preprocessing
# Label Encoding the class variables 
# Here, we replace the "Control" and "Autism" keywords with 0 and 1 values, respectively.
df["Age group"] = df["Age group"].replace({'O': 0, 'Y': 1})
df.head(3)

Unnamed: 0,Age group,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean,Min Stance_Pelv_X,...,Max Stance_S2G,TimeMax Stance_S2G,Min Stance_S2V,TimeMin Stance_S2V,Max Stance_S2V,TimeMax Stance_S2V,Min Stance_V2G,TimeMin Stance_V2G,Max Stance_V2G,TimeMax Stance_V2G
0,1,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565,2.361861,...,85.003563,59.649124,-10.577213,2.631576,7.834396,38.596493,-19.257086,0,74.823166,61.403507
1,1,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065,2.095112,...,85.173409,60.176991,-6.651151,2.654886,6.187871,44.24778,-17.311865,0,75.929893,61.946899
2,1,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637,3.227882,...,91.685211,57.142849,-6.31691,0.0,9.186202,53.061218,-12.266258,0,55.614689,57.142849


In [12]:
# saving the target variables into `y` variable.
y = df.loc[:, "Age group"].values
print("y.shape = ", y.shape)
print("y = ", y)

y.shape =  (72,)
y =  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]


In [13]:
# Perform Data Preprocessing- Data Standardization
# Defining a Standard Scaler for scaling the values in the dataset
# in the range of [-a, +a], i.e. scale values to a smaller range.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [14]:
df.shape

(72, 257)

In [15]:
# Define the different segments from dataset to be used.
segments = {
    # 'MSF': df.loc[:,'Min Stance_Cal_Met_X' : 'TimeMax Swing_Sha_Cal_Z'],
    # 'SSF' : df.loc[:,'Min_Stance_Sha_Foot_X' : 'TimeMax Swing_Sha_Foot_Z']
    # 'cal_mid' : df.loc[:,'Min Stance_Cal_Mid_X':'TimeMax Swing_Cal_Mid_Z'],
    # 'Mid_met' : df.loc[:,'Min Stance_Mid_Met_X':'TimeMax Swing_Mid_Met_Z']
    'Cal_Mid_Mid_Met' : df.loc[:, df_Cal_Mid_Mid_Met_cols]
}

In [16]:
print(segments['Cal_Mid_Mid_Met'].shape)

(72, 48)


In [17]:
segments['Cal_Mid_Mid_Met'].head()

Unnamed: 0,Min Stance_Cal_Mid_X,TimeMin Stance_Cal_Mid_X,Max Stance_Cal_Mid_X,TimeMax Stance_Cal_Mid_X,Min Swing_Cal_Mid_X,TimeMin Swing_Cal_Mid_X,Max Swing_Cal_Mid_X,TimeMax Swing_Cal_Mid_X,Min Stance_Cal_Mid_Y,TimeMin Stance_Cal_Mid_Y,...,Max Swing_Mid_Met_Y,TimeMax Swing_Mid_Met_Y,Min Stance_Mid_Met_Z,TimeMin Stance_Mid_Met_Z,Max Stance_Mid_Met_Z,TimeMax Stance_Mid_Met_Z,Min Swing_Mid_Met_Z,TimeMin Swing_Mid_Met_Z,Max Swing_Mid_Met_Z,TimeMax Swing_Mid_Met_Z
0,16.543676,61.403507,31.437777,50.877182,16.543676,61.403507,20.926363,83.333328,8.796873,37.719299,...,15.805726,66.666656,-23.606091,0.0,-20.13785,60.526314,-23.394588,96.491234,-20.139137,61.403507
1,24.506611,61.674011,37.306679,51.101334,23.302162,61.674011,33.323246,87.805359,7.783422,43.171806,...,21.020073,99.108696,-23.526367,1.762134,-20.109074,40.528637,-23.622841,88.504227,-16.627419,87.339455
2,15.469666,57.142849,32.50647,42.857124,12.905558,59.183662,23.2164,100.0,19.752295,38.775494,...,6.859684,77.551018,-15.88074,57.142849,-11.222691,33.673458,-16.584131,60.204071,-14.079123,81.632645
3,16.562412,56.565655,37.728943,41.414127,14.14655,59.595955,25.851536,90.909088,18.383307,37.373726,...,8.558912,94.949493,-22.793062,56.565655,-16.171328,46.464657,-24.00285,59.595955,-19.738171,94.949486
4,26.373043,58.823544,39.538559,45.691296,24.647732,61.081284,31.471199,88.810745,4.413151,30.762579,...,18.084152,100.0,-17.196751,29.411772,-13.273298,30.748665,-18.737881,86.801666,-16.84441,79.411774


In [18]:
print(segments["Cal_Mid_Mid_Met"].shape)
segments["Cal_Mid_Mid_Met"].head()

(72, 48)


Unnamed: 0,Min Stance_Cal_Mid_X,TimeMin Stance_Cal_Mid_X,Max Stance_Cal_Mid_X,TimeMax Stance_Cal_Mid_X,Min Swing_Cal_Mid_X,TimeMin Swing_Cal_Mid_X,Max Swing_Cal_Mid_X,TimeMax Swing_Cal_Mid_X,Min Stance_Cal_Mid_Y,TimeMin Stance_Cal_Mid_Y,...,Max Swing_Mid_Met_Y,TimeMax Swing_Mid_Met_Y,Min Stance_Mid_Met_Z,TimeMin Stance_Mid_Met_Z,Max Stance_Mid_Met_Z,TimeMax Stance_Mid_Met_Z,Min Swing_Mid_Met_Z,TimeMin Swing_Mid_Met_Z,Max Swing_Mid_Met_Z,TimeMax Swing_Mid_Met_Z
0,16.543676,61.403507,31.437777,50.877182,16.543676,61.403507,20.926363,83.333328,8.796873,37.719299,...,15.805726,66.666656,-23.606091,0.0,-20.13785,60.526314,-23.394588,96.491234,-20.139137,61.403507
1,24.506611,61.674011,37.306679,51.101334,23.302162,61.674011,33.323246,87.805359,7.783422,43.171806,...,21.020073,99.108696,-23.526367,1.762134,-20.109074,40.528637,-23.622841,88.504227,-16.627419,87.339455
2,15.469666,57.142849,32.50647,42.857124,12.905558,59.183662,23.2164,100.0,19.752295,38.775494,...,6.859684,77.551018,-15.88074,57.142849,-11.222691,33.673458,-16.584131,60.204071,-14.079123,81.632645
3,16.562412,56.565655,37.728943,41.414127,14.14655,59.595955,25.851536,90.909088,18.383307,37.373726,...,8.558912,94.949493,-22.793062,56.565655,-16.171328,46.464657,-24.00285,59.595955,-19.738171,94.949486
4,26.373043,58.823544,39.538559,45.691296,24.647732,61.081284,31.471199,88.810745,4.413151,30.762579,...,18.084152,100.0,-17.196751,29.411772,-13.273298,30.748665,-18.737881,86.801666,-16.84441,79.411774


In [19]:
segments["Cal_Mid_Mid_Met"].columns

Index(['Min Stance_Cal_Mid_X', 'TimeMin Stance_Cal_Mid_X',
       'Max Stance_Cal_Mid_X', 'TimeMax Stance_Cal_Mid_X',
       'Min Swing_Cal_Mid_X', 'TimeMin Swing_Cal_Mid_X', 'Max Swing_Cal_Mid_X',
       'TimeMax Swing_Cal_Mid_X', 'Min Stance_Cal_Mid_Y',
       'TimeMin Stance_Cal_Mid_Y', 'Max Stance_Cal_Mid_Y',
       'TimeMax Stance_Cal_Mid_Y', 'Min Swing_Cal_Mid_Y',
       'TimeMin Swing_Cal_Mid_Y', 'Max Swing_Cal_Mid_Y',
       'TimeMax Swing_Cal_Mid_Y', 'Min Stance_Cal_Mid_Z',
       'TimeMin Stance_Cal_Mid_Z', 'Max Stance_Cal_Mid_Z',
       'TimeMax Stance_Cal_Mid_Z', 'Min Swing_Cal_Mid_Z',
       'TimeMin Swing_Cal_Mid_Z', 'Max Swing_Cal_Mid_Z',
       'TimeMax Swing_Cal_Mid_Z', 'Min Stance_Mid_Met_X',
       'TimeMin Stance_Mid_Met_X', 'Max Stance_Mid_Met_X',
       'TimeMax Stance_Mid_Met_X', 'Min Swing_Mid_Met_X',
       'TimeMin Swing_Mid_Met_X', 'Max Swing_Mid_Met_X',
       'TimeMax Swing_Mid_Met_X', 'Min Stance_Mid_Met_Y',
       'TimeMin Stance_Mid_Met_Y', 'Max Stance_M

# Defining **Cross Validation** method to be used

In [20]:
# Define Leave-One-Out CV
from sklearn.model_selection import LeaveOneOut
loocv = LeaveOneOut()

# # Define Repeated Stratified k-fold CV
# from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
# rskf_cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=5, random_state=36851234)

# Defning the **Classifer** to be used

In [42]:
# Define the Classifier to be used for Sequential Feature Selection (SFS)

# # Apply Linear LDA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# lda = LinearDiscriminantAnalysis(solver='svd', n_components=None)

# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# Define SVM classifier with RBF kernel
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=90, verbose=False)


# Defining the **Feature Selection** algorithm to be used

In [43]:
!pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [44]:
# Define the Sequential Feature Selection class
# https://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/

# Below is the code for applying Forward Feature Selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs = SFS(estimator=svm, 
            k_features=(1,15),
            forward=True, floating=False,
            verbose=2,
            scoring=('accuracy'),
            cv=loocv,
            n_jobs=-1)

# **Hyper-Parameter Optimization** for Non-Linear SVC (RBF)

In [45]:
# Define the Classifier and Parameter Grid to be used for GridSearch and final Evaluation
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import SVC
svm_classifier = SVC()

param_grid = [
              {'C': [0.01, 0.1, 0, 0.5, 1, 2, 3, 5, 8, 20, 50, 90], 
               'gamma': ['scale', 'auto', 0.01, 0.03, 0.04, 0.043, 0.045, 0.048, 0.05, 0.053, 0.055, 0.058, 0.06, 0.08, 0.0001, 0.001, 0.1, 1, 10], 
               'tol':[1e-2, 1e-3, 1e-4, 1e-5], 
               'kernel': ['linear']}, #rbfSVM
]

:### Change the `estimator` in GridSearch to the estimator you are using.

In [46]:
# Define Grid Search class
from sklearn.model_selection import GridSearchCV
gridSearch = GridSearchCV(estimator=svm_classifier, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                          n_jobs=-1,
                          cv=loocv, # uses Leave One Out CV
                          refit=True, verbose=1)

# Main Driver Code: **Non Linear SVM (RBF)**

In [47]:
# Type the name of the Algorithm that you are using
# This will be used while Writing the Scores in .txt file
# LDA, LinearSVM, SVM (RBF), SVM (polynomial), LogisticRegression, RandomForest
algorith_you_are_using = 'linear' 

In [27]:
'''svm = SVC(kernel='linear', verbose=False, C=1)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Mid_Mid_Met
X.shape =  (72, 48)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    9.1s finished

[2022-09-22 14:02:35] Features: 1/48 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done  47 out of  47 | elapsed:    4.8s finished

[2022-09-22 14:02:40] Features: 2/48 -- score: 0.7916666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 out of  46 | elapsed:    6.0s finished

[2022-09-22 14:02:46] Features: 3/48 -- score: 0.8333333333333334[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 out of  45 | elapsed:    3.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    4.0s finished

[2022-09-22 14:02:50] 


sfs.k_score_ =  0.9444444444444444
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 6, 11, 15, 16, 17, 18, 20, 22, 24, 26, 27, 28, 29, 33, 35, 37, 39, 40, 41, 42, 44, 46, 47)
[After SFS] X.shape =  (72, 29)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.80555556 0.80555556 0.80555556 0.80555556

best_classifier =  SVC(C=1, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Mid_Mid_Met, 0.944, 0.944, 0.944, 0.944, 0.944, 17.000, 0.944, 0.889

##############################################################


In [34]:
'''svm = SVC(kernel='linear', C=30, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Mid_Mid_Met
X.shape =  (72, 48)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    6.7s finished

[2022-09-22 14:09:33] Features: 1/48 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done  47 out of  47 | elapsed:   10.9s finished

[2022-09-22 14:09:44] Features: 2/48 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done  46 out of  46 | elapsed:   10.5s finished

[2022-09-22 14:09:55] Features: 3/48 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   10.8s finished

[2022-09-22 14:10:06] Featu


sfs.k_score_ =  0.9027777777777778
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 46, 47)
[After SFS] X.shape =  (72, 43)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.80555556 0.80555556 0.80555556 0.80555556

best_classifier =  SVC(C=2, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Mid_Mid_Met, 0.903, 0.861, 0.944, 0.939, 0.872, 6.800, 0.907, 0.808

##############################################################


In [41]:
'''svm = SVC(kernel='linear', C=60, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Mid_Mid_Met
X.shape =  (72, 48)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    8.1s finished

[2022-09-22 14:23:42] Features: 1/48 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   11.5s
[Parallel(n_jobs=-1)]: Done  47 out of  47 | elapsed:   14.0s finished

[2022-09-22 14:23:56] Features: 2/48 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done  46 out of  46 | elapsed:   10.6s finished

[2022-09-22 14:24:07] Features: 3/48 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done  45 out o


sfs.k_score_ =  0.8888888888888888
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)
[After SFS] X.shape =  (72, 48)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.80555556 0.80555556 0.80555556 0.80555556

best_classifier =  SVC(C=0.5, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Mid_Mid_Met, 0.889, 0.833, 0.944, 0.938, 0.850, 5.667, 0.895, 0.783

##############################################################


In [48]:
'''svm = SVC(kernel='linear', verbose=False, C=90)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  Cal_Mid_Mid_Met
X.shape =  (72, 48)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    7.7s finished

[2022-09-22 14:48:46] Features: 1/48 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done  47 out of  47 | elapsed:   16.3s finished

[2022-09-22 14:49:02] Features: 2/48 -- score: 0.7777777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   10.5s
[Parallel(n_jobs=-1)]: Done  46 out of  46 | elapsed:   12.3s finished

[2022-09-22 14:49:15] Features: 3/48 -- score: 0.8194444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   15.9s
[Parallel(n_jobs=-1)]: Done  45 out o


sfs.k_score_ =  0.9166666666666666
sfs.k_feature_idx_ =  (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47)
[After SFS] X.shape =  (72, 44)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.75       0.75       0.75       0.75      

best_classifier =  SVC(C=0.5, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Mid_Mid_Met, 0.917, 0.889, 0.944, 0.941, 0.895, 8.500, 0.919, 0.835

##############################################################


# **Verification**

In [49]:
# get the names of the feature subset selected using the Feature Selection algorithm.
sfs_feature_idx = [0, 1, 2, 3, 4, 5, 6, 11, 15, 16, 17, 18, 20, 22, 24, 26, 27, 28, 29, 33, 35, 37, 39, 40, 41, 42, 44, 46, 47]
print("Number of Features selected: ", len(sfs_feature_idx))

segments["Cal_Mid_Mid_Met"].iloc[:, sfs_feature_idx].head()

Number of Features selected:  29


Unnamed: 0,Min Stance_Cal_Mid_X,TimeMin Stance_Cal_Mid_X,Max Stance_Cal_Mid_X,TimeMax Stance_Cal_Mid_X,Min Swing_Cal_Mid_X,TimeMin Swing_Cal_Mid_X,Max Swing_Cal_Mid_X,TimeMax Stance_Cal_Mid_Y,TimeMax Swing_Cal_Mid_Y,Min Stance_Cal_Mid_Z,...,TimeMin Stance_Mid_Met_Y,TimeMax Stance_Mid_Met_Y,TimeMin Swing_Mid_Met_Y,TimeMax Swing_Mid_Met_Y,Min Stance_Mid_Met_Z,TimeMin Stance_Mid_Met_Z,Max Stance_Mid_Met_Z,Min Swing_Mid_Met_Z,Max Swing_Mid_Met_Z,TimeMax Swing_Mid_Met_Z
0,16.543676,61.403507,31.437777,50.877182,16.543676,61.403507,20.926363,58.771931,61.403507,9.231457,...,58.771931,3.508768,61.403507,66.666656,-23.606091,0.0,-20.13785,-23.394588,-20.139137,61.403507
1,24.506611,61.674011,37.306679,51.101334,23.302162,61.674011,33.323246,1.762134,82.173958,14.356872,...,58.149784,3.524247,61.674011,99.108696,-23.526367,1.762134,-20.109074,-23.622841,-16.627419,87.339455
2,15.469666,57.142849,32.50647,42.857124,12.905558,59.183662,23.2164,57.142849,59.183662,-8.341793,...,55.102032,1.020407,57.142849,77.551018,-15.88074,57.142849,-11.222691,-16.584131,-14.079123,81.632645
3,16.562412,56.565655,37.728943,41.414127,14.14655,59.595955,25.851536,55.555553,56.565655,-0.155559,...,56.565655,44.444427,56.565655,94.949493,-22.793062,56.565655,-16.171328,-24.00285,-19.738171,94.949486
4,26.373043,58.823544,39.538559,45.691296,24.647732,61.081284,31.471199,0.0,92.739426,8.962367,...,58.363976,0.905202,59.271114,100.0,-17.196751,29.411772,-13.273298,-18.737881,-16.84441,79.411774


In [50]:
temp = pd.DataFrame(segments["Cal_Mid_Mid_Met"].keys().to_numpy(), columns=["FeatureNames"])

features = temp.iloc[sfs_feature_idx]
features

Unnamed: 0,FeatureNames
0,Min Stance_Cal_Mid_X
1,TimeMin Stance_Cal_Mid_X
2,Max Stance_Cal_Mid_X
3,TimeMax Stance_Cal_Mid_X
4,Min Swing_Cal_Mid_X
5,TimeMin Swing_Cal_Mid_X
6,Max Swing_Cal_Mid_X
11,TimeMax Stance_Cal_Mid_Y
15,TimeMax Swing_Cal_Mid_Y
16,Min Stance_Cal_Mid_Z


In [51]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.iloc[:, sfs_feature_idx].values
  X = sc.fit_transform(X) # Standard Scaler
  print("X.shape = ", X.shape)

  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])

  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")

Running:  Cal_Mid_Mid_Met
X.shape =  (72, 29)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.80555556 0.80555556 0.80555556 0.80555556

best_classifier =  SVC(C=1, kernel='linear', tol=0.01)

##############################################################

linear, Cal_Mid_Mid_Met, 0.944, 0.944, 0.944, 0.944, 0.944, 17.000, 0.944, 0.889

##############################################################


In [52]:
search_results.best_params_

{'C': 1, 'gamma': 'scale', 'kernel': 'linear', 'tol': 0.01}

In [53]:
search_results.best_score_

0.9444444444444444

In [None]:
##############################################################################
//////////////////////////////////////////////////////////////////////////////
##############################################################################