# **Mounting Google Drive**

In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
import os
os.getcwd()

'/gdrive'

#### Move to the Dataset Dircetory in My Drive

In [4]:
os.chdir("/gdrive/MyDrive/Autism_code/Young_vs_Old/TS")
!pwd

/gdrive/MyDrive/Autism_code/Young_vs_Old/TS


#### Loading the Kinematics dataset

In [6]:
# importing necessary packages
import matplotlib.pyplot as plt  # for making plots / graphs
import pandas as pd              # for reading the .csv file and related operations
import numpy as np               # for working with arrays (multi-dimensional)  

# read the dataset
df = pd.read_csv("./TS_Kinematics_SSF_MSF_Planar_dataset_2022-reduced_vars.csv")
df = df.loc[:, "Participant" : "Stride_Length_Mean"]

# now, the whole dataset csv dataset file is saved into `df` variable.
print("df.shape = ", df.shape)
df.head(3)

df.shape =  (72, 12)


Unnamed: 0,Participant,Age group,Processed speed,SIDE,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,P001,Y,N,L,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,P001,Y,N,R,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,P002,Y,N,L,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [7]:
# print the columns of the data frame
df.columns

Index(['Participant', 'Age group', 'Processed speed', 'SIDE',
       'Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       'Steps_Per_Minute_Mean', 'Stride_Length_Mean'],
      dtype='object')

### Label encode target variable - `y`

In [8]:
# First, look at the target variable
print(df.loc[:, "Age group"].values.shape)
print(df.loc[:, "Age group"].values)

(72,)
['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O' 'O'
 'O' 'O' 'O' 'O' 'O' 'O' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'O' 'O' 'O' 'O' 'O' 'O']


In [9]:
# Perform Data Preprocessing
# Label Encoding the class variables 
# Here, we replace the "Control" and "Autism" keywords with 0 and 1 values, respectively.
df["Age group"] = df["Age group"].replace({'O': 0, 'Y': 1})
df.head(3)

Unnamed: 0,Participant,Age group,Processed speed,SIDE,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,P001,1,N,L,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,P001,1,N,R,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,P002,1,N,L,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [10]:
df.drop(columns=["Participant", "Processed speed", "SIDE"], axis=1, inplace=True)
print(df.shape)
df.head(3)

(72, 9)


Unnamed: 0,Age group,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,1,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,1,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,1,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637


In [11]:
# saving the target variables into `y` variable.
y = df.loc[:, "Age group"].values
print("y.shape = ", y.shape)
print("y = ", y)

y.shape =  (72,)
y =  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]


In [12]:
# Perform Data Preprocessing- Data Standardization
# Defining a Standard Scaler for scaling the values in the dataset
# in the range of [-a, +a], i.e. scale values to a smaller range.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [13]:
df.shape

(72, 9)

In [14]:
# Define the different segments from dataset to be used.
segments = {
    'TS': df.loc[:,'Cycle_Time_Mean':'Stride_Length_Mean'],
}


In [15]:
print(segments["TS"].shape)
segments["TS"].head()

(72, 8)


Unnamed: 0,Cycle_Time_Mean,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Single Support Time,Time to TO,Steps_Per_Minute_Mean,Stride_Length_Mean
0,1.135,0.682212,1.246092,0.26,0.44,0.7,105.263123,1.410565
1,1.135,0.73205,1.246092,0.26,0.435,0.7,106.203018,1.418065
2,0.985,0.701486,1.412893,0.135,0.42,0.56,122.448975,1.378637
3,0.985,0.690197,1.412893,0.135,0.43,0.56,121.224518,1.404761
4,1.113333,0.608685,1.133143,0.19,0.455,0.65,105.263168,1.233512


In [16]:
segments["TS"].columns

Index(['Cycle_Time_Mean', 'Step_Length_Mean', 'Speed',
       'Double_Limb_Support_Time_Ave', 'Single Support Time', 'Time to TO',
       'Steps_Per_Minute_Mean', 'Stride_Length_Mean'],
      dtype='object')

# Defining **Cross Validation** method to be used

In [17]:
# Define Leave-One-Out CV
from sklearn.model_selection import LeaveOneOut
loocv = LeaveOneOut()

# # Define Repeated Stratified k-fold CV
# from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
# rskf_cv = RepeatedStratifiedKFold(n_splits=8, n_repeats=5, random_state=36851234)

# Defning the **Classifer** to be used

In [39]:
# Define the Classifier to be used for Sequential Feature Selection (SFS)

# # Apply Linear LDA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# lda = LinearDiscriminantAnalysis(solver='svd', n_components=None)

# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# Define SVM classifier with RBF kernel
from sklearn.svm import SVC
svm = SVC(kernel='rbf', C=90, verbose=False)


# Defining the **Feature Selection** algorithm to be used

In [40]:
!pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [41]:
# Define the Sequential Feature Selection class
# https://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/

# Below is the code for applying Forward Feature Selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
sfs = SFS(estimator=svm, 
            k_features=(1,15),
            forward=True, floating=False,
            verbose=2,
            scoring=('accuracy'),
            cv=loocv,
            n_jobs=-1)

# **Hyper-Parameter Optimization** for Non-Linear SVC (RBF)

In [42]:
# Define the Classifier and Parameter Grid to be used for GridSearch and final Evaluation
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
from sklearn.svm import SVC
svm_classifier = SVC()

param_grid = [
              {'C': [0.01, 0.1, 0, 0.5, 1, 2, 3, 5, 8, 20, 50, 90], 
               'gamma': ['scale', 'auto', 0.01, 0.03, 0.04, 0.043, 0.045, 0.048, 0.05, 0.053, 0.055, 0.058, 0.06, 0.08, 0.0001, 0.001, 0.1, 1, 10], 
               'tol':[1e-2, 1e-3, 1e-4, 1e-5], 
               'kernel': ['rbf']}, #rbfSVM
]

:### Change the `estimator` in GridSearch to the estimator you are using.

In [43]:
# Define Grid Search class
from sklearn.model_selection import GridSearchCV
gridSearch = GridSearchCV(estimator=svm_classifier, 
                          param_grid=param_grid, 
                          scoring='accuracy',
                          n_jobs=-1,
                          cv=loocv, # uses Leave One Out CV
                          refit=True, verbose=1)

# Main Driver Code: **Non Linear SVM (RBF)**

In [44]:
# Type the name of the Algorithm that you are using
# This will be used while Writing the Scores in .txt file
# LDA, LinearSVM, SVM (RBF), SVM (polynomial), LogisticRegression, RandomForest
algorith_you_are_using = 'SVM (RBF)' 

In [24]:
'''svm = SVC(kernel='rbf', verbose=False, C=1)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS
X.shape =  (72, 8)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:    3.3s finished

[2022-09-21 13:41:16] Features: 1/8 -- score: 0.6527777777777778[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    1.4s finished

[2022-09-21 13:41:17] Features: 2/8 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    1.2s finished

[2022-09-21 13:41:18] Features: 3/8 -- score: 0.7361111111111112[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.1s finished

[2022-09-21 13:41:19] Features: 4/8 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel


sfs.k_score_ =  0.75
sfs.k_feature_idx_ =  (0, 1, 3, 6)
[After SFS] X.shape =  (72, 4)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=1, tol=0.01)

##############################################################

SVM (RBF), TS, 0.750, 0.667, 0.833, 0.800, 0.714, 2.500, 0.769, 0.507

##############################################################


In [31]:
'''svm = SVC(kernel='rbf', C=30, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS
X.shape =  (72, 8)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:    0.8s finished

[2022-09-21 13:43:23] Features: 1/8 -- score: 0.6944444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    0.7s finished

[2022-09-21 13:43:23] Features: 2/8 -- score: 0.6805555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.9s finished

[2022-09-21 13:43:24] Features: 3/8 -- score: 0.7361111111111112[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.5s finished

[2022-09-21 13:43:25] Features: 4/8 -- score: 0.75[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel


sfs.k_score_ =  0.75
sfs.k_feature_idx_ =  (0, 1, 2, 3)
[After SFS] X.shape =  (72, 4)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=20, gamma=10, tol=0.01)

##############################################################

SVM (RBF), TS, 0.792, 0.722, 0.861, 0.839, 0.756, 3.100, 0.805, 0.589

##############################################################


In [38]:
'''svm = SVC(kernel='rbf', C=60, verbose=False)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS
X.shape =  (72, 8)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:    0.8s finished

[2022-09-21 13:45:33] Features: 1/8 -- score: 0.6666666666666666[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    1.0s finished

[2022-09-21 13:45:34] Features: 2/8 -- score: 0.6805555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.6s finished

[2022-09-21 13:45:34] Features: 3/8 -- score: 0.7361111111111112[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.7s finished

[2022-09-21 13:45:35] Features: 4/8 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent work


sfs.k_score_ =  0.7361111111111112
sfs.k_feature_idx_ =  (1, 2, 3)
[After SFS] X.shape =  (72, 3)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits
best_classifier =  SVC(C=20, gamma=10, tol=0.01)

##############################################################

SVM (RBF), TS, 0.792, 0.722, 0.861, 0.839, 0.756, 3.100, 0.805, 0.589

##############################################################


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

In [45]:
'''svm = SVC(kernel='rbf', verbose=False, C=90)'''
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.values
  X = sc.fit_transform(X) # Apply Standard Scaler
  print("X.shape = ", X.shape)

  # Apply Sequetial Forward Feature Selection (SFS)
  sfs.k_features = (1, X.shape[1])
  sfs.fit(X, y)
  print("\nsfs.k_score_ = ", sfs.k_score_)
  print("sfs.k_feature_idx_ = ", sfs.k_feature_idx_)
  
  # Apply Grid Search on the Most Significant Parameters
  X = sfs.transform(X)
  print("[After SFS] X.shape = ", X.shape)
  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])
  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")


Running:  TS
X.shape =  (72, 8)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:    0.8s finished

[2022-09-21 13:48:08] Features: 1/8 -- score: 0.6805555555555556[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    0.8s finished

[2022-09-21 13:48:09] Features: 2/8 -- score: 0.6944444444444444[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.8s finished

[2022-09-21 13:48:10] Features: 3/8 -- score: 0.7222222222222222[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.6s finished

[2022-09-21 13:48:11] Features: 4/8 -- score: 0.7361111111111112[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent work


sfs.k_score_ =  0.7361111111111112
sfs.k_feature_idx_ =  (1, 2, 3, 6)
[After SFS] X.shape =  (72, 4)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=3, gamma=10, tol=0.01)

##############################################################

SVM (RBF), TS, 0.819, 0.833, 0.806, 0.811, 0.829, 4.833, 0.817, 0.639

##############################################################


# **Verification**

In [46]:
# get the names of the feature subset selected using the Feature Selection algorithm.
sfs_feature_idx = [1, 2, 3, 6]
print("Number of Features selected: ", len(sfs_feature_idx))

segments["TS"].iloc[:, sfs_feature_idx].head()

Number of Features selected:  4


Unnamed: 0,Step_Length_Mean,Speed,Double_Limb_Support_Time_Ave,Steps_Per_Minute_Mean
0,0.682212,1.246092,0.26,105.263123
1,0.73205,1.246092,0.26,106.203018
2,0.701486,1.412893,0.135,122.448975
3,0.690197,1.412893,0.135,121.224518
4,0.608685,1.133143,0.19,105.263168


In [47]:
temp = pd.DataFrame(segments["TS"].keys().to_numpy(), columns=["FeatureNames"])

features = temp.iloc[sfs_feature_idx]
features

Unnamed: 0,FeatureNames
1,Step_Length_Mean
2,Speed
3,Double_Limb_Support_Time_Ave
6,Steps_Per_Minute_Mean


In [48]:
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from utils import *

for (key, value) in segments.items():
  print("Running: ", key)

  X = value.iloc[:, sfs_feature_idx].values
  X = sc.fit_transform(X) # Standard Scaler
  print("X.shape = ", X.shape)

  search_results = gridSearch.fit(X, y)
  
  # Get the Best Classfier (Best Parameters) after Grid Search
  best_classifier = search_results.best_estimator_
  print("best_classifier = ", best_classifier)
  
  # Apply LOOCV to get classification scores
  y_true_list, y_pred_list = [], []
  for train_idx, test_idx in loocv.split(X, y):
      x_train, y_train = X[train_idx], y[train_idx]
      x_test, y_test = X[test_idx], y[test_idx]
      
      best_classifier.fit(x_train, y_train)
      
      y_pred = best_classifier.predict(x_test)

      y_true_list.append(y_test[:])
      y_pred_list.append(y_pred[:])

  print("\n##############################################################\n")

  print("{}, {}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(algorith_you_are_using, 
                                                                                          key, 
                                                                                          accuracy_score(y_true_list, y_pred_list),
                                                                                          get_specificity(y_true_list, y_pred_list),
                                                                                          get_sensitivity(y_true_list, y_pred_list),
                                                                                          get_NPV(y_true_list, y_pred_list),
                                                                                          get_PPV(y_true_list, y_pred_list),
                                                                                          get_PLR(y_true_list, y_pred_list),
                                                                                          f1_score(y_true_list, y_pred_list, labels=[0, 1]),
                                                                                          get_MCC(y_true_list, y_pred_list)                                                                                               
                                                                                          ))

  print("\n##############################################################")

Running:  TS
X.shape =  (72, 4)
Fitting 72 folds for each of 912 candidates, totalling 65664 fits


5472 fits failed out of a total of 65664.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5472 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 255, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.7/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    random_seed=random_seed,
  File "sklearn/svm/_libsvm.pyx", line 189, in sklearn.svm._libsvm.fit
ValueError: C <= 0

 0.         0.         0.         0.        

best_classifier =  SVC(C=3, gamma=10, tol=0.01)

##############################################################

SVM (RBF), TS, 0.819, 0.833, 0.806, 0.811, 0.829, 4.833, 0.817, 0.639

##############################################################


In [49]:
search_results.best_params_

{'C': 3, 'gamma': 10, 'kernel': 'rbf', 'tol': 0.01}

In [50]:
search_results.best_score_

0.8194444444444444

In [None]:
##############################################################################
//////////////////////////////////////////////////////////////////////////////
##############################################################################