### Import Libraries

In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC

### Data Preprocessing

In [2]:
#Importing data
data=pd.read_csv("fingertapping_features_severity_diagnosis_June13_2023.csv")

In [3]:
#Delete ratings 3 and 4
data = data[(data.Rating != 3) & (data.Rating !=4)]

In [4]:
#Features
X=data.loc[:, 'wrist_mvmnt_x_median':'acceleration_min_trimmed']
#Label
y=data["Rating"]

In [5]:
#Checking for deleted values
y.unique()

array([2, 1, 0], dtype=int64)

In [6]:
X

Unnamed: 0,wrist_mvmnt_x_median,wrist_mvmnt_x_quartile_range,wrist_mvmnt_x_mean,wrist_mvmnt_x_min,wrist_mvmnt_x_max,wrist_mvmnt_x_stdev,wrist_mvmnt_y_median,wrist_mvmnt_y_quartile_range,wrist_mvmnt_y_mean,wrist_mvmnt_y_min,...,acceleration_median_denoised,acceleration_quartile_range_denoised,acceleration_mean_denoised,acceleration_min_denoised,acceleration_max_denoised,acceleration_stdev_denoised,acceleration_median_trimmed,acceleration_quartile_range_trimmed,acceleration_mean_trimmed,acceleration_min_trimmed
0,4.238080,4.234098,4.289725,0.000000,20.329711,4.199564,8.462262,8.810459,8.439464,0.000000,...,3000.508475,3328.705707,3797.272636,38.649003,17384.22069,3188.396515,2902.806466,2197.490376,3247.331833,38.649003
1,2.353139,2.676330,2.737808,0.000000,11.511819,2.270555,5.643571,6.659994,6.449475,0.000000,...,2074.411105,4407.711040,4231.595947,3.828037,28014.35277,5268.006648,2115.152334,4026.730182,4382.923307,3.828037
2,2.414284,2.435714,2.619451,0.110869,6.318423,1.598481,12.464668,12.223106,16.709834,0.000000,...,4902.070427,6221.253794,6756.041732,41.366761,28398.30213,5453.057264,5403.850546,6790.927527,6488.592104,41.366761
4,1.203146,1.683577,1.357783,0.040405,5.137711,1.139386,6.464794,6.574934,8.495438,0.129821,...,6362.892566,8143.989417,6707.772619,73.478845,26801.32366,5497.433033,7672.996334,8910.334726,7411.409728,378.726081
5,5.755463,5.646298,5.600528,0.857679,13.127449,3.345745,39.757940,45.290742,45.473910,1.725471,...,9716.032294,20740.004580,14993.616410,25.327804,70621.28255,15002.921040,11399.550350,15132.957030,14600.263440,830.640224
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
484,5.822107,7.858378,7.062416,0.000000,24.293398,5.330605,4.772874,6.205787,5.783200,0.000000,...,3124.178205,2828.799036,3539.250480,42.803587,12010.49146,2560.318215,3431.163832,2619.396821,4008.245174,182.951658
485,17.507193,20.122659,20.965597,0.000000,61.567459,15.703871,9.398466,11.636656,11.136031,0.000000,...,5276.174247,8537.451062,6394.156159,32.891530,18372.68331,5051.514221,8444.112719,8676.255282,7660.203811,56.358143
486,21.281412,29.798872,27.151632,0.673401,111.377486,23.390194,12.417981,17.365955,15.991592,0.000000,...,5656.541808,7423.169092,7461.457152,131.592786,40064.40252,6629.017663,5641.395905,7449.876557,7575.331504,252.536458
487,6.009283,9.672707,9.769867,0.000000,51.870359,11.877773,3.968994,8.742207,7.988162,0.000000,...,6464.642804,6949.858834,8065.853413,27.631639,31695.41192,6373.651200,5570.699815,10221.545720,9024.355184,42.025952


In [8]:
#Handling missing data
imputer=SimpleImputer(missing_values=np.nan,strategy="mean")
imputer.fit(X)#Give only numerical column
X=imputer.transform(X)

In [9]:
#one hot encoding
one_hot_encoded_data = pd.get_dummies(y, columns = ['Rating'])
print(one_hot_encoded_data)

     0  1  2
0    0  0  1
1    0  0  1
2    0  1  0
4    1  0  0
5    1  0  0
..  .. .. ..
484  0  0  1
485  0  0  1
486  0  0  1
487  0  0  1
488  1  0  0

[430 rows x 3 columns]


In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.05,random_state=0)

In [11]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(408, 114)
(22, 114)
(408,)
(22,)


In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Models Training

In [13]:
#Libraries to Build Ensemble Model : Random Forest Classifier
# Create the parameter grid based on the results of random search
params_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

In [14]:
# Performing CV to tune parameters for best SVM fit
svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']},
                         {'C': [1, 10, 100, 1000], 'kernel': ['linear']}])

In [15]:
final_model = svm_model.best_estimator_
Y_pred = final_model.predict(X_test)
Y_pred

array([1, 2, 0, 2, 1, 1, 0, 2, 2, 1, 2, 1, 2, 2, 0, 2, 1, 0, 2, 1, 1, 1],
      dtype=int64)

In [16]:
train_score=svm_model.score(X_train, y_train)
test_score=svm_model.score(X_test, y_test)
print('Train Score: ', train_score)  
print('Test Score: ', test_score)  

Train Score:  0.7132352941176471
Test Score:  0.6363636363636364


### Model Prediction

In [17]:
#Importing data
data=pd.read_csv("features.csv")

In [18]:
X=data.loc[:, 'wrist_mvmnt_x_median':'acceleration_min_trimmed']

In [19]:
final_model.predict(X.values)

array([1], dtype=int64)