## Imports

In [448]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.feature_selection import SelectKBest, f_regression
import math


In [449]:
data = pd.read_csv('participants.csv')

In [450]:
data.head()

Unnamed: 0,Mobility upper limb,Average time pegboard (vr),Instance lost focus,Average time pegboard (physic),SSQ Total score,Delay before completing a cycle finger-nose-finger,Average time of completion (s) in Everyday Task,Average score in Everyday task,Finger follow low jerk,Mean RMSE (cm),...,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Velocities Maximum EyeTrackingFix,Standard Deviation EyeTrackingFollow,Silhouette Coefficient EyeTrackingMultiple,Stage Ataxia,FARS,Upper limb FARS subscore,mFARS score,FARS ADL Custom score
0,18,300.0,1000,300.0,0,10.0,60.0,0.0,25000.0,2.0,...,18.248162,5.97334,494.994382,27.475287,0.549672,5.0,103.0,27,81.0,25
1,28,300.0,1000,300.0,3,10.0,60.0,0.0,25000.0,2.0,...,32.839674,10.314756,1051.185873,45.853511,0.435157,5.0,88.0,21,72.0,25
2,79,70.59,256,26.45,2,1.875704,19.999866,0.903333,1839.0,0.819764,...,22.993862,4.03216,1188.792757,27.533485,0.760832,0.0,0.0,0,0.0,0
3,80,39.105,123,15.35,0,1.343185,16.749916,0.969167,1410.0,0.773877,...,19.776123,1.982214,885.950206,36.829619,0.860001,0.0,0.0,0,0.0,0
4,34,300.0,549,169.5,0,7.760111,21.81095,0.018333,19886.0,2.0,...,49.626329,15.495516,1037.040361,53.38908,0.551815,5.0,87.0,20,65.0,12


## Select Features

In [451]:
features = data[[
    #"Mobility upper limb",
    "Average time pegboard (vr)",
    #"Instance lost focus",
    #"Average time pegboard (physic)",
    #"SSQ Total score",	
    #"Delay before completing a cycle finger-nose-finger",
    "Average time of completion (s) in Everyday Task",
    "Average score in Everyday task",
    #"Finger follow low jerk",	
    #"Mean RMSE (cm)",	
    #"Rhythm Variance",	
    #"Everyday Task Low Jerk",	
    #"Pegboard Low Jerk",	
    #"Finger Nose Low Jerk",	
    #"Finger Nose Average Jerk",	
    #"Finger Follow Average Jerk",	
    #"Everyday Task Average Jerk",	
    #"Pegboard Average Jerk",	
    "Time completion (MQ2)",	
    "Saccade Initial Velocity EyeTrackingMultiple Average",
    "Saccade Initial Velocity EyeTrackingMultiple Median",
    "Saccade Peak Velocity EyeTrackingMultiple Average",
    "Saccade Peak Velocity EyeTrackingMultiple Median",
    "Saccade Amplitude EyeTrackingMultiple Average",
    "Saccade Amplitude EyeTrackingMultiple Median",
    "SWJ Rate EyeTrackingFix",
    "SWJ Average Amplitude EyeTrackingFix",
    "SWJ Median Amplitude EyeTrackingFix",	
    "SWJ Average Initial Velocity EyeTrackingFix",
    "SWJ Median Initial Velocity EyeTrackingFix",
    "SWJ Average Peak Velocity EyeTrackingFix",	
    "SWJ Median Peak Velocity EyeTrackingFix",
    "SWJ Average Time EyeTrackingFix",	
    "SWJ Median Time EyeTrackingFix",	
    "Saccades Rate EyeTrackingFix",
    "Saccades Average Amplitude EyeTrackingFix",
    "Saccades Median Amplitude EyeTrackingFix",
    "Velocities Average EyeTrackingMultiple",
    "Velocities Median EyeTrackingMultiple",
    "Velocities Maximum EyeTrackingFix",
    "Standard Deviation EyeTrackingFollow",
    "Silhouette Coefficient EyeTrackingMultiple"
    
    #"Stage Ataxia",
    #"FARS",
    #"Upper limb FARS subscore",
    #"mFARS score",
    #"FARS ADL Custom score"
]]

# Drop rows with NaN values
#features = features.dropna()

# Drop rows with -1
# new_features = new_features[~new_features.isin([-1]).any(axis=1)]

prediction = data[["FARS"]]

In [452]:
features.head()


Unnamed: 0,Average time pegboard (vr),Average time of completion (s) in Everyday Task,Average score in Everyday task,Time completion (MQ2),Saccade Initial Velocity EyeTrackingMultiple Average,Saccade Initial Velocity EyeTrackingMultiple Median,Saccade Peak Velocity EyeTrackingMultiple Average,Saccade Peak Velocity EyeTrackingMultiple Median,Saccade Amplitude EyeTrackingMultiple Average,Saccade Amplitude EyeTrackingMultiple Median,...,SWJ Average Time EyeTrackingFix,SWJ Median Time EyeTrackingFix,Saccades Rate EyeTrackingFix,Saccades Average Amplitude EyeTrackingFix,Saccades Median Amplitude EyeTrackingFix,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Velocities Maximum EyeTrackingFix,Standard Deviation EyeTrackingFollow,Silhouette Coefficient EyeTrackingMultiple
0,300.0,60.0,0.0,60.0,90.559742,80.171462,106.438821,102.564313,2.137318,1.93185,...,299.0,322.5,79.206508,0.719015,0.710496,18.248162,5.97334,494.994382,27.475287,0.549672
1,300.0,60.0,0.0,60.0,157.872702,138.726112,179.998889,157.979484,3.148326,2.697997,...,299.402439,322.0,6.845821,2.600866,1.996309,32.839674,10.314756,1051.185873,45.853511,0.435157
2,70.59,19.999866,0.903333,24.65,90.464844,67.423719,109.69775,82.227604,1.741679,1.266178,...,274.8,221.0,194.791295,0.944705,0.842702,22.993862,4.03216,1188.792757,27.533485,0.760832
3,39.105,16.749916,0.969167,20.67,90.81634,41.880108,108.989981,45.971041,2.019536,1.016118,...,277.333333,307.0,49.236478,0.316776,0.300297,19.776123,1.982214,885.950206,36.829619,0.860001
4,300.0,21.81095,0.018333,60.0,261.259666,233.08058,303.538059,295.409185,7.174046,5.971508,...,292.818182,278.0,2.01891,1.412191,1.412191,49.626329,15.495516,1037.040361,53.38908,0.551815


In [453]:
prediction.head()

Unnamed: 0,FARS
0,103.0
1,88.0
2,0.0
3,0.0
4,87.0


## Train models (Linear Regression model with Leave one out cross validation)

In [454]:
loo = LeaveOneOut()

rmse_list = []
predictions = []
actual_values = []

selector = SelectKBest(score_func=f_regression, k=10)  # Select top 5 features based on f_regression

for train_index, test_index in loo.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = prediction.iloc[train_index], prediction.iloc[test_index]

    # Apply feature selection on the training set
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_test_selected = selector.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_selected, y_train)
    
    y_pred = model.predict(X_test_selected)

    #mFARS score
    #y_pred = min(y_pred, [[93.0]])  # Cap the prediction at 80
    #y_pred = max(y_pred, [[0]])  # Cap the prediction at 0

    #Stage Ataxia
    #y_pred = min(y_pred, [[5.5]])  # Cap the prediction at 80
    #y_pred = max(y_pred, [[0]])  # Cap the prediction at 0

    #FARS
    y_pred = min(y_pred, [[125.0]])  # Cap the prediction at 80
    y_pred = max(y_pred, [[0]])  # Cap the prediction at 0

    #Mobility upper limb
    #y_pred = min(y_pred, [[80.0]])  # Cap the prediction at 80
    #y_pred = max(y_pred, [[0]])  # Cap the prediction at 0

    mse = mean_squared_error(y_test, y_pred)
    rmse_list.append(np.sqrt(mse))
    
    predictions.append(y_pred[0])  # Store prediction for left-out participant
    actual_values.append(y_test.values[0])  # Store actual value for left-out participant

    selected_features = features.columns[selector.get_support()]
    print("Selected Features:", selected_features)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Average',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'Velocities Median EyeTrackingMultiple',
       'Standard Deviation EyeTrackingFollow',
       'Silhouette Coefficient EyeTrackingMultiple'],
      dtype='object')
Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'SWJ Average Amplitude EyeTrackingFix',
       'SWJ Average Peak Velocity EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coeffic

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Peak Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'SWJ Average Peak Velocity EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMultiple'],
      dtype='object')
Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Peak Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Average',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'Velocities Median EyeTrackingMultiple',
       'S

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Peak Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'SWJ Average Peak Velocity EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMultiple'],
      dtype='object')
Selected Features: Index(['Average time pegboard (vr)',
       'Average time of completion (s) in Everyday Task',
       'Average score in Everyday task', 'Time completion (MQ2)',
       'Saccade Initial Velocity EyeTrackingMultiple Median',
       'Saccade Amplitude EyeTrackingMultiple Median',
       'SWJ Rate EyeTrackingFix', 'SWJ Average Peak Velocity EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMult

## Evaluate

In [455]:
# Evaluate the models using root mean squared error
average_rmse = np.mean(rmse_list)
print("Average Root Mean Squared Error:", average_rmse)

# You can also access the coefficients and intercept of the linear regression model
coefficients = model.coef_
intercept = model.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)

Average Root Mean Squared Error: 14.386235137639986
Coefficients: [[ 7.84291986e-02  8.34045536e-01 -7.51580621e+01 -1.58836619e+00
  -3.13764093e-01 -3.95233354e-01  4.91451622e+01 -1.46585636e-01
  -5.85406485e+00 -1.52069454e+02]]
Intercept: [223.41943833]


## Compare predictions to actual values

In [456]:
# Print comparison of predicted values and actual values
for i in range(len(predictions)):
    print("Participant", i+1)
    print("Predicted Mobility:", predictions[i])
    print("Actual Mobility:", actual_values[i])
    print("---")

Participant 1
Predicted Mobility: [122.33300541]
Actual Mobility: [103.]
---
Participant 2
Predicted Mobility: [107.2549897]
Actual Mobility: [88.]
---
Participant 3
Predicted Mobility: [5.14919029]
Actual Mobility: [0.]
---
Participant 4
Predicted Mobility: [10.73748266]
Actual Mobility: [0.]
---
Participant 5
Predicted Mobility: [83.91666634]
Actual Mobility: [87.]
---
Participant 6
Predicted Mobility: [24.25274454]
Actual Mobility: [31.]
---
Participant 7
Predicted Mobility: [94.40581515]
Actual Mobility: [95.]
---
Participant 8
Predicted Mobility: [0]
Actual Mobility: [0.]
---
Participant 9
Predicted Mobility: [31.51584198]
Actual Mobility: [0.]
---
Participant 10
Predicted Mobility: [25.28240102]
Actual Mobility: [39.33333333]
---
Participant 11
Predicted Mobility: [40.28247206]
Actual Mobility: [38.]
---
Participant 12
Predicted Mobility: [14.6328678]
Actual Mobility: [34.]
---
Participant 13
Predicted Mobility: [3.81204461]
Actual Mobility: [42.]
---
Participant 14
Predicted Mob