## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.feature_selection import SelectKBest, f_regression
import math


In [2]:
data = pd.read_csv('participants.csv')

In [3]:
data.head()

Unnamed: 0,Mobility upper limb,Average time pegboard (vr),Instance lost focus,Average time pegboard (physic),SSQ Total score,Delay before completing a cycle finger-nose-finger,Average time of completion (s) in Everyday Task,Average score in Everyday task,Finger follow low jerk,Mean RMSE (cm),...,SWJ Average Peak Velocity EyeTrackingFix,SWJ Median Time EyeTrackingFix,Saccades Average Amplitude EyeTrackingFix,Saccades Median Amplitude EyeTrackingFix,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Silhouette Coefficient EyeTrackingMultiple,Standard Deviation EyeTrackingFollow,FARS,Stage Ataxia
0,18,300.0,1000,300.0,0,10.0,60.0,0.0,25000,2.0,...,59.4777,322.5,1.4441,0.4732,18.2482,5.9733,0.5497,27.475287,103.0,5.0
1,28,300.0,1000,300.0,3,10.0,60.0,0.0,25000,2.0,...,57.5956,322.0,1.051,0.6943,32.8397,10.3148,0.4352,45.853511,88.0,5.0
2,79,70.59,256,26.45,2,1.8757,19.9999,0.9033,1839,0.8198,...,31.6246,221.0,0.6323,0.3359,22.9939,4.0322,0.7608,27.533485,0.0,0.0
3,80,39.105,123,15.35,0,1.3432,16.7499,0.9692,1410,0.7739,...,30.9888,307.0,0.7439,0.1575,19.7761,1.9822,0.86,36.829619,0.0,0.0
4,34,300.0,549,169.5,0,7.7601,21.811,0.0183,19886,1.5,...,66.9496,278.0,1.843,1.4365,49.6263,15.4955,0.5518,53.38908,87.0,5.0


## Select Features

In [4]:
features = data[["Average time pegboard (vr)", 
#"Instance lost focus", 
#"Average time pegboard (physic)", 
"SSQ Total score",
#"Delay before completing a cycle finger-nose-finger", 
#"Average time of completion (s) in Everyday Task", 
"Average score in Everyday task", 
#"Finger follow low jerk", 
#"Mean RMSE (cm)", 
#"Rhythm Variance", 
#"Everyday Task Low Jerk", 
#"Pegboard Low Jerk", 
#"Finger Nose Low Jerk", 
#"Finger Nose Average Jerk",
#"Finger Follow Average Jerk",
#"Everyday Task Average Jerk", 
#"Pegboard Average Jerk", 
"Time completion (MQ2)", 
"SWJ Rate EyeTrackingFix", 
"SWJ Average Amplitude EyeTrackingFix", 
"SWJ Median Amplitude EyeTrackingFix", 
"SWJ Average Peak Velocity EyeTrackingFix", 
"SWJ Median Time EyeTrackingFix", 
"Saccades Average Amplitude EyeTrackingFix", 
"Saccades Median Amplitude EyeTrackingFix", 
"Velocities Average EyeTrackingMultiple", 
"Velocities Median EyeTrackingMultiple", 
"Silhouette Coefficient EyeTrackingMultiple",
"Standard Deviation EyeTrackingFollow"

#"FARS",
#"Stage Ataxia",
#"Mobility upper limb"
]]

# Drop rows with NaN values
#features = features.dropna()

# Drop rows with -1
# new_features = new_features[~new_features.isin([-1]).any(axis=1)]

prediction = data[['Mobility upper limb']]

In [5]:
features.head()


Unnamed: 0,Average time pegboard (vr),SSQ Total score,Average score in Everyday task,Time completion (MQ2),SWJ Rate EyeTrackingFix,SWJ Average Amplitude EyeTrackingFix,SWJ Median Amplitude EyeTrackingFix,SWJ Average Peak Velocity EyeTrackingFix,SWJ Median Time EyeTrackingFix,Saccades Average Amplitude EyeTrackingFix,Saccades Median Amplitude EyeTrackingFix,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Silhouette Coefficient EyeTrackingMultiple,Standard Deviation EyeTrackingFollow
0,300.0,0,0.0,60.0,25.6886,2.3162,2.199,59.4777,322.5,1.4441,0.4732,18.2482,5.9733,0.5497,27.475287
1,300.0,3,0.0,60.0,16.5105,1.3149,0.8234,57.5956,322.0,1.051,0.6943,32.8397,10.3148,0.4352,45.853511
2,70.59,2,0.9033,24.65,10.7028,0.8186,0.6224,31.6246,221.0,0.6323,0.3359,22.9939,4.0322,0.7608,27.533485
3,39.105,0,0.9692,20.67,6.4221,0.9665,0.699,30.9888,307.0,0.7439,0.1575,19.7761,1.9822,0.86,36.829619
4,300.0,0,0.0183,60.0,22.208,1.5661,1.05,66.9496,278.0,1.843,1.4365,49.6263,15.4955,0.5518,53.38908


In [6]:
prediction.head()

Unnamed: 0,Mobility upper limb
0,18
1,28
2,79
3,80
4,34


## Train models (Linear Regression model with Leave one out cross validation)

In [7]:
loo = LeaveOneOut()

mse_list = []
predictions = []
actual_values = []

selector = SelectKBest(score_func=f_regression, k=7)  # Select top 5 features based on f_regression

for train_index, test_index in loo.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = prediction.iloc[train_index], prediction.iloc[test_index]

    # Apply feature selection on the training set
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_test_selected = selector.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_selected, y_train)
    
    y_pred = model.predict(X_test_selected)
    #y_pred = min(y_pred, [[80.0]])  # Cap the prediction at 80
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)
    
    predictions.append(y_pred[0])  # Store prediction for left-out participant
    actual_values.append(y_test.values[0])  # Store actual value for left-out participant

    selected_features = features.columns[selector.get_support()]
    print("Selected Features:", selected_features)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Selected Features: Index(['Average time pegboard (vr)', 'Average score in Everyday task',
       'Time completion (MQ2)', 'Saccades Median Amplitude EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMultiple',
       'Standard Deviation EyeTrackingFollow'],
      dtype='object')
Selected Features: Index(['Average time pegboard (vr)', 'Average score in Everyday task',
       'Time completion (MQ2)', 'SWJ Rate EyeTrackingFix',
       'Saccades Average Amplitude EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMultiple'],
      dtype='object')
Selected Features: Index(['Average time pegboard (vr)', 'Average score in Everyday task',
       'Time completion (MQ2)', 'SWJ Rate EyeTrackingFix',
       'Saccades Average Amplitude EyeTrackingFix',
       'Velocities Median EyeTrackingMultiple',
       'Silhouette Coefficient EyeTrackingMultiple'],
      dtype='object')
Selected Feat

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


## Evaluate

In [8]:
# Evaluate the models using mean squared error
average_mse = np.mean(mse_list)
print("Average Mean Squared Error:", average_mse)

# You can also access the coefficients and intercept of the linear regression model
coefficients = model.coef_
intercept = model.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)

Average Mean Squared Error: 96.92588258358622
Coefficients: [[ 1.37858363e-02  2.80149928e+01 -9.71749017e-01 -4.34693147e-01
  -5.13569444e+00  2.11917047e+00 -2.18320311e+01]]
Intercept: [87.19628079]


## Compare predictions to actual values

In [9]:
# Print comparison of predicted values and actual values
for i in range(len(predictions)):
    print("Participant", i+1)
    print("Predicted Mobility:", predictions[i])
    print("Actual Mobility:", actual_values[i])
    print("---")

Participant 1
Predicted Mobility: [8.00196]
Actual Mobility: [18]
---
Participant 2
Predicted Mobility: [37.34905361]
Actual Mobility: [28]
---
Participant 3
Predicted Mobility: [73.00659111]
Actual Mobility: [79]
---
Participant 4
Predicted Mobility: [71.68269184]
Actual Mobility: [80]
---
Participant 5
Predicted Mobility: [37.58792174]
Actual Mobility: [34]
---
Participant 6
Predicted Mobility: [71.07829463]
Actual Mobility: [75]
---
Participant 7
Predicted Mobility: [23.09496247]
Actual Mobility: [11]
---
Participant 8
Predicted Mobility: [76.10724387]
Actual Mobility: [79]
---
Participant 9
Predicted Mobility: [69.63317988]
Actual Mobility: [79]
---
Participant 10
Predicted Mobility: [74.34785056]
Actual Mobility: [72]
---
Participant 11
Predicted Mobility: [60.69523085]
Actual Mobility: [61]
---
Participant 12
Predicted Mobility: [80.54452683]
Actual Mobility: [61]
---
Participant 13
Predicted Mobility: [70.6674156]
Actual Mobility: [64]
---
Participant 14
Predicted Mobility: [78.