## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
import math


In [2]:
data = pd.read_csv('participants.csv')

In [3]:
data.head()

Unnamed: 0,Mobility upper limb,Average time pegboard (vr),Instance lost focus,Average time pegboard (physic),SSQ Total score,Delay before completing a cycle finger-nose-finger,Average time of completion (s) in Everyday Task,Average score in Everyday task,Finger follow low jerk,Mean RMSE (cm),...,SWJ Rate EyeTrackingFix,SWJ Average Amplitude EyeTrackingFix,SWJ Median Amplitude EyeTrackingFix,SWJ Average Peak Velocity EyeTrackingFix,SWJ Median Time EyeTrackingFix,Saccades Average Amplitude EyeTrackingFix,Saccades Median Amplitude EyeTrackingFix,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Silhouette Coefficient EyeTrackingMultiple
0,18,300.0,1000,300.0,0,10.0,60.0,0.0,25000,2.0,...,25.688597,2.316221,2.198963,59.477715,322.5,1.444087,0.473204,18.248162,5.97334,0.549672
1,28,300.0,1000,300.0,3,10.0,60.0,0.0,25000,2.0,...,16.51051,1.314898,0.823428,57.595581,322.0,1.051043,0.694261,32.839674,10.314756,0.435157
2,79,70.59,256,26.45,2,1.875704,19.999866,0.903333,1839,0.819764,...,10.702818,0.818572,0.622396,31.624557,221.0,0.632306,0.335854,22.993862,4.03216,0.760832
3,80,39.105,123,15.35,0,1.343185,16.749916,0.969167,1410,0.773877,...,6.422149,0.966502,0.69903,30.988788,307.0,0.743894,0.15754,19.776123,1.982214,0.860001
4,34,300.0,549,169.5,0,7.760111,21.81095,0.018333,19886,2.0,...,22.208015,1.566103,1.049978,66.949573,278.0,1.843023,1.436511,49.626329,15.495516,0.551815


## Select Features

In [4]:
features = data[["Average time pegboard (vr)", 
"Instance lost focus", 
"Average time pegboard (physic)", 
"SSQ Total score",
"Delay before completing a cycle finger-nose-finger", 
"Average time of completion (s) in Everyday Task", 
"Average time pegboard (vr)", 
"Average score in Everyday task", 
"Finger follow low jerk", 
"Mean RMSE (cm)", 
"Rhythm Variance", 
"Everyday Task Low Jerk", 
"Pegboard Low Jerk", 
"Finger Nose Low Jerk", 
"Finger Nose Average Jerk",
"Finger Follow Average Jerk",
"Everyday Task Average Jerk", 
"Pegboard Average Jerk", 
"Time completion (MQ2)", 
"SWJ Rate EyeTrackingFix", 
"SWJ Average Amplitude EyeTrackingFix", 
"SWJ Median Amplitude EyeTrackingFix", 
"SWJ Average Peak Velocity EyeTrackingFix", 
"SWJ Median Time EyeTrackingFix", 
"Saccades Average Amplitude EyeTrackingFix", 
"Saccades Median Amplitude EyeTrackingFix", 
"Velocities Average EyeTrackingMultiple", 
"Velocities Median EyeTrackingMultiple", 
"Silhouette Coefficient EyeTrackingMultiple"]]

# Drop rows with NaN values
#features = features.dropna()

# Drop rows with -1
# new_features = new_features[~new_features.isin([-1]).any(axis=1)]

mobility_score = data[['Mobility upper limb']]

In [5]:
features.head()


Unnamed: 0,Average time pegboard (vr),Instance lost focus,Average time pegboard (physic),SSQ Total score,Delay before completing a cycle finger-nose-finger,Average time of completion (s) in Everyday Task,Average time pegboard (vr).1,Average score in Everyday task,Finger follow low jerk,Mean RMSE (cm),...,SWJ Rate EyeTrackingFix,SWJ Average Amplitude EyeTrackingFix,SWJ Median Amplitude EyeTrackingFix,SWJ Average Peak Velocity EyeTrackingFix,SWJ Median Time EyeTrackingFix,Saccades Average Amplitude EyeTrackingFix,Saccades Median Amplitude EyeTrackingFix,Velocities Average EyeTrackingMultiple,Velocities Median EyeTrackingMultiple,Silhouette Coefficient EyeTrackingMultiple
0,300.0,1000,300.0,0,10.0,60.0,300.0,0.0,25000,2.0,...,25.688597,2.316221,2.198963,59.477715,322.5,1.444087,0.473204,18.248162,5.97334,0.549672
1,300.0,1000,300.0,3,10.0,60.0,300.0,0.0,25000,2.0,...,16.51051,1.314898,0.823428,57.595581,322.0,1.051043,0.694261,32.839674,10.314756,0.435157
2,70.59,256,26.45,2,1.875704,19.999866,70.59,0.903333,1839,0.819764,...,10.702818,0.818572,0.622396,31.624557,221.0,0.632306,0.335854,22.993862,4.03216,0.760832
3,39.105,123,15.35,0,1.343185,16.749916,39.105,0.969167,1410,0.773877,...,6.422149,0.966502,0.69903,30.988788,307.0,0.743894,0.15754,19.776123,1.982214,0.860001
4,300.0,549,169.5,0,7.760111,21.81095,300.0,0.018333,19886,2.0,...,22.208015,1.566103,1.049978,66.949573,278.0,1.843023,1.436511,49.626329,15.495516,0.551815


In [6]:
mobility_score.head()

Unnamed: 0,Mobility upper limb
0,18
1,28
2,79
3,80
4,34


## Train models (Leave one out cross validation)

In [7]:
loo = LeaveOneOut()

mse_list = []
predictions = []
actual_values = []

for train_index, test_index in loo.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = mobility_score.iloc[train_index], mobility_score.iloc[test_index]
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)
    
    predictions.append(y_pred[0])  # Store prediction for left-out participant
    actual_values.append(y_test.values[0])  # Store actual value for left-out participant

## Evaluate

In [8]:
# Evaluate the models using mean squared error
average_mse = np.mean(mse_list)
print("Average Mean Squared Error:", average_mse)

# You can also access the coefficients and intercept of the linear regression model
coefficients = model.coef_
intercept = model.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)

Average Mean Squared Error: 29349.50964418605
Coefficients: [[ 0.03899976  0.01361722  0.06246569 -2.99747692 -0.4541032  -0.19738985
   0.03899976  0.35605576 -0.01580374 -1.01792232 -0.00764812 -0.00954139
   0.00838514 -0.03769124  0.0164464   0.22340355 -0.01462021 -0.21244131
   0.6938846  -1.26826542  0.24180497 -0.56012149  0.14513153 -0.01559303
   1.95498787  0.30989419 -0.75130964  4.31554913 -0.14182018]]
Intercept: [129.68213802]


## Compare predictions to actual values

In [9]:
# Print comparison of predicted values and actual values
for i in range(len(predictions)):
    print("Participant", i+1)
    print("Predicted Mobility:", predictions[i])
    print("Actual Mobility:", actual_values[i])
    print("---")

Participant 1
Predicted Mobility: [-0.53312262]
Actual Mobility: [18]
---
Participant 2
Predicted Mobility: [39.91649562]
Actual Mobility: [28]
---
Participant 3
Predicted Mobility: [52.09084193]
Actual Mobility: [79]
---
Participant 4
Predicted Mobility: [46.95692351]
Actual Mobility: [80]
---
Participant 5
Predicted Mobility: [-744.06733729]
Actual Mobility: [34]
---
Participant 6
Predicted Mobility: [118.70291797]
Actual Mobility: [75]
---
Participant 7
Predicted Mobility: [26.46071186]
Actual Mobility: [11]
---
Participant 8
Predicted Mobility: [80.14346568]
Actual Mobility: [79]
---
Participant 9
Predicted Mobility: [72.88577012]
Actual Mobility: [79]
---
Participant 10
Predicted Mobility: [89.79955447]
Actual Mobility: [72]
---
Participant 11
Predicted Mobility: [84.6368716]
Actual Mobility: [61]
---
Participant 12
Predicted Mobility: [45.43083903]
Actual Mobility: [61]
---
Participant 13
Predicted Mobility: [49.01984338]
Actual Mobility: [64]
---
Participant 14
Predicted Mobilit