In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer

# Load the dataset
file_path = 'updated_EVERSION_VP01-VP13_14_06_2024.xlsx'
df = pd.read_excel(file_path)

# Display the first few rows of the dataset to understand its structure
df.head()

Unnamed: 0,# Test,ID,Last name,Name,Last & first names,Date,#,Time[s],Step[cm],Stance phase[s],...,Contact phase [%],Foot flat[s],Foot flat [%],Propulsive phase[s],Propulsive phase [%],Walking point[cm],Walking point gap[cm],Step width[cm],Walking base[cm],PCI
0,1.0,LLmllhFglv3voXib8ylb,EVERSION2,p13,EVERSION2 p13,2024-06-14 15:04:50.490,1 L,0.0,52.0,0.704,...,16.5,0.38,54.0,0.208,29.5,,,,,
1,,,,,,,2 R,0.588,83.0,0.646,...,21.1,0.3,46.4,0.21,32.5,,,17.8,6.0,
2,,,,,,,3 L,1.124,86.0,0.646,...,14.9,0.346,53.6,0.204,31.5,-0.5,-0.5,18.8,7.0,
3,,,,,,,4 R,1.664,82.0,0.62,...,14.2,0.41,66.1,0.122,19.7,-6.3,-5.8,7.3,4.0,
4,,,,,,,5 L,2.182,88.0,0.654,...,11.6,0.448,68.5,0.13,19.9,-12.5,-6.2,19.8,9.0,


In [2]:
# Step 2: Data Preprocessing
# Select only relevant columns
# Assuming columns like 'Time[s]', 'Step[cm]', 'Stance phase[s]', etc., are present and relevant for gait event classification
relevant_columns = [
    'Time[s]', 'Step[cm]', 'Stance phase[s]', 'Swing phase[s]',
    'Single support [s]', 'Double support [s]', 'Step times[s]',
    'Load response[s]', 'Gait cycle[s]', 'Stride[cm]', 'Speed[m/s]'
]
X = df[relevant_columns]

imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X = pd.DataFrame(X_imputed, columns=relevant_columns)

In [3]:
X

Unnamed: 0,Time[s],Step[cm],Stance phase[s],Swing phase[s],Single support [s],Double support [s],Step times[s],Load response[s],Gait cycle[s],Stride[cm],Speed[m/s]
0,0.000,52.000000,0.704000,0.414817,0.414817,0.253109,0.544857,0.12778,1.084151,153.227222,1.441979
1,0.588,83.000000,0.646000,0.414817,0.420000,0.226000,0.588000,0.11600,1.084151,135.000000,1.441979
2,1.124,86.000000,0.646000,0.420000,0.430000,0.216000,0.536000,0.11000,1.124000,169.000000,1.500000
3,1.664,82.000000,0.620000,0.430000,0.412000,0.208000,0.540000,0.10600,1.076000,168.000000,1.560000
4,2.182,88.000000,0.654000,0.412000,0.426000,0.228000,0.518000,0.10200,1.058000,170.000000,1.610000
...,...,...,...,...,...,...,...,...,...,...,...
3293,4.350,89.000000,0.718000,0.480000,0.472000,0.246000,0.594000,0.12000,1.184000,180.000000,1.520000
3294,4.942,91.000000,0.736000,0.472000,0.492000,0.244000,0.592000,0.12600,1.186000,180.000000,1.520000
3295,5.560,90.000000,0.724000,0.492000,0.496000,0.228000,0.618000,0.11800,1.210000,181.000000,1.500000
3296,6.174,96.000000,0.778000,0.496000,0.414817,0.253109,0.614000,0.11000,1.232000,186.000000,1.510000


In [5]:
y = df['Gait cycle[s]']
y

0         NaN
1         NaN
2       1.124
3       1.076
4       1.058
        ...  
3293    1.184
3294    1.186
3295    1.210
3296    1.232
3297      NaN
Name: Gait cycle[s], Length: 3298, dtype: float64

In [6]:
# Target variable (Assuming we have a column 'Gait Cycle' which classifies different gait events)
# Fill missing values with mean for numerical columns
imputer = SimpleImputer(strategy='mean')
y_imputed = imputer.fit_transform(pd.DataFrame(y))

In [7]:
y_imputed

array([[1.084151],
       [1.084151],
       [1.124   ],
       ...,
       [1.21    ],
       [1.232   ],
       [1.084151]])

In [9]:
# Convert to pandas Series with custom index
n = len(y_imputed)
index = range(0, n)
y = pd.Series(y_imputed[:,0], index=index)

In [10]:
y

0       1.084151
1       1.084151
2       1.124000
3       1.076000
4       1.058000
          ...   
3293    1.184000
3294    1.186000
3295    1.210000
3296    1.232000
3297    1.084151
Length: 3298, dtype: float64

In [15]:
from sklearn.ensemble import RandomForestRegressor

# Step 4: Train the Model as a Regressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Step 5: Model Evaluation
y_pred = regressor.predict(X_test)

# If you still want to evaluate accuracy, consider using regression metrics like Mean Squared Error (MSE)
from sklearn.metrics import mean_squared_error, r2_score

print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

Mean Squared Error: 8.188971212121227e-05
R^2 Score: 0.9817175078508268


In [16]:
len(y_pred)

660

In [17]:
y_pred

array([0.93286 , 1.034   , 1.084151, 1.17828 , 1.084151, 1.042   ,
       1.084151, 1.084151, 1.118   , 1.084151, 1.208   , 1.13602 ,
       1.084151, 1.032   , 1.084151, 1.044   , 1.086   , 1.16402 ,
       1.026   , 1.032   , 1.084151, 1.068   , 1.032   , 1.084151,
       1.15    , 1.162   , 1.084   , 1.196   , 1.084151, 1.084151,
       1.084151, 1.062   , 1.112   , 1.084151, 1.002   , 1.114   ,
       1.08    , 0.9383  , 1.012   , 1.084151, 1.15992 , 1.068   ,
       1.054   , 1.084151, 1.126   , 1.052   , 1.22236 , 1.066   ,
       1.01    , 1.084151, 0.99602 , 1.068   , 1.026   , 1.16402 ,
       1.024   , 1.084151, 1.102   , 1.084151, 1.084151, 1.082   ,
       1.084151, 1.084151, 1.064   , 1.084151, 1.064   , 1.062   ,
       0.99    , 1.032   , 1.084151, 1.084151, 1.092   , 1.002   ,
       1.084151, 1.19998 , 1.13002 , 0.97184 , 1.082   , 1.08    ,
       1.1     , 1.05    , 1.21012 , 1.016   , 1.084   , 1.046   ,
       1.22604 , 1.22466 , 1.068   , 1.09    , 1.084151, 1.1  