# K-Fold Cross-Validation...

### Python Library ...

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense,Dropout
from tensorflow.keras.optimizers import Adam,SGD
from matplotlib import pyplot as plt
# from imblearn.over_sampling import SMOTE

### Load All The Data....

In [7]:
# Load the data
file_path = 'New_Data/Final_Dataset.xlsx'  # Replace with your file path
data_01 = pd.read_excel(file_path)

### Creating Synthetic Data ....

In [8]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from tsaug import AddNoise, TimeWarp, Drift

# Original dataset (features only)
Data = data_01.loc[:, 'ROM_angle':90].values 

# Number of synthetic samples to generate
num_synthetic_samples = 20

# Extract Min and Max ROM angle from the original data
rom_angle_min = Data[:, 0].min()
rom_angle_max = Data[:, 0].max()

# Function to constrain ROM angle within original Min and Max range
def constrain_rom_angle(augmented_sample, rom_angle_min, rom_angle_max):
    augmented_sample[:, 0] = np.clip(augmented_sample[:, 0], rom_angle_min, rom_angle_max)
    return augmented_sample

# Augmentation pipeline
augmenter = (
    TimeWarp(n_speed_change=1, max_speed_ratio=1.2) +
    AddNoise(scale=0.005) +
    Drift(max_drift=0.01, n_drift_points=2)
)
# Generate synthetic samples
synthetic_samples = []
for _ in range(num_synthetic_samples):
    sampled_rows = Data[np.random.choice(Data.shape[0], size=1, replace=False)]
    augmented_sample = augmenter.augment(sampled_rows)
    augmented_sample = constrain_rom_angle(augmented_sample, rom_angle_min, rom_angle_max)
    synthetic_samples.append(augmented_sample)

# Combine original and synthetic samples
synthetic_samples = np.vstack(synthetic_samples)
augmented_dataset = np.vstack([Data, synthetic_samples])

data = pd.DataFrame(augmented_dataset)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,172,173,174,175,176,177,178,179,180,181
0,74.0,310.0,437.034965,518.473193,562.940559,579.062937,575.4662,562.794872,552.722611,548.293706,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,150.0,1371.293706,1259.902098,1244.452214,1302.843823,1412.97669,1552.750583,1676.762238,1789.277389,1886.198135,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,157.0,1842.937063,1915.902098,1990.107226,2060.264569,2121.086247,2167.284382,2189.783217,2182.948718,2142.102564,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,155.0,1605.195804,1746.097902,1851.216783,1925.229604,1972.81352,1998.645688,2017.405594,2005.682984,1985.494172,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,160.0,2072.496503,2132.825175,2151.382284,2141.973193,2118.403263,2094.477855,2103.848485,2135.745921,2196.694639,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Extract EEG features and target...

In [9]:
# Extract EEG features and target
X = data.loc[:, 1:181].values  # EEG data (columns D to MI) iloc[:,3:]
y = data.loc[:, 0].values  # Target ROM angle (column C)
max_rom = y.max()
min_rom = y.min()
# Check for extreme values in target
print(f"Max ROM Angle: {max_rom}, Min ROM Angle: {min_rom}")

# Normalise the target variable
y = (y - min_rom) / (max_rom - min_rom)


Max ROM Angle: 160.0, Min ROM Angle: 58.0


### Normalise the features...

In [10]:
# Normalise the features
scaler = MinMaxScaler()
X_normalised = scaler.fit_transform(X)

# Reshape X for GRU (samples, timesteps, features)
n_features = X_normalised.shape[1]
X_reshaped = X_normalised.reshape(X_normalised.shape[0], 1, n_features)

In [11]:
reshaped_array = X_reshaped.squeeze(axis=1)  # Remove the second dimension
print(reshaped_array.shape)

(141, 181)


In [12]:
reshaped_array

array([[0.20673893, 0.18459689, 0.20932943, ..., 0.01874275, 0.01432755,
        0.01903523],
       [0.58111975, 0.52798749, 0.51615789, ..., 0.01874275, 0.01432755,
        0.01903523],
       [0.74749615, 0.80174278, 0.83130223, ..., 0.01874275, 0.01432755,
        0.01903523],
       ...,
       [0.8590131 , 0.8980592 , 0.90242785, ..., 0.99839537, 1.        ,
        1.        ],
       [0.43106968, 0.49243426, 0.49743707, ..., 0.01528978, 0.00441135,
        0.01052527],
       [0.13444078, 0.04949431, 0.04490939, ..., 0.02138965, 0.01728156,
        0.02147942]])

### Split data into training and testing sets ...

In [13]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.1)

In [14]:
import numpy as np

# Reshape to 2D
X_X = X_train.squeeze(axis=1)
X_Xtest = X_test.squeeze(axis=1)

print(X_X)

[[0.74749615 0.80174278 0.83130223 ... 0.01874275 0.01432755 0.01903523]
 [0.72757637 0.75593499 0.75179746 ... 0.01874275 0.01432755 0.01903523]
 [0.68260825 0.6987256  0.69712506 ... 0.01874275 0.01432755 0.01903523]
 ...
 [0.83173646 0.87532356 0.89281865 ... 0.01874275 0.01432755 0.01903523]
 [0.1245609  0.05253727 0.04543045 ... 0.01874275 0.01432755 0.01903523]
 [0.41377388 0.39083844 0.40490973 ... 0.01874275 0.01432755 0.01903523]]


In [15]:
# Function for invers metrixes...
def invers_prediction(predic):
    prz = predic
    Predictions = []
    for i in range(prz.shape[0]):
        pred = prz[i]*(max_rom-min_rom) + min_rom
        Predictions.append(pred)

    return Predictions

## KNN Model...

In [16]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = KNeighborsRegressor(n_neighbors=1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_knn = model.predict(X_test)

    inv_pred_knn = invers_prediction(predictions_knn )
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_knn)
    mae = mean_absolute_error(inver_y_test,inv_pred_knn)
    mse = mean_squared_error(inver_y_test,inv_pred_knn)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.762824
Average MSE : 146.066040
Average MAE : 7.333468
Average RMSE : 9.737788


### Results Table...

In [17]:
pd_result_table_knn = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_knn = pd_result_table_knn.transpose()
transposed_table_knn

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.974465,25.0,4.066667,5.0
1,0.961012,35.438515,4.565042,5.953026
2,0.944427,33.263403,4.745976,5.767443
3,0.959832,35.334028,4.208385,5.944243
4,0.980469,23.57547,3.822842,4.855458
5,0.985681,16.046331,2.821385,4.005787
6,0.620214,181.942765,9.045978,13.488616
7,0.348042,282.982205,11.620697,16.822075
8,-0.050183,764.919303,21.837409,27.657175
9,0.904284,62.158377,6.600302,7.884058


## 	Support Vector Regression 

In [18]:
from sklearn.model_selection import KFold
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_svr = model.predict(X_test)

    inv_pred_svr = invers_prediction(predictions_svr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_svr)
    mae = mean_absolute_error(inver_y_test,inv_pred_svr)
    mse = mean_squared_error(inver_y_test,inv_pred_svr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.847322
Average MSE : 108.527703
Average MAE : 7.485172
Average RMSE : 9.048916


In [19]:
pd_result_table_svr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_svr = pd_result_table_svr.transpose()
transposed_table_svr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.866517,130.688835,9.694259,11.431922
1,0.956701,39.357682,4.968323,6.27357
2,0.947143,31.637847,4.769598,5.624753
3,0.970198,26.215627,4.248352,5.12012
4,0.95897,49.525462,6.000234,7.037433
5,0.957203,47.961068,6.126958,6.925393
6,0.889643,52.868264,5.685723,7.271057
7,0.768049,100.678152,8.024231,10.03385
8,0.239365,554.022125,19.685898,23.537675
9,0.919431,52.32197,5.648144,7.233393


## Random Forest Regressor Model...

In [20]:
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_rfr = model.predict(X_test)

    inv_pred_rfr = invers_prediction(predictions_rfr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_rfr)
    mae = mean_absolute_error(inver_y_test,inv_pred_rfr)
    mse = mean_squared_error(inver_y_test,inv_pred_rfr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.875875
Average MSE : 82.496590
Average MAE : 5.316693
Average RMSE : 7.317638


In [21]:
pd_result_table_rfr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_rfr = pd_result_table_rfr.transpose()
transposed_table_rfr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.963969,35.276679,4.362947,5.939417
1,0.973259,24.306349,3.273128,4.930147
2,0.952404,28.488789,3.524919,5.337489
3,0.979266,18.238883,3.247864,4.270701
4,0.990908,10.974766,2.297037,3.312818
5,0.992256,8.678874,2.276938,2.945993
6,0.843668,74.893378,4.353311,8.654096
7,0.819083,78.526871,5.796232,8.861539
8,0.308687,503.529926,18.606018,22.439472
9,0.935247,42.051391,5.428532,6.484704
