# K-Fold Cross-Validation...

### Python Library ...

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense,Dropout
from tensorflow.keras.optimizers import Adam,SGD
from matplotlib import pyplot as plt
# from imblearn.over_sampling import SMOTE

### Load All The Data....

In [3]:
# Load the data
file_path = 'New_Data/Final_Dataset.xlsx'  # Replace with your file path
data_01 = pd.read_excel(file_path)

### Creating Synthetic Data ....

In [6]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from tsaug import AddNoise, TimeWarp, Drift

# Original dataset (features only)
Data = data_01.loc[:, 'ROM_angle':90].values 

# Number of synthetic samples to generate
num_synthetic_samples = 20

# Extract Min and Max ROM angle from the original data
rom_angle_min = Data[:, 0].min()
rom_angle_max = Data[:, 0].max()

# Function to constrain ROM angle within original Min and Max range
def constrain_rom_angle(augmented_sample, rom_angle_min, rom_angle_max):
    augmented_sample[:, 0] = np.clip(augmented_sample[:, 0], rom_angle_min, rom_angle_max)
    return augmented_sample

# Augmentation pipeline
augmenter = (
    TimeWarp(n_speed_change=1, max_speed_ratio=1.2) +
    AddNoise(scale=0.005) +
    Drift(max_drift=0.01, n_drift_points=2)
)
# Generate synthetic samples
synthetic_samples = []
for _ in range(num_synthetic_samples):
    sampled_rows = Data[np.random.choice(Data.shape[0], size=1, replace=False)]
    augmented_sample = augmenter.augment(sampled_rows)
    augmented_sample = constrain_rom_angle(augmented_sample, rom_angle_min, rom_angle_max)
    synthetic_samples.append(augmented_sample)

# Combine original and synthetic samples
synthetic_samples = np.vstack(synthetic_samples)
augmented_dataset = np.vstack([Data, synthetic_samples])

data = pd.DataFrame(augmented_dataset)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,172,173,174,175,176,177,178,179,180,181
0,74.0,310.0,437.034965,518.473193,562.940559,579.062937,575.4662,562.794872,552.722611,548.293706,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,150.0,1371.293706,1259.902098,1244.452214,1302.843823,1412.97669,1552.750583,1676.762238,1789.277389,1886.198135,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,157.0,1842.937063,1915.902098,1990.107226,2060.264569,2121.086247,2167.284382,2189.783217,2182.948718,2142.102564,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,155.0,1605.195804,1746.097902,1851.216783,1925.229604,1972.81352,1998.645688,2017.405594,2005.682984,1985.494172,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,160.0,2072.496503,2132.825175,2151.382284,2141.973193,2118.403263,2094.477855,2103.848485,2135.745921,2196.694639,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Extract EEG features and target...

In [7]:
# Extract EEG features and target
X = data.loc[:, 1:181].values  # EEG data (columns D to MI) iloc[:,3:]
y = data.loc[:, 0].values  # Target ROM angle (column C)
max_rom = y.max()
min_rom = y.min()
# Check for extreme values in target
print(f"Max ROM Angle: {max_rom}, Min ROM Angle: {min_rom}")

# Normalise the target variable
y = (y - min_rom) / (max_rom - min_rom)


Max ROM Angle: 160.0, Min ROM Angle: 58.0


### Normalise the features...

In [8]:
# Normalise the features
scaler = MinMaxScaler()
X_normalised = scaler.fit_transform(X)

# Reshape X for GRU (samples, timesteps, features)
n_features = X_normalised.shape[1]
X_reshaped = X_normalised.reshape(X_normalised.shape[0], 1, n_features)

In [9]:
reshaped_array = X_reshaped.squeeze(axis=1)  # Remove the second dimension
print(reshaped_array.shape)

(141, 181)


In [10]:
reshaped_array

array([[0.20673893, 0.18459689, 0.20932943, ..., 0.01699169, 0.01960058,
        0.01008617],
       [0.58111975, 0.52798749, 0.51615789, ..., 0.01699169, 0.01960058,
        0.01008617],
       [0.74749615, 0.80174278, 0.83130223, ..., 0.01699169, 0.01960058,
        0.01008617],
       ...,
       [0.74788263, 0.74870827, 0.74258165, ..., 0.02840509, 0.05374281,
        0.03241743],
       [0.71031243, 0.88138272, 0.88450959, ..., 0.00875709, 0.        ,
        0.00856225],
       [0.74966693, 0.81210863, 0.84048277, ..., 0.02809236, 0.02711534,
        0.04453706]])

### Split data into training and testing sets ...

In [11]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.1)

In [12]:
import numpy as np

# Reshape to 2D
X_X = X_train.squeeze(axis=1)
X_Xtest = X_test.squeeze(axis=1)

print(X_X)

[[0.42896471 0.36271526 0.35750794 ... 0.01699169 0.01960058 0.01008617]
 [0.19956041 0.10940815 0.09813841 ... 0.01699169 0.01960058 0.01008617]
 [0.49229111 0.49467565 0.50812377 ... 0.01699169 0.01960058 0.01008617]
 ...
 [0.67497582 0.68170928 0.67886874 ... 0.01699169 0.01960058 0.01008617]
 [0.61721957 0.61548249 0.6174794  ... 0.01699169 0.01960058 0.01008617]
 [0.6636308  0.73088181 0.77260144 ... 0.01699169 0.01960058 0.01008617]]


In [13]:
# Function for invers metrixes...
def invers_prediction(predic):
    prz = predic
    Predictions = []
    for i in range(prz.shape[0]):
        pred = prz[i]*(max_rom-min_rom) + min_rom
        Predictions.append(pred)

    return Predictions

## KNN Model...

In [14]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = KNeighborsRegressor(n_neighbors=1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_knn = model.predict(X_test)

    inv_pred_knn = invers_prediction(predictions_knn )
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_knn)
    mae = mean_absolute_error(inver_y_test,inv_pred_knn)
    mse = mean_squared_error(inver_y_test,inv_pred_knn)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.833469
Average MSE : 97.471649
Average MAE : 5.892066
Average RMSE : 8.098967


### Results Table...

In [15]:
pd_result_table_knn = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_knn = pd_result_table_knn.transpose()
transposed_table_knn

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.978656,20.896855,3.637743,4.571308
1,0.977683,20.285714,3.285714,4.503967
2,0.939358,36.297493,4.781435,6.02474
3,0.968038,28.115759,3.855887,5.30243
4,0.98115,22.753631,3.750531,4.770077
5,0.979798,22.640057,3.217935,4.758157
6,0.837158,78.012114,5.544937,8.832447
7,0.599047,174.033657,8.153914,13.192182
8,0.11508,537.439791,18.044135,23.182748
9,0.958724,34.241423,4.648425,5.851617


## 	Support Vector Regression 

In [16]:
from sklearn.model_selection import KFold
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_svr = model.predict(X_test)

    inv_pred_svr = invers_prediction(predictions_svr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_svr)
    mae = mean_absolute_error(inver_y_test,inv_pred_svr)
    mse = mean_squared_error(inver_y_test,inv_pred_svr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.872431
Average MSE : 84.330272
Average MAE : 6.453420
Average RMSE : 8.098677


In [17]:
pd_result_table_svr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_svr = pd_result_table_svr.transpose()
transposed_table_svr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.880788,116.716901,8.468126,10.80356
1,0.96237,34.20433,4.332857,5.848447
2,0.966066,20.311398,3.395496,4.506817
3,0.987101,11.346626,2.736462,3.368475
4,0.96742,39.325656,5.028527,6.271017
5,0.965511,38.650737,5.236807,6.216972
6,0.934138,31.552274,4.246053,5.617141
7,0.762263,103.189849,7.727724,10.15824
8,0.397892,365.6789,15.654461,19.122733
9,0.90076,82.326044,7.707689,9.07337


## Random Forest Regressor Model...

In [18]:
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_rfr = model.predict(X_test)

    inv_pred_rfr = invers_prediction(predictions_rfr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_rfr)
    mae = mean_absolute_error(inver_y_test,inv_pred_rfr)
    mse = mean_squared_error(inver_y_test,inv_pred_rfr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.879854
Average MSE : 73.685708
Average MAE : 4.949798
Average RMSE : 6.924416


In [19]:
pd_result_table_rfr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_rfr = pd_result_table_rfr.transpose()
transposed_table_rfr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.974277,25.184392,3.81105,5.018405
1,0.981725,16.611689,3.279112,4.075744
2,0.937172,37.605879,3.885652,6.132363
3,0.990682,8.196572,2.307594,2.862966
4,0.991054,10.798191,2.516204,3.28606
5,0.98386,18.087034,3.206893,4.252885
6,0.836449,78.351682,3.904582,8.851649
7,0.905894,40.846872,4.47615,6.391156
8,0.25927,449.86883,16.237434,21.210112
9,0.938153,51.30594,5.873304,7.162816
