# K-Fold Cross-Validation...

### Python Library ...

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense,Dropout
from tensorflow.keras.optimizers import Adam,SGD
from matplotlib import pyplot as plt
from imblearn.over_sampling import SMOTE

### Load All The Data....

In [2]:
# Load the data
file_path = 'All_Data.xlsx'  # Replace with your file path
data_01 = pd.read_excel(file_path)

### Creating Synthetic Data ....

In [3]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Original dataset (features only)
# X = np.array([[1, 2], [2, 3], [3, 4], [5, 6]])
Data = data_01.loc[:, 'Average ROM Angle':90].values 

# Number of synthetic samples to generate
num_synthetic_samples = 100

# Fit KNN to find nearest neighbours
knn = NearestNeighbors(n_neighbors=2)  # Use 2 neighbours for simplicity
knn.fit(Data)

# Generate synthetic samples
synthetic_samples = []
for _ in range(num_synthetic_samples):
    # Randomly pick a data point from X
    idx = np.random.randint(0, Data.shape[0])
    point = Data[idx]

    # Find neighbours of the point
    neighbours = knn.kneighbors([point], return_distance=False)[0]
    
    # Randomly choose one neighbour
    neighbour = Data[neighbours[np.random.randint(1, len(neighbours))]]  # Skip self (idx=0)
    
    # Interpolate between the point and the neighbour
    alpha = np.random.uniform(0, 1)  # Random weight for interpolation
    synthetic_point = (point + alpha * (neighbour - point))
    
    synthetic_samples.append(synthetic_point)

# Combine original and synthetic samples
synthetic_samples = np.array(synthetic_samples)
augmented_dataset = np.vstack([Data, synthetic_samples])

data = pd.DataFrame(augmented_dataset)
# data.head()

### Extract EEG features and target...

In [4]:
# Extract EEG features and target
X = data.loc[:, 1:181].values  # EEG data (columns D to MI) iloc[:,3:]
y = data.loc[:, 0].values  # Target ROM angle (column C)
max_rom = y.max()
min_rom = y.min()
# Check for extreme values in target
print(f"Max ROM Angle: {max_rom}, Min ROM Angle: {min_rom}")

# Normalise the target variable
y = (y - min_rom) / (max_rom - min_rom)


Max ROM Angle: 155.0, Min ROM Angle: 105.0


### Normalise the features...

In [5]:
# Normalise the features
scaler = MinMaxScaler()
X_normalised = scaler.fit_transform(X)

# Reshape X for GRU (samples, timesteps, features)
n_features = X_normalised.shape[1]
X_reshaped = X_normalised.reshape(X_normalised.shape[0], 1, n_features)

In [6]:
reshaped_array = X_reshaped.squeeze(axis=1)  # Remove the second dimension
print(reshaped_array.shape)

(127, 181)


In [7]:
reshaped_array

array([[0.96870379, 0.92956243, 0.93430456, ..., 1.        , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       [0.34401183, 0.03094984, 0.        , ..., 0.07936508, 0.07277167,
        0.02661783],
       ...,
       [0.57891363, 0.5418544 , 0.38562723, ..., 0.27525091, 0.27088822,
        0.26612368],
       [0.3674107 , 0.30414019, 0.27093747, ..., 0.        , 0.        ,
        0.        ],
       [0.25911364, 0.22050788, 0.21840768, ..., 0.02811203, 0.03211936,
        0.03515522]])

### Split data into training and testing sets ...

In [8]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.1)

In [9]:
import numpy as np

# Reshape to 2D
X_X = X_train.squeeze(axis=1)
X_Xtest = X_test.squeeze(axis=1)

print(X_X)

[[0.5744209  0.58057631 0.53545904 ... 0.56434676 0.55848596 0.58217338]
 [0.28912903 0.62853852 0.63077364 ... 0.         0.         0.        ]
 [0.76193493 0.74893486 0.78870141 ... 0.62869958 0.62762527 0.6378476 ]
 ...
 [0.22577671 0.16723609 0.14957408 ... 0.         0.         0.        ]
 [0.22399174 0.1663633  0.14904421 ... 0.         0.         0.        ]
 [0.23100459 0.17803939 0.16264516 ... 0.         0.         0.        ]]


In [10]:
# Function for invers metrixes...
def invers_prediction(predic):
    prz = predic
    Predictions = []
    for i in range(prz.shape[0]):
        pred = prz[i]*(max_rom-min_rom) + min_rom
        Predictions.append(pred)

    return Predictions

## KNN Model...

In [46]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = KNeighborsRegressor(n_neighbors=1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_knn = model.predict(X_test)

    inv_pred_knn = invers_prediction(predictions_knn )
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_knn)
    mae = mean_absolute_error(inver_y_test,inv_pred_knn)
    mse = mean_squared_error(inver_y_test,inv_pred_knn)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.948014
Average MSE : 7.760204
Average MAE : 1.381786
Average RMSE : 2.006526


### Results Table...

In [47]:
pd_result_table_knn = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_knn = pd_result_table_knn.transpose()
transposed_table_knn

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.693911,59.129093,4.430968,7.689544
1,0.923459,3.56903,1.501328,1.889188
2,0.979217,0.979315,0.801429,0.989604
3,0.980338,2.582316,1.138268,1.606959
4,0.994687,0.605803,0.55887,0.778334
5,0.969648,3.346397,1.352539,1.829316
6,0.987076,1.0947,0.728928,1.046279
7,0.988542,1.028425,0.743954,1.014113
8,0.987118,2.003031,1.216768,1.415285
9,0.976144,3.263933,1.344803,1.806636


## 	Support Vector Regression 

In [48]:
from sklearn.model_selection import KFold
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_svr = model.predict(X_test)

    inv_pred_svr = invers_prediction(predictions_svr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_svr)
    mae = mean_absolute_error(inver_y_test,inv_pred_svr)
    mse = mean_squared_error(inver_y_test,inv_pred_svr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.803696
Average MSE : 18.509195
Average MAE : 3.389419
Average RMSE : 3.995176


In [49]:
pd_result_table_svr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_svr = pd_result_table_svr.transpose()
transposed_table_svr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.656371,66.381072,6.840942,8.147458
1,0.313708,32.001188,5.052802,5.656959
2,0.685755,14.807428,3.424892,3.848042
3,0.942447,7.558939,1.945472,2.749352
4,0.899167,11.497509,2.797446,3.390798
5,0.913922,9.490316,2.573259,3.080636
6,0.905561,7.999321,2.096159,2.828307
7,0.903377,8.672771,2.515685,2.944957
8,0.91437,13.314672,3.272796,3.648928
9,0.902287,13.36873,3.374741,3.656327


## Random Forest Regressor Model...

In [11]:
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Define k-fold cross-validation
kf = KFold(n_splits=10)

# Initialise model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Perform k-fold
R2z = []
MSEz = []
MAEz = []
RMSEz = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train and evaluate
    model.fit(X_train, y_train)
    predictions_rfr = model.predict(X_test)

    inv_pred_rfr = invers_prediction(predictions_rfr)
    inver_y_test = invers_prediction(y_test)

    r2 = r2_score(inver_y_test,inv_pred_rfr)
    mae = mean_absolute_error(inver_y_test,inv_pred_rfr)
    mse = mean_squared_error(inver_y_test,inv_pred_rfr)
    rmse = np.sqrt(mse)

    R2z.append(r2)
    MSEz.append(mse)
    MAEz.append(mae)
    RMSEz.append(rmse)

# print(R2z)
# print(MSEz)
# print(MAEz)
# print(RMSEz)
# print(" \n")

# Average accuracy
print(f"Average R2 : {sum(R2z) / len(R2z):.6f}")
print(f"Average MSE : {sum(MSEz) / len(MSEz):.6f}")
print(f"Average MAE : {sum(MAEz) / len(MAEz):.6f}")
print(f"Average RMSE : {sum(RMSEz) / len(RMSEz):.6f}")

Average R2 : 0.901105
Average MSE : 8.388585
Average MAE : 1.847534
Average RMSE : 2.487754


In [12]:
pd_result_table_rfr = pd.DataFrame(data = [R2z,MSEz,MAEz,RMSEz], index = ["R2","MSE","MAE","RMSE"])
transposed_table_rfr = pd_result_table_rfr.transpose()
transposed_table_rfr

Unnamed: 0,R2,MSE,MAE,RMSE
0,0.823812,34.035417,4.658643,5.833988
1,0.697794,14.091627,2.840062,3.753882
2,0.97193,1.742091,0.913886,1.319883
3,0.943781,4.190285,1.382041,2.047018
4,0.948241,3.883101,1.321665,1.970558
5,0.89316,4.867509,1.611156,2.206243
6,0.991379,1.426605,0.818089,1.194406
7,0.767034,16.429321,3.215079,4.05331
8,0.986421,1.053367,0.70037,1.026337
9,0.987504,2.166532,1.014344,1.471915
