In [None]:
import pandas as pd
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Feature matrix.csv')
print(df.head(5))

   id  label     avg_X  max_X  min_X  X_median     avg_Y  max_Y  min_Y  \
0  15    0.0  19.21875    7.0    1.0      19.0 -42.50625  -64.0  -27.0   
1  15    0.0  17.93750   22.0   15.0      18.0 -43.73750  -45.0  -41.0   
2  15    0.0 -27.60625  -37.0  -20.0     -27.0   9.96250    9.0   -1.0   
3  15    0.0 -28.59375  -38.0  -17.0     -29.0  10.63750    9.0   -1.0   
4  15    0.0 -50.07500  -99.0  -13.0     -48.0 -29.84375    6.0   -1.0   

   Y_median  ...   min_EDA  EDA_median   avg_HR  max_HR  min_HR  HR_median  \
0     -43.0  ...  3.801695    3.976439  103.190  103.27  103.07     103.20   
1     -44.0  ...  3.547946    3.619714  103.188  103.30  103.00     103.25   
2      10.0  ...  4.737687    4.808173  109.362  110.00  108.68     109.38   
3      10.0  ...  4.615938    4.645414  112.220  112.35  111.90     112.28   
4     -29.0  ...  4.038785    4.157969   99.768   99.92   99.62      99.77   

   avg_TEMP  max_TEMP  min_TEMP  TEMP_median  
0    33.876     33.95     33.81        

In [None]:
class_counts = df['label'].value_counts()
print(class_counts)

2.0    53081
0.0    13551
1.0     4851
Name: label, dtype: int64


XGBoost

In [None]:
import pandas as pd
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder


# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Prepare features and target variable
X = df.drop(columns=['id', 'label'])  # Exclude 'id' and 'label' columns
y = df['label']
participants = df['id']

# Define oversampler (SMOTE)
oversampler = SMOTE(random_state=42)

# Define undersampler (RandomUnderSampler)
undersampler = RandomUnderSampler(random_state=42)

# Implement leave-one-participant-out cross-validation
logo = LeaveOneGroupOut()

# Lists to store evaluation metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
conf_matrices = []

fold_number = 1  # Counter for fold number

# XGBoost parameters for tuning
params = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 200, 300]
}

best_params = None
best_accuracy = 0

# Iterate over different parameter combinations
for max_depth in params['max_depth']:
    for learning_rate in params['learning_rate']:
        for n_estimators in params['n_estimators']:
            print(f"Training XGBoost with max_depth={max_depth}, learning_rate={learning_rate}, n_estimators={n_estimators}:")

            for train_index, test_index in logo.split(X, y, groups=participants):
                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                y_train, y_test = y[train_index], y[test_index]

                # Apply SMOTE for oversampling to address class imbalance
                X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

                # Apply Random Under Sampling to the majority class
                X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train_resampled, y_train_resampled)

                # Normalize features using StandardScaler
                scaler = StandardScaler()
                X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
                X_test_scaled = scaler.transform(X_test)

                # Train XGBoost model
                xgb = XGBClassifier(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators)
                xgb.fit(X_train_resampled_scaled, y_train_resampled)

                # Evaluate model
                y_pred = xgb.predict(X_test_scaled)

                # Calculate evaluation metrics
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred, average='weighted')
                recall = recall_score(y_test, y_pred, average='weighted')
                f1 = f1_score(y_test, y_pred, average='weighted')
                conf_matrix = confusion_matrix(y_test, y_pred)

                # Print evaluation metrics and test participant ID
                test_id = participants.iloc[test_index].iloc[0]
                print(f"Test ID: {test_id}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}")

                # Store evaluation metrics
                accuracies.append(accuracy)
                precisions.append(precision)
                recalls.append(recall)
                f1_scores.append(f1)
                conf_matrices.append(conf_matrix)

            # Calculate mean evaluation metrics
            mean_accuracy = sum(accuracies) / len(accuracies)

            # Save best parameters
            if mean_accuracy > best_accuracy:
                best_accuracy = mean_accuracy
                best_params = {'max_depth': max_depth, 'learning_rate': learning_rate, 'n_estimators': n_estimators}

print("Best parameters:", best_params)
print("Best accuracy:", best_accuracy)


Training XGBoost with max_depth=3, learning_rate=0.01, n_estimators=100:
Test ID: 15, Accuracy: 0.7277406073082862, Precision: 0.9020293851759812, Recall: 0.7277406073082862, F1-score: 0.8055657124687493
Test ID: 5C, Accuracy: 0.598487642936186, Precision: 0.6220526008715797, Recall: 0.598487642936186, F1-score: 0.6080527239695745
Test ID: 6B, Accuracy: 0.48985167837626853, Precision: 0.8929551383368798, Recall: 0.48985167837626853, F1-score: 0.5524241627449027
Test ID: 6D, Accuracy: 0.8434712084347121, Precision: 0.7220242108130566, Recall: 0.8434712084347121, F1-score: 0.7780369410550431
Test ID: 7A, Accuracy: 0.6907302038880986, Precision: 0.6468110398328624, Recall: 0.6907302038880986, F1-score: 0.6477962190283598
Test ID: 7E, Accuracy: 0.4035197988686361, Precision: 0.25036968652589536, Recall: 0.4035197988686361, F1-score: 0.30900978775878535
Test ID: 83, Accuracy: 0.733224793580649, Precision: 0.7665721502486013, Recall: 0.733224793580649, F1-score: 0.749385492034822
Test ID: 8B

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.04307334109429569, Precision: 1.0, Recall: 0.04307334109429569, F1-score: 0.08258928571428571
Test ID: F5, Accuracy: 0.7839381720430108, Precision: 0.8636975189214959, Recall: 0.7839381720430108, F1-score: 0.8218873357678278
Training XGBoost with max_depth=3, learning_rate=0.01, n_estimators=200:
Test ID: 15, Accuracy: 0.6963458569222851, Precision: 0.901077120204872, Recall: 0.6963458569222851, F1-score: 0.7851658522077741
Test ID: 5C, Accuracy: 0.5756178531907046, Precision: 0.5649171531467868, Recall: 0.5756178531907046, F1-score: 0.5699411667208174
Test ID: 6B, Accuracy: 0.2576112412177986, Precision: 0.8815381140675844, Recall: 0.2576112412177986, F1-score: 0.2556802142628813
Test ID: 6D, Accuracy: 0.8442822384428224, Precision: 0.7221288115046713, Recall: 0.8442822384428224, F1-score: 0.7784425798600635
Test ID: 7A, Accuracy: 0.6659554291133238, Precision: 0.6231924396031182, Recall: 0.6659554291133238, F1-score: 0.6332177289808699
Test ID: 7E, Accuracy: 

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.030558789289871945, Precision: 1.0, Recall: 0.030558789289871945, F1-score: 0.059305280994069474
Test ID: F5, Accuracy: 0.5551075268817204, Precision: 0.8518672660264944, Recall: 0.5551075268817205, F1-score: 0.6668846802213341
Training XGBoost with max_depth=3, learning_rate=0.01, n_estimators=300:
Test ID: 15, Accuracy: 0.6855378281008749, Precision: 0.9022901173128548, Recall: 0.6855378281008749, F1-score: 0.7780079906130707
Test ID: 5C, Accuracy: 0.5719291774253044, Precision: 0.5717061640774178, Recall: 0.5719291774253044, F1-score: 0.5718175210940313
Test ID: 6B, Accuracy: 0.24726775956284153, Precision: 0.8807254215456117, Recall: 0.24726775956284153, F1-score: 0.23943551567123844
Test ID: 6D, Accuracy: 0.8453636117869695, Precision: 0.7222680138739525, Recall: 0.8453636117869695, F1-score: 0.7789828769616228
Test ID: 7A, Accuracy: 0.661688003793267, Precision: 0.6209680728547569, Recall: 0.661688003793267, F1-score: 0.6316451070995521
Test ID: 7E, Accur

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.02590221187427241, Precision: 1.0, Recall: 0.02590221187427241, F1-score: 0.050496453900709226
Test ID: F5, Accuracy: 0.5456989247311828, Precision: 0.8475783865865468, Recall: 0.5456989247311828, F1-score: 0.6595881897925063
Training XGBoost with max_depth=3, learning_rate=0.05, n_estimators=100:
Test ID: 15, Accuracy: 0.7076685537828101, Precision: 0.9046122071538426, Recall: 0.7076685537828101, F1-score: 0.7928655738766583
Test ID: 5C, Accuracy: 0.5599409811877536, Precision: 0.5684545550668318, Recall: 0.5599409811877536, F1-score: 0.5639856616603571
Test ID: 6B, Accuracy: 0.2773224043715847, Precision: 0.8828767946804289, Recall: 0.2773224043715847, F1-score: 0.285761535127366
Test ID: 6D, Accuracy: 0.8380643417139767, Precision: 0.7306196792720874, Recall: 0.8380643417139767, F1-score: 0.776742696766227
Test ID: 7A, Accuracy: 0.6709340919867236, Precision: 0.6289818457047076, Recall: 0.6709340919867236, F1-score: 0.6379240487195191
Test ID: 7E, Accuracy: 

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.038707799767171126, Precision: 1.0, Recall: 0.038707799767171126, F1-score: 0.07453068086298682
Test ID: F5, Accuracy: 0.6058467741935484, Precision: 0.8550759401548167, Recall: 0.6058467741935485, F1-score: 0.7058333367625479
Training XGBoost with max_depth=3, learning_rate=0.05, n_estimators=200:
Test ID: 15, Accuracy: 0.7380339680905815, Precision: 0.9093939473827318, Recall: 0.7380339680905815, F1-score: 0.8129167252298551
Test ID: 5C, Accuracy: 0.5627074880118038, Precision: 0.5785787382160547, Recall: 0.5627074880118038, F1-score: 0.569855412931166
Test ID: 6B, Accuracy: 0.3805620608899297, Precision: 0.8873224753324338, Recall: 0.3805620608899297, F1-score: 0.4270340168921274
Test ID: 6D, Accuracy: 0.8513111651797783, Precision: 0.8196561781314416, Recall: 0.8513111651797783, F1-score: 0.8247196634150831
Test ID: 7A, Accuracy: 0.6819582740635373, Precision: 0.6427588679191487, Recall: 0.6819582740635373, F1-score: 0.6491017792003236
Test ID: 7E, Accuracy

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.06169965075669383, Precision: 1.0, Recall: 0.06169965075669383, F1-score: 0.1162280701754386
Test ID: F5, Accuracy: 0.6414650537634409, Precision: 0.8590002511928136, Recall: 0.6414650537634409, F1-score: 0.7316012431233593
Training XGBoost with max_depth=3, learning_rate=0.05, n_estimators=300:
Test ID: 15, Accuracy: 0.7627380339680906, Precision: 0.9176514771278655, Recall: 0.7627380339680906, F1-score: 0.8295147381142812
Test ID: 5C, Accuracy: 0.566396163777204, Precision: 0.5831529120560814, Recall: 0.566396163777204, F1-score: 0.573870707886432
Test ID: 6B, Accuracy: 0.452576112412178, Precision: 0.8890359194464525, Recall: 0.452576112412178, F1-score: 0.5122696195032557
Test ID: 6D, Accuracy: 0.8445525817788592, Precision: 0.8403099996912411, Recall: 0.8445525817788592, F1-score: 0.8423219735857994
Test ID: 7A, Accuracy: 0.6781650071123755, Precision: 0.643931202636165, Recall: 0.6781650071123755, F1-score: 0.6516901646808182
Test ID: 7E, Accuracy: 0.4198

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.04831199068684517, Precision: 1.0, Recall: 0.04831199068684517, F1-score: 0.09217101610216547
Test ID: F5, Accuracy: 0.6451612903225806, Precision: 0.8532381556948981, Recall: 0.6451612903225806, F1-score: 0.7337559295010931
Training XGBoost with max_depth=3, learning_rate=0.1, n_estimators=100:
Test ID: 15, Accuracy: 0.7313432835820896, Precision: 0.9096178287955251, Recall: 0.7313432835820896, F1-score: 0.8086614727082718
Test ID: 5C, Accuracy: 0.5623386204352637, Precision: 0.5749338600480245, Recall: 0.5623386204352637, F1-score: 0.5681509830759743
Test ID: 6B, Accuracy: 0.41217798594847777, Precision: 0.8887844772525384, Recall: 0.41217798594847777, F1-score: 0.4655221944180622
Test ID: 6D, Accuracy: 0.8418491484184915, Precision: 0.8108721310518694, Recall: 0.8418491484184915, F1-score: 0.8199642849718828
Test ID: 7A, Accuracy: 0.6769796111901375, Precision: 0.6384627832844707, Recall: 0.6769796111901375, F1-score: 0.6461813444429408
Test ID: 7E, Accuracy

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.026484284051222353, Precision: 1.0, Recall: 0.026484284051222353, F1-score: 0.05160192798412249
Test ID: F5, Accuracy: 0.635752688172043, Precision: 0.861018320639069, Recall: 0.635752688172043, F1-score: 0.7276726653936748
Training XGBoost with max_depth=3, learning_rate=0.1, n_estimators=200:
Test ID: 15, Accuracy: 0.7786927431806485, Precision: 0.921291584854388, Recall: 0.7786927431806485, F1-score: 0.8399921840863858
Test ID: 5C, Accuracy: 0.5745112504610844, Precision: 0.5875229799534485, Recall: 0.5745112504610844, F1-score: 0.5804529760645943
Test ID: 6B, Accuracy: 0.5856752537080406, Precision: 0.8862265309746228, Recall: 0.5856752537080406, F1-score: 0.6486428771273072
Test ID: 6D, Accuracy: 0.8469856718031901, Precision: 0.8672869840672398, Recall: 0.8469856718031901, F1-score: 0.8550374821093212
Test ID: 7A, Accuracy: 0.6787577050734945, Precision: 0.6476613593061928, Recall: 0.6787577050734945, F1-score: 0.6554251334774882
Test ID: 7E, Accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.050349243306169966, Precision: 1.0, Recall: 0.050349243306169966, F1-score: 0.09587143252978662
Test ID: F5, Accuracy: 0.636760752688172, Precision: 0.8500253725926484, Recall: 0.636760752688172, F1-score: 0.7277165435225025
Training XGBoost with max_depth=3, learning_rate=0.1, n_estimators=300:
Test ID: 15, Accuracy: 0.8003088008234689, Precision: 0.9232664037473798, Recall: 0.8003088008234689, F1-score: 0.8536409212743836
Test ID: 5C, Accuracy: 0.5857617115455551, Precision: 0.5977333229941674, Recall: 0.5857617115455551, F1-score: 0.5912344463472651
Test ID: 6B, Accuracy: 0.631928181108509, Precision: 0.8761685939889717, Recall: 0.631928181108509, F1-score: 0.6909614087161382
Test ID: 6D, Accuracy: 0.7985942146526088, Precision: 0.8660408651005013, Recall: 0.7985942146526088, F1-score: 0.8202181867031016
Test ID: 7A, Accuracy: 0.6839734471313419, Precision: 0.653115066305103, Recall: 0.6839734471313419, F1-score: 0.6601077914882719
Test ID: 7E, Accuracy: 0.4

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.0989522700814901, Precision: 1.0, Recall: 0.0989522700814901, F1-score: 0.18008474576271183
Test ID: F5, Accuracy: 0.6478494623655914, Precision: 0.8509279232328986, Recall: 0.6478494623655914, F1-score: 0.735387303701071
Training XGBoost with max_depth=5, learning_rate=0.01, n_estimators=100:
Test ID: 15, Accuracy: 0.7606793618116315, Precision: 0.9121942863685893, Recall: 0.7606793618116315, F1-score: 0.827503110619438
Test ID: 5C, Accuracy: 0.600147547030616, Precision: 0.5946794875484421, Recall: 0.600147547030616, F1-score: 0.5973036543279245
Test ID: 6B, Accuracy: 0.4088602654176425, Precision: 0.8588262800828151, Recall: 0.4088602654176425, F1-score: 0.46701062109222347
Test ID: 6D, Accuracy: 0.8361719383617194, Precision: 0.7210750606382252, Recall: 0.8361719383617194, F1-score: 0.7743700665923462
Test ID: 7A, Accuracy: 0.6676149834044571, Precision: 0.623354157869573, Recall: 0.6676149834044571, F1-score: 0.6330582295154525
Test ID: 7E, Accuracy: 0.385

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.13242142025611175, Precision: 1.0, Recall: 0.13242142025611175, F1-score: 0.23387304034952452
Test ID: F5, Accuracy: 0.7493279569892473, Precision: 0.8615430766869545, Recall: 0.7493279569892473, F1-score: 0.8013875577218027
Training XGBoost with max_depth=5, learning_rate=0.01, n_estimators=200:
Test ID: 15, Accuracy: 0.7092125579001544, Precision: 0.9128816039646507, Recall: 0.7092125579001544, F1-score: 0.7944039249446463
Test ID: 5C, Accuracy: 0.6012541497602361, Precision: 0.5919514456824255, Recall: 0.6012541497602361, F1-score: 0.5962889145613558
Test ID: 6B, Accuracy: 0.3665105386416862, Precision: 0.8522273831297135, Recall: 0.3665105386416862, F1-score: 0.41595797150230457
Test ID: 6D, Accuracy: 0.8461746417950797, Precision: 0.7902714916267637, Recall: 0.8461746417950797, F1-score: 0.7954556807921535
Test ID: 7A, Accuracy: 0.6536273115220483, Precision: 0.6335991647801141, Recall: 0.6536273115220483, F1-score: 0.6413538140872383
Test ID: 7E, Accuracy

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.0689755529685681, Precision: 1.0, Recall: 0.0689755529685681, F1-score: 0.1290498230329431
Test ID: F5, Accuracy: 0.5520833333333334, Precision: 0.8382257662670823, Recall: 0.5520833333333333, F1-score: 0.6652125476533964
Training XGBoost with max_depth=5, learning_rate=0.01, n_estimators=300:
Test ID: 15, Accuracy: 0.7334019557385486, Precision: 0.917856730455219, Recall: 0.7334019557385486, F1-score: 0.8107124972159533
Test ID: 5C, Accuracy: 0.5927701954998156, Precision: 0.5848593353817324, Recall: 0.5927701954998156, F1-score: 0.5886024181348712
Test ID: 6B, Accuracy: 0.41198282591725216, Precision: 0.8565620962165065, Recall: 0.41198282591725216, F1-score: 0.47114661502144345
Test ID: 6D, Accuracy: 0.8556366585563666, Precision: 0.8234940061332038, Recall: 0.8556366585563666, F1-score: 0.8220717119315265
Test ID: 7A, Accuracy: 0.670697012802276, Precision: 0.6407166913967819, Recall: 0.670697012802276, F1-score: 0.6493913377255185
Test ID: 7E, Accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.06548311990686845, Precision: 1.0, Recall: 0.06548311990686845, F1-score: 0.12291723572794318
Test ID: F5, Accuracy: 0.5756048387096774, Precision: 0.842722460257147, Recall: 0.5756048387096774, F1-score: 0.6832333275576398
Training XGBoost with max_depth=5, learning_rate=0.05, n_estimators=100:
Test ID: 15, Accuracy: 0.7637673700463201, Precision: 0.9190514477092387, Recall: 0.7637673700463201, F1-score: 0.8303370743245755
Test ID: 5C, Accuracy: 0.5918480265584655, Precision: 0.5880639325714073, Recall: 0.5918480265584655, F1-score: 0.5899064637569821
Test ID: 6B, Accuracy: 0.4564793130366901, Precision: 0.856851224381004, Recall: 0.4564793130366901, F1-score: 0.5218885668701254
Test ID: 6D, Accuracy: 0.8737496620708299, Precision: 0.8622318249546982, Recall: 0.8737496620708299, F1-score: 0.8659292907704327
Test ID: 7A, Accuracy: 0.6722380275011854, Precision: 0.6377712631728176, Recall: 0.6722380275011854, F1-score: 0.6464857568217636
Test ID: 7E, Accuracy: 0

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.07508731082654249, Precision: 1.0, Recall: 0.07508731082654249, F1-score: 0.1396859772604223
Test ID: F5, Accuracy: 0.6018145161290323, Precision: 0.8464097630392059, Recall: 0.6018145161290323, F1-score: 0.7027390629172181
Training XGBoost with max_depth=5, learning_rate=0.05, n_estimators=200:
Test ID: 15, Accuracy: 0.7920741121976326, Precision: 0.9240933219596217, Recall: 0.7920741121976326, F1-score: 0.8487138474471042
Test ID: 5C, Accuracy: 0.5916635927701955, Precision: 0.601845124241534, Recall: 0.5916635927701955, F1-score: 0.5963705654003582
Test ID: 6B, Accuracy: 0.5690866510538641, Precision: 0.8335113558013391, Recall: 0.5690866510538641, F1-score: 0.6372477418593882
Test ID: 6D, Accuracy: 0.8369829683698297, Precision: 0.8767925745531495, Recall: 0.8369829683698297, F1-score: 0.8505462084028861
Test ID: 7A, Accuracy: 0.6752015173067805, Precision: 0.6405715831679574, Recall: 0.6752015173067805, F1-score: 0.648822857382315
Test ID: 7E, Accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.10681024447031431, Precision: 1.0, Recall: 0.10681024447031431, F1-score: 0.19300552195635023
Test ID: F5, Accuracy: 0.644489247311828, Precision: 0.8496511488720119, Recall: 0.644489247311828, F1-score: 0.7329847058720135
Training XGBoost with max_depth=5, learning_rate=0.05, n_estimators=300:
Test ID: 15, Accuracy: 0.7946474523932063, Precision: 0.9242484949778131, Recall: 0.7946474523932063, F1-score: 0.8503276532931565
Test ID: 5C, Accuracy: 0.5898192548874954, Precision: 0.604171837383078, Recall: 0.5898192548874954, F1-score: 0.5962310178494535
Test ID: 6B, Accuracy: 0.594847775175644, Precision: 0.8240677852695351, Recall: 0.594847775175644, F1-score: 0.6603395444641965
Test ID: 6D, Accuracy: 0.7977831846444985, Precision: 0.875950585145652, Recall: 0.7977831846444985, F1-score: 0.821149695309325
Test ID: 7A, Accuracy: 0.6767425320056899, Precision: 0.6426676660042566, Recall: 0.6767425320056899, F1-score: 0.6506760904625534
Test ID: 7E, Accuracy: 0.4098

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.14668218859138532, Precision: 1.0, Recall: 0.14668218859138532, F1-score: 0.2558375634517766
Test ID: F5, Accuracy: 0.645497311827957, Precision: 0.8497724781538336, Recall: 0.6454973118279571, F1-score: 0.7336814453000631
Training XGBoost with max_depth=5, learning_rate=0.1, n_estimators=100:
Test ID: 15, Accuracy: 0.7910447761194029, Precision: 0.9266930061482106, Recall: 0.7910447761194029, F1-score: 0.8484621871214363
Test ID: 5C, Accuracy: 0.5962744374769458, Precision: 0.6060177023514567, Recall: 0.5962744374769458, F1-score: 0.6007836608364077
Test ID: 6B, Accuracy: 0.5620608899297423, Precision: 0.8246912415900095, Recall: 0.5620608899297423, F1-score: 0.6314532727356341
Test ID: 6D, Accuracy: 0.8350905650175723, Precision: 0.874160510606107, Recall: 0.8350905650175723, F1-score: 0.8485925411205801
Test ID: 7A, Accuracy: 0.6729492650545282, Precision: 0.6423815484523882, Recall: 0.6729492650545282, F1-score: 0.650858113418571
Test ID: 7E, Accuracy: 0.41

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.13125727590221187, Precision: 1.0, Recall: 0.13125727590221187, F1-score: 0.23205556984821193
Test ID: F5, Accuracy: 0.6394489247311828, Precision: 0.8521121725308396, Recall: 0.6394489247311828, F1-score: 0.7297315922290889
Training XGBoost with max_depth=5, learning_rate=0.1, n_estimators=200:
Test ID: 15, Accuracy: 0.7740607308286156, Precision: 0.9283879837482093, Recall: 0.7740607308286156, F1-score: 0.8379593872110712
Test ID: 5C, Accuracy: 0.5888970859461453, Precision: 0.6066403796993454, Recall: 0.5888970859461453, F1-score: 0.5965947939896568
Test ID: 6B, Accuracy: 0.6065573770491803, Precision: 0.80720951119356, Recall: 0.6065573770491803, F1-score: 0.6701440635866865
Test ID: 6D, Accuracy: 0.7807515544741822, Precision: 0.8818707419897946, Recall: 0.7807515544741822, F1-score: 0.8086674334817937
Test ID: 7A, Accuracy: 0.6888335704125178, Precision: 0.6599977349497187, Recall: 0.6888335704125178, F1-score: 0.6664036530461876
Test ID: 7E, Accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.21071012805587894, Precision: 1.0, Recall: 0.21071012805587894, F1-score: 0.34807692307692306
Test ID: F5, Accuracy: 0.6646505376344086, Precision: 0.8524488721113626, Recall: 0.6646505376344085, F1-score: 0.7468127811940869
Training XGBoost with max_depth=5, learning_rate=0.1, n_estimators=300:
Test ID: 15, Accuracy: 0.7828100874935666, Precision: 0.9248718662094062, Recall: 0.7828100874935666, F1-score: 0.8430604673496067
Test ID: 5C, Accuracy: 0.5846551088159351, Precision: 0.6056315303225481, Recall: 0.5846551088159351, F1-score: 0.5935294584593217
Test ID: 6B, Accuracy: 0.610655737704918, Precision: 0.8026798260027022, Recall: 0.610655737704918, F1-score: 0.6733256264677921
Test ID: 6D, Accuracy: 0.7539875642065423, Precision: 0.8858592298614038, Recall: 0.7539875642065423, F1-score: 0.7876615325985854
Test ID: 7A, Accuracy: 0.6843290659080132, Precision: 0.6508230760522067, Recall: 0.6843290659080132, F1-score: 0.6574700592681414
Test ID: 7E, Accuracy: 0.

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.24039580908032596, Precision: 1.0, Recall: 0.24039580908032596, F1-score: 0.38761145002346314
Test ID: F5, Accuracy: 0.6801075268817204, Precision: 0.8541728492000256, Recall: 0.6801075268817204, F1-score: 0.7571604079516435
Training XGBoost with max_depth=7, learning_rate=0.01, n_estimators=100:
Test ID: 15, Accuracy: 0.7313432835820896, Precision: 0.9287624048315167, Recall: 0.7313432835820896, F1-score: 0.8100339749820588
Test ID: 5C, Accuracy: 0.6202508299520472, Precision: 0.6122031131278551, Recall: 0.6202508299520472, F1-score: 0.6159460079146676
Test ID: 6B, Accuracy: 0.42096018735363, Precision: 0.7870081317176927, Recall: 0.42096018735363, F1-score: 0.4951146271671198
Test ID: 6D, Accuracy: 0.8277912949445796, Precision: 0.7272048977140662, Recall: 0.8277912949445796, F1-score: 0.7718865512898446
Test ID: 7A, Accuracy: 0.5906827880512091, Precision: 0.6107524011999427, Recall: 0.5906827880512091, F1-score: 0.5992501475408633
Test ID: 7E, Accuracy: 0.4

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.04976717112922002, Precision: 1.0, Recall: 0.04976717112922002, F1-score: 0.09481563626282229
Test ID: F5, Accuracy: 0.5510752688172043, Precision: 0.8676602962699762, Recall: 0.5510752688172043, F1-score: 0.6624406372808245
Training XGBoost with max_depth=7, learning_rate=0.01, n_estimators=200:
Test ID: 15, Accuracy: 0.766340710241894, Precision: 0.927273779509065, Recall: 0.766340710241894, F1-score: 0.8328976436173038
Test ID: 5C, Accuracy: 0.6099225378089266, Precision: 0.6049445109147354, Recall: 0.6099225378089266, F1-score: 0.6073341970489522
Test ID: 6B, Accuracy: 0.44379391100702575, Precision: 0.7948975448873238, Recall: 0.44379391100702575, F1-score: 0.5187409322772754
Test ID: 6D, Accuracy: 0.8559070018924033, Precision: 0.8310720083968532, Recall: 0.8559070018924033, F1-score: 0.8361715677680169
Test ID: 7A, Accuracy: 0.6390469416785206, Precision: 0.6301851679939561, Recall: 0.6390469416785206, F1-score: 0.6342297717705874
Test ID: 7E, Accuracy: 

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.059080325960419094, Precision: 1.0, Recall: 0.059080325960419094, F1-score: 0.1115691123935147
Test ID: F5, Accuracy: 0.5752688172043011, Precision: 0.8575695245779777, Recall: 0.5752688172043011, F1-score: 0.6825519110480239
Training XGBoost with max_depth=7, learning_rate=0.01, n_estimators=300:
Test ID: 15, Accuracy: 0.7812660833762224, Precision: 0.9267869110983417, Recall: 0.7812660833762224, F1-score: 0.8423415527166909
Test ID: 5C, Accuracy: 0.6032829214312062, Precision: 0.6015568552347322, Recall: 0.6032829214312062, F1-score: 0.6024084296099295
Test ID: 6B, Accuracy: 0.47931303669008585, Precision: 0.8037443125608071, Recall: 0.47931303669008585, F1-score: 0.5544821751440011
Test ID: 6D, Accuracy: 0.8605028386050284, Precision: 0.8560428482593567, Recall: 0.8605028386050284, F1-score: 0.858103016886796
Test ID: 7A, Accuracy: 0.6680891417733523, Precision: 0.6439900635152471, Recall: 0.6680891417733523, F1-score: 0.6521102654266746
Test ID: 7E, Accurac

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.06431897555296857, Precision: 1.0, Recall: 0.06431897555296857, F1-score: 0.12086409625375995
Test ID: F5, Accuracy: 0.6028225806451613, Precision: 0.8483393435534964, Recall: 0.6028225806451613, F1-score: 0.7035144115890083
Training XGBoost with max_depth=7, learning_rate=0.05, n_estimators=100:
Test ID: 15, Accuracy: 0.7678847143592383, Precision: 0.9219782823732312, Recall: 0.7678847143592383, F1-score: 0.8332941623537766
Test ID: 5C, Accuracy: 0.5903725562523054, Precision: 0.5960726787875157, Recall: 0.5903725562523054, F1-score: 0.593105692011647
Test ID: 6B, Accuracy: 0.5589383294301327, Precision: 0.8100683465608287, Recall: 0.5589383294301327, F1-score: 0.6294907698436775
Test ID: 6D, Accuracy: 0.8567180319005137, Precision: 0.8831453536314465, Recall: 0.8567180319005137, F1-score: 0.8661851130609758
Test ID: 7A, Accuracy: 0.6761498340445709, Precision: 0.645870215527189, Recall: 0.6761498340445709, F1-score: 0.6539636358868067
Test ID: 7E, Accuracy: 0

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.07275902211874273, Precision: 1.0, Recall: 0.07275902211874273, F1-score: 0.1356483993488877
Test ID: F5, Accuracy: 0.6323924731182796, Precision: 0.8503668417479792, Recall: 0.6323924731182796, F1-score: 0.7247097529265549
Training XGBoost with max_depth=7, learning_rate=0.05, n_estimators=200:
Test ID: 15, Accuracy: 0.7853834276891405, Precision: 0.9256914962522498, Recall: 0.7853834276891405, F1-score: 0.8447839439987057
Test ID: 5C, Accuracy: 0.5852084101807451, Precision: 0.6000317170727241, Recall: 0.5852084101807451, F1-score: 0.5918275249951801
Test ID: 6B, Accuracy: 0.60967993754879, Precision: 0.8182749268157414, Recall: 0.60967993754879, F1-score: 0.6728929400019267
Test ID: 6D, Accuracy: 0.8048121113814545, Precision: 0.8881675417970819, Recall: 0.8048121113814545, F1-score: 0.8281595502124582
Test ID: 7A, Accuracy: 0.6801801801801802, Precision: 0.6484182751212579, Recall: 0.6801801801801802, F1-score: 0.6559563835771793
Test ID: 7E, Accuracy: 0.39

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.1108847497089639, Precision: 1.0, Recall: 0.1108847497089639, F1-score: 0.19963321980613044
Test ID: F5, Accuracy: 0.6592741935483871, Precision: 0.8518321358769603, Recall: 0.6592741935483871, F1-score: 0.7431685583083484
Training XGBoost with max_depth=7, learning_rate=0.05, n_estimators=300:
Test ID: 15, Accuracy: 0.7833247555326814, Precision: 0.9255699998184331, Recall: 0.7833247555326814, F1-score: 0.8434774193143217
Test ID: 5C, Accuracy: 0.5852084101807451, Precision: 0.6032050167838172, Recall: 0.5852084101807451, F1-score: 0.5930255465934297
Test ID: 6B, Accuracy: 0.62256049960968, Precision: 0.8130952820989708, Recall: 0.62256049960968, F1-score: 0.6833091030090038
Test ID: 6D, Accuracy: 0.7677750743444174, Precision: 0.8866111591405201, Recall: 0.7677750743444174, F1-score: 0.7988025992523062
Test ID: 7A, Accuracy: 0.6829065908013277, Precision: 0.6475435872937527, Recall: 0.6829065908013277, F1-score: 0.6542461285404015
Test ID: 7E, Accuracy: 0.406

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.15075669383003493, Precision: 1.0, Recall: 0.15075669383003493, F1-score: 0.2620131512392514
Test ID: F5, Accuracy: 0.6750672043010753, Precision: 0.8536185185954994, Recall: 0.6750672043010751, F1-score: 0.7538070954778655
Training XGBoost with max_depth=7, learning_rate=0.1, n_estimators=100:
Test ID: 15, Accuracy: 0.7838394235717961, Precision: 0.9276061358399645, Recall: 0.7838394235717961, F1-score: 0.8440687405134946
Test ID: 5C, Accuracy: 0.5881593507930652, Precision: 0.600833926450578, Recall: 0.5881593507930652, F1-score: 0.5939104878856695
Test ID: 6B, Accuracy: 0.6163153786104606, Precision: 0.8209269464128345, Recall: 0.6163153786104606, F1-score: 0.6784159740377791
Test ID: 6D, Accuracy: 0.7929170045958367, Precision: 0.88062049062898, Recall: 0.7929170045958367, F1-score: 0.8180171761997285
Test ID: 7A, Accuracy: 0.6734234234234234, Precision: 0.6384006298877813, Recall: 0.6734234234234234, F1-score: 0.6469382041403775
Test ID: 7E, Accuracy: 0.41

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.1108847497089639, Precision: 1.0, Recall: 0.1108847497089639, F1-score: 0.19963321980613044
Test ID: F5, Accuracy: 0.6565860215053764, Precision: 0.851084721867472, Recall: 0.6565860215053764, F1-score: 0.7412896137320335
Training XGBoost with max_depth=7, learning_rate=0.1, n_estimators=200:
Test ID: 15, Accuracy: 0.784354091610911, Precision: 0.9283056177413632, Recall: 0.784354091610911, F1-score: 0.844482780450303
Test ID: 5C, Accuracy: 0.5868683142751752, Precision: 0.605981491473587, Recall: 0.5868683142751752, F1-score: 0.5950742726878988
Test ID: 6B, Accuracy: 0.6190476190476191, Precision: 0.8107878308798503, Recall: 0.6190476190476191, F1-score: 0.6803917749984293
Test ID: 6D, Accuracy: 0.7364152473641524, Precision: 0.8823874693231607, Recall: 0.7364152473641524, F1-score: 0.7731815836727791
Test ID: 7A, Accuracy: 0.6735419630156472, Precision: 0.6366018009207577, Recall: 0.6735419630156472, F1-score: 0.6451029400784959
Test ID: 7E, Accuracy: 0.45945

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.179860302677532, Precision: 1.0, Recall: 0.179860302677532, F1-score: 0.30488406512086824
Test ID: F5, Accuracy: 0.6737231182795699, Precision: 0.8530352311974717, Recall: 0.6737231182795699, F1-score: 0.752849403000218
Training XGBoost with max_depth=7, learning_rate=0.1, n_estimators=300:
Test ID: 15, Accuracy: 0.7812660833762224, Precision: 0.9267869110983417, Recall: 0.7812660833762224, F1-score: 0.8423415527166909
Test ID: 5C, Accuracy: 0.5791220951678347, Precision: 0.602503210684365, Recall: 0.5791220951678347, F1-score: 0.5888577404256735
Test ID: 6B, Accuracy: 0.6098750975800156, Precision: 0.8030511695288197, Recall: 0.6098750975800156, F1-score: 0.6727112672655828
Test ID: 6D, Accuracy: 0.7174912138415788, Precision: 0.8776714591055518, Recall: 0.7174912138415788, F1-score: 0.7574206157170534
Test ID: 7A, Accuracy: 0.6686818397344713, Precision: 0.6297496050828735, Recall: 0.6686818397344713, F1-score: 0.6391394293904106
Test ID: 7E, Accuracy: 0.4852

  _warn_prf(average, modifier, msg_start, len(result))


Test ID: EG, Accuracy: 0.23661233993015135, Precision: 1.0, Recall: 0.23661233993015135, F1-score: 0.38267827724170395
Test ID: F5, Accuracy: 0.6848118279569892, Precision: 0.8542498060082031, Recall: 0.6848118279569894, F1-score: 0.7602039558054652
Best parameters: {'max_depth': 7, 'learning_rate': 0.1, 'n_estimators': 300}
Best accuracy: 0.5612822742389483


KNN

In [None]:
# Replace label values
df['label'] = df['label'].replace({0.0: 'low stress', 1.0: 'low stress', 2.0: 'high stress'})

# Print class counts
class_counts = df['label'].value_counts()
print(class_counts)


high stress    53081
low stress     18402
Name: label, dtype: int64


In [None]:
import pandas as pd
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE


# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Prepare features and target variable
X = df.drop(columns=['id', 'label'])  # Exclude 'id' and 'label' columns
y = df['label']
participants = df['id']

# Define oversampler (SMOTE)
oversampler = SMOTE(random_state=42)

# Implement leave-one-participant-out cross-validation
logo = LeaveOneGroupOut()

# KNN parameters for tuning
params = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

best_params = None
best_accuracy = 0

# Iterate over different parameter combinations
for n_neighbors in params['n_neighbors']:
    for weights in params['weights']:
        for p in params['p']:
            print(f"Tuning KNN with n_neighbors={n_neighbors}, weights={weights}, p={p}:")

            # Lists to store accuracy for each iteration
            accuracies = []

            for train_index, test_index in logo.split(X, y, groups=participants):
                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                y_train, y_test = y[train_index], y[test_index]

                # Apply SMOTE for oversampling to address class imbalance
                X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

                # Normalize features using StandardScaler
                scaler = StandardScaler()
                X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
                X_test_scaled = scaler.transform(X_test)

                # Train KNN model
                knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, p=p)
                knn.fit(X_train_resampled_scaled, y_train_resampled)

                # Evaluate model
                y_pred = knn.predict(X_test_scaled)

                # Calculate accuracy
                accuracy = accuracy_score(y_test, y_pred)

                # Store accuracy for each iteration
                accuracies.append(accuracy)

            # Calculate mean accuracy for current parameter combination
            mean_accuracy = sum(accuracies) / len(accuracies)
            print(f"Mean Accuracy: {mean_accuracy}")

            # Save best parameters
            if mean_accuracy > best_accuracy:
                best_accuracy = mean_accuracy
                best_params = {'n_neighbors': n_neighbors, 'weights': weights, 'p': p}

print("Best parameters:", best_params)
print("Best accuracy:", best_accuracy)


Tuning KNN with n_neighbors=3, weights=uniform, p=1:
Mean Accuracy: 0.5276802014997548
Tuning KNN with n_neighbors=3, weights=uniform, p=2:
Mean Accuracy: 0.5251555228615005
Tuning KNN with n_neighbors=3, weights=distance, p=1:
Mean Accuracy: 0.5277310862575271
Tuning KNN with n_neighbors=3, weights=distance, p=2:
Mean Accuracy: 0.5251143585021238
Tuning KNN with n_neighbors=5, weights=uniform, p=1:
Mean Accuracy: 0.5187308412800442
Tuning KNN with n_neighbors=5, weights=uniform, p=2:
Mean Accuracy: 0.5159948025980079
Tuning KNN with n_neighbors=5, weights=distance, p=1:
Mean Accuracy: 0.5190395890602619
Tuning KNN with n_neighbors=5, weights=distance, p=2:
Mean Accuracy: 0.5159894567236095
Tuning KNN with n_neighbors=7, weights=uniform, p=1:
Mean Accuracy: 0.510814724358096
Tuning KNN with n_neighbors=7, weights=uniform, p=2:
Mean Accuracy: 0.5072149176414887
Tuning KNN with n_neighbors=7, weights=distance, p=1:
Mean Accuracy: 0.511721965170768
Tuning KNN with n_neighbors=7, weights=d

In [None]:
df


Unnamed: 0,id,label,avg_X,max_X,min_X,X_median,avg_Y,max_Y,min_Y,Y_median,...,min_EDA,EDA_median,avg_HR,max_HR,min_HR,HR_median,avg_TEMP,max_TEMP,min_TEMP,TEMP_median
0,15,low stress,19.21875,7.0,1.0,19.0,-42.50625,-64.0,-27.0,-43.0,...,3.801695,3.976439,103.190,103.27,103.07,103.20,33.876,33.95,33.81,33.87
1,15,low stress,17.93750,22.0,15.0,18.0,-43.73750,-45.0,-41.0,-44.0,...,3.547946,3.619714,103.188,103.30,103.00,103.25,33.966,33.99,33.93,33.97
2,15,low stress,-27.60625,-37.0,-20.0,-27.0,9.96250,9.0,-1.0,10.0,...,4.737687,4.808173,109.362,110.00,108.68,109.38,34.110,34.13,34.09,34.11
3,15,low stress,-28.59375,-38.0,-17.0,-29.0,10.63750,9.0,-1.0,10.0,...,4.615938,4.645414,112.220,112.35,111.90,112.28,34.148,34.16,34.13,34.15
4,15,low stress,-50.07500,-99.0,-13.0,-48.0,-29.84375,6.0,-1.0,-29.0,...,4.038785,4.157969,99.768,99.92,99.62,99.77,34.336,34.37,34.31,34.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71478,F5,high stress,-32.43125,-9.0,-10.0,-20.0,-13.86875,-9.0,-10.0,-14.0,...,3.378974,3.728609,80.550,81.05,80.05,80.57,34.590,34.61,34.57,34.59
71479,F5,high stress,8.08750,9.0,0.0,9.0,60.28750,62.0,57.0,61.0,...,3.706447,3.848606,87.034,87.08,86.97,87.05,34.594,34.61,34.57,34.59
71480,F5,high stress,-22.76250,9.0,-1.0,-20.0,-18.54375,8.0,-1.0,-7.0,...,10.031460,10.031460,92.774,92.82,92.70,92.80,34.322,34.34,34.29,34.33
71481,F5,high stress,-11.18125,-9.0,-10.0,-11.0,-61.88750,-65.0,-58.0,-62.0,...,8.944329,8.968663,91.470,91.55,91.40,91.47,34.160,34.18,34.15,34.16


Random Forst

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.utils import resample

# Separate majority and minority classes
df_majority = df[df['label'] == 'high stress']
df_minority = df[df['label'] == 'low stress']

# Upsample minority class
df_minority_upsampled = resample(df_minority,
                                 replace=True,     # sample with replacement
                                 n_samples=len(df_majority),    # to match majority class
                                 random_state=42) # reproducible results

# Combine majority class with upsampled minority class
df_balanced = pd.concat([df_majority, df_minority_upsampled])

# Extract features and target variable from balanced dataset
X_balanced = df_balanced.drop(['id', 'label'], axis=1)
y_balanced = df_balanced['label']

# Initialize Random Forest classifier with hyperparameter tuning
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42, class_weight='balanced')

# Define leave-one-group-out cross-validation
logo = LeaveOneGroupOut()

# Perform leave-one-group-out cross-validation on balanced data
y_pred_loo_balanced = cross_val_predict(rf_classifier, X_balanced, y_balanced, groups=df_balanced['id'], cv=logo)

# Evaluate classification report after leave-one-group-out cross-validation
print("Classification Report After Leave one out cross-validation on balanced data:")
print(classification_report(y_balanced, y_pred_loo_balanced))

# Train the model on the entire balanced dataset
rf_classifier.fit(X_balanced, y_balanced)

# Predict stress levels using the trained model
y_pred_balanced = rf_classifier.predict(X_balanced)

# Evaluate classification report after training on entire balanced dataset
print("\nClassification Report after training on entire balanced dataset:")
print(classification_report(y_balanced, y_pred_balanced))


Classification Report After Leave one out cross-validation on balanced data:
              precision    recall  f1-score   support

 high stress       0.53      0.68      0.60     53081
  low stress       0.56      0.41      0.47     53081

    accuracy                           0.54    106162
   macro avg       0.55      0.54      0.53    106162
weighted avg       0.55      0.54      0.53    106162


Classification Report after training on entire balanced dataset:
              precision    recall  f1-score   support

 high stress       0.83      0.89      0.86     53081
  low stress       0.88      0.81      0.85     53081

    accuracy                           0.85    106162
   macro avg       0.85      0.85      0.85    106162
weighted avg       0.85      0.85      0.85    106162



In [None]:
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.utils import resample

# Separate majority and minority classes
df_majority = df[df['label'] == 'high stress']
df_minority = df[df['label'] == 'low stress']

# Upsample minority class
df_minority_upsampled = resample(df_minority,
                                 replace=True,     # sample with replacement
                                 n_samples=len(df_majority),    # to match majority class
                                 random_state=42) # reproducible results

# Combine majority class with upsampled minority class
df_balanced = pd.concat([df_majority, df_minority_upsampled])

# Extract features and target variable from balanced dataset
X_balanced = df_balanced.drop(['id', 'label'], axis=1)
y_balanced = df_balanced['label']

# Apply PCA to the features
pca = PCA(n_components=0.80)  # Choose number of components to explain 95% of variance
X_pca = pca.fit_transform(X_balanced)

# Initialize Random Forest classifier with hyperparameter tuning
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42, class_weight='balanced')

# Define leave-one-group-out cross-validation
logo = LeaveOneGroupOut()

# Perform leave-one-group-out cross-validation on PCA-transformed data
y_pred_loo_pca = cross_val_predict(rf_classifier, X_pca, y_balanced, groups=df_balanced['id'], cv=logo)

# Evaluate classification report after leave-one-group-out cross-validation on PCA-transformed data
print("Classification Report After Leave one out cross-validation on PCA-transformed data:")
print(classification_report(y_balanced, y_pred_loo_pca))


Classification Report After Leave one out cross-validation on PCA-transformed data:
              precision    recall  f1-score   support

 high stress       0.48      0.59      0.53     53081
  low stress       0.47      0.37      0.42     53081

    accuracy                           0.48    106162
   macro avg       0.48      0.48      0.47    106162
weighted avg       0.48      0.48      0.47    106162

