In [None]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
import pandas as pd
import random

# Create the RFE object and compute a cross-validated score.
from sklearn.svm import SVC

from sklearn import svm
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import RFECV, f_classif
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

# Load data
data_path = r'E:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\ecg_data.csv'
data = pd.read_csv(data_path, index_col=0)

# Splitting features and labels
x = data.iloc[:, :-1].values  # Selecting all columns except last
y = data.iloc[:, -1].values  # Selecting last column as labels

# Define outer and inner loop random seeds
outer = range(0, 5)  # Outer loop
inner = range(0, 5)  # Inner loop

best_params_list = {}

# Outer loop for train-test split variations
for outer_rand in tqdm(outer, desc='Outer Loop'):
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, stratify=y, shuffle=True)
        
    

    # Inner loop for hyperparameter tuning
    for inner_rand in tqdm(inner, desc='Inner Loop', leave=True):
        X_train_train, X_train_val, y_train_train, y_train_val = train_test_split(
            X_train, y_train, test_size=0.15, stratify=y_train, shuffle=True)
        
        # Define the randomized parameter grid
        param_grid_rf = {
            'n_estimators': [random.randint(50, 200) for _ in range(3)],
            'max_depth': [random.randint(5, 30) for _ in range(3)],
            'min_samples_split': [random.randint(2, 10) for _ in range(3)],  # FIXED: Must be integer
            'min_samples_leaf': [random.randint(1, 4) for _ in range(3)],  # FIXED: Avoid 0
            'max_samples': [random.uniform(0.5, 1) for _ in range(3)],  # Adjusted range
            'bootstrap': [True]  # Required for max_samples
        }

        # Randomized Search for efficiency
        rf_model = RandomForestClassifier()
        random_search = RandomizedSearchCV(
            estimator=rf_model, 
            param_distributions=param_grid_rf, 
            scoring='roc_auc', 
            cv=5, 
            n_iter=10,
            n_jobs=-1,  # Randomly sample 10 hyperparameter combinations
        )

        # Fit the search
        random_search.fit(X_train_train, y_train_train)

        # Best hyperparameters
        best_params = random_search.best_params_
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"] = best_params

        # Best model evaluation
        best_model = random_search.best_estimator_
        y_pred = best_model.predict(X_test)
        # Calculate confusion matrix
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        
        # Calculate sensitivity (true positive rate) and specificity (true negative rate)
        sensitivity = tp / (tp + fn)  # True Positive Rate
        specificity = tn / (tn + fp)  # True Negative Rate
        # Calculate ROC AUC score
        auc_score = roc_auc_score(y_test, y_pred)

        # Calculate F1 score
        f1 = f1_score(y_test, y_pred)

        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)
        
        # Store the metrics
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['auc'] = auc_score
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['f1'] = f1
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['sensitivity'] = sensitivity
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['specificity'] = specificity
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['accuracy'] = accuracy
        print(f"\nOuter {outer_rand}, Inner {inner_rand} -> Best Hyperparameters: {best_params}")
        print(f"Test Set Accuracy: {accuracy:.4f}")
    # Save file    
    results_df = pd.DataFrame.from_dict(best_params_list, orient='index')
    results_csv_path = os.path.join(os.getcwd(), 'best_hyperparameters.csv')
    results_df.to_csv(results_csv_path)
    print(f"\nBest parameters saved to {results_csv_path} after Outer {outer_rand} completion")

# Print all best parameters
print("\nBest Parameters Summary:")
for key, val in best_params_list.items():
    print(f"{key}: {val}")

Outer Loop:   0%|          | 0/5 [00:00<?, ?it/s]


Outer 0, Inner 0 -> Best Hyperparameters: {'n_estimators': 173, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_samples': 0.6160136651902284, 'max_depth': 22, 'bootstrap': True, 'auc': 0.5271834885476969, 'f1': 0.12121212121212122, 'sensitivity': 0.06896551724137931, 'specificity': 0.9854014598540146, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 0, Inner 1 -> Best Hyperparameters: {'n_estimators': 150, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_samples': 0.8470185682894085, 'max_depth': 12, 'bootstrap': True, 'auc': 0.5135917442738486, 'f1': 0.06451612903225806, 'sensitivity': 0.034482758620689655, 'specificity': 0.9927007299270073, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 0, Inner 2 -> Best Hyperparameters: {'n_estimators': 72, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_samples': 0.610830439318528, 'max_depth': 17, 'bootstrap': True, 'auc': 0.5062924742008559, 'f1': 0.06060606060606061, 'sensitivity': 0.034482758620689655, 'specificity': 0.9781021897810219, 'accuracy': 0.8132530120481928}
Test Set Accuracy: 0.8133





Outer 0, Inner 3 -> Best Hyperparameters: {'n_estimators': 74, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_samples': 0.7048787602831459, 'max_depth': 21, 'bootstrap': True, 'auc': 0.5308331235841933, 'f1': 0.125, 'sensitivity': 0.06896551724137931, 'specificity': 0.9927007299270073, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313


Inner Loop: 100%|██████████| 5/5 [01:18<00:00, 15.73s/it]
Outer Loop:  20%|██        | 1/5 [01:18<05:14, 78.70s/it]


Outer 0, Inner 4 -> Best Hyperparameters: {'n_estimators': 175, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.8543691342930962, 'max_depth': 11, 'bootstrap': True, 'auc': 0.5308331235841933, 'f1': 0.125, 'sensitivity': 0.06896551724137931, 'specificity': 0.9927007299270073, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313

Best parameters saved to e:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\TM10007_Machine_Learning_Group_4-1\best_hyperparameters.csv after Outer 0 completion





Outer 1, Inner 0 -> Best Hyperparameters: {'n_estimators': 69, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_samples': 0.5609306166560242, 'max_depth': 1, 'bootstrap': True, 'auc': 0.5, 'f1': 0.0, 'sensitivity': 0.0, 'specificity': 1.0, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 1, Inner 1 -> Best Hyperparameters: {'n_estimators': 188, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_samples': 0.9041586730182516, 'max_depth': 2, 'bootstrap': True, 'auc': 0.5, 'f1': 0.0, 'sensitivity': 0.0, 'specificity': 1.0, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 1, Inner 2 -> Best Hyperparameters: {'n_estimators': 192, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_samples': 0.5926045728289887, 'max_depth': 21, 'bootstrap': True, 'auc': 0.5517241379310345, 'f1': 0.1875, 'sensitivity': 0.10344827586206896, 'specificity': 1.0, 'accuracy': 0.8433734939759037}
Test Set Accuracy: 0.8434





Outer 1, Inner 3 -> Best Hyperparameters: {'n_estimators': 148, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_samples': 0.8077955168994329, 'max_depth': 10, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373


Inner Loop: 100%|██████████| 5/5 [01:04<00:00, 12.96s/it]
Outer Loop:  40%|████      | 2/5 [02:23<03:31, 70.55s/it]


Outer 1, Inner 4 -> Best Hyperparameters: {'n_estimators': 175, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_samples': 0.947725229969246, 'max_depth': 28, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373

Best parameters saved to e:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\TM10007_Machine_Learning_Group_4-1\best_hyperparameters.csv after Outer 1 completion





Outer 2, Inner 0 -> Best Hyperparameters: {'n_estimators': 76, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_samples': 0.825693657971359, 'max_depth': 25, 'bootstrap': True, 'auc': 0.5172413793103449, 'f1': 0.06666666666666667, 'sensitivity': 0.034482758620689655, 'specificity': 1.0, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313





Outer 2, Inner 1 -> Best Hyperparameters: {'n_estimators': 94, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_samples': 0.5844085217192332, 'max_depth': 2, 'bootstrap': True, 'auc': 0.5, 'f1': 0.0, 'sensitivity': 0.0, 'specificity': 1.0, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 2, Inner 2 -> Best Hyperparameters: {'n_estimators': 158, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_samples': 0.8209581674086108, 'max_depth': 19, 'bootstrap': True, 'auc': 0.5308331235841933, 'f1': 0.125, 'sensitivity': 0.06896551724137931, 'specificity': 0.9927007299270073, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313





Outer 2, Inner 3 -> Best Hyperparameters: {'n_estimators': 183, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_samples': 0.6520727280159915, 'max_depth': 23, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373


Inner Loop: 100%|██████████| 5/5 [01:03<00:00, 12.75s/it]
Outer Loop:  60%|██████    | 3/5 [03:27<02:14, 67.48s/it]


Outer 2, Inner 4 -> Best Hyperparameters: {'n_estimators': 191, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_samples': 0.7226877696702521, 'max_depth': 12, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373

Best parameters saved to e:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\TM10007_Machine_Learning_Group_4-1\best_hyperparameters.csv after Outer 2 completion





Outer 3, Inner 0 -> Best Hyperparameters: {'n_estimators': 75, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_samples': 0.8007066877970783, 'max_depth': 15, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373





Outer 3, Inner 1 -> Best Hyperparameters: {'n_estimators': 186, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_samples': 0.6156543665851987, 'max_depth': 9, 'bootstrap': True, 'auc': 0.5172413793103449, 'f1': 0.06666666666666667, 'sensitivity': 0.034482758620689655, 'specificity': 1.0, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313





Outer 3, Inner 2 -> Best Hyperparameters: {'n_estimators': 155, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_samples': 0.6642561569604244, 'max_depth': 3, 'bootstrap': True, 'auc': 0.5, 'f1': 0.0, 'sensitivity': 0.0, 'specificity': 1.0, 'accuracy': 0.8253012048192772}
Test Set Accuracy: 0.8253





Outer 3, Inner 3 -> Best Hyperparameters: {'n_estimators': 172, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_samples': 0.951523923759013, 'max_depth': 28, 'bootstrap': True, 'auc': 0.5517241379310345, 'f1': 0.1875, 'sensitivity': 0.10344827586206896, 'specificity': 1.0, 'accuracy': 0.8433734939759037}
Test Set Accuracy: 0.8434


Inner Loop: 100%|██████████| 5/5 [01:12<00:00, 14.60s/it]
Outer Loop:  80%|████████  | 4/5 [04:40<01:09, 69.67s/it]


Outer 3, Inner 4 -> Best Hyperparameters: {'n_estimators': 174, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_samples': 0.6145610160917565, 'max_depth': 26, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373

Best parameters saved to e:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\TM10007_Machine_Learning_Group_4-1\best_hyperparameters.csv after Outer 3 completion





Outer 4, Inner 0 -> Best Hyperparameters: {'n_estimators': 158, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_samples': 0.9816142558114177, 'max_depth': 3, 'bootstrap': True, 'auc': 0.5172413793103449, 'f1': 0.06666666666666667, 'sensitivity': 0.034482758620689655, 'specificity': 1.0, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313





Outer 4, Inner 1 -> Best Hyperparameters: {'n_estimators': 98, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_samples': 0.9679956887832186, 'max_depth': 10, 'bootstrap': True, 'auc': 0.5344827586206896, 'f1': 0.12903225806451613, 'sensitivity': 0.06896551724137931, 'specificity': 1.0, 'accuracy': 0.8373493975903614}
Test Set Accuracy: 0.8373





Outer 4, Inner 2 -> Best Hyperparameters: {'n_estimators': 155, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_samples': 0.9458282514735021, 'max_depth': 28, 'bootstrap': True, 'auc': 0.5517241379310345, 'f1': 0.1875, 'sensitivity': 0.10344827586206896, 'specificity': 1.0, 'accuracy': 0.8433734939759037}
Test Set Accuracy: 0.8434





Outer 4, Inner 3 -> Best Hyperparameters: {'n_estimators': 171, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_samples': 0.6086927995857327, 'max_depth': 22, 'bootstrap': True, 'auc': 0.5172413793103449, 'f1': 0.06666666666666667, 'sensitivity': 0.034482758620689655, 'specificity': 1.0, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313


Inner Loop: 100%|██████████| 5/5 [01:19<00:00, 15.82s/it]
Outer Loop: 100%|██████████| 5/5 [05:59<00:00, 71.91s/it]


Outer 4, Inner 4 -> Best Hyperparameters: {'n_estimators': 113, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_samples': 0.9487066776178257, 'max_depth': 24, 'bootstrap': True, 'auc': 0.5172413793103449, 'f1': 0.06666666666666667, 'sensitivity': 0.034482758620689655, 'specificity': 1.0, 'accuracy': 0.8313253012048193}
Test Set Accuracy: 0.8313

Best parameters saved to e:\OneDrive\School\Technical Medicine\TM Jaar 1\Machine learning\TM10007_Machine_Learning_Group_4\TM\TM10007_Machine_Learning_Group_4-1\best_hyperparameters.csv after Outer 4 completion

Best Parameters Summary:
Outer 0 - Inner 0: {'n_estimators': 173, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_samples': 0.6160136651902284, 'max_depth': 22, 'bootstrap': True, 'auc': 0.5271834885476969, 'f1': 0.12121212121212122, 'sensitivity': 0.06896551724137931, 'specificity': 0.9854014598540146, 'accuracy': 0.8253012048192772}
Outer 0 - Inner 1: {'n_estimators': 150, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_s


