In [4]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
import pandas as pd
import random

# Create the RFE object and compute a cross-validated score.
from sklearn.svm import SVC

from sklearn import svm
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import RFECV, f_classif
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt

# Load data
data_path = os.path.join(os.getcwd(), 'ecg_data.csv')
data = pd.read_csv(data_path, index_col=0)

# Splitting features and labels
x = data.iloc[:, :-1].values  # Selecting all columns except last
y = data.iloc[:, -1].values  # Selecting last column as labels

# Define outer and inner loop random seeds
outer = range(0, 5)  # Outer loop
inner = range(0, 5)  # Inner loop

best_params_list = {}

# Outer loop for train-test split variations
for outer_rand in tqdm(outer, desc='Outer Loop'):
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2)
    if (np.sum(y_train == 1) / len(y_train)) * 100 < 5.0:
        print(f'Skipping Outer {outer_rand} due to class imbalance.')
        continue

    # Inner loop for hyperparameter tuning
    for inner_rand in tqdm(inner, desc='Inner Loop', leave=True):
        X_train_train, X_train_val, y_train_train, y_train_val = train_test_split(
            X_train, y_train, test_size=0.15, shuffle=True)
        
        # Define the randomized parameter grid
        param_grid_rf = {
            'n_estimators': [random.randint(50, 200) for _ in range(3)],
            'max_depth': [random.randint(1, 30) for _ in range(3)],
            'min_samples_split': [random.randint(2, 10) for _ in range(3)],  # FIXED: Must be integer
            'min_samples_leaf': [random.randint(1, 4) for _ in range(3)],  # FIXED: Avoid 0
            'max_samples': [random.uniform(0.5, 1) for _ in range(3)],  # Adjusted range
            'bootstrap': [True]  # Required for max_samples
        }

        # Randomized Search for efficiency
        rf_model = RandomForestClassifier()
        random_search = RandomizedSearchCV(
            estimator=rf_model, 
            param_distributions=param_grid_rf, 
            scoring='accuracy', 
            cv=5, 
            n_iter=10,
            n_jobs=-1,  # Randomly sample 10 hyperparameter combinations
        )

        # Fit the search
        random_search.fit(X_train_train, y_train_train)

        # Best hyperparameters
        best_params = random_search.best_params_
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"] = best_params

        # Best model evaluation
        best_model = random_search.best_estimator_
        y_pred = best_model.predict(X_test)
        # Calculate confusion matrix
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        
        # Calculate sensitivity (true positive rate) and specificity (true negative rate)
        sensitivity = tp / (tp + fn)  # True Positive Rate
        specificity = tn / (tn + fp)  # True Negative Rate
        accuracy = accuracy_score(y_test, y_pred)
        
        # Store the metrics
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['sensitivity'] = sensitivity
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['specificity'] = specificity
        best_params_list[f"Outer {outer_rand} - Inner {inner_rand}"]['accuracy'] = accuracy
        print(f"\nOuter {outer_rand}, Inner {inner_rand} -> Best Hyperparameters: {best_params}")
        print(f"Test Set Accuracy: {accuracy:.4f}")
    # Save file    
    results_df = pd.DataFrame.from_dict(best_params_list, orient='index')
    results_csv_path = os.path.join(os.getcwd(), 'best_hyperparameters.csv')
    results_df.to_csv(results_csv_path)
    print(f"\nBest parameters saved to {results_csv_path} after Outer {outer_rand} completion")
# Print all best parameters
print("\nBest Parameters Summary:")
for key, val in best_params_list.items():
    print(f"{key}: {val}")

KeyboardInterrupt: 