In [None]:
import os
import time
import random
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from sklearn.svm import SVC
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, f1_score, precision_recall_curve, auc as pr_auc
from scipy.stats import uniform, loguniform
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

# Load dataset
data_path = os.path.join(os.getcwd(), 'ecg_data.csv')
data = pd.read_csv(data_path, index_col=0)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

print(f'The number of samples: {len(data.index)}')
print(f'The number of features: {len(data.columns) - 1}')

# Initialize outer loop range
outer_range = range(0, 10)
inner_range = range(0, 10)

# For collecting results
best_params_dict = {}
best_model_dict = {}
precision_list = []
recall_list = []
auc_list = []

# Outer loop
for outer_rand in tqdm(outer_range, desc='Outer Loop', total=len(outer_range)):
    # Split into train/test
    X_train_outer, X_test_outer, y_train_outer, y_test_outer = train_test_split(
        X, y, test_size=0.2, stratify=y, shuffle=True, random_state=outer_rand)

    # Scale and apply PCA
    scaler = RobustScaler(quantile_range=(25, 75))
    X_train_outer = scaler.fit_transform(X_train_outer)
    X_test_outer = scaler.transform(X_test_outer)

    pca = PCA(n_components=0.99, random_state=42)
    X_train_outer = pca.fit_transform(X_train_outer)
    X_test_outer = pca.transform(X_test_outer)

    # Inner CV setup
    inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    best_inner_auc = -1
    best_inner_model = None
    best_inner_params = None

    # Randomized hyperparameter search
    for trial in tqdm(inner_range, desc=f'Inner Loop for Outer {outer_rand}', leave=False):
        params = {
            'C': loguniform(0.01, 1000).rvs(),
            'gamma': loguniform(0.0001, 100).rvs(),
            'class_weight': random.choice(['balanced', None]),
            'shrinking': random.choice([True, False]),
            'tol': uniform(1e-5, 1e-2).rvs()
        }

        auc_scores = []
        for train_idx, val_idx in inner_cv.split(X_train_outer, y_train_outer):
            X_train_fold, X_val_fold = X_train_outer[train_idx], X_train_outer[val_idx]
            y_train_fold, y_val_fold = y_train_outer[train_idx], y_train_outer[val_idx]

            # smote = SMOTE(random_state=42)
            # X_train_fold, y_train_fold = smote.fit_resample(X_train_fold, y_train_fold)

            model = SVC(kernel='rbf', probability=True, **params)
            model.fit(X_train_fold, y_train_fold)
            y_val_proba = model.predict_proba(X_val_fold)[:, 1]
            auc_scores.append(roc_auc_score(y_val_fold, y_val_proba))

        mean_auc = np.mean(auc_scores)
        if mean_auc > best_inner_auc:
            best_inner_auc = mean_auc
            best_inner_model = model
            best_inner_params = params.copy()

    # Test on outer test set
    y_pred = best_inner_model.predict(X_test_outer)
    y_pred_proba = best_inner_model.predict_proba(X_test_outer)[:, 1]

    # Metrics
    tn, fp, fn, tp = confusion_matrix(y_test_outer, y_pred).ravel()
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    auc_score = roc_auc_score(y_test_outer, y_pred_proba)
    f1 = f1_score(y_test_outer, y_pred)
    accuracy = accuracy_score(y_test_outer, y_pred)

    # Precision-recall
    precision, recall, _ = precision_recall_curve(y_test_outer, y_pred_proba)
    pr_score = pr_auc(recall, precision)
    precision_list.append(precision)
    recall_list.append(recall)
    auc_list.append(pr_score)

    # Save best parameters + metrics
    best_params_dict[f"Outer {outer_rand}"] = best_inner_params.copy()
    best_params_dict[f"Outer {outer_rand}"].update({
        'auc': auc_score,
        'f1': f1,
        'sensitivity': sensitivity,
        'specificity': specificity,
        'accuracy': accuracy,
        'pr_auc': pr_score
    })
    best_model_dict[f"Outer {outer_rand}"] = best_inner_model

    print(f"\nOuter {outer_rand} -> Best Hyperparameters: {best_inner_params}")

    # Save results after each outer loop
    results_df = pd.DataFrame.from_dict(best_params_dict, orient='index')
    results_csv_path = os.path.join(os.getcwd(), 'svm_best_hyperparameters.csv')
    results_df.to_csv(results_csv_path)

    pr_data = {
        "precision_list": precision_list,
        "recall_list": recall_list,
        "auc_list": auc_list
    }
    pr_data_path = os.path.join(os.getcwd(), 'svm_precision_recall_data.pkl')
    with open(pr_data_path, 'wb') as f:
        pickle.dump(pr_data, f)

    print(f"Saved precision-recall data to {pr_data_path}")

# Plot mean PR curve ± std
mean_recall = np.linspace(0, 1, 100)
interp_precisions = []

for precision, recall in zip(precision_list, recall_list):
    order = np.argsort(recall)
    recall_sorted = recall[order]
    precision_sorted = precision[order]
    interp_precision = np.interp(mean_recall, recall_sorted, precision_sorted, left=1.0, right=0.0)
    interp_precisions.append(interp_precision)

mean_precision = np.mean(interp_precisions, axis=0)
std_precision = np.std(interp_precisions, axis=0)

plt.figure(figsize=(8, 6))
plt.plot(mean_recall, mean_precision, label=f"Mean PR Curve (AUC = {np.mean(auc_list):.2f})", color='darkred')
plt.fill_between(mean_recall, mean_precision - std_precision, mean_precision + std_precision, alpha=0.2, color='red', label="±1 Std. Dev.")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("SVM Precision-Recall Curve (Mean ± Std)")
plt.legend(loc="lower left")
plt.grid()
plt.tight_layout()

plot_path = os.path.join(os.getcwd(), 'svm_outer_loop_pr_curve.png')
plt.savefig(plot_path)
plt.show()

print(f"Saved PR curve plot to {plot_path}")
