In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# **ECG Dataset**


### **MITBIH**


In [None]:
mitbih_train = pd.read_csv('', header=None)
mitbih_test = pd.read_csv('', header=None)

In [None]:
mitbih_train = mitbih_train.rename(columns={187: 'target'})
mitbih_train['target'].value_counts()

In [None]:
mitbih_test = mitbih_test.rename(columns={187: 'target'})
mitbih_test['target'].value_counts()

In [None]:
mitbih_train.info()

In [None]:
mitbih_test.info()

In [None]:
X_train_mitbih = mitbih_train.iloc[:,0:187]
y_train_mitbih = mitbih_train['target']

X_test_mitbih = mitbih_test.iloc[:,0:187]
y_test_mitbih = mitbih_test['target']

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_scaled_mitbih = scaler.fit_transform(X_train_mitbih)
X_test_scaled_mitbih = scaler.fit_transform(X_test_mitbih)

In [None]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_accuracies_mitbih = []

for i in range(1, 46):
    pca = PCA(n_components=i)
    X_train_trf_mitbih = pca.fit_transform(X_train_scaled_mitbih)
    X_test_trf_mitbih = pca.transform(X_test_scaled_mitbih)

    knn = KNeighborsClassifier()

    knn.fit(X_train_trf_mitbih, y_train_mitbih)

    y_pred_mitbih = knn.predict(X_test_trf_mitbih)

    accuracy = accuracy_score(y_test_mitbih,y_pred_mitbih)
    knn_accuracies_mitbih.append(accuracy)

    print(f'{i} PCA -> Accuracy: {accuracy}, Appended: {knn_accuracies_mitbih[-1]}')

In [None]:
knn_accuracies_mitbih

In [None]:
print('Explaied Variance Ratio: ', pca.explained_variance_ratio_)
print('Cummulative Sum of Explained Variance Ratio', np.cumsum(pca.explained_variance_ratio_))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
from IPython.display import HTML

fig, ax = plt.subplots(figsize=(6,4))
ax.set_xlim(0, 45)
ax.set_ylim(min(knn_accuracies_mitbih) - 0.05, max(knn_accuracies_mitbih) + 0.02)
line, = ax.plot([], [], 'b-o', lw=2)
ax.set_title('MITBIH Animated KNN Accuracy vs PCA Components')
ax.set_xlabel('Number of PCA Components')
ax.set_ylabel('Accuracy')
ax.grid(True)

def init():
    line.set_data([], [])
    return line,

def update(frame):
    x = list(range(1, frame + 2))
    y = knn_accuracies_mitbih[:frame + 1]
    line.set_data(x, y)
    return line,

ani = animation.FuncAnimation(fig, update, frames=len(knn_accuracies_mitbih), init_func=init,
                              blit=True, interval=150, repeat=False)

ani.save('', writer='pillow')

HTML(ani.to_jshtml())

In [None]:
from imblearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, f1_score

In [None]:
scaler = StandardScaler()
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
macro_f1 = make_scorer(f1_score, average='macro')

In [None]:
pipe_knn = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=45)),
    ('knn', KNeighborsClassifier())
])

In [None]:
print("KNN Macro-F1:")
scores_knn = cross_val_score(pipe_knn, X_train_mitbih, y_train_mitbih, cv=skf, scoring=macro_f1)
print(scores_knn, scores_knn.mean())

In [None]:
from datetime import datetime
import joblib

metadata_common = {
    "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "dataset": "MIT-BIH Arrhythmia Dataset",
    "cv_folds": 10,
    "scoring": "Macro-F1",
    "preprocessing": ["StandardScaler", "PCA-45"],
}

In [None]:
pipe_knn.fit(X_train_scaled_mitbih, y_train_mitbih)

In [None]:
knn_bundle = {
    "pipeline": pipe_knn,
    "model_type": "XGBoostClassifier",
    "macro_f1_score": scores_knn.mean(),
    **metadata_common
}

joblib.dump(pipe_knn, '')

### **Using XGBoost and Random Forest**


In [None]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import make_scorer, f1_score
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
import torch

In [None]:
scaler = StandardScaler()
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
macro_f1 = make_scorer(f1_score, average='macro')

xgb_device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
pipe_xgb = Pipeline([
    ('scale', scaler),
    ('smote', SMOTE(sampling_strategy={1: 5000, 3: 1000}, random_state=42)),
    ('clf', XGBClassifier(
        use_label_encoder=False,
        eval_metric='mlogloss',
        random_state=42,
        tree_method='hist',
        device=xgb_device
    ))
])

In [None]:
print("XGBoost Macro-F1:")
scores_xgb = cross_val_score(pipe_xgb, X_train_mitbih, y_train_mitbih, cv=skf, scoring=macro_f1)
print(scores_xgb, scores_xgb.mean())

In [None]:
pipe_rf = Pipeline([
    ('scale', StandardScaler()),
    ('smote', SMOTE(sampling_strategy={1: 5000, 3: 1000}, random_state=42)),
    ('clf', RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        n_jobs=-1
    ))
])

In [None]:
print("RandomForest Macro-F1:")
scores_rf = cross_val_score(pipe_rf, X_train_mitbih, y_train_mitbih, cv=skf, scoring=macro_f1)
print(scores_rf, scores_rf.mean())

In [None]:
from datetime import datetime
import joblib

metadata_common = {
    "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "dataset": "MIT-BIH Arrhythmia Dataset",
    "sampling_strategy": {1: 5000, 3: 1000},
    "cv_folds": 10,
    "scoring": "Macro-F1",
    "preprocessing": ["StandardScaler", "SMOTE"],
}

In [None]:
pipe_xgb.fit(X_train_mitbih, y_train_mitbih)

In [None]:
xgb_bundle = {
    "pipeline": pipe_xgb,
    "model_type": "XGBoostClassifier",
    "macro_f1_score": scores_xgb.mean(),
    **metadata_common
}

joblib.dump(xgb_bundle, "")

In [None]:
pipe_rf.fit(X_train_mitbih, y_train_mitbih)

In [None]:
rf_bundle = {
    "pipeline": pipe_rf,
    "model_type": "RandomForestClassifier",
    "macro_f1_score": scores_rf.mean(),
    **metadata_common
}

joblib.dump(rf_bundle, "")

In [None]:
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

folds = np.arange(1, 11)

mean_xgb = np.mean(scores_xgb)
mean_rf = np.mean(scores_rf)

fig, ax = plt.subplots(figsize=(10, 6))
ax.set_xlim(0.5, 10.5)
ax.set_ylim(min(min(scores_xgb), min(scores_rf)) - 0.05, max(max(scores_xgb), max(scores_rf)) + 0.05)
ax.set_xlabel('Fold Number')
ax.set_ylabel('Macro-F1 Score')
ax.set_title('ECG Macro-F1 Scores Across Folds: XGBoost vs RandomForest')
ax.set_xticks(folds)
ax.grid(True)

line_xgb, = ax.plot([], [], 'o-', label='XGBoost', color='blue')
line_rf, = ax.plot([], [], 's-', label='RandomForest', color='green')
mean_line_xgb = ax.axhline(mean_xgb, color='blue', linestyle='--', alpha=0.5, label='XGBoost Mean')
mean_line_rf = ax.axhline(mean_rf, color='green', linestyle='--', alpha=0.5, label='RandomForest Mean')
ax.legend()

def init():
    line_xgb.set_data([], [])
    line_rf.set_data([], [])
    return line_xgb, line_rf

def update(frame):
    line_xgb.set_data(folds[:frame+1], scores_xgb[:frame+1])
    line_rf.set_data(folds[:frame+1], scores_rf[:frame+1])
    return line_xgb, line_rf

ani = FuncAnimation(fig, update, frames=len(folds), init_func=init, blit=True, interval=1000)

HTML(ani.to_jshtml())
ani.save('', writer='pillow')

plt.show()

In [None]:
# Load XGBoost
xgb_model_bundle = joblib.load("")
xgb_pipe = xgb_model_bundle["pipeline"]
xgb_meta = {k: v for k, v in xgb_model_bundle.items() if k != "pipeline"}

# Predict
y_pred_xgb = xgb_pipe.predict(X_test_mitbih)

# Access Metadata
print("XGB Metadata:", xgb_meta)


In [None]:
cm_xgb = confusion_matrix(y_test_mitbih, y_pred_xgb)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_xgb, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (XGBoost)')
plt.show()

print("\nClassification Report (XGBoost):")
print(classification_report(y_test_mitbih, y_pred_xgb))