In [3]:
from google.colab import files
uploaded = files.upload()

Saving 01000_lr.dat to 01000_lr.dat
Saving 01000_lr.hea to 01000_lr.hea
Saving 01001_lr.dat to 01001_lr.dat
Saving ptbxl_database.csv to ptbxl_database.csv
Saving scp_statements.csv to scp_statements.csv


In [7]:
# ==============================================
# Subject: 23CSE301 | Lab Session 08
# Task A1 – Implement Stacking Classifier (PTB-XL ECG)
# ==============================================

# === STEP 1: INSTALL REQUIRED LIBRARIES ===
!pip install scikit-learn pandas numpy

# === STEP 2: IMPORTS ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# ---------------------------------------------------------
# Function 1 – Load and Preprocess PTB-XL ECG Data
# ---------------------------------------------------------
def load_and_preprocess_ecg_data(ptbxl_csv, scp_csv):
    """Load ECG metadata and prepare numeric features for ML."""
    data = pd.read_csv(ptbxl_csv)
    scp = pd.read_csv(scp_csv)

    # remove missing diagnosis
    data = data.dropna(subset=['scp_codes'])

    # create binary target: 0 = Normal, 1 = Abnormal
    data['target'] = data['scp_codes'].apply(lambda x: 0 if 'NORM' in str(x) else 1)

    # choose available numeric features
    numeric_features = [col for col in ['age', 'sex', 'height', 'weight', 'heart_rate'] if col in data.columns]
    X = data[numeric_features].fillna(data[numeric_features].mean())
    y = data['target']

    # scale
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # split
    return train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# ---------------------------------------------------------
# Function 2 – Build Stacking Classifier
# ---------------------------------------------------------
def build_stacking_classifier():
    """Define base models + meta model for stacking."""
    base_models = [
        ('rf',  RandomForestClassifier(n_estimators=100, random_state=42)),
        ('gb',  GradientBoostingClassifier(random_state=42)),
        ('ada', AdaBoostClassifier(random_state=42)),
        ('svm', SVC(kernel='rbf', probability=True, random_state=42))
    ]
    meta = LogisticRegression(max_iter=1000, random_state=42)
    return StackingClassifier(estimators=base_models, final_estimator=meta, cv=5, n_jobs=-1)

# ---------------------------------------------------------
# Function 3 – Train and Evaluate Model
# ---------------------------------------------------------
def train_and_evaluate(model, X_train, X_test, y_train, y_test):
    """Fit and evaluate the stacking model."""
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Normal', 'Abnormal'])
    return acc, report

# ---------------------------------------------------------
# === MAIN PROGRAM ===
# ---------------------------------------------------------
ptbxl_csv = "ptbxl_database.csv"
scp_csv   = "scp_statements.csv"

X_train, X_test, y_train, y_test = load_and_preprocess_ecg_data(ptbxl_csv, scp_csv)
stacking_clf = build_stacking_classifier()
accuracy, report = train_and_evaluate(stacking_clf, X_train, X_test, y_train, y_test)

print("🎯 ECG Stacking Classifier Accuracy:", round(accuracy * 100, 2), "%")
print("\nClassification Report:\n", report)


🎯 ECG Stacking Classifier Accuracy: 70.8 %

Classification Report:
               precision    recall  f1-score   support

      Normal       0.71      0.57      0.63      1903
    Abnormal       0.71      0.82      0.76      2457

    accuracy                           0.71      4360
   macro avg       0.71      0.69      0.69      4360
weighted avg       0.71      0.71      0.70      4360

