In [3]:
from google.colab import files
uploaded = files.upload()

Saving 01000_lr.dat to 01000_lr.dat
Saving 01000_lr.hea to 01000_lr.hea
Saving 01001_lr.dat to 01001_lr.dat
Saving ptbxl_database.csv to ptbxl_database.csv
Saving scp_statements.csv to scp_statements.csv


In [8]:
# ==============================================
# Subject: 23CSE301 | Lab Session 08
# Task A2 – Implement Pipeline with Stacking Classifier (PTB-XL ECG)
# ==============================================

# === STEP 1: IMPORT REQUIRED LIBRARIES ===
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np

# ---------------------------------------------------------
# Function 1 – Load and Prepare Data
# ---------------------------------------------------------
def load_ecg_data(ptbxl_csv, scp_csv):
    """Load PTB-XL ECG metadata and prepare input features + labels."""
    data = pd.read_csv(ptbxl_csv)
    scp = pd.read_csv(scp_csv)

    # Ensure valid diagnosis
    data = data.dropna(subset=['scp_codes'])

    # Binary label: 0 = Normal, 1 = Abnormal
    data['target'] = data['scp_codes'].apply(lambda x: 0 if 'NORM' in str(x) else 1)

    # Select numeric features
    numeric_features = [col for col in ['age', 'sex', 'height', 'weight', 'heart_rate'] if col in data.columns]
    X = data[numeric_features].fillna(data[numeric_features].mean())
    y = data['target']

    return train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# ---------------------------------------------------------
# Function 2 – Build Stacking Model
# ---------------------------------------------------------
def build_stacking_model():
    """Creates base and meta models for stacking classifier."""
    base_models = [
        ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
        ('gb', GradientBoostingClassifier(random_state=42)),
        ('ada', AdaBoostClassifier(random_state=42)),
        ('svm', SVC(kernel='rbf', probability=True, random_state=42))
    ]
    meta_model = LogisticRegression(max_iter=1000, random_state=42)

    return StackingClassifier(
        estimators=base_models,
        final_estimator=meta_model,
        cv=3,
        n_jobs=-1
    )

# ---------------------------------------------------------
# Function 3 – Build Pipeline
# ---------------------------------------------------------
def build_pipeline(model):
    """Combine preprocessing + model into one unified pipeline."""
    pipeline = Pipeline([
        ('scaler', StandardScaler()),   # Step 1: Scale features
        ('classifier', model)           # Step 2: Stacking classifier
    ])
    return pipeline

# ---------------------------------------------------------
# Function 4 – Train and Evaluate Pipeline
# ---------------------------------------------------------
def train_and_evaluate_pipeline(pipeline, X_train, X_test, y_train, y_test):
    """Trains and evaluates the ECG classification pipeline."""
    print("⏳ Training ECG pipeline...")
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Normal', 'Abnormal'])

    return acc, report

# ---------------------------------------------------------
# MAIN PROGRAM
# ---------------------------------------------------------
ptbxl_csv = "ptbxl_database.csv"
scp_csv   = "scp_statements.csv"

X_train, X_test, y_train, y_test = load_ecg_data(ptbxl_csv, scp_csv)
stack_model = build_stacking_model()
ecg_pipeline = build_pipeline(stack_model)

accuracy, report = train_and_evaluate_pipeline(ecg_pipeline, X_train, X_test, y_train, y_test)

print("\n🎯 ECG Pipeline Accuracy:", round(accuracy * 100, 2), "%")
print("\nClassification Report:\n", report)


⏳ Training ECG pipeline...

🎯 ECG Pipeline Accuracy: 70.83 %

Classification Report:
               precision    recall  f1-score   support

      Normal       0.71      0.56      0.63      1903
    Abnormal       0.71      0.82      0.76      2457

    accuracy                           0.71      4360
   macro avg       0.71      0.69      0.69      4360
weighted avg       0.71      0.71      0.70      4360

