In [4]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.utils.class_weight import compute_class_weight # Import for handling imbalance
import matplotlib.pyplot as plt

# --- STAGE 1: (No changes here, it's already working well) ---
print("--- STAGE 1: Building and Training Beat Classifier (Model A) on MIT-BIH Data ---")
# ... (The entire Stage 1 code is exactly the same as the previous optimized version) ...
try:
    mit_train_df = pd.read_csv("mitbih_train.csv", header=None)
    mit_test_df = pd.read_csv("mitbih_test.csv", header=None)
    mit_df = pd.concat([mit_train_df, mit_test_df], axis=0)
    print("MIT-BIH dataset loaded successfully.")
except FileNotFoundError:
    print("ERROR: Make sure 'mitbih_train.csv' and 'mitbih_test.csv' are in the same folder.")
    exit()
X_mit = mit_df.iloc[:, :-1].values
y_mit = mit_df.iloc[:, -1].values
y_mit_cat = to_categorical(y_mit)
X_train_mit, X_test_mit, y_train_mit, y_test_mit = train_test_split(X_mit, y_mit_cat, test_size=0.2, random_state=42)
model_A = Sequential([ Dense(64, activation='relu', input_shape=(187,)), Dense(32, activation='relu'), Dense(5, activation='softmax') ])
model_A.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print("Training Model A...")
model_A.fit(X_train_mit, y_train_mit, epochs=10, batch_size=128, verbose=1, validation_split=0.1)
loss_A, acc_A = model_A.evaluate(X_test_mit, y_test_mit, verbose=0)
print(f"\nModel A (Beat Classifier) Test Accuracy: {acc_A * 100:.2f}%")
print("-" * 50 + "\n")


# --- STAGE 3: BUILD AND TRAIN THE DIAGNOSTIC MODEL (MODEL B) ---
print("--- STAGE 3: Building and Training Diagnostic Model (B) on PTBDB Data ---")

# Step 3.1: Load PTBDB Data
try:
    ptb_normal_df = pd.read_csv("ptbdb_normal.csv", header=None)
    ptb_abnormal_df = pd.read_csv("ptbdb_abnormal.csv", header=None)
    print("PTBDB dataset loaded successfully.")
except FileNotFoundError:
    print("ERROR: Make sure 'ptbdb_normal.csv' and 'ptbdb_abnormal.csv' are in the same folder.")
    exit()

ptb_df = pd.concat([ptb_normal_df, ptb_abnormal_df])
ptb_df = shuffle(ptb_df, random_state=42)
X_ptb_raw = ptb_df.iloc[:, :-1].values
y_ptb = ptb_df.iloc[:, -1].values

# Step 3.2: Optimized Feature Engineering
print("Applying optimized feature engineering pipeline to PTBDB data...")
prediction_probs = model_A.predict(X_ptb_raw)
predicted_classes = np.argmax(prediction_probs, axis=1)
X_ptb_featured = np.concatenate([prediction_probs, predicted_classes.reshape(-1, 1)], axis=1)
print("Feature engineering complete.")

# Step 3.3: Prepare Data for Model B
X_train_ptb, X_test_ptb, y_train_ptb, y_test_ptb = train_test_split(X_ptb_featured, y_ptb, test_size=0.2, random_state=42)

# **IMPROVEMENT: CALCULATE CLASS WEIGHTS TO HANDLE IMBALANCE**
# This will calculate how much to penalize errors for each class
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_ptb), # The unique classes are 0 (Normal) and 1 (Abnormal)
    y=y_train_ptb
)
# The result needs to be a dictionary for Keras
class_weights_dict = dict(enumerate(class_weights))
print("\nClass Imbalance Detected. Calculated weights to compensate:")
print(f"  Weight for class 0 (Normal): {class_weights_dict[0]:.2f}")
print(f"  Weight for class 1 (Abnormal): {class_weights_dict[1]:.2f}")
# **END OF IMPROVEMENT**

# Step 3.4: Build and Train Model B
model_B = Sequential([
    Dense(16, activation='relu', input_shape=(X_train_ptb.shape[1],)),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_B.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print("\nTraining Model B (with class weights)...")
# **IMPROVEMENT: PASS THE WEIGHTS TO THE .fit() METHOD**
history_B = model_B.fit(
    X_train_ptb, y_train_ptb,
    epochs=20,
    batch_size=64,
    verbose=1,
    validation_split=0.1,
    class_weight=class_weights_dict # Here's the magic!
)

# Step 3.5: Evaluate Model B
loss_B, acc_B = model_B.evaluate(X_test_ptb, y_test_ptb, verbose=0)
print(f"\nModel B (Diagnostic Model) Test Accuracy: {acc_B * 100:.2f}%")
print("-" * 50 + "\n")

# --- STAGE 4: (No changes here) ---
print("--- STAGE 4: Live Test of the Full Pipeline ---")
def run_live_pipeline(raw_signal, model_a, model_b):
    signal_reshaped = np.reshape(raw_signal, (1, 187))
    live_probs = model_a.predict(signal_reshaped, verbose=0)[0]
    live_class = np.argmax(live_probs)
    live_features = np.append(live_probs, live_class)
    live_features_reshaped = np.reshape(live_features, (1, -1))
    diagnostic_prob = model_b.predict(live_features_reshaped, verbose=0)[0][0]
    return live_class, diagnostic_prob

raw_ptb_test_signals = train_test_split(X_ptb_raw, y_ptb, test_size=0.2, random_state=42)[1]
sample_raw_signal = raw_ptb_test_signals[15]
print(f"Input Raw ECG Signal (first 10 values): {sample_raw_signal[:10]}...")
predicted_beat_idx, diagnostic_probability = run_live_pipeline(sample_raw_signal, model_A, model_B)
beat_type_names = ['Normal (N)', 'Supraventricular (S)', 'Ventricular (V)', 'Fusion (F)', 'Unknown (Q)']
diagnosis = "Abnormal (Myocardial Infarction Detected)" if diagnostic_probability > 0.5 else "Normal"
print(f"\n[Model A Analysis]: The most likely beat type is '{beat_type_names[predicted_beat_idx]}'")
print(f"[Model B Analysis]: Probability of being Abnormal is {diagnostic_probability:.4f}")
print(f"\n>>> Final Diagnosis: {diagnosis}")
print("-" * 50)


--- STAGE 1: Building and Training Beat Classifier (Model A) on MIT-BIH Data ---
MIT-BIH dataset loaded successfully.
Training Model A...
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9044 - loss: 0.3625 - val_accuracy: 0.9407 - val_loss: 0.2202
Epoch 2/10
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9457 - loss: 0.1968 - val_accuracy: 0.9552 - val_loss: 0.1617
Epoch 3/10
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9556 - loss: 0.1605 - val_accuracy: 0.9633 - val_loss: 0.1434
Epoch 4/10
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9610 - loss: 0.1407 - val_accuracy: 0.9671 - val_loss: 0.1366
Epoch 5/10
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9649 - loss: 0.1271 - val_accuracy: 0.9681 - val_loss: 0.1243
Epoch 6/10
[1m616/616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9678 - loss: 0.1186 - val_accuracy: 0.9677 - val_loss: 0.1227
Epoch 7/10
[1m616/616[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5095 - loss: 0.6827 - val_accuracy: 0.4266 - val_loss: 0.6654
Epoch 2/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4198 - loss: 0.6618 - val_accuracy: 0.4026 - val_loss: 0.6817
Epoch 3/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4308 - loss: 0.6528 - val_accuracy: 0.7142 - val_loss: 0.6283
Epoch 4/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4611 - loss: 0.6447 - val_accuracy: 0.4601 - val_loss: 0.6431
Epoch 5/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4549 - loss: 0.6376 - val_accuracy: 0.4910 - val_loss: 0.6244
Epoch 6/20
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4643 - loss: 0.6333 - val_accuracy: 0.4712 - val_loss: 0.6362
Epoch 7/20
[1m164/164[0m [32m━━━━━━━