<a href="https://colab.research.google.com/github/Harshithaprudhivi/Team-15-project/blob/main/100Hzclassify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
# ✅ ECG Classification Pipeline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
# STEP 1: Load ECG signal data (12-lead, 100Hz)
X = np.load("/content/drive/MyDrive/PTB_processed_batches/Y_100Hz.npy", mmap_mode='r')
print("Loaded X shape:", X.shape)  # Should be (21847, 1000, 12)

Loaded X shape: (21847, 1000, 12)


In [None]:
# Step 2: Load metadata
df = pd.read_csv("/content/drive/MyDrive/PTB-data/ptbxl_database.csv")

In [None]:
# ✅ Step 3: Sort metadata to match X
df = df.sort_values("filename_lr").reset_index(drop=True)
df["scp_codes"] = df["scp_codes"].apply(eval)
df["diagnostic_codes"] = df["scp_codes"].apply(lambda d: [k for k in d.keys()])


In [None]:
# ✅ Step 4: Use top 20 diagnostic classes
top_20_labels = [
    'SR', 'NORM', 'ABQRS', 'IMI', 'ASMI', 'LVH', 'NDT', 'LAFB', 'AFIB', 'ISC_',
    'PVC', 'IRBBB', 'STD_', 'VCLVH', 'STACH', '1AVB', 'IVCD', 'SARRH', 'NST_', 'ISCAL'
]
df["filtered_labels"] = df["diagnostic_codes"].apply(lambda codes: [c for c in codes if c in top_20_labels])
df = df[df["filtered_labels"].str.len() > 0].reset_index(drop=True)


In [None]:
# ✅ Step 5: Match ECG data to filtered labels
X = X[:len(df)]
print(len(X))

21417


In [None]:
# ✅ Step 6: One-hot encode using top 20 labels
mlb = MultiLabelBinarizer(classes=top_20_labels)
Y = mlb.fit_transform(df["filtered_labels"])


In [None]:
# ✅ Step 7: Train-test split
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)


In [None]:
# ✅ Step 8: Define classifier model
model = Sequential([
    Conv1D(32, 5, activation='relu', input_shape=(1000, 12)),
    MaxPooling1D(2),
    Conv1D(64, 5, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(len(top_20_labels), activation='sigmoid')  # Multi-label output
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# ✅ Step 9: Train and save best model
checkpoint = ModelCheckpoint("/content/drive/MyDrive/classifier_model_20labels_100Hz.keras", save_best_only=True)

history = model.fit(
    X_train, Y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_val, Y_val),
    callbacks=[checkpoint]
)

print("🎉 Training complete. Model saved to Google Drive.")

Epoch 1/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 104ms/step - accuracy: 0.7484 - loss: 0.3020 - val_accuracy: 0.7773 - val_loss: 0.2735
Epoch 2/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 97ms/step - accuracy: 0.7863 - loss: 0.2754 - val_accuracy: 0.7773 - val_loss: 0.2749
Epoch 3/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 90ms/step - accuracy: 0.7847 - loss: 0.2669 - val_accuracy: 0.7773 - val_loss: 0.2772
Epoch 4/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 94ms/step - accuracy: 0.7859 - loss: 0.2545 - val_accuracy: 0.7771 - val_loss: 0.2834
Epoch 5/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 90ms/step - accuracy: 0.7816 - loss: 0.2388 - val_accuracy: 0.7731 - val_loss: 0.2966
Epoch 6/30
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 90ms/step - accuracy: 0.7809 - loss: 0.2099 - val_accuracy: 0.7612 - val_loss: 0.3143
Epoch 7/30
[1m