In [None]:
import os
import numpy as np
import wfdb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# ---------------------------
# Data Loading & Preprocessing
# ---------------------------
def load_ecg_data(base_dir, fixed_length=1000):
    X, y = [], []
    
    # Iterate through each patient folder
    for patient in os.listdir(base_dir):
        patient_path = os.path.join(base_dir, patient)
        if not os.path.isdir(patient_path):
            continue
        
        # Process each .dat file in the patient folder
        for file in os.listdir(patient_path):
            if file.endswith('.dat'):
                file_prefix = file.split('.')[0]
                file_path = os.path.join(patient_path, file_prefix)
                
                # Read ECG signal using WFDB
                try:
                    signals, fields = wfdb.rdsamp(file_path)
                except:
                    print(f"Skipping corrupt/invalid file: {file_path}")
                    continue
                
                # Use the first lead (modify if multi-lead needed)
                ecg_signal = signals[:, 0]
                
                # Normalize signal to [0, 1]
                ecg_normalized = (ecg_signal - np.min(ecg_signal)) / (np.max(ecg_signal) - np.min(ecg_signal))
                
                # Pad/Truncate to fixed length
                if len(ecg_normalized) > fixed_length:
                    ecg_processed = ecg_normalized[:fixed_length]
                else:
                    ecg_processed = np.pad(ecg_normalized, (0, fixed_length - len(ecg_normalized)), mode='constant')
                
                X.append(ecg_processed)
                y.append(1 if 'lre' in file else 0)  # Label based on filename
    
    # Convert to numpy arrays and reshape for LSTM
    X = np.array(X).reshape(-1, fixed_length, 1)
    y = np.array(y)
    return X, y

# Load data
BASE_DIR = 'path/to/patient_folders'  # Update this path
X_ecg, y_ecg = load_ecg_data(BASE_DIR)

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_ecg, y_ecg, test_size=0.2, random_state=42
)

# ---------------------------
# LSTM Model Architecture
# ---------------------------
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.5),
    LSTM(32),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# ---------------------------
# Training & Evaluation
# ---------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stop]
)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")