In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/ECG/dataset.csv")

# Parse stringified lists/dicts into usable Python objects
def parse_column(col):
    return col.apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])

df['segments_hr'] = parse_column(df['segments_hr'])
df['segments_br'] = parse_column(df['segments_br'])
df['bad_ecg'] = parse_column(df['bad_ecg'])

# Feature engineering functions
def extract_mean(segment_list):
    if not segment_list:
        return 0
    return np.mean([seg['value'] for seg in segment_list if 'value' in seg])

def extract_std(segment_list):
    if not segment_list:
        return 0
    return np.std([seg['value'] for seg in segment_list if 'value' in seg])

df['mean_hr'] = df['segments_hr'].apply(extract_mean)
df['std_hr'] = df['segments_hr'].apply(extract_std)

df['mean_br'] = df['segments_br'].apply(extract_mean)
df['std_br'] = df['segments_br'].apply(extract_std)

df['pulse_count'] = df['ecg_pulses'].apply(lambda x: len(ast.literal_eval(x)) if pd.notna(x) else 0)
df['pulse_density'] = df['pulse_count'] / df['duration']

df['bad_ecg_coverage'] = df['bad_ecg'].apply(lambda x: sum([interval[1] - interval[0] for interval in x]) if len(x) > 0 else 0)
df['bad_ecg_ratio'] = df['bad_ecg_coverage'] / df['duration']

# Label: Bad ECG if bad coverage > 100 seconds
df['label'] = df['bad_ecg_coverage'].apply(lambda x: 1 if x > 100 else 0)

# Drop rows with missing essential data
df.dropna(subset=['duration', 'weight', 'age', 'mean_hr', 'mean_br'], inplace=True)

# Features and target
features = [
    'duration', 'weight', 'age',
    'mean_hr', 'std_hr',
    'mean_br', 'std_br',
    'pulse_count', 'pulse_density',
    'bad_ecg_ratio'
]
X = df[features]
y = df['label']

# 🔁 Split before scaling to avoid data leakage
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize using only training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model
model = Sequential([
    Input(shape=(X_train_scaled.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train
model.fit(
    X_train_scaled, y_train,
    epochs=100,
    batch_size=16,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
y_pred = model.predict(X_test_scaled).flatten()
y_pred_classes = (y_pred > 0.5).astype(int)

print("\n✅ Test Accuracy:", round(accuracy_score(y_test, y_pred_classes) * 100, 2), "%")
print(classification_report(y_test, y_pred_classes))

# Save model
model.save('/content/drive/MyDrive/ecg_model.keras')  # recommended
model.save('/content/drive/MyDrive/ecg_model.h5')     # optional legacy


print("✅ Model saved in both .keras and .h5 formats.")


In [None]:
from tensorflow import keras
import numpy as np

# Load the trained model (change path if needed)
model = keras.models.load_model('/content/drive/MyDrive/ecg_model.keras')

# Create a fake ECG sample (random numbers matching input shape)
# Replace (X_train.shape[1],) with your real input shape if needed
# For example, if you trained on 10 features:
sample_input = np.array([[0.1, -0.2, 0.3, 0.0, 0.1, 0.5, -0.4, 0.2, 0.3, 0.1]])

# Predict
prediction = model.predict(sample_input)

# Show prediction
predicted_class = (prediction > 0.5).astype("int32")
print("✅ Predicted class:", predicted_class[0][0])

model.summary()
