# Complete ECG Heartbeat Classification with a 1D-CNN

This notebook contains the complete, end-to-end code for training a 1D Convolutional Neural Network on the MIT-BIH Arrhythmia dataset. It includes all steps from loading the real data to final model evaluation.

### Step 1: Import Libraries

In [3]:
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

### Step 2: Load the Real Kaggle Dataset

In [6]:
try:
    df = pd.read_csv('mitbih_train.csv', header=None)
    print("Successfully loaded 'mitbih_train.csv'")
except FileNotFoundError:
    print("Error: 'mitbih_train.csv' not found. Please ensure it's in the same directory as this notebook.")
    exit()

# The last column (187) is the label, the rest are features.
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

print("\nDataset Shape:", X.shape)
print("Labels Shape:", y.shape)

Successfully loaded 'mitbih_train.csv'

Dataset Shape: (87554, 187)
Labels Shape: (87554,)


### Step 3: Split and Scale the Data

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data splitting and scaling complete.")

Data splitting and scaling complete.


### Step 4: Reshape Data for the CNN

A 1D-CNN expects a 3D input of shape `(samples, timesteps, channels)`.

In [11]:
X_train_reshaped = np.expand_dims(X_train_scaled, axis=-1)
X_test_reshaped = np.expand_dims(X_test_scaled, axis=-1)

print("Reshaped training data shape:", X_train_reshaped.shape)

Reshaped training data shape: (70043, 187, 1)


### Step 5: Prepare for Imbalanced Data

We calculate class weights to ensure the model pays more attention to the less frequent heartbeat categories.

In [15]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))

print("Class Weights Dictionary:", class_weights_dict)

Class Weights Dictionary: {0: 0.2416234023837039, 1: 7.878852643419573, 2: 3.0256155507559397, 3: 27.307212475633527, 4: 2.722759961127308}


### Step 6: Define Callbacks

In [18]:
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

learning_rate_reduction = ReduceLROnPlateau(
    monitor='val_loss',
    patience=3, 
    verbose=1,
    factor=0.5, 
    min_lr=0.00001
)

callbacks_list = [early_stop, learning_rate_reduction]

### Step 7: Build the 1D-CNN Model

In [21]:
# Note: The number of output neurons must match the number of classes.
num_classes = len(np.unique(y))

model = Sequential([
    Conv1D(filters=64, kernel_size=6, activation='relu', padding='same', input_shape=(X_train_reshaped.shape[1], 1)),
    MaxPooling1D(pool_size=3),
    Dropout(0.3),

    Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax') # Softmax for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Step 8: Train the Model

In [24]:
history = model.fit(
    X_train_reshaped, 
    y_train,
    epochs=50,
    batch_size=128,
    validation_data=(X_test_reshaped, y_test),
    class_weight=class_weights_dict, 
    callbacks=callbacks_list
)

Epoch 1/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.6255 - loss: 0.8719 - val_accuracy: 0.8056 - val_loss: 0.5218 - learning_rate: 0.0010
Epoch 2/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - accuracy: 0.7790 - loss: 0.4593 - val_accuracy: 0.8462 - val_loss: 0.4574 - learning_rate: 0.0010
Epoch 3/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 21ms/step - accuracy: 0.8174 - loss: 0.3838 - val_accuracy: 0.8798 - val_loss: 0.3769 - learning_rate: 0.0010
Epoch 4/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 23ms/step - accuracy: 0.8262 - loss: 0.3516 - val_accuracy: 0.8992 - val_loss: 0.3320 - learning_rate: 0.0010
Epoch 5/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 38ms/step - accuracy: 0.8458 - loss: 0.3409 - val_accuracy: 0.9081 - val_loss: 0.2773 - learning_rate: 0.0010
Epoch 6/50
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

### Step 9: Final Evaluation and Saving

In [26]:
print("\n--- Final Model Evaluation ---")
loss, accuracy = model.evaluate(X_test_reshaped, y_test)

print(f"Final Test Accuracy: {accuracy*100:.2f}%")
print(f"Final Test Loss: {loss:.4f}")

# Save the final, trained model
model.save("ecg_cnn_model_final.h5")
print("\nModel saved as ecg_cnn_model_final.h5")


--- Final Model Evaluation ---
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.9410 - loss: 0.1591




Final Test Accuracy: 94.19%
Final Test Loss: 0.1594

Model saved as ecg_cnn_model_final.h5
