In [1]:
!pip install pandas numpy tensorflow scikit-learn matplotlib



In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shayanfazeli/heartbeat")

Downloading from https://www.kaggle.com/api/v1/datasets/download/shayanfazeli/heartbeat?dataset_version_number=1...


100%|██████████| 98.8M/98.8M [00:01<00:00, 68.6MB/s]

Extracting files...





In [3]:
print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/shayanfazeli/heartbeat/versions/1


In [2]:
# prompt: save the above file which is in .cache to my workspace

import shutil
import os

# Assuming 'path' variable holds the path to the downloaded dataset from the previous code
# Replace with the actual path variable if it's different


# Specify the destination directory within your workspace
destination_directory = "sample_data"  # Change to your desired directory

# Check if the destination directory exists, create it if not
if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)

# Copy the downloaded files to the destination directory
try:
  shutil.copytree(path, os.path.join(destination_directory, "heartbeat_dataset"), dirs_exist_ok=True) # Copy to workspace, handle existing dir
  print(f"Files copied successfully to: {os.path.join(destination_directory, 'heartbeat_dataset')}")

except FileNotFoundError:
    print(f"Error: Source directory '{path}' not found.")

except Exception as e:
    print(f"An error occurred during copying: {e}")

Files copied successfully to: sample_data/heartbeat_dataset


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

In [10]:
# Step 1: Load the dataset
def load_data(file_path):
    data = pd.read_csv(file_path, header=None)
    X = data.iloc[:, :-1].values  # Features (first 187 columns)
    y = data.iloc[:, -1].values   # Labels (last column)
    return X, y

# Step 2: Preprocess the data
def preprocess_data(X, y):
    # Normalize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Reshape features for Conv1D (samples, timesteps, features)
    X = X.reshape((X.shape[0], X.shape[1], 1))

    # One-hot encode labels
    encoder = OneHotEncoder(sparse_output=False)
    y = encoder.fit_transform(y.reshape(-1, 1))
    return X, y



In [5]:
data=pd.read_csv("sample_data/heartbeat_dataset/mitbih_train.csv")

In [6]:
data.head()

Unnamed: 0,9.779411554336547852e-01,9.264705777168273926e-01,6.813725233078002930e-01,2.450980395078659058e-01,1.544117629528045654e-01,1.911764740943908691e-01,1.519607901573181152e-01,8.578431606292724609e-02,5.882352963089942932e-02,4.901960864663124084e-02,...,0.000000000000000000e+00.79,0.000000000000000000e+00.80,0.000000000000000000e+00.81,0.000000000000000000e+00.82,0.000000000000000000e+00.83,0.000000000000000000e+00.84,0.000000000000000000e+00.85,0.000000000000000000e+00.86,0.000000000000000000e+00.87,0.000000000000000000e+00.88
0,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.659459,0.186486,0.07027,0.07027,0.059459,0.056757,0.043243,0.054054,0.045946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.925414,0.665746,0.541436,0.276243,0.196133,0.077348,0.071823,0.060773,0.066298,0.058011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.967136,1.0,0.830986,0.586854,0.356808,0.248826,0.14554,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.927461,1.0,0.626943,0.193437,0.094991,0.072539,0.043178,0.053541,0.093264,0.189983,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Step 3: Build the model
def build_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(filters=32, kernel_size=5, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Conv1D(filters=64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model



In [8]:
# Step 4: Train the model
def train_model(model, X_train, y_train, X_val, y_val):
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=[early_stopping]
    )
    return history



In [11]:
# Step 5: Main script
if __name__ == "__main__":
    # Load and preprocess the data
    file_path = "sample_data/heartbeat_dataset/mitbih_train.csv"  # Replace with your dataset path
    X, y = load_data(file_path)
    X, y = preprocess_data(X, y)

    # Split into training, validation, and testing sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Build and train the model
    input_shape = X_train.shape[1:]  # (188, 1)
    num_classes = y.shape[1]
    model = build_model(input_shape, num_classes)
    print(model.summary())

    print("Training the model...")
    history = train_model(model, X_train, y_train, X_val, y_val)

    # Save the trained model
    model.save("ekg_model.h5")
    print("Model saved as ekg_model.h5.")

    # Evaluate the model
    print("Evaluating the model on the test set...")
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_acc * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
Training the model...
Epoch 1/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 21ms/step - accuracy: 0.9060 - loss: 0.3237 - val_accuracy: 0.9568 - val_loss: 0.1522
Epoch 2/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 21ms/step - accuracy: 0.9560 - loss: 0.1596 - val_accuracy: 0.9674 - val_loss: 0.1128
Epoch 3/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - accuracy: 0.9641 - loss: 0.1320 - val_accuracy: 0.9721 - val_loss: 0.0951
Epoch 4/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - accuracy: 0.9693 - loss: 0.1130 - val_accuracy: 0.9742 - val_loss: 0.0903
Epoch 5/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - accuracy: 0.9709 - loss: 0.1053 - val_accuracy: 0.9763 - val_loss: 0.0801
Epoch 6/50
[1m1916/1916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 20ms/step - accuracy: 0.9733 - loss: 0.0931 - val_accuracy: 0.9



Model saved as ekg_model.h5.
Evaluating the model on the test set...
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9841 - loss: 0.0547
Test Accuracy: 98.36%
