<a href="https://colab.research.google.com/github/G-C-T-RAMA-LAKSHMI/Comparative-Analysis-of-CNN-RNN-and-LSTM-for-ECG-Classification-/blob/main/comparision_of_differnt_model_conditions_for_ecg-classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, SimpleRNN, LSTM, Dense, Flatten, Dropout, MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE

In [2]:
# Load dataset
data = pd.read_csv('/content/filtered_ptbxl_data.csv')

def extract_labels(scp_codes):
    if isinstance(scp_codes, str) and 'NORM' in scp_codes:
        return 0  # Normal ECG
    elif isinstance(scp_codes, str) and 'IMI' in scp_codes:
        return 1  # Infarction ECG
    return -1  # Exclude other cases

data['label'] = data['scp_codes'].apply(extract_labels)
data = data[data['label'] != -1]  # Filter only relevant classes

In [3]:
# Ensure 'label' exists before dropping columns
if 'label' in data.columns:
    X = data.drop(columns=['scp_codes', 'label'])
    y = data['label']
else:
    raise KeyError("Column 'label' not found in dataset")

In [4]:
# Select only numeric columns for SMOTE
X_numeric = X.select_dtypes(include=[np.number])
# Impute missing values using the mean
imputer = SimpleImputer(strategy='mean')
X_numeric_imputed = imputer.fit_transform(X_numeric)

In [5]:
# Handle class imbalance using SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_numeric_imputed, y)

In [6]:
# Normalize data
scaler = StandardScaler()
X_resampled = scaler.fit_transform(X_resampled)


In [7]:
# Reshape for models
X_resampled = np.expand_dims(X_resampled, axis=-1)

In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [9]:
# Assuming X_train, X_test, y_train, y_test are already defined
# Assuming X_train.shape[1] is your sequence length
input_shape = (X_train.shape[1], 1)  # Replace X_train with your actual data


In [10]:
# Define model configurations
#conv1D(cl_values) dense(dl_values)
configurations = [
    {'cl_values': [8], 'dl_values': [8]},
    {'cl_values': [8, 16], 'dl_values': [16, 8]},
    {'cl_values': [8, 16], 'dl_values': [32, 16, 8]},
    {'cl_values': [8, 16, 32], 'dl_values': [32, 16, 8]},
    {'cl_values': [8, 16, 32], 'dl_values': [64, 32, 16, 8]}
]

In [11]:
# Function to build and train a model
def build_and_train_model(model_type, kernel_size, config):
    model = Sequential()
    if model_type == 'CNN':
        model.add(Conv1D(config['cl_values'][0], kernel_size=kernel_size, activation='relu', input_shape=input_shape, padding='same'))
        model.add(MaxPooling1D())
        for cl_value in config['cl_values'][1:]:
            model.add(Conv1D(cl_value, kernel_size=kernel_size, activation='relu', padding='same'))
            model.add(MaxPooling1D())
    elif model_type == 'RNN':
        model.add(SimpleRNN(config['cl_values'][0], activation='relu', input_shape=input_shape, return_sequences=True))
        model.add(MaxPooling1D())
        for cl_value in config['cl_values'][1:]:
            model.add(SimpleRNN(cl_value, activation='relu', return_sequences=True))
            model.add(MaxPooling1D())
    elif model_type == 'LSTM':
        model.add(LSTM(config['cl_values'][0], activation='relu', input_shape=input_shape, return_sequences=True))
        model.add(MaxPooling1D())
        for cl_value in config['cl_values'][1:]:
            model.add(LSTM(cl_value, activation='relu', return_sequences=True))
            model.add(MaxPooling1D())

    model.add(Flatten())
    for dl_value in config['dl_values']:
        model.add(Dense(dl_value, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [12]:
# Create accuracy tables for each kernel size
kernel_sizes = [3, 5, 7]
accuracy_tables = {}

for kernel_size in kernel_sizes:
    all_accuracies = []
    for model_type in ['CNN', 'RNN', 'LSTM']:
        for i, config in enumerate(configurations):
            accuracy = build_and_train_model(model_type, kernel_size, config)
            all_accuracies.append([model_type, i + 1, accuracy])

    df_accuracies = pd.DataFrame(all_accuracies, columns=['Model Type', 'Case', 'Accuracy'])
    accuracy_table = pd.pivot_table(df_accuracies, values='Accuracy', index='Case', columns='Model Type')
    accuracy_tables[kernel_size] = accuracy_table

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)


In [13]:
# Print the accuracy tables
for kernel_size, accuracy_table in accuracy_tables.items():
    print(f"\nAccuracy Table (Kernel Size = {kernel_size}):")
    print(accuracy_table)


Accuracy Table (Kernel Size = 3):
Model Type       CNN      LSTM       RNN
Case                                    
1           0.770199  0.769412  0.780168
2           0.783316  0.754984  0.770986
3           0.773610  0.764428  0.774659
4           0.782529  0.764428  0.776758
5           0.786726  0.501049  0.771773

Accuracy Table (Kernel Size = 5):
Model Type       CNN      LSTM       RNN
Case                                    
1           0.771773  0.764953  0.762592
2           0.785939  0.757870  0.761018
3           0.785677  0.769937  0.770199
4           0.788562  0.766789  0.781480
5           0.791186  0.770986  0.783578

Accuracy Table (Kernel Size = 7):
Model Type       CNN      LSTM       RNN
Case                                    
1           0.769675  0.768101  0.757345
2           0.786464  0.775971  0.760493
3           0.796170  0.751836  0.781480
4           0.795645  0.742392  0.767576
5           0.775708  0.760755  0.782791


In [14]:
best_accuracies = {}

for kernel_size, accuracy_table in accuracy_tables.items():
    # Find the maximum accuracy for each model type
    max_accuracies = accuracy_table.max()

    # Get the case corresponding to the maximum accuracy for each model type
    best_cases = accuracy_table.idxmax()

    # Store the best accuracy, model type, and case for the current kernel size
    best_accuracies[kernel_size] = {
        'CNN': {'accuracy': max_accuracies['CNN'], 'case': best_cases['CNN']},
        'RNN': {'accuracy': max_accuracies['RNN'], 'case': best_cases['RNN']},
        'LSTM': {'accuracy': max_accuracies['LSTM'], 'case': best_cases['LSTM']}
    }

# Print the best accuracies for each kernel size
for kernel_size, best_accuracy_info in best_accuracies.items():
    print(f"\nBest Accuracies (Kernel Size = {kernel_size}):")
    for model_type, info in best_accuracy_info.items():
        print(f"{model_type}: Accuracy = {info['accuracy']:.4f}, Case = {info['case']}")


Best Accuracies (Kernel Size = 3):
CNN: Accuracy = 0.7867, Case = 5
RNN: Accuracy = 0.7802, Case = 1
LSTM: Accuracy = 0.7694, Case = 1

Best Accuracies (Kernel Size = 5):
CNN: Accuracy = 0.7912, Case = 5
RNN: Accuracy = 0.7836, Case = 5
LSTM: Accuracy = 0.7710, Case = 5

Best Accuracies (Kernel Size = 7):
CNN: Accuracy = 0.7962, Case = 3
RNN: Accuracy = 0.7828, Case = 5
LSTM: Accuracy = 0.7760, Case = 2
