# CNN Model (multiple) for MIT data
As we concluded before, for MIT data, we apply the following preprocessing:   
resampling: Oversampling \
rescaling: MinMax Scaler

If you don't have the original files: run the notebook 'preprocessing_mit_multipleclass_minmax_oversampling.ipynb'    
Input file: (The preprocessed data)   
mitbih_train_multiclass_shift_minmax_oversampling.csv
mitbih_test_multiclass_shift_minmax_oversampling.csv

Output: CNN model trained  
model_mit_multiple_raw_cnn.h5


In [5]:
import sys
import os

data_path = ''
model_output_path = ''
# check if the enviorment is Google Colab

if 'google.colab' in sys.modules:
    print("Running on Google Colab")
    # Install required libraries
    !pip install scikit-learn -q
    !pip install pandas -q
    !pip install numpy -q
    !pip install imbalanced-learn -q
    !pip install matplotlib -q
    !pip install seaborn -q

    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    # set the path where the csv file stored in your google drive.
    data_path = '/content/drive/MyDrive/data/'
    model_output_path = data_path

else:
    print("Running on local environment")

    current_path = os.getcwd()
    print("Current working directory:", current_path)
    data_path = '../data/processed/'
    model_output_path = '../models/'

Running on local environment
Current working directory: g:\Meine Ablage\heartbeat-analysis-ai\notebooks


## Read data


In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

RawFiles = dict({
    'train': data_path + 'mitbih_train_multipleclass_minmax_oversampling.csv',
    'test': data_path + 'mitbih_test_multipleclass_minmax_oversampling.csv'
})

OutputFiles = dict({
    'model': model_output_path +  'model_mit_multiple_raw_cnn' 
})

train = pd.read_csv(RawFiles.get('train'),sep=',',header=0)
test = pd.read_csv(RawFiles.get('test'),sep=',',header=0)

y_train = train['target']
X_train = train.drop('target', axis=1)

y_test = test['target']
X_test = test.drop('target', axis=1)



In [7]:
# Convert DataFrames to NumPy arrays
X_train = X_train.values
X_test = X_test.values

# Reshape data for (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Check the shape
print("X_train shape:", X_train.shape)  # Should be (num_samples, timesteps, 1)
print("X_test shape:", X_test.shape)    # Should be (num_samples, timesteps, 1)


X_train shape: (289884, 187, 1)
X_test shape: (20284, 187, 1)


In [8]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)

# 1D CNN with MinMax Scaler and Oversampling

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
from sklearn.metrics import roc_curve, confusion_matrix, classification_report, f1_score

# Seed
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

# Define the 1D CNN model for multi-class classification
model = Sequential()
model.add(Conv1D(187, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

model.add(Conv1D(64, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))  

# Compile model
optimizer = tf.keras.optimizers.Adamax(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])
model.summary()

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0.01,
    patience=10,
    verbose=1,
    mode='min',
    restore_best_weights=True
)

reduce_learning_rate = ReduceLROnPlateau(
    monitor="val_loss",
    min_delta=0.01,
    patience=3,
    factor=0.1,
    cooldown=4,
    verbose=1
)

# Train Model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=16,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_learning_rate],
    verbose=1
)

# Train Scores
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['sparse_categorical_accuracy']
val_accuracy = history.history['val_sparse_categorical_accuracy']

print(f"Final Loss: {train_loss[-1]:.4f}, Val Loss: {val_loss[-1]:.4f}, "
      f"Accuracy: {train_accuracy[-1]:.4f}, Val Accuracy: {val_accuracy[-1]:.4f}")

# Prediction
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Get class predictions
print(f"Predicted Classes: {y_pred_classes[:10]}")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m18118/18118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 7ms/step - AUC: 0.9349 - accuracy: 0.7682 - loss: 0.6145 - val_AUC: 0.9803 - val_accuracy: 0.8781 - val_loss: 0.3323 - learning_rate: 0.0010
Epoch 2/100
[1m18118/18118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 7ms/step - AUC: 0.9838 - accuracy: 0.8849 - loss: 0.2973 - val_AUC: 0.9904 - val_accuracy: 0.9169 - val_loss: 0.2304 - learning_rate: 0.0010
Epoch 3/100
[1m18118/18118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 8ms/step - AUC: 0.9890 - accuracy: 0.9084 - loss: 0.2396 - val_AUC: 0.9923 - val_accuracy: 0.9313 - val_loss: 0.1946 - learning_rate: 0.0010
Epoch 4/100
[1m18118/18118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 8ms/step - AUC: 0.9911 - accuracy: 0.9209 - loss: 0.2114 - val_AUC: 0.9928 - val_accuracy: 0.9271 - val_loss: 0.1877 - learning_rate: 0.0010
Epoch 5/100
[1m18118/18118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 8ms/step - AUC: 

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

# Plotting the Loss
plt.plot(train_loss, label='Train Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Model Loss by Epoch')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.show()

# Plotting the Accuracy
plt.plot(train_accuracy, label='Train Accuracy')
plt.plot(val_accuracy, label='Validation Accuracy')
plt.title('Model Accuracy by Epoch')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper right')
plt.show()

# Making predictions
y_pred = model.predict(X_test)
y_pred_class = (y_pred >= 0.5).astype(int)


In [None]:
# Convert probabilities to class predictions
y_pred_class = np.argmax(y_pred, axis=1) # argmax returns the index of the max value

# Convert one-hot encoded `y_test` to class labels
y_test_class = np.argmax(y_test, axis=1)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, f1_score

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test_class, y_pred_class))

# Classification Report
print("\nClassification Report:\n", classification_report(y_test_class, y_pred_class))

# F1 Score
print(f"F1 Score: {f1_score(y_test_class, y_pred_class, average='weighted'):.4f}") 

# Save model

In [None]:
# Save the model in HDF5 format
model_path = OutputFiles.get('model') + '.h5'  # Append .h5 extension
model.save(model_path)
print(f"Model saved to {model_path}")


In [None]:
from datetime import datetime
# Display the running time
print("Current time:", datetime.now())