<h1>Building a Sequential Multi-Class Categorization CNN Model:</h1>

In [1]:
from __future__ import print_function
import pandas as np
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

import keras
from keras.utils import to_categorical # type: ignore
from keras.models import Sequential # type: ignore
from keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, GlobalAveragePooling1D, Input # type: ignore
import tensorflow as tf

import os

In [2]:
from Scripts.ptbxl_mc import *

<h3>Checking For Available GPUs:</h3>

In [3]:
gpus = tf.config.list_physical_devices('GPU')

In [4]:
if gpus:
    for gpu in gpus:
        print(f'GPU Name: {gpu.name}')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), 'Physical GPUs, ', len(logical_gpus), 'Logical GPUs')

else:
    print('No GPUs found')

No GPUs found


<h3>Loading PTB-XL Database from ".npz" File:</h3>

In [5]:
npz_file = r'C:\Users\loera\OneDrive\Python Projects\College\ECG\Processed Datasets\multiclass_dataset.npz'

In [6]:
ptbxl_df, signals_df = load_database(npz_file)

<h3>Splitting Data into Training and Testing Sets:</h3>

In [7]:
# Formatting the signals and superclassses to numpy arrays
signals = signals_df.to_numpy(dtype=float)
superclasses = ptbxl_df.loc[:,'superclasses'].to_numpy(dtype=str)

In [8]:
# Assigns data (signals) to X
X = signals

# Assigns labels (superclasses) to Y
Y = superclasses

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [10]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(17110, 400) (17110,)
(4278, 400) (4278,)


<h3>Counting Instances of each Superclass:</h3>

In [11]:
from collections import Counter

# Counting instances in training dataset
print('Training Set Intances:\n')
train_counter = Counter(y_train)

for label, num in train_counter.items():
    print(f'{label}: {num}')
    
# Counting instances in testing dataset
print('\nTesting Set Intances:\n')
test_counter = Counter(y_test)

for label, num in test_counter.items():
    print(f'{label}: {num}')

Training Set Intances:

STTC: 3039
CD: 3012
NORM: 7365
MI: 2732
HYP: 962

Testing Set Intances:

NORM: 1804
CD: 768
STTC: 781
MI: 665
HYP: 260


<h3>Encoding Both Training and Testing Labels:</h3>

In [12]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [13]:
# Showing original versus encoded labels
print('Encoded Training Labels:', y_train_encoded)
print('Original Training Labels:', label_encoder.inverse_transform(y_train_encoded))

print('\nEncoded Testing Labels:', y_test_encoded)
print('Original Testing Labels:', label_encoder.inverse_transform(y_test_encoded))

Encoded Training Labels: [4 0 4 ... 0 4 0]
Original Training Labels: ['STTC' 'CD' 'STTC' ... 'CD' 'STTC' 'CD']

Encoded Testing Labels: [3 3 0 ... 3 3 3]
Original Testing Labels: ['NORM' 'NORM' 'CD' ... 'NORM' 'NORM' 'NORM']


In [14]:
# Converting training and testing labels into one-hot encoding
y_train = to_categorical(y_train_encoded, 5)
y_test = to_categorical(y_test_encoded, 5)

In [15]:
y_train

array([[0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       ...,
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.]])

In [16]:
y_test

array([[0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.]])

In [17]:
print(y_train.shape, y_test.shape)

(17110, 5) (4278, 5)


<h3>Defining How to Plot Graphs:</h3>

In [18]:
def plot_graphs(history):
    '''
    Plots 3 graphs using a sequential model's history: Training vs. Validation Loss, Training vs. Validation Accuracy,
    and Training vs. Validation AUC.
    '''
    
    # Plots training loss versus validation loss
    plt.plot(history.epoch, history.history['loss'], 'b', label='Training Loss')
    plt.plot(history.epoch, history.history['val_loss'], 'g', label='Validation Loss')
    plt.legend()
    plt.title('Loss')
    plt.show()
    
    # Plots training accuracy versus validation accuracy
    plt.plot(history.epoch, history.history['Accuracy'], 'b', label='Training Accuracy')
    plt.plot(history.epoch, history.history['val_Accuracy'], 'g', label='Validation Accuracy')
    plt.legend()
    plt.title('Accuracy')
    plt.show()
    
    # Plots training area under curve versus validation area under curve
    plt.plot(history.epoch, history.history['AUC'], 'b', label='Training AUC')
    plt.plot(history.epoch, history.history['val_AUC'], 'g', label='Validation AUC')
    plt.legend()
    plt.title('AUC')
    plt.show()

<h3>Defining Functions for Evaluating the Model:</h3>

In [19]:
def evaluate_model(model, test_data, test_labels):
    '''
    Evaluates a trained model based on the test data. Prints the accuracy and loss while 
    returning the overall score of the model.
    '''
    score = model.evaluate(test_data, test_labels, verbose=1)
    print('\nAccuracy on ORIGINAL test data: %0.2f' % score[1])
    print('\nLoss on ORIGINAL test data: %0.2f' % score[0])
    
    return score

In [20]:
def model_predict(model, test_data, test_labels) -> tuple:
    '''
    Generates predictions using the inputted model and test data. Returns the true class labels along with the predicted 
    labels for the test data.
    '''
    y_test_pred = model.predict(test_data)
    
    # Taking the class with the highest probability based off the model's predictions
    y_hat = np.argmax(y_test_pred, axis=1)
    
    y_test = np.argmax(test_labels, axis=1)
    
    return y_test, y_hat

In [21]:
def show_confusion_matrix(validations, predictions, labels):
    '''
    Visualizes the performance of a model via confusion matrices and heatmaps.
    '''
    matrix = confusion_matrix(validations, predictions)
    
    plt.figure(figsize=(6, 4))
    sns_hm=sns.heatmap(matrix,
                         cmap="YlGnBu",
                         linecolor='white',
                         linewidths=1,
                         xticklabels=labels,
                         yticklabels=labels,
                         annot=True,
                         fmt="d")
    
    plt.title("Confusion Matrix")
    plt.ylabel("True Label")
    plt.xlabel("Predicted Label")
    figure=sns_hm.get_figure()  
    plt.show()
    
    return figure

<h3>Generating the CNN Model:</h3>

In [22]:
def generate_1D_model():
    '''
    Generates and returns a Keras sequential model with 14 convolutional layers. Each layer consists of ReLU 
    activation while the output layer utilizes softmax activation to predict the the most probable superclass
    present within a data point.
    '''
    model = Sequential([Input(shape=(400,1))])
    model.add(Conv1D(32, 18, name='conv0', activation='relu'))
    model.add(Conv1D(32, 18, name='conv1', activation='relu'))
    model.add(Conv1D(64, 18, name='conv2', activation='relu'))
    model.add(Conv1D(64, 18, name='conv3', activation='relu'))
    model.add(Conv1D(128, 18, name='conv4', activation='relu'))
    model.add(Conv1D(128, 18, name='conv5', activation='relu'))
    model.add(Conv1D(256, 18, name='conv6', activation='relu'))
    model.add(Conv1D(256, 18, name='conv7', activation='relu'))
    model.add(MaxPooling1D(3, name='max1'))
    model.add(Conv1D(32, 18, name='conv8', activation='relu'))
    model.add(Conv1D(32, 18, name='conv9', activation='relu'))
    model.add(Conv1D(64, 18, name='conv10', activation='relu'))
    model.add(Conv1D(64, 18, name='conv11', activation='relu'))
    model.add(Conv1D(128, 18, name='conv12', activation='relu'))
    model.add(Conv1D(256, 3, name='conv13', activation='relu'))
    model.add(GlobalAveragePooling1D(name='gap1'))
    model.add(Dropout(0.5, name='drop1'))
    model.add(Dense(5, name='dense1', activation='softmax'))
    return model

<h3>Defining Callback for Model Traning:</h3>

In [23]:
# Checkpoint Folder
chkpt_folder = os.path.join(os.getcwd(), 'Training Sessions', 'Multiclass', 'MC_CNN_Weights')

# Checkpoint Path
chkpt_path = os.path.join(chkpt_folder, 'CustomCNN_Lead2_{epoch}.weights.h5')

In [24]:
print(chkpt_folder)
print(chkpt_path)

c:\Users\loera\OneDrive\Python Projects\College\ECG\Training Sessions\Multiclass\MC_CNN_Weights
c:\Users\loera\OneDrive\Python Projects\College\ECG\Training Sessions\Multiclass\MC_CNN_Weights\CustomCNN_Lead2_{epoch}.weights.h5


In [25]:
# Defining how to save model training checkpoints
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(filepath=chkpt_path,
                                                            save_weights_only=True,
                                                            monitor='val_Accuracy',
                                                            mode='max', # want the max accuracy
                                                            save_best_only=True,
                                                            verbose=1) # prints message for saved weights

<h3>Compiling and Training the Model:</h3>

In [26]:
model = generate_1D_model()

In [27]:
model.summary()

In [None]:
# Compiling the model
model.compile(loss = keras.losses.CategoricalCrossentropy(),
              optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.009, beta_2=0.8, epsilon=1e-08),
              metrics = [keras.metrics.CategoricalAccuracy(name='Accuracy'),
                         keras.metrics.Recall(name='Recall'),
                         keras.metrics.Precision(name='Precision'),
                         keras.metrics.AUC(name='AUC')])

In [None]:
# # Training the model
# history = model.fit(x_train,
#                     y_train,
#                     epochs=50,
#                     batch_size=512,
#                     validation_data=(x_test, y_test),
#                     callbacks=[model_checkpoint_callback])

<h3>Evaluating the Model:</h3>

In [None]:
print('\n--- Evaluate Model ---\n')
scores = evaluate_model(model=model,
                        test_data=x_test,
                        test_labels=y_test)

In [None]:
print('%s: %.2f' % (model.metrics_names[1], scores[1]*100))

In [None]:
print('\n--- Generate Model Predictions ---\n')
y_test, y_hat = model_predict(model=model,
                              test_data=x_test,
                              test_labels=y_test)

In [None]:
# Computing the f1 score
results_1 = f1_score(y_true=y_test, y_pred=y_hat, labels=None, average='micro')
print('F1 Score:', results_1)

<h3>Plotting Model's Metrics:</h3>

In [None]:
plot_graphs(history)

<h3>Computing Confusion Matrices:</h3>

In [None]:
labels = label_encoder.classes_

In [None]:
confusion_matrix = show_confusion_matrix(y_test, y_hat, labels)