In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import sklearn.metrics as metrics
import seaborn as sns
from sklearn.model_selection import train_test_split

2024-02-05 04:48:14.940511: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-05 04:48:14.963034: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-05 04:48:14.963057: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-05 04:48:14.963073: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-05 04:48:14.967544: I tensorflow/core/platform/cpu_feature_g

In [2]:
def train_val_test_split(X, y, train_size, val_size, test_size):
    X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, train_size=train_size, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=test_size/(test_size+val_size), stratify=y_val_test)
    return X_train, y_train, X_val, y_val, X_test, y_test

def process_data_for_conv2D(X):
    X_conv2D = []
    for sample in X:
        sample = np.reshape(sample, newshape=(sample.shape[0], sample.shape[1], 1))
        X_conv2D.append(sample)
    return np.array(X_conv2D, dtype=np.float32)

def data_iter(X, y, batch_size):
    n_samples = X.shape[0]
    idx = list(range(n_samples))
    while True:
        for i in range(0, n_samples, batch_size):
            j = idx[i: min(i+batch_size, n_samples)]
            yield X[j, :], y[j, : ]


In [23]:
# define some blocks for NN

def base_conv_block(n_conv_filters, kernel_size):
    def f(input_):
        x = tf.keras.layers.Conv2D(n_conv_filters, kernel_size, padding='same')(input_)


        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)
        return x
    return f

def base_model_cnn(input_shape, num_conv_filters = [32, 64, 128], kernel_size = (2, 2), max_pool_kernel = (1, 3), num_dense_units=128, num_classes=2):
    input_ = tf.keras.layers.Input(shape=input_shape)
    x = input_
    for n_conv_filters in num_conv_filters:
        x = base_conv_block(n_conv_filters, kernel_size)(x)
        x = tf.keras.layers.MaxPooling2D(max_pool_kernel)(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(num_dense_units, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)
    output = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    model = tf.keras.Model(inputs=input_, outputs=output)
    return model
    

In [24]:
# load data

X_stft = np.load("stft/stft_features.npy")
y = np.load("stft/labels.npy")

# process data for conv2d
X_stft = process_data_for_conv2D(X_stft)
print(X_stft.shape)
print(y.shape)

(3855, 188, 257, 1)
(3855, 2)


In [25]:
kernel_size = (3, 3)
max_pool_kernel = (1, 4)
conv_filters = [32, 64, 128, 256]
num_dense_units = 512
num_classes = 2
batch_size = 32

train_size = 0.7
val_size = 0.15
test_size = 0.15

epochs = 100
learning_rate = 0.01

# keep track of best model and log
base_file_name = "cnn_model.hdf5"
path = "weights/"
base_csv_name = "cnn_logs.csv"

# create directory if not exist
if not os.path.exists(path):
    print(f"Create directory {path}")
    os.makedirs(path)

train_loss_record = []
train_acc_record = []
val_loss_record = []
val_acc_record = []
test_loss_record = []
test_acc_record = []

# prepare for k-fold cross validation
k_fold = 5


def kfold_training(X, y, k_fold, train_size, val_size, test_size, base_model_cnn, base_file_name, base_csv_name, path, learning_rate, epochs, batch_size, num_dense_units, num_classes, kernel_size, max_pool_kernel, conv_filters):
    train_loss_record = []
    train_acc_record = []
    val_loss_record = []
    val_acc_record = []
    test_loss_record = []
    test_acc_record = []
    train_auc_record = []
    val_auc_record = []
    test_auc_record = []
    train_precision_record = []
    val_precision_record = []
    test_precision_record = []
    train_recall_record = []
    val_recall_record = []
    test_recall_record = []
    # train_f1_score_record = []
    # val_f1_score_record = []
    # test_f1_score_record = []
    # train_specificity_record = []
    # val_specificity_record = []
    # test_specificity_record = []
    
    for i in range(k_fold):
        print(f"Start {i+1}th-fold in {k_fold} cross validation")

        X_train, y_train, X_val, y_val, X_test, y_test = train_val_test_split(X_stft, y, train_size, val_size, test_size)

        file_name = os.path.join(path, f"{i}_fold_{base_file_name}")
        csv_path = os.path.join(path, f"{i}_fold_{base_csv_name}")


        lr_change = tf.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, min_lr=0.0001)

        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(file_name, monitor="val_accuracy", save_best_only=True, mode="max", metric="val_acc")

        early_stopping = tf.keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.01, patience=10, mode="min")

        # csv logger
        csv_logger = tf.keras.callbacks.CSVLogger(csv_path)

        callbacks = [lr_change, model_checkpoint, early_stopping, csv_logger]
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        model = base_model_cnn(input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]),
                                num_dense_units=num_dense_units, num_classes=num_classes,
                                kernel_size=kernel_size, max_pool_kernel=max_pool_kernel, 
                                num_conv_filters=conv_filters)
        
        # monitor specificity, sensitivity, f1 score
        model.compile(loss = "categorical_crossentropy", optimizer=optimizer, metrics=["accuracy", "AUC", tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
        
        model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
                validation_data=(X_val, y_val), callbacks=callbacks,
                verbose=1)
        
        # train_loss, train_acc, train_auc, train_precision, train_recall, train_f1_score, train_specificity = model.evaluate(X_train, y_train, verbose=0)
        # val_loss, val_acc, val_auc, val_precision, val_recall, val_f1_score, val_specificity = model.evaluate(X_val, y_val, verbose=0)
        # test_loss, test_acc, test_auc, test_precision, test_recall, test_f1_score, test_specificity = model.evaluate(X_test, y_test, verbose=0)
        
        train_loss, train_acc, train_auc, train_precision, train_recall = model.evaluate(X_train, y_train, verbose=0)
        val_loss, val_acc, val_auc, val_precision, val_recall = model.evaluate(X_val, y_val, verbose=0)
        test_loss, test_acc, test_auc, test_precision, test_recall= model.evaluate(X_test, y_test, verbose=0)
        
        
        train_loss_record.append(train_loss)
        train_acc_record.append(train_acc)
        val_loss_record.append(val_loss)
        val_acc_record.append(val_acc)
        test_loss_record.append(test_loss)
        test_acc_record.append(test_acc)
        train_auc_record.append(train_auc)
        val_auc_record.append(val_auc)
        test_auc_record.append(test_auc)
        train_precision_record.append(train_precision)
        val_precision_record.append(val_precision)
        test_precision_record.append(test_precision)
        train_recall_record.append(train_recall)
        val_recall_record.append(val_recall)
        test_recall_record.append(test_recall)
        # train_f1_score_record.append(train_f1_score)
        # val_f1_score_record.append(val_f1_score)
        # test_f1_score_record.append(test_f1_score)
        # train_specificity_record.append(train_specificity)
        # val_specificity_record.append(val_specificity)
        # test_specificity_record.append(test_specificity)
        

    train_loss_avg = np.mean(train_loss_record)
    train_acc_avg = np.mean(train_acc_record)
    val_loss_avg = np.mean(val_loss_record)
    val_acc_avg = np.mean(val_acc_record)
    test_loss_avg = np.mean(test_loss_record)
    test_acc_avg = np.mean(test_acc_record)
    train_auc_avg = np.mean(train_auc_record)
    val_auc_avg = np.mean(val_auc_record)
    test_auc_avg = np.mean(test_auc_record)
    train_precision_avg = np.mean(train_precision_record)
    val_precision_avg = np.mean(val_precision_record)
    test_precision_avg = np.mean(test_precision_record)
    train_recall_avg = np.mean(train_recall_record)
    val_recall_avg = np.mean(val_recall_record)
    test_recall_avg = np.mean(test_recall_record)
    # train_f1_score_avg = np.mean(train_f1_score_record)
    # val_f1_score_avg = np.mean(val_f1_score_record)
    # test_f1_score_avg = np.mean(test_f1_score_record)
    # train_specificity_avg = np.mean(train_specificity_record)
    # val_specificity_avg = np.mean(val_specificity_record)
    # test_specificity_avg = np.mean(test_specificity_record)
    

    print(f"{k_fold}-fold cv train loss avg: {train_loss_avg:.4f}, train acc avg: {train_acc_avg:.4f}, val loss avg: {val_loss_avg:.4f}, val acc avg: {val_acc_avg:.4f}, test loss avg: {test_loss_avg:.4f}, test acc avg: {test_acc_avg:.4f} \n \
            train auc avg: {train_auc_avg:.4f}, val auc avg: {val_auc_avg:.4f}, test auc avg: {test_auc_avg:.4f} \n \
                train precision avg: {train_precision_avg:.4f}, val precision avg: {val_precision_avg:.4f}, test precision avg: {test_precision_avg:.4f} \n \
                    train recall avg: {train_recall_avg:.4f}, val recall avg: {val_recall_avg:.4f}, test recall avg: {test_recall_avg:.4f}")
                        # train f1 score avg: {train_f1_score_avg:.4f}, val f1 score avg: {val_f1_score_avg:.4f}, test f1 score avg: {test_f1_score_avg:.4f} \n \
                        #     train specificity avg: {train_specificity_avg:.4f}, val specificity avg: {val_specificity_avg:.4f}, test specificity avg: {test_specificity_avg:.4f}")
    



In [26]:
# perform k-fold cross validation
kfold_training(X_stft, y, k_fold, train_size, val_size, test_size, base_model_cnn, base_file_name, base_csv_name, path, learning_rate, epochs, batch_size, num_dense_units, num_classes, kernel_size, max_pool_kernel, conv_filters)

Start 1th-fold in 5 cross validation
Epoch 1/100

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Start 3th-fold in 5 cross validat

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Start 4th-fold in 5 cross validation
Epoch 1/100

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Start 5th-fold in 5 cross validation
Epoch 1/100

  saving_api.save_model(


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
5-fold cv train loss avg: 0.0347, train acc avg: 0.9881, val loss avg: 0.1881, val acc avg: 0.9526, test loss a

In [None]:
# # for each patient in patients, do k-fold cross validation
# patients = ["a01r", "a02r", "a03r", "a04r", "b01r", "c02r"]

# for patient in patients:
#     print(f"Start training for patient {patient}")
#     X_stft = np.load(f"stft_individual/{patient}_stft_features.npy")
#     y = np.load(f"stft_individual/{patient}_labels.npy")

#     X_stft = process_data_for_conv2D(X_stft)
#     print(X_stft.shape)
#     print(y.shape)
#     path = f"weights/{patient}/"
#     if not os.path.exists(path):
#         print(f"Create directory {path}")
#         os.makedirs(path)
#     kfold_training(X_stft, y, k_fold, train_size, val_size, test_size, base_model_cnn, base_file_name, base_csv_name, path, learning_rate, epochs, batch_size, num_dense_units, num_classes, kernel_size, max_pool_kernel, conv_filters)

In [15]:
from tensorflow.keras.utils import plot_model 
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
