# **import libraries, data and label**

In [None]:
# Import necessary libraries and packages
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import StratifiedKFold, KFold
import keras
from keras import regularizers
from keras.optimizers import Adagrad, SGD, Adamax, RMSprop, Adam
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import BatchNormalization, GlobalAveragePooling1D
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.layers.convolutional import Conv1D, MaxPooling1D, AveragePooling1D
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import scipy.io as sio
import tensorflow as tf
from tensorflow.keras.callbacks import Callback
import matplotlib.pyplot as plt
from keras.models import load_model
!pip install tensorflow_addons
from tensorflow_addons.optimizers import CyclicalLearningRate

# Mount Google Drive
from google.colab import drive

drive.mount('/content/drive/')

# Load label data and define constants
mypath = '/content/drive/My Drive/valdata/'
onlyfiles = [f for f in listdir(mypath) if (isfile(join(mypath, f)))]
ll = [f for f in onlyfiles if f[0] == 'w']
lbl = sio.loadmat(mypath + ll[0])['wlbl']
lbl = np.reshape(lbl, (840,))
size = 840
big = 50000
X = np.zeros((size, big))
np.random.seed(10)
number_of_classes = 2

In [None]:
# Load data and gather information
onlyfiles = [f for f in listdir(mypath) if (isfile(join(mypath, f)))]
mats = [f for f in onlyfiles if f[4] == 'm']
mats = sorted(mats)

# Resize data to a uniform shape
for i in range(size):
    # Load and reshape data
    dummy = sio.loadmat(mypath + mats[i])['val'][0, :]
    if (big - len(dummy)) <= 0:
        X[i, :] = dummy[0:big]
    else:
        b = dummy[5000:((big - len(dummy)) + 5000)]
        goal = np.hstack((dummy, b))
        while len(goal) != big:
            b = dummy[5000:((big - len(dummy)) + 5000)]
            goal = np.hstack((goal, b))
        X[i, :] = goal

print(' Done ')

# Save reshaped data to a CSV file
np.savetxt("/content/drive/My Drive/valdata/X_reshaped.csv", X, delimiter = ",")

# **data preparation**

In [None]:
# Function for picking the right class among outputs
def change(x):
    """
    Transforms the output predictions into single class output by selecting the class with the highest probability for each sample.

    Args:
    x (numpy.ndarray): Array of shape (number of samples, number of classes) representing predicted probabilities.

    Returns:
    numpy.ndarray: Array of integers representing the selected classes.
    """
    answer = np.zeros((np.shape(x)[0]))
    for i in range(np.shape(x)[0]):
        max_value = max(x[i, :])
        max_index = list(x[i, :]).index(max_value)
        answer[i] = max_index
    return answer.astype(np.int)


#windowing data
Xnew = np.zeros((80640, 2500))
a = -1
for k in range(840):
    a = a + 1
    c = 0
    d = 2500
    for j in range(96):
        Xnew[(96 * a) + j] = X[k, c:d]
        c = c + 500
        d = d + 500

#windowing label
lbl_new = np.zeros(80640)
a = -1
for k in range(840):
    a = a + 1
    for i in range(96):
        lbl_new[(96 * a) + i] = lbl[k]

#int labels to binary
Label_set = np.zeros((80640, number_of_classes))
for i in range(80640):
    row = np.zeros((number_of_classes))
    row[int(lbl_new[i]) - 1] = 1
    Label_set[i, :] = row

#normalization
Xnew = (Xnew - Xnew.mean()) / (Xnew.std())
Xnew = np.reshape(Xnew, (80640, 2500, 1))

In [None]:
#train and test split - 5th slot as test
values = [i for i in range(80640)]
permutations = np.random.permutation(values)
Xnew = Xnew[permutations, :]
Label_set = Label_set[permutations, :]

X_train = Xnew[:int(0.8 * 80640), :]
Y_train = Label_set[:int(0.8 * 80640), :]
X_test = Xnew[int(0.8 * 80640):, :]
Y_test = Label_set[int(0.8 * 80640):, :]
sizee = len(X_test)

val = 0.5
X_val = X_test[:int(val * sizee), :]
Y_val = Y_test[:int(val * sizee), :]
X_test = X_test[int(val * sizee):, :]
Y_test = Y_test[int(val * sizee):, :]

In [None]:
#train and test split - 4th slot as test
values = [i for i in range(80640)]
permutations = np.random.permutation(values)
Xnew = Xnew[permutations, :]
Label_set = Label_set[permutations, :]

X_train1 = Xnew[:int(0.6 * 80640), :]
X_train2 = Xnew[int(0.8 * 80640):, :]
X_train = np.vstack((X_train1, X_train2))
Y_train1 = Label_set[:int(0.6 * 80640), :]
Y_train2 = Label_set[int(0.8 * 80640):, :]
Y_train = np.vstack((Y_train1, Y_train2))
X_test = Xnew[int(0.6 * 80640):int(0.8 * 80640), :]
Y_test = Label_set[int(0.6 * 80640):int(0.8 * 80640), :]
sizee = len(X_test)

val = 0.5
X_val = X_test[:int(val * sizee), :]
Y_val = Y_test[:int(val * sizee), :]
X_test = X_test[int(val * sizee):, :]
Y_test = Y_test[int(val * sizee):, :]

In [None]:
#train and test split - 3rd slot as test
values = [i for i in range(80640)]
permutations = np.random.permutation(values)
Xnew = Xnew[permutations, :]
Label_set = Label_set[permutations, :]

X_train1 = Xnew[:int(0.4 * 80640), :]
X_train2 = Xnew[int(0.6 * 80640):, :]
X_train = np.vstack((X_train1, X_train2))
Y_train1 = Label_set[:int(0.4 * 80640), :]
Y_train2 = Label_set[int(0.6 * 80640):, :]
Y_train = np.vstack((Y_train1, Y_train2))
X_test = Xnew[int(0.4 * 80640):int(0.6 * 80640), :]
Y_test = Label_set[int(0.4 * 80640):int(0.6 * 80640), :]
sizee = len(X_test)

val = 0.5
X_val = X_test[:int(val * sizee), :]
Y_val = Y_test[:int(val * sizee), :]
X_test = X_test[int(val * sizee):, :]
Y_test = Y_test[int(val * sizee):, :]

In [None]:
#train and test split - 2nd slot as test
values = [i for i in range(80640)]
permutations = np.random.permutation(values)
Xnew = Xnew[permutations, :]
Label_set = Label_set[permutations, :]

X_train1 = Xnew[:int(0.2 * 80640), :]
X_train2 = Xnew[int(0.4 * 80640):, :]
X_train = np.vstack((X_train1, X_train2))
Y_train1 = Label_set[:int(0.2 * 80640), :]
Y_train2 = Label_set[int(0.4 * 80640):, :]
Y_train = np.vstack((Y_train1, Y_train2))
X_test = Xnew[int(0.2 * 80640):int(0.4 * 80640), :]
Y_test = Label_set[int(0.2 * 80640):int(0.4 * 80640), :]
sizee = len(X_test)

val = 0.5
X_val = X_test[:int(val * sizee), :]
Y_val = Y_test[:int(val * sizee), :]
X_test = X_test[int(val * sizee):, :]
Y_test = Y_test[int(val * sizee):, :]

In [None]:
#train and test split - 1st slot as test
values = [i for i in range(80640)]
permutations = np.random.permutation(values)
Xnew = Xnew[permutations, :]
Label_set = Label_set[permutations, :]

X_train = Xnew[int(0.2 * 80640):, :]
Y_train = Label_set[int(0.2 * 80640):, :]
X_test = Xnew[:int(0.2 * 80640), :]
Y_test = Label_set[:int(0.2 * 80640), :]
sizee = len(X_test)

val = 0.5
X_val = X_test[:int(val * sizee), :]
Y_val = Y_test[:int(val * sizee), :]
X_test = X_test[int(val * sizee):, :]
Y_test = Y_test[int(val * sizee):, :]

# **Functions**

In [None]:
#Plot run properties
def pretty_plot(history, field, fn):
    def plot(data, val_data, best_index, best_value, title):
        plt.plot(range(1, len(data) + 1), data, label = 'train')
        plt.plot(range(1, len(data) + 1), val_data, label = 'validation')
        if not best_index is None:
            plt.axvline(x = best_index + 1, linestyle = ':', c = "#777777")
        if not best_value is None:
            plt.axhline(y = best_value, linestyle = ':', c = "#777777")
        plt.xlabel('Epoch')
        plt.ylabel(field)
        plt.xticks(range(0, len(data), 20))
        plt.title(title)
        plt.legend()
        plt.show()

    data = history.history[field]
    val_data = history.history['val_' + field]
    tail = int(0.15 * len(data))

    best_index = fn(val_data)
    best_value = val_data[best_index]

    plot(data, val_data, best_index, best_value, "{} over epochs (best {:06.4f})".format(field, best_value))
    plot(data[-tail:], val_data[-tail:], None, best_value, "{} over last {} epochs".format(field, tail))


#learning rate finder
from keras.callbacks import Callback
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt


class LRFinder(Callback):
    """
    Up-to date version: https://github.com/WittmannF/LRFinder
    Example of usage:
        from keras.models import Sequential
        from keras.layers import Flatten, Dense
        from keras.datasets import fashion_mnist
        !git clone https://github.com/WittmannF/LRFinder.git
        from LRFinder.keras_callback import LRFinder
        # 1. Input Data
        (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
        mean, std = X_train.mean(), X_train.std()
        X_train, X_test = (X_train-mean)/std, (X_test-mean)/std
        # 2. Define and Compile Model
        model = Sequential([Flatten(),
                            Dense(512, activation='relu'),
                            Dense(10, activation='softmax')])
        model.compile(loss='sparse_categorical_crossentropy', \
                      metrics=['accuracy'], optimizer='sgd')
        # 3. Fit using Callback
        lr_finder = LRFinder(min_lr=1e-4, max_lr=1)
        model.fit(X_train, y_train, batch_size=128, callbacks=[lr_finder], epochs=2)
    """

    def __init__(self, min_lr, max_lr, mom=0.9, stop_multiplier=None,
                 reload_weights=True, batches_lr_update=5):
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.mom = mom
        self.reload_weights = reload_weights
        self.batches_lr_update = batches_lr_update
        if stop_multiplier is None:
            self.stop_multiplier = -20 * self.mom / 3 + 10  # 4 if mom=0.9
            # 10 if mom=0
        else:
            self.stop_multiplier = stop_multiplier

    def on_train_begin(self, logs={}):
        p = self.params
        try:
            n_iterations = p['epochs'] * p['samples'] // p['batch_size']
        except:
            n_iterations = p['steps'] * p['epochs']

        self.learning_rates = np.geomspace(self.min_lr, self.max_lr, \
                                           num = n_iterations // self.batches_lr_update + 1)
        self.losses = []
        self.iteration = 0
        self.best_loss = 0
        if self.reload_weights:
            self.model.save_weights('tmp.hdf5')

    def on_batch_end(self, batch, logs={}):
        loss = logs.get('loss')

        if self.iteration != 0:  # Make loss smoother using momentum
            loss = self.losses[-1] * self.mom + loss * (1 - self.mom)

        if self.iteration == 0 or loss < self.best_loss:
            self.best_loss = loss

        if self.iteration % self.batches_lr_update == 0:  # Evaluate each lr over 5 epochs

            if self.reload_weights:
                self.model.load_weights('tmp.hdf5')

            lr = self.learning_rates[self.iteration // self.batches_lr_update]
            K.set_value(self.model.optimizer.lr, lr)

            self.losses.append(loss)

        if loss > self.best_loss * self.stop_multiplier:  # Stop criteria
            self.model.stop_training = True

        self.iteration += 1

    def on_train_end(self, logs=None):
        if self.reload_weights:
            self.model.load_weights('tmp.hdf5')

        plt.figure(figsize = (12, 6))
        plt.plot(self.learning_rates[:len(self.losses)], self.losses)
        plt.xlabel("Learning Rate")
        plt.ylabel("Loss")
        plt.xscale('log')
        plt.show()

# **model structure**

In [None]:
# Define a Sequential model
model = Sequential()

# First set of Conv1D, Pooling, and Batch Normalization layers
model.add(Conv1D(32, 4, strides = 1, padding = 'same', activation = 'relu', input_shape = (2500, 1)))
model.add(Conv1D(32, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(AveragePooling1D(4, 2))
model.add(BatchNormalization())

# Second set of Conv1D, Pooling, and Batch Normalization layers
model.add(Conv1D(64, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(Conv1D(64, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(AveragePooling1D(4, 2))
model.add(BatchNormalization())

# Third set of Conv1D, Pooling, and Batch Normalization layers
model.add(Conv1D(128, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(Conv1D(128, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(AveragePooling1D(4, 2))
model.add(BatchNormalization())

# Fourth set of Conv1D, Pooling, and Batch Normalization layers
model.add(Conv1D(256, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(Conv1D(256, 4, strides = 1, padding = 'same', activation = 'relu'))
model.add(AveragePooling1D(4, 2))
model.add(BatchNormalization())

# Global Average Pooling layer
model.add(GlobalAveragePooling1D())

# Fully connected layers
model.add(Dense(256, kernel_initializer = 'normal', activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(2, kernel_initializer = 'normal', activation = 'softmax'))

# **train model**

In [None]:
# Define the optimizer for the model
optimizer = keras.optimizers.Adamax(lr = 0.001)

# Compile the model with the specified optimizer, loss function, and metrics
model.compile(optimizer = optimizer, loss = 'mse', metrics = ['accuracy'])

# Define the learning rate finder
lr_finder = LRFinder(min_lr = 1e-12, max_lr = 1e+1)

# Train the model to find the learning rate using the lr_finder
model.fit(X_train, Y_train, batch_size = 128, callbacks = [lr_finder], epochs = 5)

In [None]:
# Set up the optimizer
opt = Adamax(learning_rate = 0.001, decay = 0.001, clipvalue = 0.5, epsilon = 1e-07)

# Compile the model with the specified optimizer, loss function, and metrics
model.compile(loss = 'mse', optimizer = opt, metrics = ['accuracy'])

# Define the path for saving the trained model
pathh = '/content/drive/My Drive/Conv_models'

# Set up a ModelCheckpoint callback to save the best model based on validation accuracy
checkpointer = ModelCheckpoint(filepath = pathh, monitor = 'val_accuracy', mode = 'max', verbose = 1, save_best_only = True)

# Train the model with specified parameters and save the training history
hist = model.fit(X_train, Y_train, validation_data = (
X_val, Y_val), batch_size = 128, epochs = 280, verbose = 2, shuffle = True, callbacks = [checkpointer])

# Save the training history to a CSV file for analysis
pd.DataFrame(hist.history).to_csv(path_or_buf = '/content/drive/My Drive/Conv_models/History.csv')

# **train information**

In [None]:
#plot run info
pretty_plot(hist, 'loss', lambda x: np.argmin(x))
pretty_plot(hist, 'accuracy', lambda x: np.argmax(x))

In [None]:
#test
testmodel = load_model('/content/drive/My Drive/Conv_models')
tst_loss, tst_acc = testmodel.evaluate(X_test, Y_test)

In [None]:
#print model summary
model.summary()

In [None]:
# Initialize arrays to hold evaluation metrics
accuracy_model = np.zeros((1, 1))
Sensitivity_model = np.zeros((1, 1))
Specificity_model = np.zeros((1, 1))

# Predict the classes using the trained model
Y_pred = model.predict(X_test, verbose = 0, max_queue_size = 10, workers = 1, use_multiprocessing = False)

# Calculate the confusion matrix for the model's predictions
cm = confusion_matrix(change(Y_test), change(Y_pred))

# Calculate various performance metrics
accuracy_model = accuracy_score(change(Y_test), change(Y_pred))
Sensitivity_model = cm[0, 0] / (cm[0, 0] + cm[0, 1])
Specificity_model = cm[1, 1] / (cm[1, 0] + cm[1, 1])

# Calculate the Area Under the ROC Curve (ROC AUC)
auc = roc_auc_score(Y_test, Y_pred, average = 'macro', sample_weight = None, max_fpr = None, multi_class = 'raise', labels = None)

# Print the evaluation metrics
print('\n\n\n\n roc_auc_score : \n', auc)
print('\n\n confusion_matrix : \n', cm)
print('\n\n test accuracy is :', accuracy_model)
print(' Sensitivity :', Sensitivity_model)
print(' Specificity :', Specificity_model)
print('\n\n classification report : \n', classification_report(change(Y_test), change(Y_pred)))