In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers
import pandas as pd
import numpy as np
from google.colab import drive
import gc
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

In [None]:
def clear_variable(var_list):
    for var in var_list:
        globals().pop(var, None)  # Remove from global scope
    gc.collect() # Run garbage collection

In [None]:
file_list = []

for i in range(20):
  file_list.append(f'Shuffled_Subset{i+1}.h5')

In [None]:
X_train_list, X_test_list, X_val_list = [], [], []
y_train_list, y_test_list, y_val_list = [], [], []

for i in range(len(file_list)):
  file_path = '/content/drive/MyDrive/ML_DL_Datasets/DNA_Datasets/Shuffled_Datasets/Covid_Shuffled_Balanced/Shuffled_Subset{file_counter}.h5'.format(file_counter = i+1)
  read_data = pd.read_hdf(file_path) # Read the current dataset

  data_reshaped = np.array(read_data.drop('Class', axis=1)).reshape(read_data.shape[0],30900,4)
  data_labels = read_data['Class']
  clear_variable('read_data')

  X_train_list.append(data_reshaped[:700])
  X_val_list.append(data_reshaped[700:800])
  X_test_list.append(data_reshaped[800:])
  clear_variable('data_reshaped')

  y_train_list.append(data_labels[:700])
  y_val_list.append(data_labels[700:800])
  y_test_list.append(data_labels[800:])
  clear_variable('data_labels')

X_train = np.concatenate(X_train_list, axis=0)
clear_variable('X_train_list')

X_test = np.concatenate(X_test_list, axis=0)
clear_variable('X_test_list')

X_val = np.concatenate(X_val_list, axis=0)
clear_variable('X_val_list')

y_train = np.concatenate(y_train_list, axis=0)
clear_variable('y_train_list')

y_test = np.concatenate(y_test_list, axis=0)
clear_variable('y_test_list')

y_val = np.concatenate(y_val_list, axis=0)
clear_variable('y_val_list')

In [None]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    # Reshape input to 3D
    model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))

    # Convolutional Layers
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 1)))  # Adjusted pool size

    # Flatten layer
    model.add(layers.Flatten())

    # Dense Layers
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(8, activation='relu'))

    model.add(layers.Dense(num_classes, activation='softmax'))

    return model


In [None]:
# Learning rate scheduler callback
def scheduler(epoch, lr):
    if epoch > 30 and epoch <= 40:
        return 0.0001
    elif epoch > 40:
        return 0.00001
    return lr

lr_scheduler = callbacks.LearningRateScheduler(scheduler)

# Define input shape and number of classes
input_shape = (30900, 4)  # Shape of the input data: 30900 sequences, each with 4 features
num_classes = 8  # Number of classes for classification

# Create the CNN model
cnn_model = create_cnn_model(input_shape, num_classes)

# Compile the model
cnn_model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
cnn_model.summary()

In [None]:
# Train the model
history = cnn_model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_val, y_val), callbacks=[lr_scheduler])

In [None]:
# Evaluate the model
loss, accuracy = cnn_model_2.evaluate(X_test, y_test)

# Print the evaluation results
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

In [None]:
# Predict using the model
predictions = cnn_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Compute and format the classification report
class_report = classification_report(y_test, predicted_labels, output_dict=True)
class_report_df = pd.DataFrame(class_report).transpose()
class_report_df = class_report_df.round(4)

print("Classification Report:")
print(class_report_df.to_string())