In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers
import pandas as pd
import numpy as np
from google.colab import drive
import gc
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def clear_variable(var_list):
    for var in var_list:
        globals().pop(var, None)  # Remove from global scope
    gc.collect() # Run garbage collection

In [None]:
file_list = []

for i in range(20):
  file_list.append(f'Shuffled_Subset{i+1}.h5')

In [None]:
X_train_list, X_test_list, X_val_list = [], [], []
y_train_list, y_test_list, y_val_list = [], [], []

for i in range(len(file_list)):
  file_path = '/content/drive/MyDrive/ML_DL_Datasets/DNA_Datasets/Shuffled_Datasets/Covid_Shuffled_Balanced/Shuffled_Subset{file_counter}.h5'.format(file_counter = i+1)
  read_data = pd.read_hdf(file_path) # Read the current dataset

  data_reshaped = np.array(read_data.drop('Class', axis=1)).reshape(read_data.shape[0],30900,4)
  data_labels = read_data['Class']
  clear_variable('read_data')

  X_train_list.append(data_reshaped[:700])
  X_val_list.append(data_reshaped[700:800])
  X_test_list.append(data_reshaped[800:])
  clear_variable('data_reshaped')

  y_train_list.append(data_labels[:700])
  y_val_list.append(data_labels[700:800])
  y_test_list.append(data_labels[800:])
  clear_variable('data_labels')

X_train = np.concatenate(X_train_list, axis=0)
clear_variable('X_train_list')

X_test = np.concatenate(X_test_list, axis=0)
clear_variable('X_test_list')

X_val = np.concatenate(X_val_list, axis=0)
clear_variable('X_val_list')

y_train = np.concatenate(y_train_list, axis=0)
clear_variable('y_train_list')

y_test = np.concatenate(y_test_list, axis=0)
clear_variable('y_test_list')

y_val = np.concatenate(y_val_list, axis=0)
clear_variable('y_val_list')

In [None]:
'''from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    # Reshape input to 3D
    model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))

    # Convolutional Layers
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 1)))  # Adjusted pool size

    # Flatten layer
    model.add(layers.Flatten())

    # Dense Layers
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(num_classes, activation='softmax'))

    return model
'''

#**First Model**
Three dense layers

In [None]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    # Reshape input to 3D
    model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))

    # Convolutional Layers
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 1)))  # Adjusted pool size

    # Flatten layer
    model.add(layers.Flatten())

    # Dense Layers
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(8, activation='relu'))
#    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(num_classes, activation='softmax'))

    return model


In [None]:
# Learning rate scheduler callback
def scheduler(epoch, lr):
    if epoch > 30 and epoch <= 40:
        return 0.0001
    elif epoch > 40:
        return 0.00001
    return lr

lr_scheduler = callbacks.LearningRateScheduler(scheduler)

# Define input shape and number of classes
input_shape = (30900, 4)  # Shape of the input data: 30900 sequences, each with 4 features
num_classes = 8  # Number of classes for classification

# Create the CNN model
cnn_model = create_cnn_model(input_shape, num_classes)

# Compile the model
cnn_model.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
cnn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 30900, 4, 1)       0         
                                                                 
 conv2d (Conv2D)             (None, 30900, 4, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15450, 2, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 15450, 2, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7725, 1, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 7725, 1, 128)      7

In [None]:
# Train the model
history = cnn_model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_val, y_val), callbacks=[lr_scheduler])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Evaluate the model
loss, accuracy = cnn_model.evaluate(X_test, y_test)

# Print the evaluation results
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.0696132704615593
Test Accuracy: 0.9800994992256165


In [None]:
# Convert probabilities to class labels
predictions = cnn_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Print confusion matrix and classification report
print(confusion_matrix(y_test, predicted_labels))
print('\n')
print(classification_report(y_test, predicted_labels))

[[328   1   0   0   0   0   0   1]
 [  0 416   0   0   0   0   4   0]
 [  0   0 360   0   0   0   0   0]
 [  0   2   0 358   0   0   0   0]
 [  1   0   0   0 136   0   3   0]
 [  0   0   0   0   0 160   0   0]
 [  1   7   0   0   0   0 140  12]
 [  0   5   0   0   0   0   3  72]]


              precision    recall  f1-score   support

           0       0.99      0.99      0.99       330
           1       0.97      0.99      0.98       420
           2       1.00      1.00      1.00       360
           3       1.00      0.99      1.00       360
           4       1.00      0.97      0.99       140
           5       1.00      1.00      1.00       160
           6       0.93      0.88      0.90       160
           7       0.85      0.90      0.87        80

    accuracy                           0.98      2010
   macro avg       0.97      0.97      0.97      2010
weighted avg       0.98      0.98      0.98      2010



In [None]:
cnn_model.save('/content/drive/MyDrive/ML_Models/CNN_Model_Covid_(First_Model).h5')

  saving_api.save_model(


#**Second Model**
Two dense layers

In [None]:
from tensorflow.keras import layers, models

def create_cnn_model_2(input_shape, num_classes):
    model = models.Sequential()

    # Reshape input to 3D
    model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))

    # Convolutional Layers
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 1)))  # Adjusted pool size

    # Flatten layer
    model.add(layers.Flatten())

    # Dense Layers
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(8, activation='relu'))

    model.add(layers.Dense(num_classes, activation='softmax'))

    return model


In [None]:
# Learning rate scheduler callback
def scheduler(epoch, lr):
    if epoch > 30 and epoch <= 40:
        return 0.0001
    elif epoch > 40:
        return 0.00001
    return lr

lr_scheduler = callbacks.LearningRateScheduler(scheduler)

# Define input shape and number of classes
input_shape = (30900, 4)  # Shape of the input data: 30900 sequences, each with 4 features
num_classes = 8  # Number of classes for classification

# Create the CNN model
cnn_model_2 = create_cnn_model_2(input_shape, num_classes)

# Compile the model
cnn_model_2.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
cnn_model_2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_1 (Reshape)         (None, 30900, 4, 1)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 30900, 4, 32)      320       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 15450, 2, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 15450, 2, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 7725, 1, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 7725, 1, 128)     

In [None]:
# Train the model
history_2 = cnn_model_2.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_val, y_val), callbacks=[lr_scheduler])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Evaluate the model
loss, accuracy = cnn_model_2.evaluate(X_test, y_test)

# Print the evaluation results
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.042395804077386856
Test Accuracy: 0.9890547394752502


In [None]:
# Convert probabilities to class labels
predictions = cnn_model_2.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Print confusion matrix and classification report
print(confusion_matrix(y_test, predicted_labels))
print('\n')
print(classification_report(y_test, predicted_labels))

[[330   0   0   0   0   0   0   0]
 [  1 416   0   2   1   0   0   0]
 [  2   0 358   0   0   0   0   0]
 [  0   1   0 356   0   0   3   0]
 [  0   1   0   0 139   0   0   0]
 [  0   0   0   0   0 160   0   0]
 [  1   3   0   0   2   0 153   1]
 [  1   2   0   0   1   0   0  76]]


              precision    recall  f1-score   support

           0       0.99      1.00      0.99       330
           1       0.98      0.99      0.99       420
           2       1.00      0.99      1.00       360
           3       0.99      0.99      0.99       360
           4       0.97      0.99      0.98       140
           5       1.00      1.00      1.00       160
           6       0.98      0.96      0.97       160
           7       0.99      0.95      0.97        80

    accuracy                           0.99      2010
   macro avg       0.99      0.98      0.99      2010
weighted avg       0.99      0.99      0.99      2010



In [None]:
cnn_model_2.save('/content/drive/MyDrive/ML_Models/CNN_Model_Covid_(Second_Model).h5')

  saving_api.save_model(


# **Final Model**


In [None]:
from tensorflow.keras import layers, models

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()

    # Reshape input to 3D
    model.add(layers.Reshape((input_shape[0], input_shape[1], 1), input_shape=input_shape))

    # Convolutional Layers
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 1)))  # Adjusted pool size

    # Flatten layer
    model.add(layers.Flatten())

    # Dense Layers
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(8, activation='relu'))

    model.add(layers.Dense(num_classes, activation='softmax'))

    return model


In [None]:
from tensorflow.keras.utils import Sequence

class DataGenerator(Sequence):
    def __init__(self, data, labels, batch_size=32, shuffle=True):
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.data) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_data = self.data[indices]
        batch_labels = self.labels[indices]
        return batch_data, batch_labels

    def on_epoch_end(self):
        self.indices = np.arange(len(self.data))
        if self.shuffle:
            np.random.shuffle(self.indices)

In [None]:
# Learning rate scheduler callback
def scheduler(epoch, lr):
    if epoch > 30 and epoch <= 40:
        return 0.0001
    elif epoch > 40:
        return 0.00001
    return lr

lr_scheduler = callbacks.LearningRateScheduler(scheduler)

# Define input shape and number of classes
input_shape = (30900, 4)  # Shape of the input data: 30900 sequences, each with 4 features
num_classes = 8  # Number of classes for classification

# Create the CNN model
cnn_model_3 = create_cnn_model(input_shape, num_classes)

# Compile the model
cnn_model_3.compile(optimizer=optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
cnn_model_3.summary()

In [None]:
# Parameters
batch_size = 64

# Create data generators
train_generator = DataGenerator(X_train, y_train, batch_size=batch_size, shuffle=True)
val_generator = DataGenerator(X_val, y_val, batch_size=batch_size, shuffle=False)
test_generator = DataGenerator(X_test, y_test, batch_size=batch_size, shuffle=False)

# Train the model
history_3 = cnn_model_3.fit(train_generator, epochs=50, validation_data=val_generator, callbacks=[lr_scheduler])

In [None]:
# Evaluate the model
loss, accuracy = cnn_model_3.evaluate(test_generator)

# Print the evaluation results
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

In [None]:
# Predict using the model
predictions = cnn_model_3.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)

# Since test_generator shuffles data, we need the original order for evaluation
true_labels = np.concatenate([y_test[i*batch_size:(i+1)*batch_size] for i in range(len(y_test) // batch_size)])

# Print confusion matrix and classification report
print(confusion_matrix(true_labels, predicted_labels))
print('\n')
print(classification_report(true_labels, predicted_labels))

## **Testing Final Model**

In [None]:
cnn_model = tf.keras.models.load_model('/content/drive/MyDrive/ML_Models/CNN_Model_Covid_(Balanced).h5')

In [None]:
# Parameters
batch_size = 64

# Create data generators
train_generator = DataGenerator(X_train, y_train, batch_size=batch_size, shuffle=True)
val_generator = DataGenerator(X_val, y_val, batch_size=batch_size, shuffle=False)
test_generator = DataGenerator(X_test, y_test, batch_size=batch_size, shuffle=False)

In [None]:
# Evaluate the model
loss, accuracy = cnn_model.evaluate(test_generator)

# Print the evaluation results
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.0493064783513546
Test Accuracy: 0.9931955933570862


In [None]:
# Predict using the model
predictions = cnn_model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)

# Since test_generator shuffles data, we need the original order for evaluation
true_labels = np.concatenate([y_test[i*batch_size:(i+1)*batch_size] for i in range(len(y_test) // batch_size)])

# Compute and format the classification report
class_report = classification_report(true_labels, predicted_labels, output_dict=True)
class_report_df = pd.DataFrame(class_report).transpose()
class_report_df = class_report_df.round(4)

print("Classification Report:")
print(class_report_df.to_string())

Classification Report:
              precision  recall  f1-score    support
0                0.9958  1.0000    0.9979   474.0000
1                0.9888  0.9833    0.9860   359.0000
2                1.0000  0.9966    0.9983   597.0000
3                0.9940  0.9940    0.9940   497.0000
4                0.9982  0.9928    0.9955   554.0000
5                0.9960  0.9960    0.9960   497.0000
6                0.9842  0.9977    0.9909   436.0000
7                0.9855  0.9838    0.9846   554.0000
accuracy         0.9932  0.9932    0.9932     0.9932
macro avg        0.9928  0.9930    0.9929  3968.0000
weighted avg     0.9932  0.9932    0.9932  3968.0000


In [None]:
# Predict using the model
predictions = cnn_model.predict(test_generator)
predicted_labels = np.argmax(predictions, axis=1)

# Since test_generator shuffles data, we need the original order for evaluation
true_labels = np.concatenate([y_test[i*batch_size:(i+1)*batch_size] for i in range(len(y_test) // batch_size)])

# Print confusion matrix and classification report
print(confusion_matrix(true_labels, predicted_labels))
print('\n')
print(classification_report(true_labels, predicted_labels))

[[474   0   0   0   0   0   0   0]
 [  0 353   0   0   1   0   0   5]
 [  0   0 595   0   0   1   0   1]
 [  0   0   0 494   0   1   2   0]
 [  0   0   0   1 550   0   2   1]
 [  2   0   0   0   0 495   0   0]
 [  0   0   0   0   0   0 435   1]
 [  0   4   0   2   0   0   3 545]]


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       474
           1       0.99      0.98      0.99       359
           2       1.00      1.00      1.00       597
           3       0.99      0.99      0.99       497
           4       1.00      0.99      1.00       554
           5       1.00      1.00      1.00       497
           6       0.98      1.00      0.99       436
           7       0.99      0.98      0.98       554

    accuracy                           0.99      3968
   macro avg       0.99      0.99      0.99      3968
weighted avg       0.99      0.99      0.99      3968

