<a href="https://colab.research.google.com/github/KelvinMarques/IA_predict_covid/blob/main/GS_IA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np # linear algebra
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# https://www.kaggle.com/datasets/plameneduardo/sarscov2-ctscan-dataset

In [None]:
data_dir =  '/content/Dataset'
img_height, img_width = 224,224
batch_size = 64

class_names = ['Covid', 'Non-Covid']

train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  labels='inferred',
  label_mode='binary',
  class_names=class_names,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  shuffle=True,
  interpolation='nearest',
  batch_size=batch_size)

valid_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  labels='inferred',
  label_mode='binary',
  class_names=class_names,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  shuffle=True,
  interpolation='nearest',
  batch_size=batch_size)

Found 1284 files belonging to 2 classes.
Using 1028 files for training.
Found 1284 files belonging to 2 classes.
Using 256 files for validation.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Image preprocessing¶


In [None]:
rescale = tf.keras.Sequential([
    layers.Rescaling(1./255, input_shape=[img_height, img_width, 3])
])

Model definition


In [None]:
model = keras.Sequential([

    rescale,

    # First Convolutional Block
    layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Second Convolutional Block
    layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Third Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Classifier Head
    layers.Flatten(),
    layers.Dense(units=128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(units=64, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(units=32, activation="relu"),
    layers.Dense(units=1, activation="sigmoid"),
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 224, 224, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 224, 224, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2  (None, 112, 112, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 56, 56, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 128)      

Fitting

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds_preprocess = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
valid_ds_preprocess = valid_ds.cache().prefetch(buffer_size=AUTOTUNE)

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    train_ds_preprocess,
    validation_data=valid_ds_preprocess,
    epochs=50,
    verbose=1,
    callbacks=[callback],
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50


## Results

###Confusion matrix


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.metrics import accuracy_score

CLASSES = train_ds.class_names
probabilities = model.predict(valid_ds_preprocess)

predictions = [1 if i >0.5 else 0 for i in probabilities]
unbatched_valid_ds_preprocess = valid_ds_preprocess.unbatch()
labels_valid = [int(i[1]) for i in unbatched_valid_ds_preprocess]
list_labels_predictions = np.vstack((labels_valid,predictions)).T
conf_matrix = np.zeros([2,2])

for i,j in list_labels_predictions:
    conf_matrix[i,j] +=1

accuracy = accuracy_score(labels_valid, predictions)
f1_score = conf_matrix[1,1]/(conf_matrix[1,1] + ((conf_matrix[0,1] + conf_matrix[1,0])/2))
print('Val Accuracy: %.2f' % accuracy)
print('False positive: %.2f' % (conf_matrix[0,1]/len(predictions)))
print('False negative: %.2f' % (conf_matrix[1,0]/len(predictions)))
print('F1-score: %.2f' % (f1_score))

fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')

plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)

ax.set_yticks([0,1])
ax.set_yticklabels(CLASSES)
ax.set_xticks([0,1])
ax.set_xticklabels(CLASSES)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

## Categorization results¶


In [None]:
rows, cols = 3, 3
dataset = valid_ds_preprocess
dataset = dataset.unbatch().batch(rows*cols)
batch = iter(dataset)

In [None]:
img, label = next(batch)
probabilities = model.predict(img)
fig, ax = plt.subplots(rows, cols, figsize=(10, 10))
idx = 0
for i in range(rows):
    for j in range(cols):
        predictions = (probabilities[idx] > 0.5).astype("int32")
        if CLASSES[int(label[idx][0])] != CLASSES[predictions[0]]:
            title_color = 'red'
        else:
            title_color = 'black'

        ax[i,j].set_title(CLASSES[int(label[idx][0])] + ' -> ' + CLASSES[predictions[0]], color=title_color)
        ax[i,j].imshow(img[idx].numpy())
        ax[i,j].xaxis.set_major_locator(ticker.NullLocator())
        ax[i,j].yaxis.set_major_locator(ticker.NullLocator())
        idx +=1