## Importing necessary libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.optimizers as Optimizer
from tensorflow.keras import layers 
from tensorflow.keras.applications.vgg16 import VGG16


from sklearn.model_selection import train_test_split
import sklearn.metrics
from sklearn.metrics import roc_curve, auc, precision_recall_curve


import numpy as np

#Set the `numpy` pseudo-random generator at a fixed value
#This helps with repeatable results everytime you run the code. 
np.random.seed(1000)

import os
import cv2 as cv
import plotly.express as px

ModuleNotFoundError: No module named 'cv2'

## Data Preprocessing

In [None]:
#Iterate through all images in Parasitized folder, resize to 224 x 224
#and save the resized image as numpy array to 'dataset' variable, and set the label of parasitized cells to '0'

image_directory = 'images/'
SIZE = 224
dataset = []  #Many ways to handle data, you can use pandas. Here, we are using a list format.  
label = []  #Place holders to define add labels. We will add 0 to all parasitized images and 1 to uninfected.

parasitized_images = os.listdir(image_directory + 'Parasitized/')
for i, image_name in enumerate(parasitized_images):    #Remember enumerate method adds a counter and returns the enumerate object
    if (image_name.split('.')[1] == 'png'):
        image = cv.imread(image_directory + 'Parasitized/' + image_name)
        image = cv.resize(image,(SIZE,SIZE))
        dataset.append(np.array(image))
        label.append(0)

#Iterate through all images in Uninfected folder, resize to 224 x 224
#and save the resized image as numpy array to 'dataset' variable, and set the label of parasitized cells to '1'

uninfected_images = os.listdir(image_directory + 'Uninfected/')
for i, image_name in enumerate(uninfected_images):
    if (image_name.split('.')[1] == 'png'):
        image = cv.imread(image_directory + 'Uninfected/' + image_name)
        image = cv.resize(image,(SIZE,SIZE))
        dataset.append(np.array(image))
        label.append(1)

## Model definition

In [None]:
def vgg16_pretrain_cnn_model(img_size):
    # img_size = 224
    # https://www.analyticsvidhya.com/blog/2020/08/top-4-pre-trained-models-for-image-classification-with-python-code/
    # VGG-16 Architecture
    base_model = VGG16(input_shape = (img_size, img_size, 3), # Shape of our images
    include_top = False, # Leave out the last fully connected layer
    weights = 'imagenet')

    for layer in base_model.layers:
        layer.trainable = False

    # Flatten the output layer to 1 dimension
    x = layers.Flatten()(base_model.output)

    # Add a fully connected layer with 512 hidden units and ReLU activation
    x = layers.Dense(512, activation='relu')(x)

    # Add a dropout rate of 0.5
    x = layers.Dropout(0.5)(x)

    # Add a final sigmoid layer with 1 node for classification output
    x = layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.models.Model(base_model.input, x)
    model.compile(optimizer = Optimizer.Adam(learning_rate=0.0001), loss='binary_crossentropy',metrics=['accuracy'])
    
    model.summary()
    return model

## Evaluation Metrics

In [None]:
def metrics(test_y, predictions):
    accuracy = sklearn.metrics.accuracy_score(test_y, predictions)
    precision = sklearn.metrics.precision_score(test_y, predictions)
    recall = sklearn.metrics.recall_score(test_y, predictions)
    f1 = sklearn.metrics.f1_score(test_y, predictions)
    
    return accuracy, precision, recall, f1

In [None]:
def show_roc_and_pr_curve(test_y, probas,a,p,r,f1):
    fpr,tpr,thr = roc_curve(test_y, probas)
    precision, recall, thresholds = precision_recall_curve(test_y, probas)

    roc_fig = px.area(
        x=fpr, y=tpr,
        title=f'ROC Curve (AUC={auc(fpr, tpr):.4f}), Accuracy={a:.2f}',
        labels=dict(x='False Positive Rate', y='True Positive Rate'),
        width=700, height=500
    )
    roc_fig.add_shape(
        type='line', line=dict(dash='dash'),
        x0=0, x1=1, y0=0, y1=1
    )

    roc_fig.update_yaxes(scaleanchor="x", scaleratio=1)
    roc_fig.update_xaxes(constrain='domain')
    roc_fig.show()
    
    pr_fig = px.area(
        x=recall, y=precision,
        title=f'Precision-Recall Curve (AUC={auc(fpr, tpr):.4f}), Precision={p:.2f}, Recall={r:.2f}, F1={f1:.2f}',
        labels=dict(x='Recall', y='Precision'),
        width=700, height=500
    )
    pr_fig.add_shape(
        type='line', line=dict(dash='dash'),
        x0=0, x1=1, y0=1, y1=0
    )
    pr_fig.update_yaxes(scaleanchor="x", scaleratio=1)
    pr_fig.update_xaxes(constrain='domain')
    pr_fig.show()

In [None]:
### Split the dataset 
# Split the dataset into training and testing dataset.
# 1. Training data: 80%
# 2. Testing data: 20%

X_train, X_test, y_train, y_test = train_test_split(dataset, label, test_size = 0.20, random_state = 0)

model = vgg16_pretrain_cnn_model(SIZE)

y_train = np.asarray(y_train).astype('int32').reshape((-1,1))
y_test = np.asarray(y_test).astype('int32').reshape((-1,1))

model.fit(np.array(X_train), y_train, batch_size = 32, epochs = 15, validation_split = 0.1, shuffle = False)

    
preds = model.predict(X_test)
flatten_preds_probas = preds.flatten()
actual_preds = (flatten_preds_probas > 0.5).astype("int32")

accuracy, precision, recall, f1_score = metrics(y_test,actual_preds)
print("Accuracy: ", accuracy, "Precision: ", precision, "Recall: ", recall, "F1_score: ", f1_score)
show_roc_and_pr_curve(y_test, flatten_preds_probas,accuracy,precision,recall,f1_score)