<a href="https://colab.research.google.com/github/AntonisGantzos/Tensorflow-ML_Projects/blob/main/Malaria_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Malaria detection using Tensorflow
The aim of this project is to utilize the TensorFlow framework, in order to create, train, validate and subsequently deploy a Convoluted Neural Network (CNN) model that is given an image of a patient's blood sample and is able to predict wether the patient suffers from Malaria or not.

In [None]:
!pip install -q tensorflow-datasets tensorflow

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_datasets as tfds
print(f"Tensorflow is using version : {tf.__version__}")


Tensorflow is using version : 2.17.0


In [None]:
#load dataset and info about it
dataset, dataset_info = tfds.load('malaria', with_info=True, as_supervised=True, shuffle_files=True, split=['train'])

In [None]:
#we can see that dataset is a list that consists of 2 elemens
#the iterable dataset is the first element on this list that we get as output
dataset, dataset[0]

In [None]:
dataset_info

In [None]:
#set train, validation and test splits
TRAIN_RATIO = 0.8
TEST_RATIO = 0.1
VALIDATION_RATIO = 0.1
#dataset = tf.data.Dataset.range(10)

print(f"Train ratio : {TRAIN_RATIO}")
print(f"Test ratio : {TEST_RATIO}")
print(f"Validation ratio : {VALIDATION_RATIO}")

In [None]:
#create a function to automate data split
def splits(dataset, TRAIN_RATIO, VALIDATION_RATIO, TEST_RATIO):
  #keep the first 10 elements of the total dataset
  print(f"complete dataset : {list(dataset.as_numpy_iterator())}")
  DATASET_SIZE = len(dataset)
  print(f"dataset size : {DATASET_SIZE}")

  #set the train validation and test dataset
  train_dataset = dataset.take(int(TRAIN_RATIO * DATASET_SIZE))
  print(f"train_dataset : {list(train_dataset.take(1).as_numpy_iterator())}")

  val_dataset = dataset.skip(int(TRAIN_RATIO * DATASET_SIZE)).take(int(VALIDATION_RATIO * DATASET_SIZE))
  print(f"validation dataset : {list(val_dataset.take(1).as_numpy_iterator())}")

  test_dataset = dataset.skip(int(TRAIN_RATIO * DATASET_SIZE) + int(VALIDATION_RATIO * DATASET_SIZE))
  print(f"test dataset : {list(test_dataset.take(1).as_numpy_iterator())}")

  return train_dataset, val_dataset, test_dataset

In [None]:
train_dataset, val_dataset, test_dataset = splits(dataset[0].take(1500), TRAIN_RATIO, VALIDATION_RATIO, TEST_RATIO)

# Data Visualization

In [None]:
for i, (image, label) in enumerate(train_dataset.take(16)):
  ax = plt.subplot(4,4,i+1)
  plt.imshow(image)
  plt.title(dataset_info.features['label'].int2str(label))
  plt.axis('off')

# Data Preprocessing

In CNN models, images are often resized to a specific size like (224, 224, 3) for the following reasons:

- Consistency: CNNs require all input images to have the same dimensions, so resizing standardizes the input.

- Pretrained Models: Many popular models (e.g., VGG, ResNet) are trained on ImageNet, which uses 224x224 images.

- Efficiency: Keeping the image size fixed helps reduce computational cost while preserving important spatial information.
- Channels: The third dimension (3) corresponds to RGB color channels.

In [None]:
#We first have to resize our images
IMG_SIZE = 224
def resize_rescale(image, label):
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))/255.0
  return image, label

In [None]:
train_dataset = train_dataset.map(resize_rescale)
test_dataset = test_dataset.map(resize_rescale)
val_dataset = val_dataset.map(resize_rescale)

In [None]:
for image, label in train_dataset.take(1):
  print(f"image shape : {image.shape}")
  print(f"label : {label}")
  print(image, label)

In [None]:
#now that we have rescaled and resized our images we shuffle the daaset and we begin constructing the neural network model
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration = True).batch(batch_size=32).prefetch(tf.data.AUTOTUNE)

In [None]:
val_dataset = val_dataset.shuffle(buffer_size=8, reshuffle_each_iteration = True).batch(batch_size=32).prefetch(tf.data.AUTOTUNE)

In [None]:
test_dataset = test_dataset.shuffle(buffer_size=8, reshuffle_each_iteration = True).batch(batch_size=32).prefetch(tf.data.AUTOTUNE)

# CNN Model Implementation

Resources on how a CNN Model works

- CNN Explainer : https://poloclub.github.io/cnn-explainer/
- Convolutional Neural Networks (CNNs) explained : https://youtu.be/YRhxdVk_sIs?si=puUIKMWZIy-jqnPC
- Convolution Intorduction : Collaborative filtering, embeddings, and more  https://course17.fast.ai/lessons/lesson4.html
- We will be copying he architecture of the lenet CNN : https://youtu.be/PcGCpxstTCg?si=zV3Dd57ipjnepFCq

Basically convolution in the network occurs between a randomly initilized tensor of shape that we set and our input. We take the dot product of that and repeat the process for each combination of initialized tensors of that shape

The dot product (also known as the scalar product) of two matrices is a way to multiply corresponding elements of two arrays (vectors or matrices) and then sum the results.

In the context of matrices, the dot product is commonly used in matrix multiplication, particularly for multiplying rows by columns to generate elements of the result matrix.


In [None]:
from keras.layers import Dense, InputLayer, Conv2D, Flatten, BatchNormalization, MaxPool2D
from keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
import numpy as np

model = tf.keras.Sequential([
    InputLayer(input_shape = (IMG_SIZE,IMG_SIZE,3)),
    #1st convolution layer and pool layer
    tf.keras.layers.Conv2D(
    filters =6,
    kernel_size =3,
    strides=1,
    padding='valid',
    activation = 'relu'),
    BatchNormalization(),
   tf.keras.layers.MaxPool2D(
    pool_size=2,
    strides=2,
),
    #2nd convolution layer and pool layer
    tf.keras.layers.Conv2D(
    filters =6,
    kernel_size =3,
    strides=1,
    padding='valid',
    activation = 'relu'),
    BatchNormalization(),
   tf.keras.layers.MaxPool2D(
    pool_size=2,
    strides=2,
),
    Flatten(),
    Dense(100, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='relu'),
    BatchNormalization(),
    Dense(1, activation='sigmoid'),])

model.summary()

The reason the activation functions in the hidden layers were changed from sigmoid to ReLU (Rectified Linear Unit) while keeping the last layer's activation function as sigmoid is based on the specific roles and properties of these activation functions in neural networks:

# Hidden Layers with ReLU:
ReLU is commonly used in hidden layers because it addresses some key problems that arise with the sigmoid activation:
- Avoiding Vanishing Gradients: Sigmoid functions can cause vanishing gradients, where gradients become very small during backpropagation, leading to slow or stalled learning. ReLU avoids this by keeping the gradient large for positive values.
- Better Performance: ReLU is computationally simple and more efficient than sigmoid for deeper networks because it introduces non-linearity while maintaining positive outputs for positive inputs and keeping zero for negative inputs. This helps the model learn complex patterns effectively.
- Faster Convergence: ReLU tends to result in faster training and more efficient learning because it doesn't saturate like sigmoid does.

# Final Layer with Sigmoid:
Sigmoid is still used in the last layer because this layer is responsible for outputting a probability or binary classification result:
- Output Between 0 and 1: Sigmoid maps its input to a value between 0 and 1, making it ideal for binary classification tasks. Since the model’s output is a single value (Dense(1)), it needs to represent the probability of one class versus another.
- Binary Classification: In your case, the last layer produces a single output, and with the sigmoid activation, this value represents the probability that the input belongs to one class (e.g., 1) or the other class (e.g., 0).

# Summary:
ReLU is used in the hidden layers because it helps prevent vanishing gradients, improves efficiency, and leads to faster convergence in deeper networks.
Sigmoid is kept in the final layer because it is well-suited for binary classification problems, where you want an output between 0 and 1 to represent a probability.

In [None]:
#set up loss and optimizers
#since we are working on a binary classification problem we will use BinaryCrossEntropyLoss
loss = tf.keras.losses.BinaryCrossentropy()
optim = tf.keras.optimizers.Adam(learning_rate=0.01)
#metrics = ['accuracy']

In [None]:
model.compile(optimizer=optim, loss=loss, metrics=['accuracy'])

In [None]:
optim.learning_rate

In [None]:
# Check the shape of the first element in the dataset
for data, label in train_dataset:
    print("Data shape:", data.shape)
    print("Label shape:", label.shape)
    break

During training and evaluation we will need to be wary that our model does not suffer from :
- overfitting
- underfitting

This source explains both issues and how to resolve them in detail : https://www.youtube.com/watch?v=W-0-u6XVbE4

Additional Resources : https://www.geeksforgeeks.org/underfitting-and-overfitting-in-machine-learning/

For this particular model through expirementation what has been found to produce good results in terms of the metrics that we used is :  
- the dataset records to around 2000
- reduce the kernels of our CNN model to 3
- reduce filters of our CNN model to 6 in both layers. Both of these adjustments are made to reduce the compelxity of the model
- add ```BatchNormalization()``` to normalize the input (for more on how normalization works see https://github.com/AntonisGantzos/Tensorflow-ML_Projects/blob/main/Second_hand_car_prices_prediction_(TensorFlow_Regression_project).ipynb)
- adjust the optimizer's learning rate to about 0.01
- set the epochs that the model will train to around 15

In [None]:
history = model.fit(train_dataset,validation_data=val_dataset, epochs=15, verbose = 1)

In [None]:
#plot the loss function
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')

# Model Evaluation

In [None]:
model.evaluate(test_dataset)

In [None]:
def parasite_or_not(x):
  if x < 0.5:
    return "P"
  else:
    return "U"

In [None]:
#a probability closer to 0 means that our input image is most likely that of an infected patient while a probability closer to 1 means it is probably an image of an uninfected
parasite_or_not(model.predict(test_dataset.take(1))[0][0])

In [None]:
for i, (image, label) in enumerate(test_dataset.take(9)):
  ax = plt.subplot(3,3,i+1)
  plt.imshow(image[0])
  plt.title(f"{parasite_or_not(model.predict(image)[0][0])}  : {parasite_or_not(label.numpy()[0])}")
  plt.axis('off')

# Save and Load the Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Define the path where you want to save the model on Google Drive
model_save_path = '/content/drive/My Drive/model_name.h5'

# Save the entire model (weights + configuration)
model.save(model_save_path)
print(f"Model saved to {model_save_path}")


In [None]:
# Load the saved model from Google Drive
model_load_path = '/content/drive/My Drive/model_name.h5'
loaded_model = tf.keras.models.load_model(model_load_path)
print(f"Model {loaded_model} loaded successfully.")

## Implementing Functional API
So far we have only been using Sequential API for he creation our models. This works fine in classification problems but for models with more than one input and output (which are used in types of problems like finding the position of the parasitic cell in a given image for example), Functional API is our best approach

It also allows us to create more complex models that provide better results. A good example of that architecure is ResNet (https://www.geeksforgeeks.org/residual-networks-resnet-deep-learning/)

Additional Resources
- ResNet (actually) explained in under 10 minutes :
https://www.youtube.com/watch?v=o_3mboe1jYI

In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import Model, Input, Layer
from keras.layers import Dense, InputLayer, Conv2D, Flatten, BatchNormalization, MaxPool2D
class Feature_Extractor(keras.layers.Layer):
  def __init__(self, filters, kernel_size, strides, padding, activation, pool_size,):
    super(Feature_Extractor, self).__init__()

    self.conv_1 = Conv2D(filters = filters, kernel_size = kernel_size, strides = strides, padding = padding, activation = activation)
    self.batch_1 = BatchNormalization()
    self.pool_1 = MaxPool2D (pool_size = pool_size, strides= 2*strides)

    self.conv_2 = Conv2D(filters = filters*2, kernel_size = kernel_size, strides = strides, padding = padding, activation = activation)
    self.batch_2 = BatchNormalization()
    self.pool_2 = MaxPool2D (pool_size = pool_size, strides= 2*strides)

    self.conv_3 = Conv2D(filters=filters*4, kernel_size=kernel_size, strides=strides, padding=padding, activation=activation)
    self.batch_3 = BatchNormalization()
    self.pool_3 = MaxPool2D(pool_size=pool_size, strides=2*strides)

  def build(self, input_shape):
      # No additional weights to build manually in this case, as all layers are standard layers
      # But you can add custom weights if needed.
      super(Feature_Extractor, self).build(input_shape)  # This ensures the layer is marked as built.

  def call(self, x):

    #print(x.shape)
    x = self.conv_1(x)
    #print(x.shape)
    x = self.batch_1(x)
    #print(x.shape)
    x = self.pool_1(x)
    #print(x.shape)

    x = self.conv_2(x)
    #print(x.shape)
    x = self.batch_2(x)
    #print(x.shape)
    x = self.pool_2(x)

    return x

feature_extractor = Feature_Extractor(8, 3, 1, "valid", "relu", 2)
#feature_extractor.build(input_shape=(1, 224, 224, 3))  # Specify the input shape (batch size, height, width, channels)
x = tf.zeros([1, 224, 224, 3])  # A sample input
feature_extractor(x)


In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import Model, Input
from keras.layers import Dense, InputLayer, Conv2D, Flatten, BatchNormalization, MaxPool2D
from keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
functional_api_input = Input(shape= (IMG_SIZE,IMG_SIZE,3), name = "Input Image")

class Model_v_2(keras.models.Model):
  def __init__(self):
    super(Model_v_2, self).__init__()

    self.feature_extractor = Feature_Extractor(8, 3, 1, "valid", "relu", 2)

    self.flatten = Flatten()

    self.dense_1 = Dense(100, activation = "relu")
    self.batch_1 = BatchNormalization()

    self.dense_2 = Dense(10, activation = "relu")
    self.batch_2 = BatchNormalization()

    self.dense_3 = Dense(1, activation = "sigmoid")

  def call(self, x):
    #print(x.shape)
    x = self.feature_extractor(x)
    #print(x.shape)
    x = self.flatten(x)
    #print(x.shape)
    x = self.dense_1(x)
    #print(x.shape)
    x = self.batch_1(x)
    #print(x.shape)
    x = self.dense_2(x)
    #print(x.shape)
    x = self.batch_2(x)
    #print(x.shape)
    x = self.dense_3(x)
    #print(x.shape)

    return x

model_v_2 = Model_v_2()
model_v_2(tf.zeros([1, 224, 224, 3]))  # Specify the input shape (batch size, height, width, channels)
model_v_2.summary()

In [None]:
model_v_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=loss, metrics=['accuracy'])

In [None]:
history = model_v_2.fit(train_dataset,validation_data=val_dataset, epochs=15, verbose = 1)