<a href="https://colab.research.google.com/github/Jorgecardetegit/Malaria_Analysis_TFDS/blob/main/Basic_Model_Deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


## Model Deployment

In [10]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, InputLayer, BatchNormalization, Input, Dropout, RandomFlip, Resizing, Rescaling
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TruePositives, TrueNegatives, Precision, Recall, AUC
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import CSVLogger, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.regularizers  import L2, L1
from tensorflow.train import Example, Features, Feature, BytesList, FloatList, Int64List

import tensorflow_datasets as tfds

from sklearn.metrics import confusion_matrix, roc_curve

from collections import defaultdict
from PIL import Image

import os
import io
import random

### Parameters definition

In [11]:
dataset, info_dataset = tfds.load("malaria", split = ["train"], shuffle_files = True, with_info = True, as_supervised = True)
dataset = dataset[0]

In [12]:
CLASS_NAMES = ["infected", "uninfected"]

CONFIGURATION = {
    "BATCH_SIZE" : 32, "IM_HEIGHT" : 256, "IM_WIDTH" : 256, "NORMALIZATION" : 255, "LEARNING_RATE": 0.001, "N_EPOCHS": 100,
    "DROPOUT_RATE": 0.3, "REGULARIZATION_RATE": 0.0,
    "N_FILTERS": 6,
    "KERNEL_SIZE": 3,
    "N_STRIDES": 1,
    "POOL_SIZE": 2,
    "N_DENSE_1": 100,
    "N_DENSE_2": 10,
    "NUM_CLASSES":2
}

### Rescaling and Normalizing

In [4]:
resize_rescale_layers = Sequential([
    Resizing(CONFIGURATION["IM_HEIGHT"], CONFIGURATION["IM_WIDTH"], interpolation="bilinear"),           # Interpolation method used: Bilinear
    Rescaling(1./CONFIGURATION["NORMALIZATION"])])                                                       # Normalization factor

@tf.function
def map_fn(images, labels):
    processed_images = resize_rescale_layers(images)
    return processed_images, labels

In [5]:
dataset = dataset.map(map_fn, num_parallel_calls = tf.data.AUTOTUNE)

### Data splitting

In [6]:
def splits(dataset, TRAIN_RATIO, VAL_RATIO, TEST_RATIO, seed = None):
  DATASET_SIZE = len(dataset)

  if seed is not None:
    dataset = dataset.shuffle(DATASET_SIZE, seed=seed)
  else:
    dataset = dataset.shuffle(DATASET_SIZE)

  train_dataset = dataset.take(int(TRAIN_RATIO*DATASET_SIZE))

  val_test_dataset = dataset.skip(int(TRAIN_RATIO*DATASET_SIZE))
  val_dataset = val_test_dataset.take(int(VAL_RATIO*DATASET_SIZE))

  test_dataset = val_test_dataset.skip(int(VAL_RATIO*DATASET_SIZE))
  return train_dataset, val_dataset, test_dataset

TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1

In [7]:
train_dataset, val_dataset, test_dataset = splits(dataset, TRAIN_RATIO, VAL_RATIO, TEST_RATIO)

### Efficient model storage

In [None]:
# @title TFRecord files creator

def create_example(image, label):

  bytes_feature = Feature(
      bytes_list=BytesList(value=[image]))

  int_feature = Feature(
      int64_list=Int64List(value=[label]))

  example = Example(
      features=Features(feature={
          'images': bytes_feature,
          'labels': int_feature,
      }))

  return example.SerializeToString()

def encode_image(image, label):
  image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
  image = tf.io.encode_jpeg(image)
  return image,label

encoded_dataset = (
  train_dataset
  .map(encode_image)
)

NUM_SHARDS = 10
PATH = '/content/drive/MyDrive/Computer vision projects/Malaria disease/Malaria dataset/shard_{:02d}.tfrecord'

for shard_number in range(NUM_SHARDS):

  sharded_dataset = (
      encoded_dataset
      .shard(NUM_SHARDS, shard_number)
      .as_numpy_iterator()
  )

  with tf.io.TFRecordWriter(PATH.format(shard_number)) as file_writer:
    for encoded_image, encoded_label in sharded_dataset:

      example = create_example(encoded_image, encoded_label)
      file_writer.write(example)

In [None]:
# @title TFRecord Dataset creation
recons_dataset = tf.data.TFRecordDataset(
    filenames =[PATH.format(p) for p in range(NUM_SHARDS-2)] )

def parse_tfrecords(example):

    feature_description = {
          "images": tf.io.FixedLenFeature([], tf.string),
          "labels": tf.io.FixedLenFeature([], tf.int64),
      }

    example = tf.io.parse_single_example(example, feature_description)
    example["images"] = tf.image.convert_image_dtype(
        tf.io.decode_jpeg(
        example["images"], channels = 3), dtype = tf.float32)

    return example["images"], example["labels"]

parsed_dataset = (
    recons_dataset
    .map(parse_tfrecords)
    .batch(CONFIGURATION["BATCH_SIZE"])
    .prefetch(tf.data.AUTOTUNE)
)

### Data optimization

In [None]:
train_dataset = (
    train_dataset
    .shuffle(buffer_size = 1024, reshuffle_each_iteration = True)
    .batch(CONFIGURATION["BATCH_SIZE"])
    .prefetch(tf.data.AUTOTUNE)
    )

val_dataset = (
    val_dataset
    .batch(CONFIGURATION["BATCH_SIZE"])
    .prefetch(tf.data.AUTOTUNE)
    )

test_dataset = (
    val_dataset
    .batch(CONFIGURATION["BATCH_SIZE"])
    .prefetch(tf.data.AUTOTUNE)
    )

### Callbacks

### CSV logger

In [None]:
csv_callback = CSVLogger(r"C:\Users\jorge\OneDrive\Imágenes\Documentos\Malaria Detection\Model logs\logs.csv",
                        separator = ",",
                        append = True)

### EarlyStopping

In [None]:
es_callback = EarlyStopping(
    monitor = "val_loss",
    min_delta = 0,                        #An absolute change of less than min_delta, will count as no improvement.
    patience = 3,                         #Number of epochs with no improvement after which training will be stopped.
    verbose = 0,
    mode = "auto",                        #In min mode, training will stop when the quantity monitored has stopped decreasing; in max mode it will stop when the quantity monitored has stopped increasing.
    baseline = None,                      #Baseline value for the monitored quantity. Training will stop if the model doesn't show improvement over the baseline.
    restore_best_weights = False          #Whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used.
)

https://stackoverflow.com/questions/43906048/which-parameters-should-be-used-for-early-stopping

### Learning Rate Scheduler

In [None]:
def scheduler(epoch, lr):
    if epoch <= 1:
        learning_rate = lr
    else:
        learning_rate = lr * tf.math.exp(-0.1)
    return learning_rate

scheduler_callback = LearningRateScheduler(scheduler, verbose = 1)

- https://www.jeremyjordan.me/nn-learning-rate/
- https://datascience.stackexchange.com/questions/410/choosing-a-learning-rate
- https://proceedings.neurips.cc/paper_files/paper/2018/file/a41b3bb3e6b050b6c9067c67f663b915-Paper.pdf


### ReduceLROnPlateau

In [None]:
plateau_callback = ReduceLROnPlateau(monitor = "val_accuracy",
                                     factor = 0.3,
                                     patience = 5,
                                     verbose = 1)

## Model definition

### Custom loss class

In [None]:
class CustomBCE(tf.keras.losses.Loss):
  def __init__(self, FACTOR):
    super(CustomBCE, self).__init__()
    self.FACTOR = FACTOR

  def call(self, y_true, y_pred):
    bce = BinaryCrossentropy()
    return bce(y_true, y_pred)* self.FACTOR

### Custom metrics class

In [None]:
class CustomAccuracy(tf.keras.metrics.Metric):
  def __init__(self, name = 'Custom_Accuracy', FACTOR = 1):
    super(CustomAccuracy, self).__init__()
    self.FACTOR = FACTOR
    self.accuracy = self.add_weight(name = name, initializer = 'zeros')


  def update_state(self, y_true, y_pred, sample_weight = None):
    output = binary_accuracy(tf.cast(y_true, dtype = tf.float32), y_pred)*self.FACTOR
    self.accuracy.assign(tf.math.count_nonzero(output, dtype = tf.float32)/tf.cast(len(output), dtype = tf.float32))

  def result(self):
    return self.accuracy

  def reset_states(self):
    self.accuracy.assign(0.)

FACTOR = 1

### Model metrics definition

In [None]:
metrics = [TruePositives(name='tp'),FalsePositives(name='fp'), TrueNegatives(name='tn'), FalseNegatives(name='fn'),
            BinaryAccuracy(name='accuracy'), Precision(name='precision'), Recall(name='recall'), AUC(name='auc')]

## Model Architecture (LeNet)

### Feature extractor (sequential API)

In [13]:
feature_extractor_seq_model = tf.keras.Sequential([
    InputLayer(input_shape = (CONFIGURATION["IM_HEIGHT"], CONFIGURATION["IM_WIDTH"], 3)),

    Conv2D(filters = 6, kernel_size = 3, strides=1, padding='valid', activation = 'relu', kernel_regularizer = L2(l2 = 0.01)),
    BatchNormalization(),
    MaxPool2D (pool_size = 2, strides= 2),

    Conv2D(filters = 16, kernel_size = 3, strides=1, padding='valid', activation = 'relu', kernel_regularizer = L2(l2 = 0.01)),
    BatchNormalization(),
    MaxPool2D (pool_size = 2, strides= 2),
])

In [14]:
feature_extractor_seq_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 254, 254, 6)       168       
                                                                 
 batch_normalization_2 (Bat  (None, 254, 254, 6)       24        
 chNormalization)                                                
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 127, 127, 6)       0         
 g2D)                                                            
                                                                 
 conv2d_3 (Conv2D)           (None, 125, 125, 16)      880       
                                                                 
 batch_normalization_3 (Bat  (None, 125, 125, 16)      64        
 chNormalization)                                                
                                                      

### Callable Model (functional API)

In [15]:
func_input = Input(shape = (CONFIGURATION["IM_HEIGHT"], CONFIGURATION["IM_WIDTH"], 3), name = "Input Image")

x = feature_extractor_seq_model(func_input)

x = Flatten()(x)

x = Dense(100, activation = "relu", kernel_regularizer = L2(l2 = 0.01))(x)
x = Dropout(CONFIGURATION["DROPOUT_RATE"])(x)
x = BatchNormalization()(x)

x = Dense(10, activation = "relu", kernel_regularizer = L2(l2 = 0.01))(x)
x = Dropout(CONFIGURATION["DROPOUT_RATE"])(x)
x = BatchNormalization()(x)

func_output = Dense(1, activation = "sigmoid", kernel_regularizer = L2(l2 = 0.01))(x)

lenet_model = Model(func_input, func_output, name = "Lenet_Model")

In [16]:
lenet_model.summary()

Model: "Lenet_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input Image (InputLayer)    [(None, 256, 256, 3)]     0         
                                                                 
 sequential_1 (Sequential)   (None, 62, 62, 16)        1136      
                                                                 
 flatten (Flatten)           (None, 61504)             0         
                                                                 
 dense (Dense)               (None, 100)               6150500   
                                                                 
 dropout (Dropout)           (None, 100)               0         
                                                                 
 batch_normalization_4 (Bat  (None, 100)               400       
 chNormalization)                                                
                                                       

### Model training

In [None]:
lenet_model.compile(optimizer = Adam(learning_rate = CONFIGURATION['LEARNING_RATE']),
      loss = BinaryCrossentropy(CustomBCE(FACTOR)),        # Custom loss class defined above
      metrics = metrics)                                   # Metriics list defined above. For now the class that was defined for the

In [None]:
history = lenet_model.fit(
    train_dataset,
    validation_data = val_dataset,
    epochs = CONFIGURATION['N_EPOCHS'],
    verbose = 1,
    callbacks = [csv_callback, es_callback, scheduler_callback, plateau_callback]
    )

https://www.pinecone.io/learn/regularization-in-neural-networks/