# Code details

## Summary
This notebook contains the main class that are composing our neural networkl 
- #### The Model 
Class that setup the shape of the model of our Convolutional Neural Network(CNN) 
- #### The Loader
Class that take charge of the pre-processing of the training and testing datasets
- #### The Main 
Function that initialize multiple parameters: image sizes and paths,number of epoch and batch size

## CNN Model

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
from tensorflow import keras

import tensorflow as tf
import datetime


class CnnModel:
    def __init__(self, fileNameNeural, inputShape, classNumber, active_log=False):
        self.fileNameNeural = fileNameNeural
        self.inputShape = inputShape
        self.classNumber = classNumber
        self.active_log = active_log 
        # set cnn model            
        self.model = keras.Sequential([
        keras.layers.Conv2D(32, 3, activation='relu', input_shape=self.inputShape),
        keras.layers.MaxPooling2D(2),
        keras.layers.Dropout(0.2),
        keras.layers.Conv2D(64, 3, activation='relu'),
        keras.layers.MaxPooling2D(2),
        keras.layers.Dropout(0.2),
        keras.layers.Conv2D(64, 3, activation='relu'),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(self.classNumber)
        ])
        
   # compile model
        self.model.compile(optimizer='adam',
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                           metrics=['accuracy'])
        if self.active_log:
            # complete logs
            self.log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            self.tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=1)
            
        # compile model
        self.model.compile(optimizer='adam',
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                           metrics=['accuracy'])
        if self.active_log:
            # complete logs
            self.log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            self.tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=1)

A convolutionnal neural network is composed of 2 types of layers : 
- convolutional layers, that contains convolution , maxPooling and dropout functions
`                
            keras.layers.Conv2D(32, 3, activation='relu', input_shape=self.inputShape),
            keras.layers.MaxPooling2D(2),
            keras.layers.Dropout(0.2),
`
- a classic neural network that start with a flatten of the previous data and end with the class of the dataset 
`
            keras.layers.Flatten(),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(self.classNumber)
`


In [2]:
class CnnModel(CnnModel):
    def train(self, train_data, train_label, test_data, test_label, epochNumber):
        # train model
        if self.active_log:
            self.model.fit(train_data, train_label, validation_data=(test_data, test_label), epochs=epochNumber, callbacks=[self.tensorboard_callback])
        else:
            self.model.fit(train_data, train_label, validation_data=(test_data, test_label), epochs=epochNumber)

    def load(self, fileName):
        # load model
        self.model = keras.models.load_model(fileName + '.h5')
        # compile model
        self.model.compile(optimizer='adam',
                               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                               metrics=['accuracy'])
        
    def save(self):
        # save initialize neural network
        self.model.save(self.fileNameNeural + '.h5')

    def evaluate(self, test_images, test_labels):
        # evaluate model
        test_loss, test_acc = self.model.evaluate(test_images, test_labels, verbose=3)
        print('\nTest accuracy:', test_acc)
        print('\nLoss: ', test_loss)


##  Loader

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import os


class Loader:
    def __init__(self, pathToData, batchSize, height, width, className):
        # loading parameter
        self.BATCH_SIZE = batchSize
        self.IMG_HEIGHT = height
        self.IMG_WIDTH = width
        # load data dir
        self.data_dir = pathlib.Path(pathToData)
        # display class name
        self.CLASS_NAMES = className
        self.list_ds = tf.data.Dataset.list_files(str(self.data_dir/'*/*'))

    def print_list_ds_n(self, n):
        for f in self.list_ds.take(n):
            print(f)

    def show_batch(self, image_batch, label_batch, n):
        plt.figure(figsize=(10, 10))
        for n in range(n):
            ax = plt.subplot(5, 5, n + 1)
            plt.imshow(image_batch[n])
            plt.title(self.CLASS_NAMES[label_batch[n] == 1][0].title())
            plt.axis('off')
        plt.show()

    def get_label(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        return tf.equal(self.CLASS_NAMES, parts[-2])

    def decode_img(self, img):
        # channels = 3 for rgb image channels for grayscale
        img = tf.image.decode_jpeg(img, channels=1)
        img = tf.image.convert_image_dtype(img, tf.float32)
        return tf.image.resize(img, [self.IMG_WIDTH, self.IMG_HEIGHT])

    def process_path(self, file_path):
        label = self.get_label(file_path)
        img = tf.io.read_file(file_path)
        img = self.decode_img(img)
        return img, label

    def prepare_for_training(self, ds, cache=True, shuffle_buffer_size=40):
        if cache:
            if isinstance(cache, str):
                ds = ds.cache(cache)
            else:
                ds = ds.cache()
        ds = ds.shuffle(buffer_size=shuffle_buffer_size)
        # Repeat forever
        ds = ds.repeat()
        ds = ds.batch(self.BATCH_SIZE)
        # `prefetch` lets the dataset fetch batches in the background while the model
        # is training.
        ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

        return ds

    def resize_label(self, data):
        # resize test labels
        x = []
        for item in data:
            for index in range(len(item)):
                if item[index]:
                    x.append(index)

        return np.array(x)

    def load_data(self):
        # Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
        labeled_ds = self.list_ds.map(self.process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        train_ds = self.prepare_for_training(labeled_ds, cache=False)
        image_batch, label_batch = next(iter(train_ds))
        print("image batch:", type(image_batch.numpy()))
        print("label batch:", type(label_batch.numpy()))

        """
        # See data as ndarray
        show_batch(image_batch.numpy(), label_batch.numpy())
        """
        return image_batch.numpy(), self.resize_label(label_batch.numpy())


### Main function

In [5]:

if __name__ == "__main__":
    fileName = 'neural_network'
    DATA_PATH_TRAIN = 'resource/data/train'
    DATA_PATH_TEST = 'resource/data/test'
    BATCH_SIZE = 400
    IMG_HEIGHT = 224
    IMG_WIDTH = 224
    inputShape = (IMG_WIDTH, IMG_HEIGHT, 1)
    EPOCHS = 20
    CLASS_NAME = ['NORMAL', 'BACTERIA', 'VIRUS']

    # init model
    model = CnnModel(fileName, inputShape, 3, True)

    # replace the actual model with an existing one from h5 format file
    # model.load('neural_network')

    # load train data
    loader_train = Loader(DATA_PATH_TRAIN, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH, CLASS_NAME)
    train_data, train_label = loader_train.load_data()
    train_data = train_data.reshape((BATCH_SIZE, IMG_WIDTH, IMG_HEIGHT, 1))

    # load test data
    loader_test = Loader(DATA_PATH_TEST, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH, CLASS_NAME)
    test_data, test_label = loader_test.load_data()
    test_data = test_data.reshape((BATCH_SIZE, IMG_WIDTH, IMG_HEIGHT, 1))

    # train model then evaluate with test data
    model.train(train_data, train_label, test_data, test_label, EPOCHS)
    # save model in a .h5 file
    model.save()


image batch: <class 'numpy.ndarray'>
label batch: <class 'numpy.ndarray'>
image batch: <class 'numpy.ndarray'>
label batch: <class 'numpy.ndarray'>
Train on 400 samples, validate on 400 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
