# Notebook de test du préprocessing du dataset

In [None]:
import numpy as np              #For handling arrays
import pandas as pd             # For handling data
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPooling2D, BatchNormalization, MaxPool2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.utils import plot_model

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import onnx
import glob
import matplotlib.pyplot as plt

In [None]:
print(tf.__version__)

In [None]:
train_path = "../../../datasets/original_500_500/train/"
valid_path = "../../../datasets/original_500_500/val/"
test_path = "../../../datasets/original_500_500/test/"
img_width = 500
img_height = 500
batch_size = 32
model_path = "model_v2_03"

In [None]:
train = tf.keras.utils.image_dataset_from_directory(train_path, 
        class_names= ["NORMAL", "PNEUMONIE"],
        color_mode='grayscale',
        batch_size=batch_size,
        shuffle=True,
        image_size=(img_width, img_height))
val = tf.keras.utils.image_dataset_from_directory(valid_path, 
        class_names= ["NORMAL", "PNEUMONIE"],
        color_mode='grayscale',
        batch_size=batch_size,
        shuffle=True,
        image_size=(img_width, img_height))
test = tf.keras.utils.image_dataset_from_directory(test_path, 
        class_names= ["NORMAL", "PNEUMONIE"],
        color_mode='grayscale',
        batch_size=batch_size,
        shuffle=True,
        image_size=(img_width, img_height))

In [None]:
#image_gen = ImageDataGenerator(
#                                  rescale = 1./255,
#                                  shear_range = 0.2,
#                                  zoom_range = 0.2,
#                                  horizontal_flip = True,          
#                               )
# Create Image Data Generator for Test/Validation Set
#test_data_gen = ImageDataGenerator(rescale = 1./255)

#train = image_gen.flow_from_directory(
#      train_path,
#      target_size=(img_height, img_width),
#      color_mode='grayscale',
#      class_mode='binary',
#      batch_size=batch_size,
#      )
#test = test_data_gen.flow_from_directory(
#      test_path,
#      target_size=(img_height, img_width),
#      color_mode='grayscale',
#      shuffle=False, 
#setting shuffle as False just so we can later compare it with predicted values without having indexing problem 
#      class_mode='binary',
#      batch_size=batch_size,
#      )
#val = test_data_gen.flow_from_directory(
#      valid_path,
#      target_size=(img_height, img_width),
#      color_mode='grayscale',
#      class_mode='binary', 
      #batch_size=batch_size,
#      )

In [None]:
train_label = np.concatenate([y for x, y in train], axis=0)
val_label = np.concatenate([y for x, y in val], axis=0)
test_label = np.concatenate([y for x, y in test], axis=0)

In [None]:
cnn = Sequential()
cnn.add(Conv2D(32, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size = (2, 2)))
cnn.add(Conv2D(32, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size = (2, 2)))
cnn.add(Conv2D(32, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size = (2, 2)))
cnn.add(Conv2D(64, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size = (2, 2)))
cnn.add(Conv2D(64, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size = (2, 2)))
cnn.add(Flatten())
cnn.add(Dense(activation = 'relu', units = 128))
cnn.add(Dense(activation = 'relu', units = 64))
cnn.add(Dense(activation = 'sigmoid', units = 1))
cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
cnn.summary()

In [None]:
early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
learning_rate_reduction = ReduceLROnPlateau(monitor="val_loss", patience = 2, verbose=1,factor=0.3, min_lr=0.000001)
callbacks_list = [ early, learning_rate_reduction]

In [None]:
from sklearn.utils.class_weight import compute_class_weight
weights = compute_class_weight(class_weight = "balanced", classes = np.unique(train_label), y = train_label)
cw = dict(zip( np.unique(train_label), weights))

In [None]:
history = cnn.fit(train,epochs=100, validation_data=val, callbacks=callbacks_list) #class_weight=cw,

In [None]:
preds = cnn.predict(test,verbose=1)

predictions = preds.copy()
predictions[predictions <= 0.5] = 0
predictions[predictions > 0.5] = 1

In [None]:
#from sklearn.metrics import classification_report,confusion_matrix
#cm = pd.DataFrame(data=confusion_matrix(test.classes, predictions, labels=[0, 1]),index=["Actual Normal", "Actual Pneumonia"],
#columns=["Predicted Normal", "Predicted Pneumonia"])
#import seaborn as sns
#sns.heatmap(cm,annot=True,fmt="d")

#print(classification_report(y_true=test.classes,y_pred=predictions,target_names =['NORMAL','PNEUMONIA']))

In [None]:
test_accu = cnn.evaluate(test)
print('The testing accuracy is :',test_accu[1]*100, '%')

new_test = cnn.evaluate(val)
print('The testing accuracy is :',new_test[1]*100, '%')

In [None]:
cnn.save("../keras/" + model_path)

In [None]:
N = 4
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), history.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), history.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), history.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), history.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot.png")