<a href="https://colab.research.google.com/github/GuillBla/Internship_Report_Code/blob/main/Eyeballer_Xception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

A script to execute to prevent from the limits of imposed by Google Colab

In [None]:
# function ConnectButton(){
#     console.log("Connect pushed"); 
#     document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click() 
# }
# setInterval(ConnectButton,60000);

In [None]:
!pip install livelossplot
!pip install split-folders

In [None]:
import splitfolders
from google.colab import drive
import zipfile
import os,shutil
from os import listdir
from os.path import isfile, join
from keras.models import Sequential
from keras.models import Model
from tensorflow.keras.optimizers import RMSprop, Adam
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Dropout, Flatten,Conv2D, GlobalAveragePooling2D
from keras import regularizers
from pathlib import Path
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from livelossplot.inputs.keras import PlotLossesCallback
# import matplotlib.pyplot as plt
# from tensorflow.keras.utils import plot_model

In [None]:
drive.mount('/content/drive')

If the following folders have already been created, they need to be removed (uncomment if it is the case)

In [None]:
# # Clear the environment
# shutil.rmtree(r'/content/topic_classification_splitted')
# shutil.rmtree(r'/content/topic_classification')

In [None]:
zip_ref = zipfile.ZipFile('/content/drive/MyDrive/topic_classification.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/content') #Extracts the files into the /tmp folder
zip_ref.close()

I split the dataset into two folders : 
1. One containing the training set : /content/topic_classification_splitted/train
2. One containing the testing set : /content/topic_classification_splitted/test

Note that I will split the first folder in two to create a validation set.

In [None]:
splitfolders.ratio(r"/content/topic_classification",
                   output=r"/content/topic_classification_splitted",
                   seed=2504, ratio=(.8, .2), group_prefix=None, move=False)

os.rename("/content/topic_classification_splitted/val", "/content/topic_classification_splitted/test")

names_classes = [f for f in listdir('/content/topic_classification_splitted/test')]


I implement data augmentation on the training data and I split the training dataset in two using the radio val_ratio

In [None]:
BATCH_SIZE = 64
download_dir = Path(r"/content/topic_classification_splitted")
train_data_dir = download_dir/'train'
test_data_dir = download_dir/'test'
class_subset = sorted(os.listdir(train_data_dir))
val_ratio = 0.3
target_size = (224, 224)

# train_generator = ImageDataGenerator(validation_split=0.2,preprocessing_function=preprocess_input)

train_generator = ImageDataGenerator(
                                    #  width_shift_range=0.5, 
                                    #  height_shift_range=0.5,
                                    #  horizontal_flip=True, 
                                    #  vertical_flip=True,
                                     validation_split= val_ratio,
                                     preprocessing_function=preprocess_input
                                     ) 

traingen = train_generator.flow_from_directory(train_data_dir,
                                               target_size=target_size,
                                               class_mode='categorical',
                                               classes=class_subset,
                                               subset='training',
                                               batch_size=BATCH_SIZE, 
                                               shuffle=True,
                                               seed=42)

validgen = train_generator.flow_from_directory(train_data_dir,
                                               target_size=target_size,
                                               class_mode='categorical',
                                               classes=class_subset,
                                               subset='validation',
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               seed=42)

test_generator = ImageDataGenerator(preprocessing_function=preprocess_input)

testgen = test_generator.flow_from_directory(test_data_dir,
                                             target_size=target_size,
                                             class_mode=None,
                                             classes=class_subset,
                                             batch_size=1,
                                             shuffle=False,
                                             seed=42)

A function to build the architecture of the model: Eyeballer layers on top of a pre-trained model. It takes in input : 
1. The size of the images (it must match the target_size used previously
2. The number of categories in the dataset, it is also the size of the output layer
3. The optimizer instantiated for training (default 'RMSProp')
4. The number of layers from the pre-trained model to unfreeze. If set to 0, all of the pre-trained will freeze

In [None]:
def create_model(input_shape, n_classes):
   
  base_model = Xception(weights='imagenet', include_top=False, input_shape=input_shape)
  model = base_model.output
  model = GlobalAveragePooling2D()(model)
  model = Dense(256, activation="relu", name="HiddenLayer1")(model)
  model = Dropout(0.5)(model)
  model = Dense(128, activation="relu", name="HiddenLayer2", kernel_regularizer=regularizers.l2(0.01))(model)
  model = Dropout(0.2)(model)
  model = Dense(27, activation="softmax", name="OutputLayer")(model)

  model = Model(inputs=base_model.input, outputs=model)

  # optim_1 = Adam(learning_rate=0.001)
  optim_1 = RMSprop(
    learning_rate=0.0001,
    rho=0.9,
    momentum=0.0,
    epsilon=1e-07,
    centered=False, #True may help but computationaly expensive
    name="RMSprop",
)

  model.compile(optimizer= optim_1, 
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

  return model

In [None]:
input_shape = (224, 224, 3)
n_classes=27

n_steps = traingen.samples // BATCH_SIZE
n_val_steps = validgen.samples // BATCH_SIZE
n_epochs = 30

# First we'll train the model without Fine-tuning
model = create_model(input_shape, n_classes)

Callbacks

In [None]:
plot_loss_1 = PlotLossesCallback()

tl_checkpoint_1 = ModelCheckpoint(filepath='tl_model_eyeballer.weights.best.hdf5',
                                  save_best_only=True,
                                  verbose=1)

early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           restore_best_weights=True,
                           mode='min')

In [None]:
history = model.fit(traingen,
                            batch_size=BATCH_SIZE,
                            epochs=n_epochs,
                            validation_data=validgen,
                            steps_per_epoch=n_steps,
                            validation_steps=n_val_steps,
                            callbacks=[tl_checkpoint_1, early_stop,plot_loss_1],
                            verbose=1)

In [None]:
# Generate predictions
model.load_weights('tl_model_eyeballer.weights.best.hdf5') # initialize the best trained weights

true_classes = testgen.classes
class_indices = traingen.class_indices
class_indices = dict((v,k) for k,v in class_indices.items())

preds = model.predict(testgen)
pred_classes = np.argmax(preds, axis=1)

In [None]:
# The score results :
print(classification_report(true_classes, pred_classes, target_names= names_classes))
accuracy = accuracy_score(true_classes, pred_classes)
print("Eyeballer Model Accuracy : {:.2f}%".format(accuracy * 100))
p = precision_score(true_classes, pred_classes, average='macro')
r = recall_score(true_classes, pred_classes, average='macro')
F1 = 2 * (p * r) / (p + r)
print("Precision = " + str(p))
print("Recall = " + str(r))
print("F1 score = " + str(F1))