# Image classification

### Classification using VGG16 (frozen - 224*224)

In [None]:
!pip install livelossplot

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets, sklearn.model_selection
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from keras.layers import Dense, Flatten
from keras.models import Model
from keras.optimizers import Adam
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import plot_model
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
# Define the paths toward the different datasets
#src_path_train = '/kaggle/input/spamassassin/images_test/train'
#src_path_test = '/kaggle/input/spamassassin/images_test/val'

#src_path_train = '/kaggle/input/img-dataset/img_spamassassin/train'
#src_path_test = '/kaggle/input/img-dataset/img_spamassassin/test'

#src_path_train = '/kaggle/input/img-dataset/img_fusion/train'
#src_path_test = '/kaggle/input/img-dataset/img_fusion/test'
#src_path_test2 = '/kaggle/input/img-dataset/img_fusion/test2'

#src_path_train = '/kaggle/input/img-dataset/img_duo/train'
#src_path_test = '/kaggle/input/img-dataset/img_duo/test'

#src_path_train = '/kaggle/input/img-dataset/img_duo2/train'
#src_path_test = '/kaggle/input/img-dataset/img_duo2/test'

src_path_train = '/kaggle/input/img-dataset/img_duo3/train'
src_path_test = '/kaggle/input/img-dataset/img_duo3/test'

#src_path_train = '/kaggle/input/img-dataset/img_fusion2/train'
#src_path_test = '/kaggle/input/img-dataset/img_fusion2/test'
#src_path_test2 = '/kaggle/input/img-dataset/img_fusion2/test2'


# Define some variables values
batch_size = 256
IMSIZE = [224,224]


# Define the transformation to possibly apply to the images of the different datasets
train_gen = ImageDataGenerator(
        rescale=1 / 255.0,
        rotation_range=20,
        zoom_range=0.05,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.05,
        horizontal_flip=True,
        fill_mode="nearest",
        validation_split=0.20)

train_gen2 = ImageDataGenerator(
    rescale=1 / 255.0,
    fill_mode="nearest",
    validation_split=0.20
)

test_gen = ImageDataGenerator(
        rescale=1 / 255.0,
        fill_mode="nearest"
)


# Load the images of the different datasets
# Split the training dataset into a training dataset and a validation dataset
#train_generator = train_gen.flow_from_directory(
train_generator = train_gen2.flow_from_directory(
  src_path_train,
  target_size=IMSIZE,
  shuffle=True,
  batch_size=batch_size,
  subset='training'
)

#validation_generator = train_gen.flow_from_directory(
validation_generator = train_gen2.flow_from_directory(
  src_path_train,
  target_size=IMSIZE,
  shuffle=True,
  batch_size=batch_size,
  subset='validation'
)

#test_generator = train_gen.flow_from_directory(
test_generator = test_gen.flow_from_directory(
  src_path_test,
  target_size=IMSIZE,
  shuffle=False,
  batch_size=batch_size,
)

In [None]:
# Show the two first images in the test dataset
for _ in range(2):
    img, label = test_generator.next()
    print(img.shape)   #  (224,224,3)
    plt.imshow(img[0])
    plt.show()

In [None]:
# Define the number of classes that represents the number of nodes in the output layer
NBCLASSES = 2

def create_model():
    vgg = VGG16(input_shape=IMSIZE + [3], weights='imagenet', include_top=False)
    
    # get the VGG output
    out = vgg.output
    
    # Add new dense layer at the end to get the wanted number of output
    x = Flatten()(out)
    x = Dense(NBCLASSES, activation='softmax')(x)
    
    model = Model(inputs=vgg.input, outputs=x)
    
    optim_1 = Adam(learning_rate=0.001)
    
    model.compile(#loss="binary_crossentropy",
                  loss="categorical_crossentropy",
                  #optimizer="adam",
                  optimizer=optim_1,
                  metrics=['accuracy'])
    
    model.summary()
    
    return model

mymodel = create_model()

# Print an image representing the model
#plot_model(mymodel, to_file='cnn_model.png', show_shapes=True, show_layer_names=True)

In [None]:
from livelossplot.inputs.keras import PlotLossesCallback

plot_loss_1 = PlotLossesCallback()

# Define the number of epochs and the conditions for an early stopping
epochs = 50
early_stop = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights = True
                          )

# ModelCheckpoint callback - save the best weights
tl_checkpoint = ModelCheckpoint(filepath='test4',save_best_only=True,verbose=1
                                 )

In [None]:
%%time 

# Train the model on the training subset and evaluate its performances after each epoch using the validation subset
history = mymodel.fit(
  train_generator,
  validation_data=validation_generator,
  epochs=epochs,
  steps_per_epoch = train_generator.samples // batch_size,
  validation_steps = validation_generator.samples // batch_size,
  callbacks=[tl_checkpoint,early_stop],
  verbose=1
)

In [None]:
# Show the weights of the kernels of the last trained layer
last_layer_weights = mymodel.layers[-1].get_weights()[0]

print("Shape of the weights of the kernels of the last trained layer :", last_layer_weights.shape)
print("Weights of the kernels of the last trained layer :\n", last_layer_weights)


In [None]:
import seaborn as sns
sns.set_style('darkgrid')

# Plot the training and validation loss and accuracy during the training of the model
def tr_plot(tr_data, start_epoch):
    tacc=tr_data.history['accuracy']
    tloss=tr_data.history['loss']
    vacc=tr_data.history['val_accuracy']
    vloss=tr_data.history['val_loss']
    Epoch_count=len(tacc)+ start_epoch
    Epochs=[]
    for i in range (start_epoch ,Epoch_count):
        Epochs.append(i+1)   
    index_loss=np.argmin(vloss) #  this is the epoch with the lowest validation loss
    val_lowest=vloss[index_loss]
    index_acc=np.argmax(vacc)
    acc_highest=vacc[index_acc]
    plt.style.use('fivethirtyeight')
    sc_label='best epoch= '+ str(index_loss+1 +start_epoch)
    vc_label='best epoch= '+ str(index_acc + 1+ start_epoch)
    fig,axes=plt.subplots(nrows=1, ncols=2, figsize=(25,10))
    axes[0].plot(Epochs,tloss, 'r', label='Training loss')
    axes[0].plot(Epochs,vloss,'g',label='Validation loss' )
    axes[0].scatter(index_loss+1 +start_epoch,val_lowest, s=150, c= 'blue', label=sc_label)
    axes[0].scatter(Epochs, tloss, s=100, c='red')    
    axes[0].set_title('Training and Validation Loss')
    axes[0].set_xlabel('Epochs', fontsize=18)
    axes[0].set_ylabel('Loss', fontsize=18)
    axes[0].legend()
    axes[1].plot (Epochs,tacc,'r',label= 'Training Accuracy')
    axes[1].scatter(Epochs, tacc, s=100, c='red')
    axes[1].plot (Epochs,vacc,'g',label= 'Validation Accuracy')
    axes[1].scatter(index_acc+1 +start_epoch,acc_highest, s=150, c= 'blue', label=vc_label)
    axes[1].set_title('Training and Validation Accuracy')
    axes[1].set_xlabel('Epochs', fontsize=18)
    axes[1].set_ylabel('Accuracy', fontsize=18)
    axes[1].legend()
    plt.tight_layout    
    plt.show()
    return index_loss
loss_index=tr_plot(history,0)

In [None]:
# Load the best trained weights (those that were saved at the end of the training)
mymodel.load_weights('test4')

# Run the trained model on the test dataset
true_classes = test_generator.classes
class_indices = test_generator.class_indices
class_indices = dict((v,k) for k,v in class_indices.items())

vgg_preds = mymodel.predict(test_generator)
vgg_pred_classes = np.argmax(vgg_preds, axis=1)

In [None]:
from sklearn.metrics import accuracy_score

vgg_acc = accuracy_score(true_classes, vgg_pred_classes)
print("VGG16 Model Accuracy: {:.2f}%".format(vgg_acc * 100))

In [None]:
nb_test = test_generator.samples
target_names = ['Ham','Spam']

Y_pred = mymodel.predict_generator(test_generator, 1 + nb_test // batch_size)
y_pred = np.argmax(Y_pred, axis=1)


# Show the confusion matrix
print('Confusion Matrix')
cm = confusion_matrix(test_generator.classes, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp.plot(cmap=plt.cm.Blues)
plt.show()


# Get the values of differents evalutation metrics
print('Classification Report')
print(classification_report(test_generator.classes, y_pred, target_names=target_names,digits=4))