# Training_Classification_Model_VGG19_on_custom_Dataset

In [None]:
# Install essential libraries # OS is python's standard library. So no need to download it.

!pip install tensorflow
!pip install numpy
!pip install pandas
!pip install opencv-python        
!pip install matplotlib
!pip install scikit-learn

In [None]:
# Import the required packages

from tensorflow.keras.layers import Input, Lambda, Dense, Flatten,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt

# Images Pre-processing

In [None]:
# VGG19 model inputs are color images of dimensions 224×224 (resizing is required)

# Setting the paths of the Dataset
train_path = "../Classification_Custom_Dataset/train"
test_path = "../Classification_Custom_Dataset/test"
val_path = "../Classification_Custom_Dataset/val"

# Getting the images of the training set
x_train=[]

#The os.listdir() method in Python is used to get the list of all files and directories in the specified directory. 

for folder in os.listdir(train_path):
    sub_path = train_path+"/"+folder
    #print(sub_path)
    for img in os.listdir(sub_path):
        image_path = sub_path+"/"+img
        #print(image_path)
        img_arr = cv2.imread(image_path)
        img_arr = cv2.resize(img_arr,(224,224))
        x_train.append(img_arr)
        

# Getting the images of the testing set
x_test=[]
for folder in os.listdir(test_path):
    sub_path = test_path+"/"+folder
    for img in os.listdir(sub_path):
        image_path = sub_path+"/"+img
        img_arr = cv2.imread(image_path)
        img_arr = cv2.resize(img_arr,(224,224))
        x_test.append(img_arr)


# Getting the images of the validation set
x_val=[]
for folder in os.listdir(val_path):
    sub_path = val_path+"/"+folder
    for img in os.listdir(sub_path):
        image_path = sub_path+"/"+img
        img_arr = cv2.imread(image_path)
        img_arr = cv2.resize(img_arr,(224,224))
        x_val.append(img_arr)

In [None]:
# Converting x_train, x_test, and x_val into Numpy arrays

train_x = np.array(x_train)
test_x = np.array(x_test)
val_x = np.array(x_val)

In [None]:
# Divided by 255.0 for normalization.
# By dividing an image by 255 to rescale the image from 0-255 to 0-1.

train_x = train_x/255.0
test_x = test_x/255.0
val_x = val_x/255.0

In [None]:
# Using ImageDataGenerator to compute the labels of the corresponding datasets.
# We must walk through the folders and find out the corresponding labels of the images stored here.

train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)
val_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory(train_path,
                                                 target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'sparse')
test_set = test_datagen.flow_from_directory(test_path,
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'sparse')
val_set = val_datagen.flow_from_directory(val_path,
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'sparse')

In [None]:
# Assigning the classes to train_y, test_y, and val_y
# where the label of the image train_x[i] is train_y[i]

train_y = training_set.classes
test_y = test_set.classes
val_y = val_set.classes

***Class 0 is AI-images***     |     ***Class 1 is Real_images***

In [None]:
# Showing class lebel (name and number)

print(training_set.class_indices)

In [None]:
print (test_set.classes)

In [None]:
# Showing the dimensional of the arrays (train_y, test_y, val_y) and their elements
# All of them are one-dimensional 

train_y.shape,test_y.shape,val_y.shape

# Model Building and Training

In [None]:
# Step 1 : Model Building

# import VGG-19
IMAGE_SIZE = [224,224]
vgg = VGG19(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

#Do not train the pre-trained layers of VGG-19
#freeze These Layers: This prevents them from being updated during future training rounds.
for layer in vgg.layers:
    layer.trainable = False

In [None]:
# Customize the model "by changing its last layer alone according to the number of classes "2" in our problem"

# fine tuning the model by flatten the last layer and adapt it for our problem
x = Flatten()(vgg.output)

#adding output layer.Softmax classifier is used as it is use for multi-class or binary classification
# here 2 means binary classification, if 3 or 4 ... it is multi-class classification
prediction = Dense(2, activation='softmax')(x)

model = Model(inputs=vgg.input, outputs=prediction)

In [None]:
# view the structure of the model after fine tuning the model
model.summary()

In [None]:
# Step 2 : Compiling the model
# The defualt model's labels are one-hot encoded "need to be changed" 
# Use sparse categorical cross-entropy as our loss function.
# Use the best optimizer called adam optimizer as it decides the best learning rate on its own.

model.compile(
  loss='sparse_categorical_crossentropy',
  optimizer="adam",
  metrics=['accuracy']
)

In [None]:
# Step 3 : Fitting the model
# importing Early stopping to avoid overfitting of model.
# use early stopping to stop training the model any further if the validation loss suddenly starts increasing.

from tensorflow.keras.callbacks import EarlyStopping
early_stop=EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=5)

In [None]:
# Model training (fit the model)

history = model.fit(
  train_x,
  train_y,
  validation_data=(val_x,val_y),
  epochs=5,
  callbacks=[early_stop],
  batch_size=32,shuffle=True)

In [None]:
# Visualize the performance of our model on training and validation datasets,
# with the help of accuracy and loss graphs:

# accuracies
plt.plot(history.history['accuracy'], label='train acc')
plt.plot(history.history['val_accuracy'], label='val acc')
plt.legend()
plt.savefig('vgg-acc-rps-1.png')
plt.show()

# loss
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.savefig('vgg-loss-rps-1.png')
plt.show()

In [None]:
# Step-4 : Model Evaluation
# evaluating our model by testing it on the test dataset.

model.evaluate(test_x,test_y,batch_size=32)

In [None]:
# Check the model's efficiency,by observing its classification report and confusion matrix.

from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import numpy as np

#predict
y_pred=model.predict(test_x)
y_pred=np.argmax(y_pred,axis=1)

#get classification report
print(classification_report(y_pred,test_y))

#get confusion matrix
print(confusion_matrix(y_pred,test_y)) 

In [None]:
# Showing the predicted labels and the corresponding original labels
print("Predicted labels\n", y_pred)
print("\nOriginal labels\n", test_y)

# Counting the mis-classified images
mis_AI_images = 0
mis_Real_images = 0

# mis-classified images
misclassified_images = []

for i in range(len(y_pred)):
    if y_pred[i] != test_y[i]:
        if test_y[i] == 0:
            mis_AI_images +=1
        else:
            mis_Real_images += 1
        misclassified_images.append(x_test[i])
            
if mis_AI_images != 0 or mis_Real_images != 0:
    print("\nTesting the model : Out of "+str(len(y_pred))+" images , "+ str(mis_AI_images) +
          " AI-images have been mis-classified as Real images, and "+
          str(mis_Real_images) +" Real images have been mis-classified as AI-images.")


In [None]:
# Display the images that the model misclassified


from PIL import Image
import numpy as np


#misclassified_images = np.array(misclassified_images)
#misclassified_images = misclassified_images/255.0

if len(misclassified_images) > 1:
    for img in misclassified_images:
        rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(rgb_image)
       
        try:
        
            display(image)
        except FileNotFoundError:
            print(f"Error: Image file '{image_path}' not found.")

In [None]:
# Save our train Model

model.save('save/New_vgg19_model.h5')