<a href="https://colab.research.google.com/github/Tihara-Jay/FYP-model-implementation/blob/main/Fine_tuning_of_VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

spam - 1 , ham - 0

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Flatten, Activation, BatchNormalization, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline

Data preprocessing to make the data suitable to be passed into the VGG16 model.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
DATASET_PATH = '/content/drive/MyDrive/FYP/Dataset/Phishing'
ham_dir = '/content/drive/MyDrive/FYP/Dataset/Phishing/Ham'
spam_dir = '/content/drive/MyDrive/FYP/Dataset/Phishing/Spam'

In [None]:
spam_files = os.listdir(spam_dir)
num_spam_files = len(spam_files)

ham_files = os.listdir(ham_dir)
num_ham_files = len(ham_files)

print("Number of spam images in spam_dir:", num_spam_files)
print("Number of ham images in ham_dir:", num_ham_files)

Number of spam images in spam_dir: 740
Number of ham images in ham_dir: 740


In [None]:
ham_dir = '/content/drive/MyDrive/FYP/Dataset/Phishing/Ham'
spam_dir ='/content/drive/MyDrive/FYP/Dataset/Phishing/Spam'
model_dir = '/content/drive/MyDrive/FYP/Dataset/Phishing/Model'

In [None]:
train_dir = model_dir + '/train'
test_dir = model_dir + '/test'
validation_dir = model_dir + '/validation'

Data augmentation

In [None]:
trainAug = ImageDataGenerator (
  rotation_range=30,
	zoom_range=0.15,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.15,
	horizontal_flip=True,
	fill_mode="nearest")

#Initializaing the validation/testing data augmentation object (to which the mean subtraction will be added to)
valAug = ImageDataGenerator()

# define the ImageNet mean subtraction (in RGB order) and set the mean subtraction value for each of the data augmentation objects

mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

In [None]:
BATCH_SIZE = 32
trainGen = trainAug.flow_from_directory(
	train_dir,
	class_mode="binary",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=True,
	batch_size = BATCH_SIZE)

valGen = valAug.flow_from_directory(
	validation_dir,
	class_mode="binary",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=True,
	batch_size= BATCH_SIZE)

testGen = valAug.flow_from_directory(
	test_dir,
	class_mode="binary",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=False,
	batch_size= BATCH_SIZE)
num_images = trainGen.n
print("Number of images:", num_images)

Found 1184 images belonging to 2 classes.
Found 148 images belonging to 2 classes.
Found 148 images belonging to 2 classes.
Number of images: 1184


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dropout
from tensorflow.keras.models import Model
from keras import regularizers

baseModel = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))

headModel = baseModel.output
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(256, activation="relu", kernel_regularizer=regularizers.l2(0.001))(headModel)
headModel = Dropout(0.3)(headModel)
headModel = Dense(1, activation="sigmoid")(headModel)
# placing the head FC model on top of the base model
model = Model(inputs=baseModel.input, outputs=headModel)

In [None]:
baseModel.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
for layer in baseModel.layers:
	layer.trainable = False

In [None]:
from imutils import paths
totalTrain = len(list(paths.list_images(train_dir)))
totalVal = len(list(paths.list_images(validation_dir)))
totalTest = len(list(paths.list_images(test_dir)))
print(totalTrain)
print(totalVal)
print(totalTest)

1184
148
148


steps per epoch = total number of samples / batch size

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
lr_schedule = ReduceLROnPlateau(factor=0.1, patience=4, verbose=1)

In [None]:
checkpoint_path = "/content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                              save_weights_only=True,
                              save_best_only=True,
                              verbose=1)

In [None]:
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate= 1e-4),metrics=["accuracy"])

print("Training head...")
H = model.fit(
	x=trainGen,
	steps_per_epoch= totalTrain // BATCH_SIZE,
	validation_data=valGen,
	validation_steps=  totalVal // BATCH_SIZE,
	epochs=50,
	callbacks=[early_stopping, lr_schedule, cp_callback])


Training head...
Epoch 1/50
Epoch 1: val_loss improved from inf to 3.55400, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 2/50
Epoch 2: val_loss improved from 3.55400 to 2.51601, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 3/50
Epoch 3: val_loss improved from 2.51601 to 1.66076, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 4/50
Epoch 4: val_loss improved from 1.66076 to 1.48520, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 5/50
Epoch 5: val_loss improved from 1.48520 to 1.31640, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 6/50
Epoch 6: val_loss improved from 1.31640 to 1.19642, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 7/50
Epoch 7: val_loss improved from 1.19642 to 1.11303, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Check

In [None]:
model.save('/content/drive/MyDrive/FYP/Implementation/Attempt1/21_03_vgg16_head.h5')

  saving_api.save_model(


In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/FYP/Implementation/Attempt1/head_model.h5')

In [None]:
test_loss, test_accuracy = model.evaluate(testGen, verbose=0)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 1.0341585874557495
Test Accuracy: 0.75


https://pyimagesearch.com/2019/06/03/fine-tuning-with-keras-and-deep-learning/

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_training(H, N):
    # construct a plot that plots the training history
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.show()

Unfreezing the final set of conv layers in the initial base model

In [None]:
trainGen.reset()
valGen.reset()

for layer in model.layers:
    layer.trainable = False
for layer in model.layers[-8:]:
	layer.trainable = True

for layer in model.layers:
	print("{}: {}".format(layer, layer.trainable))

<keras.src.engine.input_layer.InputLayer object at 0x7c2980171600>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297fcd9960>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297fcda0e0>: False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7c297fcdb070>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297fcdb6d0>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297fcdbf10>: False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7c297e3fcf10>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297fcdb520>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297e3fdc00>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297e3fe710>: False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7c297e3ff7c0>: False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7c297e3ffd90>: False
<keras.src.layers.convolutional.conv

In [None]:
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate= 1e-4), metrics=["accuracy"])

H = model.fit(
	x=trainGen,
	steps_per_epoch = totalTrain // BATCH_SIZE,
	validation_data = valGen,
	validation_steps =  totalVal // BATCH_SIZE,
	epochs=50,
	callbacks=[early_stopping, lr_schedule,cp_callback ])

Epoch 1/50
Epoch 1: val_loss did not improve from 0.59088
Epoch 2/50
Epoch 2: val_loss did not improve from 0.59088
Epoch 3/50
Epoch 3: val_loss did not improve from 0.59088
Epoch 4/50
Epoch 4: val_loss did not improve from 0.59088
Epoch 5/50
Epoch 5: val_loss improved from 0.59088 to 0.57247, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 6/50
Epoch 6: val_loss did not improve from 0.57247
Epoch 7/50
Epoch 7: val_loss did not improve from 0.57247
Epoch 8/50
Epoch 8: val_loss did not improve from 0.57247
Epoch 9/50
Epoch 9: val_loss improved from 0.57247 to 0.56913, saving model to /content/drive/MyDrive/FYP/Implementation/Attempt1/Checkpoints
Epoch 10/50
Epoch 10: val_loss did not improve from 0.56913
Epoch 11/50
Epoch 11: val_loss did not improve from 0.56913
Epoch 12/50
Epoch 12: val_loss did not improve from 0.56913
Epoch 13/50
Epoch 13: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.

Epoch 13: val_loss did not improve from 

In [None]:
model.save('/content/drive/MyDrive/FYP/Implementation/Attempt1/Accuracy/bat_VGG16_finetune.h5')

In [None]:
testGen.reset()
test_loss, test_accuracy = model.evaluate(testGen, verbose=1)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 0.4893653988838196
Test Accuracy: 0.7635135054588318


In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
image_path = '/content/drive/MyDrive/FYP/PSPD_Submission_20200601_w1833519/Demo_Images/Non_spam.jpeg'

In [None]:
from tensorflow.keras.preprocessing import image

img = image.load_img(image_path, target_size=(224, 224))  # Resize the image to match model input size
img_array = image.img_to_array(img)  # Convert image to numpy array
img_array = np.expand_dims(img_array, axis=0)

In [None]:
predictions = model.predict(img_array)
print(predictions)

[[0.00035218]]


In [None]:
threshold = 0.5
if predictions > threshold:
    print("SPAM")
else:
    print("HAM")

HAM


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

testGen.reset()
predictions = model.predict(testGen)

predicted_labels = (predictions > 0.5).astype(int)

true_labels = testGen.classes

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.75
Precision: 0.7466666666666667
Recall: 0.7567567567567568
F1 Score: 0.7516778523489932


In [None]:
model = tf.keras.models.load_model('/content/drive/MyDrive/FYP/Implementation/Attempt1/Accuracy/checkpts_bat_VGG16_finetune.h5')