# **This is a code for CBIR test**

---



Project Name: Raava: Google Chrome Extensions Artificial Intelligence for Phishing Email Prevention and Image Forgery Detection

Student Name: Mokhamad Fikri Alfawaid

Student Id: 2500125A

Major: IT Cyber Security

Year: 2021

University of Glasgow

# **Code**

In [None]:
# import library requirements
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from skimage.metrics import structural_similarity as compare_ssim
import numpy as np
from PIL import Image
from pathlib import Path
import numpy as np
import pandas as pd
import cv2 as cv

In [None]:
# create VGG16 feature extractor
class FeatureExtractor: 
  def __init__(self):
    base_model = VGG16(weights='imagenet')
    self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

  def extract(self, img):
    """
    Extract a deep feature from an input image using deep feature extraction algorithm VGG16.
    Returns:
    (np.ndarray): deep feature with the shape=(4096, )
    """
    img = img.resize((224, 224))  # VGG input a 224x224 img
    img = img.convert('RGB')  # Convert color to RGB
    x = image.img_to_array(img)  # To np.array. Height x Width x Channel. dtype=float32
    x = np.expand_dims(x, axis=0)  # (H, W, C)->(1, H, W, C), where the first elem is the number of img
    x = preprocess_input(x)  # Subtracting avg values for each pixel
    feature = self.model.predict(x)[0]  # (1, 4096) -> (4096, )
    return feature / np.linalg.norm(feature)  # Normalize

In [None]:
# extract 100 original images database
fe = FeatureExtractor()
for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Original_Image').glob('*.jpg')):
  # extract feature
  feature = fe.extract(img = Image.open(img_path))
  feature_path = Path('./drive/MyDrive/Image_Forgery_Detection/OG_Feature_Extraction')/(img_path.stem + '.npy')
  # save feature
  np.save(feature_path, feature)

In [None]:
# extract whole images database
fe = FeatureExtractor()
for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Whole').glob('*.jpg')):
  # extract feature
  feature = fe.extract(img = Image.open(img_path))
  feature_path = Path('./drive/MyDrive/Image_Forgery_Detection/Whole_Feature_Extracted')/(img_path.stem + '.npy')
  # save feature
  np.save(feature_path, feature)

In [None]:
features_1 = []
img_paths_1 = []

for feature_path in Path('./drive/MyDrive/Image_Forgery_Detection/OG_Feature_Extraction').glob('*.npy'):
    features_1.append(np.load(feature_path))
    img_paths_1.append(Path('./drive/MyDrive/Image_Forgery_Detection/Original_Image')/(feature_path.stem + ".jpg"))
    
OG_features = np.array(features_1)

In [None]:
features_2 = []
img_paths_2 = []

for feature_path in Path('./drive/MyDrive/Image_Forgery_Detection/Whole_Feature_Extracted').glob('*.npy'):
    features_2.append(np.load(feature_path))
    img_paths_2.append(Path('./drive/MyDrive/Image_Forgery_Detection/Whole')/(feature_path.stem + ".jpg"))
    
whole_features = np.array(features_2)

In [None]:
# CBIR max euclidean distance test function
def checkScoreDifference(image):
  imaji = Image.open(image)
  query_features = fe.extract(imaji)
  dists = np.linalg.norm(OG_features - query_features, axis=1)
  ids = np.argsort(dists)[:1]
  scores = [(dists[id], img_paths[id]) for id in ids]
  score = scores[0]
  if score[0] > 0:
    difference = score[0]
    return difference
  else:
    return "not found"

# **Check CBIR Upper Threshold**

In [None]:
# check upper threshold
extractedResult = []
for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Forged_Image').glob('*.jpg')):
  extractedResult.append(checkScoreDifference(img_path))

max = extractedResult[0]

for i in range(0,len(extractedResult)):
  if(extractedResult[i] > max):
    max = extractedResult[i]
  
print("Euclidean distance:")
print("Array length: {:.0f}".format(len(extractedResult)))
print("Upper threshold: {:.7f}".format(max))

Euclidean distance:
Array length: 100
Upper threshold: 0.7611508


**Check possibility of FP**

In [None]:
# check possibility of FP
def checkInRange(image):
  imaji = Image.open(image)
  query_features = fe.extract(imaji)
  dists = np.linalg.norm(whole_features - query_features, axis=1)
  ids = np.argsort(dists)[:4]
  scores = [(dists[id], img_paths_2[id]) for id in ids]

  item = []
  for i in range(len(scores)):
    score = scores[i]
    if score[0] <= 0.7611508 :
      image_path = score[1].__str__()
      img_name = image_path.split('Whole/')
      item.append(img_name[1])
      item.append(score[0])
    else:
      column = []
      item.append("Not in range")
      item.append("-")
  return item

def checkImage(image):
  result = []
  result.append("10 E.jpg")
        
  # pass image for CBIR check
  checkCBIR = checkInRange(image)
  for i in range(len(checkCBIR)):
      result.append(checkCBIR[i])
  return result
  
extractedResult = []

for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Forged_Image').glob('*.jpg')):
  extractedResult.append(checkImage(img_path))

feature_names = ['Query_Image', 'Exactly_the_same', 'Euclidean Distance', 'Similar_Image_Pair', 'Euclidean Distance', 
    '1st_False_Image_Pair', 'Euclidean Distance', '2nd_False_Image_Pair', 'Euclidean Distance']

resultExtracted = pd.DataFrame(extractedResult, columns=feature_names)
pd.options.display.max_columns= None
pd.options.display.max_rows= None
display(resultExtracted)

Unnamed: 0,Query_Image,Exactly_the_same,Euclidean Distance,Similar_Image_Pair,Euclidean Distance.1,1st_False_Image_Pair,Euclidean Distance.2,2nd_False_Image_Pair,Euclidean Distance.3
0,10 E.jpg,10 E.jpg,0.0,10 O.jpg,0.45033,Not in range,-,Not in range,-
1,10 E.jpg,100 E.jpg,0.0,100 O.jpg,0.203433,Not in range,-,Not in range,-
2,10 E.jpg,11 E.jpg,0.0,11 O.jpg,0.64795,Not in range,-,Not in range,-
3,10 E.jpg,12 E.jpg,0.0,12 O.jpg,0.277712,Not in range,-,Not in range,-
4,10 E.jpg,13 E.jpg,0.0,13 O.jpg,0.245573,Not in range,-,Not in range,-
5,10 E.jpg,14 E.jpg,0.0,14 O.jpg,0.378995,Not in range,-,Not in range,-
6,10 E.jpg,15 E.jpg,0.0,15 O.jpg,0.326788,Not in range,-,Not in range,-
7,10 E.jpg,16 E.jpg,0.0,16 O.jpg,0.416509,Not in range,-,Not in range,-
8,10 E.jpg,17 E.jpg,0.0,17 O.jpg,0.285064,Not in range,-,Not in range,-
9,10 E.jpg,18 E.jpg,0.0,18 O.jpg,0.228911,Not in range,-,Not in range,-


# **Apply Second Filter (SSIM Filtration)**

In [None]:
# SSIM filtration test
def rescaleImage(frame, scale=None):
  width = 500
  height = 500
  dimensions = (width, height)

  return cv.resize(frame, dimensions, interpolation=cv.INTER_AREA)
def detectSSIMScore(path1, path2):
  # load images
  imageA = cv.imread(path1)
  imageB = cv.imread(path2)

  # resize the image
  resizedA = rescaleImage(imageA)
  resizedB = rescaleImage(imageB)

  # grayscale
  grayA = cv.cvtColor(resizedA, cv.COLOR_BGR2GRAY)
  grayB = cv.cvtColor(resizedB, cv.COLOR_BGR2GRAY)

  #  compute the SSIM
  (score, diff) = compare_ssim(grayA, grayB, full=True)
  diff = (diff *255).astype("uint8")
  
  return score

imageOriginalArray1 = []
imageForgedArray1 = []
ssimScore = []

for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Original_Image').glob('*.jpg')):
    input_image = str(img_path)
    imageOriginalArray1.append(input_image)

for img_path in sorted(Path('./drive/MyDrive/Image_Forgery_Detection/Forged_Image').glob('*.jpg')):
    input_imaji = str(img_path)
    imageForgedArray1.append(input_imaji)


for i in range(0,100):
    ssimScore.append(detectSSIMScore(path1 = imageOriginalArray1[i], path2 = imageForgedArray1[i]))

ssimMax = ssimScore[i]

for i in range(len(ssimScore)):
    if ssimScore[i] < ssimMax:
        ssimMax = ssimScore[i]

print("Lowest SSIM threshold is: {}".format(ssimMax))

Lowest SSIM threshold is: 0.6606496658828713


**Proof of SSIM filtration**

In [None]:
# SSIM filter proof using three images that slip off the CBIR filtration
checkImage1 = "./drive/MyDrive/Image_Forgery_Detection/Forged_Image/36 E.jpg"
checkImage2 = "./drive/MyDrive/Image_Forgery_Detection/Original_Image/56 O.jpg"
checkImage3 = "./drive/MyDrive/Image_Forgery_Detection/Forged_Image/56 E.jpg"
checkImage4 = "./drive/MyDrive/Image_Forgery_Detection/Original_Image/15 O.jpg"
checkImage5 = "./drive/MyDrive/Image_Forgery_Detection/Forged_Image/78 E.jpg"
checkImage6 = "./drive/MyDrive/Image_Forgery_Detection/Original_Image/56 O.jpg"

print("SSIM score on the first FP: {}".format(detectSSIMScore(checkImage1, checkImage2)))
print("SSIM score on the second FP: {}".format(detectSSIMScore(checkImage3, checkImage4)))
print("SSIM score on the third FP: {}".format(detectSSIMScore(checkImage5, checkImage6)))

SSIM score on the first FP: 0.12162511065650397
SSIM score on the second FP: 0.508247072073602
SSIM score on the third FP: 0.6377620623164818
