In [1]:
# Cell to import libraries
import numpy as np
import random
from PIL import Image
from PIL.ImageOps import invert
import numpy as np
from torch import Tensor
from torch.utils.data import Dataset
import pickle
import torch

In [2]:
# Import model functions and load the pre-trained model
from Model import SiameseConvNet, distance_metric
from torch import load
from torch.utils.data import DataLoader

device = torch.device('cpu')
model = SiameseConvNet()
model.load_state_dict(load(open('Models/model_large_epoch_20', 'rb'), map_location=device))

<All keys matched successfully>

In [3]:
# Create path for both genuine and forgery images
base_path_genuine = 'Datasets/cedar1/full_org/original_%d_%d.png'
base_path_forgery = 'Datasets/cedar1/full_forg/forgeries_%d_%d.png'

In [4]:
# Create test dataset. The rationale for number of samples is in the appendix of the report
test = []
n_samples_of_each_class = 2025

for _ in range(n_samples_of_each_class):
    anchor_person = random.randint(1, 55)
    anchor_sign = random.randint(1, 24)
    pos_sign = random.randint(1, 24)
    while (anchor_sign == pos_sign):
        pos_sign = random.randint(1, 24)
    neg_sign = random.randint(1, 24)
    positive = [base_path_genuine % (anchor_person, anchor_sign), base_path_genuine % (anchor_person, pos_sign), 1]
    negative = [base_path_genuine % (anchor_person, anchor_sign), base_path_forgery % (anchor_person, neg_sign), 0]
    test.append(positive)
    test.append(negative)


In [5]:
# Preprocessing and Dataloaders
# Preprocessing involves resizing and inverting the image before binarizing with a threshold intensity of 50
def invert_image(path):
	image_file = Image.open(path) # open colour image
	image_file = image_file.convert('L').resize([220, 155])
	image_file = invert(image_file)
	image_array = np.array(image_file)
	for i in range(image_array.shape[0]):
		for j in range(image_array.shape[1]):
			if image_array[i][j]<=50:
				image_array[i][j]=0
			else:
				image_array[i][j]=255
	return image_array

def convert_to_image_tensor(image_array):
	image_array = image_array/255.0
	return Tensor(image_array).view(1, 220, 155)


with open('test_index_CEDAR.pkl', 'wb') as test_index_file:
	pickle.dump(test, test_index_file)


class TestDataset(Dataset):

	def __init__(self):
		with open('test_index_CEDAR.pkl', 'rb') as test_index_file:
			self.pairs = pickle.load(test_index_file)

	def __getitem__(self, index):
		item = self.pairs[index]
		X = convert_to_image_tensor(invert_image(item[0]))
		Y = convert_to_image_tensor(invert_image(item[1]))
		return [X, Y, item[2]]

	def __len__(self):
		return len(self.pairs)
  

In [6]:
# Compute accuracy as average of True Positive and True Negative rates
def compute_accuracy_roc(predictions, labels):
    dmax = np.max(predictions)
    dmin = np.min(predictions)
    nsame = np.sum(labels == 1)
    ndiff = np.sum(labels == 0)
    step = 0.001
    max_acc = 0

    d_optimal = 0
    for d in np.arange(dmin, dmax + step, step):
        idx1 = predictions.ravel() <= d
        idx2 = predictions.ravel() > d

        tpr = float(np.sum(labels[idx1] == 1)) / nsame
        tnr = float(np.sum(labels[idx2] == 0)) / ndiff

        acc = 0.5 * (tpr + tnr)

        if acc > max_acc:
            max_acc = acc
            d_optimal = d

    return max_acc, d_optimal



In [7]:
# Compute the accuracy on the test data set
batch_avg_acc = 0
batch_avg_d = 0
n_batch = 0


def test():
    model.eval()
    global batch_avg_acc, batch_avg_d, n_batch

    test_dataset = TestDataset()
    loader = DataLoader(test_dataset, batch_size=10, shuffle=True)

    for batch_index, data in enumerate(loader):
        A = data[0]
        B = data[1]
        labels = data[2].long()

        f_a, f_b = model.forward(A, B)
        dist = distance_metric(f_a, f_b)

        acc, d = compute_accuracy_roc(dist.detach().numpy(), labels.detach().numpy())
        print('Max accuracy for batch {} = {} at d = {}'.format(batch_index, acc, d))
        batch_avg_acc += acc
        batch_avg_d += d
        n_batch += 1


print('CEDAR:')
test()
print('Avg acc across all batches={} at d={}'.format(batch_avg_acc / n_batch, batch_avg_d / n_batch))


CEDAR:
Max accuracy for batch 0 = 0.6666666666666666 at d = 0.26400374642014524
Max accuracy for batch 1 = 0.75 at d = 0.27408815485239046
Max accuracy for batch 2 = 0.8333333333333333 at d = 0.16687067449092874
Max accuracy for batch 3 = 0.8 at d = 0.16015798848867424
Max accuracy for batch 4 = 0.8333333333333333 at d = 0.1090470126271248
Max accuracy for batch 5 = 0.7 at d = 0.10216946506500246
Max accuracy for batch 6 = 0.6666666666666666 at d = 0.06750118732452393
Max accuracy for batch 7 = 0.6666666666666667 at d = 0.20017619964480415
Max accuracy for batch 8 = 0.7619047619047619 at d = 0.18272349041700375
Max accuracy for batch 9 = 0.9 at d = 0.1633798882663251
Max accuracy for batch 10 = 0.75 at d = 0.1890811891555787
Max accuracy for batch 11 = 1.0 at d = 0.2322803023755552
Max accuracy for batch 12 = 0.6666666666666667 at d = 0.15807620523870003
Max accuracy for batch 13 = 0.625 at d = 0.0449083037674427
Max accuracy for batch 14 = 0.8571428571428572 at d = 0.17124111729860314