In [1]:
# Cell to import libraries

import numpy as np
import random
from PIL import Image
from PIL.ImageOps import invert
import numpy as np
from torch import Tensor
from torch.utils.data import Dataset
import pickle
import torch
from torch.utils.data import DataLoader

In [2]:
# Import model functions and load the pre-trained model

from Model import SiameseConvNet, distance_metric
from torch import load

device = torch.device('cpu')
model = SiameseConvNet()
model.load_state_dict(load(open('Models/model_large_epoch_20', 'rb'), map_location=device))


<All keys matched successfully>

In [3]:
# Create path for both genuine and forgery images

base_path_genuine = 'Datasets/BHSig260-Bengali/Genuine/B-S-%d-G-%s.tif'
base_path_forgery = 'Datasets/BHSig260-Bengali/Forgery/B-S-%d-F-%s.tif'


In [4]:
# Create test dataset. The rationale for number of samples is in the appendix of the report

test = []
n_samples_of_each_class = 3682

for _ in range(n_samples_of_each_class):
    anchor_person = random.randint(1, 100)
    anchor_sign = random.randint(1, 24)
    pos_sign = random.randint(1, 24)
    while (anchor_sign == pos_sign):
        pos_sign = random.randint(1, 24)
    neg_sign = random.randint(1, 30)
    if anchor_sign < 10:
        anchor_sign = "0" + str(anchor_sign)
    if pos_sign < 10:
        pos_sign = "0" + str(pos_sign)
    if neg_sign < 10:
        neg_sign = "0" + str(neg_sign)
    positive = [base_path_genuine % (anchor_person, anchor_sign), base_path_genuine % (anchor_person, pos_sign), 1]
    negative = [base_path_genuine % (anchor_person, anchor_sign), base_path_forgery % (anchor_person, neg_sign), 0]
    test.append(positive)
    test.append(negative)


In [5]:
# Preprocessing and Dataloaders
# Preprocessing involves resizing and inverting the image before binarizing with a threshold intensity of 50

def invert_image(path):
	image_file = Image.open(path) # open colour image
	image_file = image_file.convert('L').resize([220, 155])
	image_file = invert(image_file)
	image_array = np.array(image_file)
	for i in range(image_array.shape[0]):
		for j in range(image_array.shape[1]):
			if image_array[i][j]<=50:
				image_array[i][j]=0
			else:
				image_array[i][j]=255
	return image_array

def convert_to_image_tensor(image_array):
	image_array = image_array/255.0
	return Tensor(image_array).view(1, 220, 155)


with open('test_index_BHSigB.pkl', 'wb') as test_index_file:
	pickle.dump(test, test_index_file)


class TestDataset(Dataset):

	def __init__(self):
		with open('test_index_BHSigB.pkl', 'rb') as test_index_file:
			self.pairs = pickle.load(test_index_file)

	def __getitem__(self, index):
		item = self.pairs[index]
		X = convert_to_image_tensor(invert_image(item[0]))
		Y = convert_to_image_tensor(invert_image(item[1]))
		return [X, Y, item[2]]

	def __len__(self):
		return len(self.pairs)
  




In [6]:
# Compute accuracy as average of True Positive and True Negative rates

def compute_accuracy_roc(predictions, labels):
    dmax = np.max(predictions)
    dmin = np.min(predictions)
    nsame = np.sum(labels == 1)
    ndiff = np.sum(labels == 0)
    step = 0.001
    max_acc = 0

    d_optimal = 0
    for d in np.arange(dmin, dmax + step, step):
        idx1 = predictions.ravel() <= d
        idx2 = predictions.ravel() > d

        tpr = float(np.sum(labels[idx1] == 1)) / nsame
        tnr = float(np.sum(labels[idx2] == 0)) / ndiff

        acc = 0.5 * (tpr + tnr)

        if acc > max_acc:
            max_acc = acc
            d_optimal = d

    return max_acc, d_optimal



In [7]:
# Compute the accuracy on the test data set

batch_avg_acc = 0
batch_avg_d = 0
n_batch = 0


def test():
    model.eval()
    global batch_avg_acc, batch_avg_d, n_batch

    test_dataset = TestDataset()
    loader = DataLoader(test_dataset, batch_size=10, shuffle=True)

    for batch_index, data in enumerate(loader):
        A = data[0]
        B = data[1]
        labels = data[2].long()

        f_a, f_b = model.forward(A, B)
        dist = distance_metric(f_a, f_b)

        acc, d = compute_accuracy_roc(dist.detach().numpy(), labels.detach().numpy())
        print('Max accuracy for batch {} = {} at d = {}'.format(batch_index, acc, d))
        batch_avg_acc += acc
        batch_avg_d += d
        n_batch += 1


print('BHSigB:')
test()
print('Avg acc across all batches={} at d={}'.format(batch_avg_acc / n_batch, batch_avg_d / n_batch))


BHSigB:
Max accuracy for batch 0 = 0.625 at d = 0.058779806345701235
Max accuracy for batch 1 = 0.7619047619047619 at d = 0.13451007428765305
Max accuracy for batch 2 = 0.7 at d = 0.07343119269609455
Max accuracy for batch 3 = 0.625 at d = 0.01796450838446617
Max accuracy for batch 4 = 0.8333333333333333 at d = 0.05984264904260639
Max accuracy for batch 5 = 0.6000000000000001 at d = 0.0511943150162697
Max accuracy for batch 6 = 0.7142857142857143 at d = 0.0487057643979788
Max accuracy for batch 7 = 0.875 at d = 0.11201131370663643
Max accuracy for batch 8 = 0.5952380952380952 at d = 0.2193247511833908
Max accuracy for batch 9 = 0.875 at d = 0.10728269095718868
Max accuracy for batch 10 = 0.8571428571428572 at d = 0.09730710491538053
Max accuracy for batch 11 = 0.9166666666666667 at d = 0.15428865750879062
Max accuracy for batch 12 = 0.75 at d = 0.11694645085930833
Max accuracy for batch 13 = 0.6111111111111112 at d = 0.2834645841121676
Max accuracy for batch 14 = 0.6666666666666666 at 

  tpr = float(np.sum(labels[idx1] == 1)) / nsame


Max accuracy for batch 736 = 0 at d = 0
Avg acc across all batches=0.7674223579074327 at d=0.10732307312194281
