In [2]:
from PIL import Image
import numpy as np
import os
import keras
from keras import ops
from keras import layers
from keras import models
from keras import regularizers

import pprint
import matplotlib.pyplot as plt
import skimage as ski


# Data Preprocessing #

### Clean Resized Data ###

In [3]:
FORG_DATA_PATH = "signatures/resized_forg"
ORG_DATA_PATH = "signatures/resized_org"


'''
For each image, 
    converts to black and white, 
    turns into an np array, and 
    stores it in dict as:
        forgeries[IMAGE_NAME]
            or
        originals[IMAGE_NAME]
'''
forgeries = {}
for filename in os.listdir(FORG_DATA_PATH):
    name = filename.split(".")[0]
    filepath = FORG_DATA_PATH + "/" + filename
    with Image.open(filepath) as img:
        # Manual conversion to B&W using otsu thresholding
        greyscale = img.convert('L')
        threshold = ski.filters.threshold_otsu(np.asarray(greyscale))
        thresholded = greyscale.point( lambda p: 255 if p > threshold else 0 )
        black_and_white = thresholded.convert('1')  

        forgeries[name] = np.array(black_and_white)

originals = {}
for filename in os.listdir(ORG_DATA_PATH):
    name = filename.split(".")[0]
    filepath = ORG_DATA_PATH + "/" + filename
    with Image.open(filepath) as img:
        # Manual conversion to B&W using otsu thresholding
        greyscale = img.convert('L')
        hreshold = ski.filters.threshold_otsu(np.asarray(greyscale))
        thresholded = greyscale.point( lambda p: 255 if p > threshold else 0 )
        black_and_white = thresholded.convert('1') 

        originals[name] = np.array(black_and_white)

### Format into testing/training data ###

In [4]:
# Reformat into sig_dict[user_id] -> {"originals": {}, "forgeries", {}}

sig_dict = {}

for key in forgeries.keys():
    user_id = key.split("_")[1]
    sig_number = key.split("_")[2]

    if user_id in sig_dict.keys():
        sig_dict[user_id]["forgeries"][sig_number] = forgeries[key]
    else:
        sig_dict[user_id] = {"forgeries" : {sig_number : forgeries[key]},
                             "originals" : {}
                             }

for key in originals.keys():
    user_id = key.split("_")[1]
    sig_number = key.split("_")[2]
    sig_dict[user_id]["originals"][sig_number] = originals[key]
    # sig_dict[user_id]["originals"][sig_number] = "org"

'''    
Current format:
sig_dict = {
    [user number] : { 
            "forgeries" : {"1": [first_forgery],
                           "2": [second_forgery],
                           ...},
            "originals" : "1": [first_original],
                           "2": [second_original],
                           ...}
            }
    "2" : { ...
    }
'''


'    \nCurrent format:\nsig_dict = {\n    [user number] : { \n            "forgeries" : {"1": [first_forgery],\n                           "2": [second_forgery],\n                           ...},\n            "originals" : "1": [first_original],\n                           "2": [second_original],\n                           ...}\n            }\n    "2" : { ...\n    }\n'

In [5]:
x_train, y_train = [], []
x_test, y_test = [], []

# labels: 1 == genuine:genuine, 0 == genuine:forgery
# The first 44 users will be train and the remaining 11 will be in testing

for user_id in sig_dict.keys():
    user_sigs = sig_dict[user_id]

    if int(user_id) <= 44: 
        # TRAINING DATA

        for reference_sig_id in user_sigs["originals"]:
            reference_original = user_sigs["originals"][reference_sig_id]

            # Iterate through all other signatures and make a data point
            # Originals
            for org_id in user_sigs["originals"]:
                if org_id == reference_sig_id: continue
                org_image = user_sigs["originals"][org_id]

                x_train.append((reference_original, org_image))
                y_train.append(1)
            
            # Forgeries
            for forg_id in user_sigs["forgeries"]:
                forg_image = user_sigs["forgeries"][forg_id]

                x_train.append((reference_original, forg_image))
                y_train.append(0)


    else:
        #TESTING
        
        for reference_sig_id in user_sigs["originals"]:
            reference_original = user_sigs["originals"][reference_sig_id]

            # Iterate through all other signatures and make a data point
            # Originals
            for org_id in user_sigs["originals"]:
                if org_id == reference_sig_id: continue
                org_image = user_sigs["originals"][org_id]

                x_test.append((reference_original, org_image))
                y_test.append(1)
            
            # Forgeries
            for forg_id in user_sigs["forgeries"]:
                forg_image = user_sigs["forgeries"][forg_id]

                x_test.append((reference_original, forg_image))
                y_test.append(0)

x_train = np.array(x_train, dtype='float32')
y_train = np.array(y_train, dtype='float32')
x_test = np.array(x_test, dtype='float32')
y_test = np.array(y_test, dtype='float32')
    

In [6]:
x_train_1 = x_train[:, 0]
x_train_2 = x_train[:, 1]

x_test_1 = x_test[:, 0]
x_test_2 = x_test[:, 1]

# Model Construction #

In [9]:
@keras.saving.register_keras_serializable()
def euclidean_dist(vectors):
    x, y = vectors
    return ops.sqrt(ops.maximum(ops.sum(ops.square(x - y), axis=1, keepdims=True), keras.backend.epsilon()))

In [10]:
@keras.saving.register_keras_serializable()
def loss(margin=1):
    # Contrastive loss = mean( (1-true_value) * square(prediction) +
    #                         true_value * square( max(margin-prediction, 0) ))
    @keras.saving.register_keras_serializable()
    def contrastive_loss(y_true, y_pred):
        return ops.mean((1 - y_true) * ops.square(y_pred) + (y_true) * ops.square(ops.maximum(margin - (y_pred), 0)))

    return contrastive_loss

In [13]:
siamese_network = keras.saving.load_model('best_model.keras')

In [12]:
# Predict similarity scores on the test set
y_pred = siamese_network.predict((x_test_1, x_test_2)).flatten()

# Set a threshold for classification (e.g., 0.5)
threshold = 0.5

# Initialize counters
total_forgeries = 0
false_positives = 0

total_genuines = 0
false_negatives = 0

# Go through each prediction
for i in range(len(y_test)):
    actual_label = y_test[i]     # 0 = forgery, 1 = genuine
    predicted_score = y_pred[i]  # closer to 1 = predicted genuine, closer to 0 = predicted forgery

    if actual_label == 0:  # It's a forgery
        total_forgeries += 1
        if predicted_score >= threshold:
            false_positives += 1  # Predicted genuine, but it's a forgery

    elif actual_label == 1:  # It's genuine
        total_genuines += 1
        if predicted_score < threshold:
            false_negatives += 1  # Predicted forgery, but it's genuine

# Compute rates
fp_rate = false_positives / total_forgeries if total_forgeries > 0 else 0
fn_rate = false_negatives / total_genuines if total_genuines > 0 else 0

# Print results
print(f"False Positive Rate (forgeries predicted as genuine): {fp_rate * 100:.2f}%")
print(f"False Negative Rate (genuines predicted as forgery): {fn_rate * 100:.2f}%")


[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 375ms/step
False Positive Rate (forgeries predicted as genuine): 32.62%
False Negative Rate (genuines predicted as forgery): 23.55%
