Imports

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, Input, backend as K
from tensorflow.keras.models import load_model
import cv2
import csv
import sys

Set paths

In [None]:
DATASET_DIR = "/home/emizu/Desktop/SiamezeDataset"
MODEL_FILE = "siamese_model.keras"
RESULTS_FILE = "test_predictions.csv"

Set images resizeing(and thus nominal) sizes

In [None]:
IMG_HEIGHT = 105
IMG_WIDTH = 105
IMG_CHANNELS = 3

Function to load the dataset into a dictionary with person names(strings) as keys and image lists as values

In [None]:
def prepare_dataset():
    dataset = {}
    for person in os.listdir(DATASET_DIR):
        person_path = os.path.join(DATASET_DIR, person)
        if os.path.isdir(person_path):
            images = load_images_from_folder(person_path)
            if len(images) > 1:
                dataset[person] = images
    return dataset

Function to obtain all images of a person. It is meant to resize them, normalize them, group them into a list and return that list. An image is a tuple of the image label and actual image data.

In [None]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                # Resize image
                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                # Normalize image to [0, 1]
                img = img.astype("float32") / 255.0
                images.append((filename, img))
    return images

Function to split the dataset. Will return 1 subtataset in the form of 1 dictionary meant for live testing

In [None]:
def split_dataset(dataset):
    test_set = {}
    for person, images in dataset.items():
        random.shuffle(images)
        n = len(images)
        test_set[person] = images[int(0.8 * n):]
    return test_set

Function to create pairs of images(tuples) from a dataset. Each pair will contain 2 images, a label(1 for pozitive pairs and 0 for negative pairs) and a metadata about the images(which is not fed into the model under any circumstance). The metadata is ment to serve as aditional informations when the output file is generated and for debuging purposes. 
A pozitive pair is a pair containing both images from the same person.
A negative pair is a pair which contains 2 images of 2 different persons

In [None]:
def make_pairs(data_dict):
    positive_pairs = []
    for person in data_dict:
        images = data_dict[person]
        for i in range(len(images)):
            for j in range(i + 1, len(images)):
                positive_pairs.append((images[i][1],images[j][1],1,(person, person, images[i][0], images[j][0])))

    num_positive_pairs = len(positive_pairs)
    negative_pairs = []
    persons = list(data_dict.keys())

    while len(negative_pairs) < num_positive_pairs:
        person1 = random.choice(persons)
        if not data_dict[person1]:
            continue
        fname1, img1 = random.choice(data_dict[person1])

        other_persons = [p for p in persons if p != person1]
        if not other_persons:
            break  
        person2 = random.choice(other_persons)
        if not data_dict[person2]:
            continue
        fname2, img2 = random.choice(data_dict[person2])

        negative_pairs.append((img1,img2,0,(person1, person2, fname1, fname2)))

    all_pairs = positive_pairs + negative_pairs
    random.shuffle(all_pairs)
    return all_pairs

Logical layer of the siamese network to compare the 2 inputs(their feature vectors).
This layer needs to be specified also in this script as it's logic not memorized in the siamese network. 
Rather when creating and saving the siamese network, a refference towards the fact that a logical layer with the defined inputs and outputs should be linked is saved

In [None]:
def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

Load the model

In [None]:
if not os.path.exists(MODEL_FILE):
        print(f"Model file {MODEL_FILE} not found. Exiting.")
        sys.exit(1)
    print("Loading saved model...")
    model = load_model(MODEL_FILE, custom_objects={'euclidean_distance': euclidean_distance})

Prepare the test dataset for live predictions

In [None]:
print("Loading and preparing dataset for testing...")
dataset = prepare_dataset()
if not dataset:
    print("Dataset not found or no valid sub-folders/images. Exiting.")
        ys.exit(1)
test_set = split_dataset(dataset)
print("Creating test pairs...")
test_pairs = make_pairs(test_set)

Run the live predictions

In [None]:
print("Running live predictions on test pairs...")
    predictions = []
    for pair in test_pairs:
        img1, img2, true_label, file_info = pair
        img1_exp = np.expand_dims(img1, axis=0)
        img2_exp = np.expand_dims(img2, axis=0)
        pred = model.predict([img1_exp, img2_exp])[0][0]
        predictions.append((file_info, true_label, pred))

In [None]:
Save the predictions in a CSV file

In [None]:
print(f"Saving predictions to {RESULTS_FILE} ...")
with open(RESULTS_FILE, mode='w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(["Person1", "Person2", "File1", "File2", "TrueLabel", "Prediction"])
    for (file_info, true_label, pred) in predictions:
        person1, person2, file1, file2 = file_info
        writer.writerow([person1, person2, file1, file2, true_label, pred])

print("Predictions saved to CSV.")