Imports

In [1]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, Input, backend as K
from tensorflow.keras.optimizers import Adam
import cv2
import csv
import sys

ModuleNotFoundError: No module named 'cv2'

Enable GPU boost

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU boost set.")
    except RuntimeError as e:
        print(e)

Set paths

In [None]:
DATASET_DIR = "/home/emizu/Desktop/SiamezeDataset" 
MODEL_FILE = "siamese_model.keras"

Set images resizeing(and thus nominal) sizes

In [None]:
IMG_HEIGHT = 105
IMG_WIDTH = 105
IMG_CHANNELS = 3

Function to load the dataset into a dictionary with person names(strings) as keys and image lists as values

In [None]:
def prepare_dataset():
    dataset = {}
    for person in os.listdir(DATASET_DIR):
        person_path = os.path.join(DATASET_DIR, person)
        if os.path.isdir(person_path):
            images = load_images_from_folder(person_path)
            if len(images) > 1:
                dataset[person] = images
    return dataset

Function to obtain all images of a person. It is meant to resize them, normalize them, group them into a list and return that list. An image is a tuple of the image label and actual image data.

In [None]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(folder, filename)
            img = cv2.imread(img_path)
            if img is not None:
                # Resize image
                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                # Normalize image to [0, 1]
                img = img.astype("float32") / 255.0
                images.append((filename, img))
    return images

Function to split the dataset. Will return 2 subtatasets in the form of 2 dictionaries, one for training and one for testing

In [None]:
def split_dataset(dataset):
    train_set, val_set = {}, {}
    for person, images in dataset.items():
        random.shuffle(images)
        n = len(images)
        train_end = int(0.6 * n)
        val_end = train_end + int(0.2 * n)
        train_set[person] = images[:train_end]
        val_set[person] = images[train_end:val_end]
    return train_set, val_set


Function to create pairs of images(tuples) from a dataset. Each pair will contain 2 images, a label(1 for pozitive pairs and 0 for negative pairs) and a metadata about the images(which is not fed into the model under any circumstance). The metadata is ment to serve as aditional informations when the output file is generated and for debuging purposes.
A pozitive pair is a pair containing both images from the same person.
A negative pair is a pair which contains 2 images of 2 different persons

In [None]:
def make_pairs(data_dict):
    positive_pairs = []
    for person in data_dict:
        images = data_dict[person]
        for i in range(len(images)):
            for j in range(i + 1, len(images)):
                positive_pairs.append((images[i][1],images[j][1],1,(person, person, images[i][0], images[j][0])))

    num_positive_pairs = len(positive_pairs)
    negative_pairs = []
    persons = list(data_dict.keys())

    while len(negative_pairs) < num_positive_pairs:
        person1 = random.choice(persons)
        if not data_dict[person1]:
            continue
        fname1, img1 = random.choice(data_dict[person1])

        other_persons = [p for p in persons if p != person1]
        if not other_persons:
            break  
        person2 = random.choice(other_persons)
        if not data_dict[person2]:
            continue
        fname2, img2 = random.choice(data_dict[person2])

        negative_pairs.append((img1,img2,0,(person1, person2, fname1, fname2)))

    all_pairs = positive_pairs + negative_pairs
    random.shuffle(all_pairs)
    return all_pairs

Function to build the base neural network for the siamese network. It consists of convolutional layers for image processing and a dense layer for outputing the feature vector of an imput(image)

In [None]:
def build_base_network(input_shape):
    inp = Input(shape=input_shape)
    x = layers.Conv2D(100, (10, 10), activation='relu')(inp)
    x = layers.Conv2D(125, (10, 10), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(150, (7, 7), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(175, (4, 4), activation='relu')(x)
    x = layers.Conv2D(250, (4, 4), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(250, (2, 2), activation='relu')(x)
    x = layers.Flatten()(x)
    x = layers.Dense(5000, activation='sigmoid')(x)
    return Model(inp, x)

In [None]:
Logical layer of the siamese network to compare the 2 inputs(their feature vectors)

In [None]:
def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

Function to create the entire siamese neural network. On top of the base network and logical layer(lambda layer), another dense layer is added for deciding the final output.

In [None]:
def build_siamese_model(input_shape):
    base_network = build_base_network(input_shape)

    input_a = Input(shape=input_shape)
    input_b = Input(shape=input_shape)

    processed_a = base_network(input_a)
    processed_b = base_network(input_b)

    distance = layers.Lambda(euclidean_distance)([processed_a, processed_b])
    outputs = layers.Dense(1, activation='sigmoid')(distance)

    model = Model([input_a, input_b], outputs)
    return model

Function to generate batches from pairs. This function decides what elements from a pair are fed into the model

In [None]:
def generate_batch(pairs, batch_size=32):
    while True:
        random.shuffle(pairs)
        for i in range(0, len(pairs), batch_size):
            batch = pairs[i:i+batch_size]
            imgs_a = np.array([pair[0] for pair in batch])
            imgs_b = np.array([pair[1] for pair in batch])
            labels = np.array([pair[2] for pair in batch])
            yield (imgs_a, imgs_b), labels

Load and split dataset

In [None]:
print("Loading and preparing dataset...")
dataset = prepare_dataset()
if not dataset:
    print("Dataset not found or no valid sub-folders/images. Exiting.")
    sys.exit(1)
train_set, val_set = split_dataset(dataset)

Create pairs for training and validation

In [None]:
print("Creating pairs for training and validation...")
train_pairs = make_pairs(train_set)
val_pairs = make_pairs(val_set)

Create final training and validation dataset variants with output signatures as required by tensorflow

In [None]:
train_dataset = tf.data.Dataset.from_generator(
    lambda: generate_batch(train_pairs, batch_size),
    output_signature=(
        (
            tf.TensorSpec(shape=(None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=tf.float32),
            tf.TensorSpec(shape=(None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=tf.float32)
        ),
        tf.TensorSpec(shape=(None,), dtype=tf.float32)
    )
)

val_dataset = tf.data.Dataset.from_generator(
    lambda: generate_batch(val_pairs, batch_size),
    output_signature=(
        (
            tf.TensorSpec(shape=(None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=tf.float32),
            tf.TensorSpec(shape=(None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=tf.float32)
        ),
        tf.TensorSpec(shape=(None,), dtype=tf.float32)
    )
)

Get input shape from one image sample

In [None]:
sample = next(iter(train_set.values()))[0][1]
input_shape = sample.shape

In [None]:
Build the model

In [None]:
print("Building new Siamese model...")
model = build_siamese_model(input_shape)
optimizer = Adam(learning_rate=0.0001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Set training parameters 

In [None]:
batch_size = 50
epochs = 15
steps_per_epoch = 1000
validation_steps = 1000

Model training

In [None]:
print("Starting training...")
model.fit(
    train_dataset,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs,
    validation_data=val_dataset,
    validation_steps=validation_steps
)

Save the model

In [None]:
print("Training completed. Saving model...")
model.save(MODEL_FILE)
print(f"Model saved to {MODEL_FILE}")