In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split

In [None]:
if (tf.test.is_gpu_available):
    print("GPU")
else:
    print("CPU")

In [None]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

In [None]:
def load_dataset(dataset_root):
    classes = sorted([cls for cls in os.listdir(dataset_root) if os.path.isdir(os.path.join(dataset_root, cls))])
    x = []
    y = []
    for i, cls in enumerate(classes):
        cls_images = sorted(os.listdir(os.path.join(dataset_root, cls)))
        x.extend([os.path.join(dataset_root, cls, img) for img in cls_images])
        y.extend([i] * len(cls_images))
    return x, y

In [None]:
# Load your dataset
dataset_root = '/kaggle/input/facedatasets'  # Replace with the path to your dataset
x, y = load_dataset(dataset_root)

# Split the dataset into training, validation, and test sets
x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.2, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
print("Number of images:", len(x))
print("Some image paths:", x[:5])
print("Corresponding labels:", y[:5])

In [None]:
def create_pairs(x, y):
    pair_images = []
    pair_labels = []

    class_indices = dict((c, [i for i, label in enumerate(y) if label == c]) for c in set(y))

    for idx, (img_path, label) in enumerate(zip(x, y)):
        # Add a positive pair
        positive_options = class_indices[label]
        if len(positive_options) > 1:  # Ensure there is more than one image in the class
            positive_idx = idx
            while positive_idx == idx:
                positive_idx = random.choice(positive_options)
            pair_images.append([img_path, x[positive_idx]])
            pair_labels.append(1)

        # Add a negative pair
        negative_labels = list(set(y) - set([label]))
        if negative_labels:  # Ensure there is at least one other class
            negative_label = random.choice(negative_labels)
            negative_idx = random.choice(class_indices[negative_label])
            pair_images.append([img_path, x[negative_idx]])
            pair_labels.append(0)

    return pair_images, pair_labels

In [None]:
# make train pairs
pairs_train, labels_train = create_pairs(x_train, y_train)

In [None]:
print(pairs_train[:10], labels_train[:10])

In [None]:
# make validation pairs
pairs_val, labels_val = create_pairs(x_val, y_val)

In [None]:
# make test pairs
pairs_test, labels_test = create_pairs(x_test, y_test)

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

def visualize(pairs, labels, to_show=6, num_col=3, predictions=None, test=False):
    """Visualizes pairs of images with labels (and predictions, if test dataset)."""
    num_row = to_show // num_col if to_show // num_col != 0 else 1
    to_show = num_row * num_col

    fig, axes = plt.subplots(num_row, num_col, figsize=(5 * num_col, 5 * num_row))
    for i in range(to_show):
        if num_row == 1:
            ax = axes[i % num_col]
        else:
            ax = axes[i // num_col, i % num_col]

        # Load images
        img1 = Image.open(pairs[i][0])
        img2 = Image.open(pairs[i][1])

        # Concatenate images
        combined_img = Image.new('RGB', (img1.width + img2.width, img1.height))
        combined_img.paste(img1, (0, 0))
        combined_img.paste(img2, (img1.width, 0))

        ax.imshow(combined_img)
        ax.set_axis_off()

        if test:
            ax.set_title("True: {} | Pred: {:.5f}".format(labels[i], predictions[i][0]))
        else:
            ax.set_title("Label: {}".format(labels[i]))

    plt.tight_layout()
    plt.show()

In [None]:
visualize(pairs_train, labels_train, to_show=24, num_col=3)

In [None]:
visualize(pairs_val, labels_val, to_show=24, num_col=3)

In [None]:
visualize(pairs_test, labels_test, to_show=24, num_col=3)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, applications, regularizers

# Euclidean distance function remains the same
def euclidean_distance(vects):
    x, y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

# Modified embedding network using a pre-trained model (e.g., MobileNet)
def create_embedding_network(input_shape):
    base_model = applications.MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet', pooling='avg')
    
    base_model.trainable = True
    # Optional: Fine-tuning - freeze layers except the last few
    for layer in base_model.layers[:-4]:
        layer.trainable = False

    model = keras.Sequential([
        base_model,
        layers.Dense(512, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(64, activation="relu")
    ], name='Embedding')
    
    return model

input_shape = (250, 250, 3)
embedding_network = create_embedding_network(input_shape)

input_1 = layers.Input(shape=input_shape)
input_2 = layers.Input(shape=input_shape)

tower_1 = embedding_network(input_1)
tower_2 = embedding_network(input_2)

merge_layer = layers.Lambda(lambda tensors: tf.math.abs(tensors[0] - tensors[1]))([tower_1, tower_2])
output_layer = layers.Dense(1, activation="sigmoid")(merge_layer)

siamese = keras.Model(inputs=[input_1, input_2], outputs=output_layer)

# Computing the Euclidean distance as output
#distance = layers.Lambda(euclidean_distance)([tower_1, tower_2])

# Creating the model
#siamese = keras.Model(inputs=[input_1, input_2], outputs=distance)

In [None]:
import tensorflow as tf

def contrastive_loss(margin=1):
    def loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        square_pred = tf.square(y_pred)
        margin_square = tf.square(tf.maximum(margin - y_pred, 0))
        return tf.reduce_mean((1 - y_true) * square_pred + y_true * margin_square)
    return loss

In [None]:
import numpy as np
from PIL import Image
import tensorflow as tf

def preprocess_image(image_path, target_size=(250, 250)):
    """Preprocess a single image with augmentation."""
    image = Image.open(image_path)
    image = image.resize(target_size)
    image = np.array(image)

    # Convert image to a tf.Tensor to use tf.image functions
    image = tf.convert_to_tensor(image, dtype=tf.float32)

    # Apply a sequence of augmentations
    image = tf.image.random_flip_left_right(image)  # Random horizontal flip
    image = tf.image.random_flip_up_down(image)     # Random vertical flip
    image = tf.image.random_brightness(image, max_delta=0.3)  # Random brightness
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2) # Random contrast
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2) # Random saturation
    image = tf.image.random_hue(image, max_delta=0.1) # Random hue

    # Normalize the image to [0, 1]
    image = image / 255.0

    return image.numpy()  # Convert back to numpy array if necessary

In [None]:
import numpy as np

def pair_generator(pairs, labels, batch_size):
    """Generator that yields batches of pairs and their labels."""
    while True:
        batch_pairs = []
        batch_labels = []

        for _ in range(batch_size):
            idx = np.random.randint(0, len(pairs))
            pair = pairs[idx]
            label = labels[idx]

            image1 = preprocess_image(pair[0])
            image2 = preprocess_image(pair[1])

            batch_pairs.append([image1, image2])
            batch_labels.append(label)

        # Convert the list of pairs and labels to numpy arrays
        batch_pairs_array = [np.array([pair[0] for pair in batch_pairs]), 
                             np.array([pair[1] for pair in batch_pairs])]
        batch_labels_array = np.array(batch_labels).astype('float32')  # Cast labels to float32

        yield batch_pairs_array, batch_labels_array

In [None]:
siamese.compile(loss=contrastive_loss(margin=1), optimizer="RMSprop", metrics=["accuracy"])
siamese.summary()

In [None]:
#siamese.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#siamese.summary()

In [None]:
plot_model(siamese, show_shapes=True, show_layer_names=True)

In [None]:
BATCH_SIZE = 32
EPOCHS = 50

train_generator = pair_generator(pairs_train, labels_train, BATCH_SIZE)
validation_generator = pair_generator(pairs_val, labels_val, BATCH_SIZE)

In [None]:
history = siamese.fit(
    train_generator,
    steps_per_epoch=len(pairs_train) // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=len(pairs_val) // BATCH_SIZE,
    epochs=EPOCHS
)

In [None]:
def plt_metric(history, metric, title, has_valid=True):
    """Plots the given 'metric' from 'history'.

    Arguments:
        history: history attribute of History object returned from Model.fit.
        metric: Metric to plot, a string value present as key in 'history'.
        title: A string to be used as title of plot.
        has_valid: Boolean, true if valid data was passed to Model.fit else false.

    Returns:
        None.
    """
    plt.plot(history[metric])
    if has_valid:
        plt.plot(history["val_" + metric])
        plt.legend(["train", "validation"], loc="upper left")
    plt.title(title)
    plt.ylabel(metric)
    plt.xlabel("epoch")
    plt.show()

In [None]:
# Plot the accuracy
plt_metric(history=history.history, metric="accuracy", title="Model accuracy")

In [None]:
# Plot the constrastive loss
plt_metric(history=history.history, metric="loss", title="Constrastive Loss")

In [None]:
test_generator = pair_generator(pairs_test, labels_test, BATCH_SIZE)
test_loss, test_accuracy = siamese.evaluate(test_generator, steps=len(pairs_test) // BATCH_SIZE)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

In [None]:
from tensorflow.keras.models import load_model

# Assuming siamese is your Siamese model
siamese.save('siamese_model.keras')

In [None]:
siamese.save('siamese_model.h5')

In [None]:
#predictions = siamese.predict([[pairs_test[i][0] for i in range(len(pairs_test))]
#                               ,[pairs_test[i][1] for i in range(len(pairs_test))]])
#visualize(pairs_test, labels_test, to_show=3, predictions=predictions, test=True)

In [None]:
#gg = [pairs_test[i][0] for i in range(len(pairs_test))]
#wp = [pairs_test[i][1] for i in range(len(pairs_test))]

In [None]:
# Generate a batch of pairs
#pairs_batch, labels_batch = next(pair_generator(pairs_test, labels_test, batch_size=10))

# Predict
#predictions = siamese.predict([np.array([pair[0] for pair in pairs_batch]), 
#                               np.array([pair[1] for pair in pairs_batch])])

# Visualize the predictions along with the true labels using the new function
#visualize_arrays(pairs_batch, labels_batch, predictions=predictions, to_show=10, num_col=3, test=True)

In [None]:
# Generate a batch of pairs
#pairs_batch, labels_batch = next(pair_generator(pairs_test, labels_test, batch_size=1))

# Predict
#predictions = siamese.predict(pairs_batch)

# Optionally: perform further analysis, visualize results, calculate additional metrics, etc.
