In [35]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
from sklearn.datasets import fetch_lfw_pairs
from tensorflow.keras.preprocessing import image_dataset_from_directory
from PIL import Image

In [36]:
def build_embedding_model(embedding_dim=128):
    base_model = tf.keras.applications.ResNet50(
        include_top=False, 
        weights="imagenet", 
        pooling="avg", 
        input_shape=(224, 224, 3)
    )

    # Fine-tune last layers
    base_model.trainable = True
    for layer in base_model.layers[:100]:
        layer.trainable = False

    inputs = layers.Input(shape=(224,224,3))
    x = tf.keras.applications.resnet50.preprocess_input(inputs)
    x = base_model(x, training=True)

    # Deeper embedding head
    x = layers.Dense(512, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(embedding_dim)(x)
    x = layers.Lambda(lambda t: tf.math.l2_normalize(t, axis=1))(x)

    return models.Model(inputs, x, name="EmbeddingModel")

embedding_model = build_embedding_model()

In [37]:
def build_siamese_model(embedding_model):
    input_a = layers.Input(shape=(224,224,3))
    input_b = layers.Input(shape=(224,224,3))

    emb_a = embedding_model(input_a)
    emb_b = embedding_model(input_b)

    # Learnable distance
    cosine = layers.Dot(axes=1, normalize=True)([emb_a, emb_b])

    return models.Model([input_a, input_b], cosine, name="SiameseNet")

siamese_model = build_siamese_model(embedding_model)

In [38]:
class ContrastiveLoss(tf.keras.losses.Loss):
    def __init__(self, margin=1.0):
        super().__init__()
        self.margin = margin

    def call(self, y_true, y_pred):
        # y_true = 1 (same), 0 (different)
        y_true = tf.cast(y_true, tf.float32)
        loss_same = tf.square(1 - y_pred)       # want similarity → 1
        loss_diff = tf.square(tf.maximum(y_pred - self.margin, 0))
        return tf.reduce_mean(y_true * loss_same + (1 - y_true) * loss_diff)

## Loading and Preprocessing the Dataset

In [39]:
from sklearn.datasets import fetch_lfw_pairs
import tensorflow as tf

lfw_pairs_train = fetch_lfw_pairs(subset='train', color=True, resize=0.5, download_if_missing=True)
lfw_pairs_test  = fetch_lfw_pairs(subset='test', color=True, resize=0.5, download_if_missing=True)

def preprocess(img):
    img = tf.image.resize(img, (224,224))
    img = tf.cast(img, tf.float32)
    return img

def make_dataset(lfw_pairs):
    X1, X2, y = lfw_pairs.pairs[:,0], lfw_pairs.pairs[:,1], lfw_pairs.target
    X1 = np.array([preprocess(img).numpy() for img in X1])
    X2 = np.array([preprocess(img).numpy() for img in X2])
    y  = np.array(y).astype("float32")
    return (X1, X2), y

(train_X1, train_X2), train_y = make_dataset(lfw_pairs_train)
(test_X1, test_X2), test_y   = make_dataset(lfw_pairs_test)

## Compiling and Training the Model

In [40]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_accuracy', 
                                         factor=0.1, 
                                         patience=7, 
                                         verbose=1, 
                                         mode='max', 
                                         min_delta=0.0001)

In [43]:
print("Sample labels:", np.unique(train_y, return_counts=True))


Sample labels: (array([0., 1.], dtype=float32), array([1100, 1100]))


In [41]:
# ----------------------------
# Compile + Train
# ---------------------------

siamese_model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=ContrastiveLoss(margin=0.5),
    metrics=["accuracy"]
)


history = siamese_model.fit(
    [train_X1, train_X2], train_y,
    validation_data=([test_X1, test_X2], test_y),
    epochs=30, batch_size=16, callbacks=[reduce_lr_on_plateau]
)

Epoch 1/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 445ms/step - accuracy: 0.5218 - loss: 0.4691 - val_accuracy: 0.5000 - val_loss: 0.1250 - learning_rate: 0.0010
Epoch 2/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 227ms/step - accuracy: 0.5041 - loss: 0.0693 - val_accuracy: 0.5000 - val_loss: 0.0981 - learning_rate: 0.0010
Epoch 3/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 219ms/step - accuracy: 0.5001 - loss: 0.0702 - val_accuracy: 0.5000 - val_loss: 0.1155 - learning_rate: 0.0010
Epoch 4/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 218ms/step - accuracy: 0.5038 - loss: 0.0691 - val_accuracy: 0.5000 - val_loss: 0.1250 - learning_rate: 0.0010
Epoch 5/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 222ms/step - accuracy: 0.4982 - loss: 0.0679 - val_accuracy: 0.5000 - val_loss: 0.1223 - learning_rate: 0.0010
Epoch 6/30
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history, model, test_data):
    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_data[0], test_data[1], verbose=0)

    # Plot training curves
    plt.figure(figsize=(12,5))

    # ---- Loss ----
    plt.subplot(1,2,1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training & Validation Loss')
    plt.legend()
    plt.grid(True)

    # ---- Accuracy ----
    plt.subplot(1,2,2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training & Validation Accuracy')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

    # Print test metrics
    print(f"Final Test Loss: {test_loss:.4f}")
    print(f"Final Test Accuracy: {test_acc:.4f}")


In [None]:
plot_training_history(history, siamese_model, ([test_X1, test_X2], test_y))

In [None]:
# ----------------------------
# Inference on custom images
# ----------------------------
def compare_faces(img_path1, img_path2, model, threshold=0.5):
    def load_and_preprocess(path):
        img = Image.open(path).convert("RGB")
        img = img.resize((224,224))
        img = np.array(img).astype("float32")
        return img
    
    img1 = load_and_preprocess(img_path1)
    img2 = load_and_preprocess(img_path2)
    
    img1 = np.expand_dims(img1, axis=0)
    img2 = np.expand_dims(img2, axis=0)

    sim = model.predict([img1, img2])[0][0]
    print(f"Similarity score: {sim:.4f}")
    if sim > threshold:
        print("✅ Same person")
    else:
        print("❌ Different persons")
    return sim

In [None]:
siamese_model.save()