In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import os
import zipfile
import cv2

In [2]:
# Download and extract LFW dataset
url = "http://vis-www.cs.umass.edu/lfw/lfw.tgz"
dataset_path = tf.keras.utils.get_file("lfw.tgz", origin=url, extract=True)
lfw_path = os.path.dirname(dataset_path) + '/lfw/'

def load_lfw_dataset():
    images = []
    labels = []
    class_names = []

    # Iterate through subdirectories (each subdirectory represents a person)
    for dir_name in os.listdir(lfw_path):
        person_dir = os.path.join(lfw_path, dir_name)

        # Check if the path is a directory
        if os.path.isdir(person_dir):
            class_names.append(dir_name)
            for filename in os.listdir(person_dir):
                image_path = os.path.join(person_dir, filename)
                # Read and preprocess images
                img = tf.keras.preprocessing.image.load_img(image_path, target_size=(128, 128))
                img_array = tf.keras.preprocessing.image.img_to_array(img)
                images.append(img_array)
                labels.append(len(class_names) - 1)  # Use index of class_names as label

    # Convert lists to numpy arrays
    images = np.array(images)
    labels = np.array(labels)

    return images, labels, class_names

# Load the dataset
images, labels, class_names = load_lfw_dataset()

# Verify the shape of the loaded data
print("Number of images:", len(images))
print("Number of labels:", len(labels))
print("Class names:", class_names)
print("Shape of images array:", images.shape)
print("Shape of labels array:", labels.shape)

Downloading data from http://vis-www.cs.umass.edu/lfw/lfw.tgz
Number of images: 13233
Number of labels: 13233
Class names: ['Paul_Luvera', 'Jelena_Dokic', 'Michael_Sheehan', 'Joey_Buttafuoco', 'Casey_Crowder', 'William_Donaldson', 'Amy_Cotton', 'Astou_Ndiaye-Diatta', 'Albert_Pujols', 'Federico_Trillo', 'Evgeni_Plushenko', 'Laila_Ali', 'Jim_Hahn', 'Claire_Tomalin', 'Alex_King', 'Sergei_Yushenkov', 'Wolfgang_Clement', 'Michael_Milton', 'Bill_Butler', 'Danny_Glover', 'Kim_Dae-jung', 'Andy_Garcia', 'Craig_Fitzgibbon', 'Nora_Ephron', 'Huang_Suey-Sheng', 'Christina_Aguilera', 'Charley_Armey', 'Brian_Griese', 'Robert_Flodquist', 'Minnie_Mendoza', 'David_Caruso', 'Ilie_Nastase', 'Uthai_Pimchaichon', 'Melissa_Stark', 'Kim_Hong-gul', 'Dave_McGinnis', 'Zach_Parise', 'Bob_Geldof', 'Christopher_Conyers', 'Serge_Melac', 'Kent_Rominger', 'Abraham_Foxman', 'Dyab_Abou_Jahjah', 'Takahiro_Mori', 'Cesar_Maia', 'Lane_Odom', 'Meirion_Evans', 'Morris_Watts', 'Sam_Gerald', 'Mike_Matthews', 'Ruano_Pascual', 'J

In [3]:
# Base network (shared weights)
base_network = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu')
])

# Siamese network model
input_1 = tf.keras.layers.Input(shape=(128, 128, 3))
input_2 = tf.keras.layers.Input(shape=(128, 128, 3))

# Encoded feature representations of the inputs
encoded_1 = base_network(input_1)
encoded_2 = base_network(input_2)

# Calculate L2 distance between the encoded representations
distance = tf.keras.layers.Lambda(lambda x: tf.keras.backend.abs(x[0] - x[1]))([encoded_1, encoded_2])

# Output layer
output = layers.Dense(1, activation='sigmoid')(distance)

# Siamese model
siamese_model = tf.keras.models.Model(inputs=[input_1, input_2], outputs=output)

# Compile the model
siamese_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [4]:
def generate_siamese_pairs(images, labels):
    num_classes = len(np.unique(labels))
    pairs = []
    new_labels = []  # Use a different variable name here

    label_indices = [np.where(labels == i)[0] for i in range(num_classes)]

    for idx1 in range(len(images)):
        img1 = images[idx1]
        label1 = labels[idx1]
        img2 = img1 if np.random.randint(0, 2) else images[np.random.choice(label_indices[label1])]
        pairs.append([img1, img2])
        new_labels.append(1 if label1 == labels[idx1] else 0)

    return np.array(pairs), np.array(new_labels)

pairs, new_labels = generate_siamese_pairs(images, labels)

In [6]:
siamese_model.fit([pairs[:, 0], pairs[:, 1]], labels, epochs=2, batch_size=32)


Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7cddda41b310>

In [24]:
# Save the Siamese model as an HDF5 file
siamese_model.save('siamese_model.h5')
print('Siamese model saved as siamese_model.h5')

Siamese model saved as siamese_model.h5


In [21]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the saved Siamese model
loaded_model = load_model('siamese_model.h5')

# Define a function to perform inference on new image pairs
def predict_similarity(image_pair):
    # Normalize pixel values to be between 0 and 1
    img1 = image_pair[0] / 255.0
    img2 = image_pair[1] / 255.0
    
    # Expand dimensions to match model input shape (batch size of 1)
    img1 = tf.expand_dims(img1, axis=0)
    img2 = tf.expand_dims(img2, axis=0)
    
    # Predict the similarity (probability of being the same person)
    similarity = loaded_model.predict([img1, img2])[0][0]
    
    return similarity

In [22]:
# Define a function to read and preprocess images
def load_and_preprocess_images(image_paths):
    images = []
    for path in image_paths:
        # Read the image using OpenCV
        img = cv2.imread(path)
        # Resize the image to match the input size of the model (128x128)
        img = cv2.resize(img, (128, 128))
        # Convert BGR to RGB (OpenCV loads images in BGR format)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # Normalize pixel values to be between 0 and 1
        img = img / 255.0
        images.append(img)
    return np.array(images)

# Provide the paths to your test images
image_paths = ["/kaggle/input/pictures/1.jpeg", "/kaggle/input/pictures/2.jpeg"]

# Load and preprocess the images
test_images = load_and_preprocess_images(image_paths)

# Example usage: predict similarity score
similarity_score = predict_similarity(test_images)

# Print the similarity score
print('Similarity Score:', similarity_score)

Similarity Score: 1.0
