In [10]:
import os
import numpy as np
import tensorflow as tf
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print("Error enabling GPU memory growth:", e)

# Paths
base_dir = os.path.join('dataset', 'lfw-deepfunneled', 'lfw-deepfunneled')
pairs_train_path = 'pairsDevTrain.txt'
pairs_test_path = 'pairsDevTest.txt'

# Normalize file and directory names for compatibility
def normalize_name(name):
    return name.replace(' ', '_')

# Function to load pairs from the LFW pairs file
def load_pairs(pairs_path, base_dir):
    pairs = []
    with open(pairs_path, 'r') as f:
        for line in f.readlines()[1:]:
            parts = line.strip().split()
            if len(parts) == 3:  # Positive pair
                person, img1, img2 = parts
                person = normalize_name(person)
                img1_path = os.path.join(base_dir, person, f"{person}_{int(img1):04d}.jpg")
                img2_path = os.path.join(base_dir, person, f"{person}_{int(img2):04d}.jpg")
                if os.path.exists(img1_path) and os.path.exists(img2_path):
                    pairs.append((img1_path, img2_path, 1))
            elif len(parts) == 4:  # Negative pair
                person1, img1, person2, img2 = parts
                person1, person2 = normalize_name(person1), normalize_name(person2)
                img1_path = os.path.join(base_dir, person1, f"{person1}_{int(img1):04d}.jpg")
                img2_path = os.path.join(base_dir, person2, f"{person2}_{int(img2):04d}.jpg")
                if os.path.exists(img1_path) and os.path.exists(img2_path):
                    pairs.append((img1_path, img2_path, 0))
    return pairs

# Load pairs
train_pairs = load_pairs(pairs_train_path, base_dir)
test_pairs = load_pairs(pairs_test_path, base_dir)

# Load VGGFace model for embeddings (ResNet50 backbone)
image_input = Input(shape=(224, 224, 3))
vggface_model = VGGFace(model='resnet50', input_tensor=image_input, include_top=False, pooling='avg')
embedding_model = Model(inputs=vggface_model.input, outputs=vggface_model.output)

# Fine-tune the model
for layer in vggface_model.layers[:-5]:
    layer.trainable = False

# Function to preprocess and augment images
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True
)

def preprocess_image(img_path, augment=False):
    img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = preprocess_input(img_array, version=1)
    if augment:
        img_array = datagen.random_transform(img_array)
    return img_array

# Function to generate embeddings
def generate_embeddings(pairs, model, augment=False):
    embeddings = []
    labels = []
    for img1_path, img2_path, label in pairs:
        try:
            img1 = preprocess_image(img1_path, augment=augment)
            img2 = preprocess_image(img2_path, augment=augment)
            emb1 = model.predict(np.expand_dims(img1, axis=0), verbose=0)[0]
            emb2 = model.predict(np.expand_dims(img2, axis=0), verbose=0)[0]
            embeddings.append((emb1, emb2))
            labels.append(label)
        except Exception as e:
            print(f"Error processing pair: {img1_path}, {img2_path}: {e}")
    return embeddings, labels

# Generate embeddings for training and testing pairs
train_embeddings, train_labels = generate_embeddings(train_pairs, embedding_model, augment=True)
test_embeddings, test_labels = generate_embeddings(test_pairs, embedding_model, augment=False)

# Normalize embeddings
train_embeddings = [(normalize([emb1])[0], normalize([emb2])[0]) for emb1, emb2 in train_embeddings]
test_embeddings = [(normalize([emb1])[0], normalize([emb2])[0]) for emb1, emb2 in test_embeddings]

# Compute cosine similarity and optimize threshold
def find_best_threshold(embeddings, labels):
    similarities = [cosine_similarity([emb1], [emb2])[0][0] for emb1, emb2 in embeddings]
    best_acc, best_threshold = 0, 0.5
    for threshold in np.arange(0.1, 0.9, 0.01):
        predictions = [1 if sim > threshold else 0 for sim in similarities]
        acc = np.mean(np.array(predictions) == np.array(labels))
        if acc > best_acc:
            best_acc = acc
            best_threshold = threshold
    return best_threshold, best_acc

# Evaluate on training and testing pairs
train_threshold, train_accuracy = find_best_threshold(train_embeddings, train_labels)
test_threshold, test_accuracy = find_best_threshold(test_embeddings, test_labels)

print(f"Train Accuracy: {train_accuracy * 100:.2f}% with Threshold: {train_threshold}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}% with Threshold: {test_threshold}")


Train Accuracy: 85.82% with Threshold: 0.43999999999999984
Test Accuracy: 88.20% with Threshold: 0.45999999999999985
