# Face Recognition Model

## Setup

In [None]:
import cv2
import os
import random
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Conv2D, Dense, MaxPooling2D, Input, Flatten
import shutil
from pathlib import Path
import re
import uuid
from tensorflow.keras.metrics import Precision, Recall
import shutil

In [None]:
# Model(inputs = [inputImg, veriImg], outputs = [1,0] )

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPI')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
TRAIN = os.path.join('data', 'training')
TEST = os.path.join('data', 'test')
ARCH = os.path.join('data', 'archive')

## Preprocess

### Data gathering

In [None]:
def split_data():
    for d in [TRAIN, TEST]:
        Path(d).mkdir(parents=True, exist_ok=True)

    all_folders = [
        folder for folder in os.listdir(ARCH)
        if os.path.isdir(os.path.join(ARCH, folder))
    ]

    random.shuffle(all_folders)
    split_idx = int(len(all_folders) * 0.7)
    train_folders = all_folders[:split_idx]
    test_folders = all_folders[split_idx:]

    for folder_name in train_folders:
        src = os.path.join(ARCH, folder_name)
        dest = os.path.join(TRAIN, folder_name)
        shutil.copytree(src, dest)
        print(f"Copied to training: {folder_name}")

    for folder_name in test_folders:
        src = os.path.join(ARCH, folder_name)
        dest = os.path.join(TEST, folder_name)
        shutil.copytree(src, dest)
        print(f"Copied to testing: {folder_name}")

    print(f"\n✅ Done. {len(train_folders)} folders in training, {len(test_folders)} in test.")

split_data()

In [None]:
# Correcting function because I messed up the top one and did it twice once with folder deleting and once without lol
def remove_duplicate_images(directory):
    if not os.path.exists(directory):
        print(f"Error: Directory '{directory}' does not exist.")
        return
    
    files = os.listdir(directory)
    files_to_delete = []
    pattern = re.compile(r'^(.+)_1(\.[^.]+)$')
    deleted_count = 0
    
    print(f"Scanning directory: {directory}")

    for file in files:
        match = pattern.match(file)
        if match:
            base_name = match.group(1)
            extension = match.group(2)
            original_file = f"{base_name}{extension}"
            if original_file in files:
                files_to_delete.append(file)

    for file in files_to_delete:
        try:
            file_path = os.path.join(directory, file)
            os.remove(file_path)
            print(f"Deleted: {file}")
            deleted_count += 1
        except Exception as e:
            print(f"Error deleting {file}: {str(e)}")
    
    print(f"\nTotal duplicate files deleted: {deleted_count}")

remove_duplicate_images(NEG)

In [None]:
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        print("Failed to grab frame")
        break

    frame = frame[120:120+250, 200:200+250, :]
    cv2.imshow('Image Collection', frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('a'):
        img_name = os.path.join(ANC, f'{uuid.uuid1()}.jpg')
        cv2.imwrite(img_name, frame)
        print(f"Saved anchor image: {img_name}")
    elif key == ord('p'):
        img_name = os.path.join(POS, f'{uuid.uuid1()}.jpg')
        cv2.imwrite(img_name, frame)
        print(f"Saved positive image: {img_name}")
    elif key == ord('q'):
        print("Quitting...")
        break
    
cap.release()
cv2.destroyAllWindows()

In [178]:
def create_pairs_from_directory(directory):
    person_dirs = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    
    anchor_paths = []
    positive_paths = []
    negative_paths = []
    
    for person in person_dirs:
        person_path = os.path.join(directory, person)
        person_images = [os.path.join(person_path, f) for f in os.listdir(person_path) 
                         if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        if len(person_images) < 2:
            continue
        for i in range(len(person_images)):
            for j in range(i+1, len(person_images)):
                anchor_paths.append(person_images[i])
                positive_paths.append(person_images[j])
        
        other_people = [p for p in person_dirs if p != person]
        
        for anchor_img in person_images[:10]: 
            for other_person in other_people:
                other_person_path = os.path.join(directory, other_person)
                other_person_images = [os.path.join(other_person_path, f) 
                                       for f in os.listdir(other_person_path) 
                                       if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

                if not other_person_images:
                    continue

                for _ in range(min(5, len(other_person_images))): 
                    negative_img = random.choice(other_person_images)
                    anchor_paths.append(anchor_img)
                    negative_paths.append(negative_img)

    positive_labels = tf.ones(len(positive_paths))
    negative_labels = tf.zeros(len(negative_paths))
    
    all_anchor_paths = anchor_paths + anchor_paths
    all_comparison_paths = positive_paths + negative_paths
    all_labels = tf.concat([positive_labels, negative_labels], axis=0)
    
    anchor_ds = tf.data.Dataset.from_tensor_slices(all_anchor_paths)
    comparison_ds = tf.data.Dataset.from_tensor_slices(all_comparison_paths)
    labels_ds = tf.data.Dataset.from_tensor_slices(all_labels)
    
    return anchor_ds, comparison_ds, labels_ds

### Preprocess

In [179]:
def preprocess(img_path):
    byte_img = tf.io.read_file(img_path)
    img = tf.io.decode_jpeg(byte_img)
    img = tf.image.resize(img,(100,100))
    img = img/255
    return img

In [180]:
def preproc_twin(in_img, valid_img, label):
    return(preprocess(in_img), preprocess(valid_img), label)

In [None]:
def prepare_datasets():
    anchor_ds, comparison_ds, labels_ds = create_pairs_from_directory(TRAIN)
    
    dataset = tf.data.Dataset.zip((anchor_ds, comparison_ds, labels_ds))
    dataset = dataset.map(preproc_twin)
    dataset = dataset.cache()
    dataset = dataset.shuffle(buffer_size=1024)
    
    dataset_size = tf.data.experimental.cardinality(dataset).numpy()
    train_size = int(dataset_size * 0.8)
    
    train_dataset = dataset.take(train_size)
    val_dataset = dataset.skip(train_size)
    
    train_dataset = train_dataset.batch(16)
    train_dataset = train_dataset.prefetch(8)
    
    val_dataset = val_dataset.batch(16)
    val_dataset = val_dataset.prefetch(8)
    
    return train_dataset, val_dataset

## Model

### Building

In [None]:
def embeding_make():
    in_ = Input(shape=(100,100,3), name="in img")

    c1 = Conv2D(64, (10,10), activation='relu')(in_)
    p1 = MaxPooling2D(64, (2,2), padding='same')(c1)

    c2 = Conv2D(128, (7,7), activation='relu')(p1)
    p2 = MaxPooling2D(64, (2,2), padding='same')(c2)
    
    c3 = Conv2D(128, (4,4), activation='relu')(p2)
    p3 = MaxPooling2D(64, (2,2), padding='same')(c3)

    c4 = Conv2D(256, (4,4), activation='relu')(p3)
    f1 = Flatten()(c4)
    d1 = Dense(4096,activation='sigmoid')(f1)

    return Model(inputs=in_, outputs=d1, name='embedding')

In [None]:
embedding = embeding_make()

In [None]:
class L1Dist(Layer):
    def __init__(self, **kwargs):
        super().__init__()

    def call(self, in_embed, valid_embed):
        return tf.math.abs(in_embed - valid_embed)

In [None]:
def make_model(): #Simanese
    input_img = Input(name='input_img', shape=(100,100,3))
    validation_img = Input(name='validation_img', shape=(100,100,3))

    model_layer = L1Dist()
    model_layer.name = 'distance'
    distances = model_layer(embedding(input_img), embedding(validation_img))

    classifier = Dense(1,activation='sigmoid')(distances)

    return Model(inputs=[input_img, validation_img], outputs=classifier, name='SimaneseNetwork')

In [None]:
siamese_model = make_model()

### Training

In [None]:
binary_cross_loss = tf.losses.BinaryCrossentropy()
opt = tf.keras.optimizers.Adam(1e-4)

In [None]:
checkpoints = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoints, 'ckpt')
checkpoint = tf.train.Checkpoint(opt=opt, siamese_model=siamese_model)

In [182]:
@tf.function
def t_step(batch, model, optimizer, loss_fn):
    with tf.GradientTape() as tape:  
        anchor_img, comparison_img, y_true = batch
        y_pred = model([anchor_img, comparison_img], training=True)
        loss = loss_fn(y_true, y_pred)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

In [183]:
def train(data, epochs):
    for epoch in range(1, epochs+1):
        print('\n Epoch {}/{}'.format(epoch, epochs))
        progbar = tf.keras.utils.Progbar(len(data))  
        for idx, batch in enumerate(data): 
            t_step(batch)
            progbar.update(idx+1)
        
        if epoch % 10 == 0:
            checkpoint.save(file_prefix=checkpoint_prefix) 

In [None]:
def retrain_model(model, epochs=50):
    train_dataset, val_dataset = prepare_datasets()
    optimizer = tf.keras.optimizers.Adam(1e-4)
    loss_fn = tf.keras.losses.BinaryCrossentropy()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')
    val_accuracy = tf.keras.metrics.BinaryAccuracy(name='val_accuracy')

    checkpoint_dir = './checkpoints'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        
        train_loss.reset_states()
        train_accuracy.reset_states()
        
        progress_bar = tf.keras.utils.Progbar(len(train_dataset))
        for batch_idx, batch in enumerate(train_dataset):
            loss = t_step(batch, model, optimizer, loss_fn)
            
            train_loss(loss)
            train_accuracy(batch[2], model([batch[0], batch[1]], training=False))
            progress_bar.update(batch_idx + 1)

        val_accuracy.reset_states()
        for batch in val_dataset:
            val_preds = model([batch[0], batch[1]], training=False)
            val_accuracy(batch[2], val_preds)

        print(f"Loss: {train_loss.result():.4f}, Accuracy: {train_accuracy.result():.4f}, Val Accuracy: {val_accuracy.result():.4f}")
        if (epoch + 1) % 10 == 0:
            checkpoint.save(file_prefix=checkpoint_prefix)
    
    return model

In [None]:
epochs = 50

In [None]:
train(train_data, epochs)

### Test / Evaluate model

In [None]:
test_in, test_val, y_true = test_data.as_numpy_iterator().next()
y_hat = siamese_model.predict([test_in, test_val])

In [None]:
[1 if prediction > 0.5 else 0 for prediction in y_hat]

In [None]:
y_true

In [None]:
m = Recall()
m.update_state(y_true, y_hat)
m.result().numpy()

In [None]:
m = Precision()
m.update_state(y_true, y_hat)
m.result().numpy()

In [None]:
plt.figure(figsize=(10,8))
plt.subplot(1,2,1)
plt.imshow(test_in[0])
plt.subplot(1,2,2)
plt.imshow(test_val[0])
plt.show()

In [None]:
siamese_model.save('face_verification.h5')

In [None]:
model = tf.keras.models.load_model('face_verification.h5', custom_objects={'L1Dist':L1Dist, 'BinaryCrossentropy':tf.losses.BinaryCrossentropy})

In [None]:
def test_on_two_images(img1_path, img2_path, model):
    def preprocess_single_image(img_path):
        byte_img = tf.io.read_file(img_path)
        img = tf.io.decode_jpeg(byte_img)
        img = tf.image.resize(img, (100, 100))
        img = img / 255.0
        return img

    img1 = preprocess_single_image(img1_path)
    img2 = preprocess_single_image(img2_path)

    # Add batch dimension
    img1 = tf.expand_dims(img1, axis=0)
    img2 = tf.expand_dims(img2, axis=0)

    # Predict similarity
    result = model.predict([img1, img2])
    print(f"Similarity score: {result[0][0]:.4f}")
    
    if result[0][0] > 0.5:
        print("✅ Match: Likely the same person")
    else:
        print("❌ No Match: Likely different people")

# Example usage:
test_on_two_images('C:/Users/lokna/Projects/MyReactNativeApp/extensions/face_auth/data/positive/496e69d0-3499-11f0-a4ad-e8fb1c79b654.jpg', 'C:/Users/lokna/Projects/MyReactNativeApp/extensions/face_auth/data/positive/4a5df055-3499-11f0-ae89-e8fb1c79b654.jpg', model)
