# Face Recognition Model Training
## Overview
This notebook trains a face recognition model using:
- OpenCV for face detection (Haar Cascade).
- TensorFlow (VGGFace) for feature extraction.
- Cosine Similarity for recognition.
The model is trained on a custom dataset and saved as `face_database.pkl`.

## Setup
Install dependencies and import libraries.

In [3]:

!pip install tensorflow==2.9.0
!pip install keras==2.9
!pip install face_recognition
!pip install keras_applications==1.0.8
!pip install keras-vggface
!pip install h5py
!pip install matplotlib
!pip install scikit-learn
!pip install kaggle

Collecting tensorflow==2.9.0
  Downloading tensorflow-2.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting flatbuffers<2,>=1.12 (from tensorflow==2.9.0)
  Downloading flatbuffers-1.12-py2.py3-none-any.whl.metadata (872 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow==2.9.0)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting keras<2.10.0,>=2.9.0rc0 (from tensorflow==2.9.0)
  Downloading keras-2.9.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras-preprocessing>=1.1.1 (from tensorflow==2.9.0)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting tensorboard<2.10,>=2.9 (from tensorflow==2.9.0)
  Downloading tensorboard-2.9.1-py3-none-any.whl.metadata (1.9 kB)
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0 (from tensorflow==2.9.0)
  Downloading tensorflow_estimator-2.9.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting google-auth-oauthlib<0.5,>=0.4.1 (from tensorboard<2.10,>=

In [4]:
import os
import cv2
import numpy as np
import pickle
import face_recognition
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("omchoksi04/celebrity-faceid-dataset")

print("Path to dataset files:", path)


In [None]:

# Define dataset path
dataset_path = path
model_path = "face_embeddings.pkl"


In [16]:
def extract_faces(image):
    """ Detect faces in an image using face_recognition """
    faces = face_recognition.face_locations(image)
    if faces:
        return faces
    return None

In [17]:
def get_embeddings():
    """ Extract face embeddings from dataset """
    encodings = []
    names = []
    
    for person in os.listdir(dataset_path):
        person_path = os.path.join(dataset_path, person)
        if os.path.isdir(person_path):
            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                img = cv2.imread(img_path)
                rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                faces = extract_faces(rgb_img)
                if faces:
                    encoding = face_recognition.face_encodings(rgb_img, faces)
                    if encoding:
                        encodings.append(encoding[0])
                        names.append(person)
    
    return encodings, names



In [18]:
# Train & Save Model
def train():
    print("Training on dataset...")
    encodings, names = get_embeddings()
    data = {"encodings": encodings, "names": names}
    with open(model_path, "wb") as f:
        pickle.dump(data, f)
    print("Model saved!")

train()



Training on dataset...
Model saved!


In [19]:
# Load Model
def load_trained_model():
    with open(model_path, "rb") as f:
        data = pickle.load(f)
    return data["encodings"], data["names"]

def recognize_face(frame):
    encodings, names = load_trained_model()
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    faces = face_recognition.face_locations(rgb_frame)
    face_encodings = face_recognition.face_encodings(rgb_frame, faces)

    for face_encoding, face_location in zip(face_encodings, faces):
        distances = cosine_similarity([face_encoding], encodings)
        best_match = np.argmax(distances)
        confidence = distances[0][best_match]
        name = names[best_match] if confidence > 0.6 else "Unknown"
        
        top, right, bottom, left = face_location
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
        cv2.putText(frame, name, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    return frame


2025-03-13 11:19:38.867 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-03-13 11:19:38.872 Session state does not function when running a script without `streamlit run`
