In [1]:
import os
import cv2
import numpy as np
import face_recognition
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pickle

In [3]:
# Directory containing the celebrity images
dataset_dir = r"C:\Users\Sarav\Imarticus Learning (Mega Sync)\Internships\CodeClause\Facial Recognition System\Celebrity Faces Dataset"

In [5]:
# Prepare lists for face embeddings and labels
face_embeddings = []
labels = []
celebrity_names = os.listdir(dataset_dir)

In [9]:
# Loop through each celebrity folder and process images
for label, celebrity_name in enumerate(celebrity_names):
    celebrity_folder = os.path.join(dataset_dir, celebrity_name)
    print(f"Processing celebrity: {celebrity_name} ({label + 1}/{len(celebrity_names)})")

    # Loop through each image in the celebrity folder
    for image_name in os.listdir(celebrity_folder):
        image_path = os.path.join(celebrity_folder, image_name)
        print(f"  Processing image: {image_name}")

         # Load image and convert to RGB (face_recognition expects RGB)
        image = cv2.imread(image_path)
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Find face locations and embeddings
        face_locations = face_recognition.face_locations(rgb_image)
        face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
        
        # If faces are detected, add embeddings to the list
        if face_encodings:
            for encoding in face_encodings:
                face_embeddings.append(encoding)
                labels.append(label)

Processing celebrity: Angelina Jolie (1/17)
  Processing image: 001_fe3347c0.jpg
  Processing image: 002_8f8da10e.jpg
  Processing image: 003_57612506.jpg
  Processing image: 004_f61e7d0c.jpg
  Processing image: 005_582c121a.jpg
  Processing image: 006_9135205d.jpg
  Processing image: 007_cabbfcbb.jpg
  Processing image: 008_d1f87068.jpg
  Processing image: 009_fb3e6174.jpg
  Processing image: 010_f99d79e3.jpg
  Processing image: 011_7344ca35.jpg
  Processing image: 012_cfcd4007.jpg
  Processing image: 013_95ecbd39.jpg
  Processing image: 014_0d29db88.jpg
  Processing image: 015_8bac79b5.jpg
  Processing image: 016_8945d6ca.jpg
  Processing image: 017_e28ea9d4.jpg
  Processing image: 018_fcafe1a8.jpg
  Processing image: 019_57ab290d.jpg
  Processing image: 020_4c4b655f.jpg
  Processing image: 021_6e419870.jpg
  Processing image: 022_b497b92e.jpg
  Processing image: 023_7781dd1c.jpg
  Processing image: 024_ca32be97.jpg
  Processing image: 025_41cee764.jpg
  Processing image: 026_2828fca

In [11]:
# Convert labels to a numpy array
labels = np.array(labels)

In [13]:
# Train a classifier using Support Vector Machine (SVM)
X_train, X_test, y_train, y_test = train_test_split(face_embeddings, labels, test_size=0.2, random_state=42)

In [15]:
svm_classifier = SVC(kernel='linear', probability=True)
svm_classifier.fit(X_train, y_train)

In [17]:
# Evaluate on the test set
y_pred = svm_classifier.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

Accuracy: 0.9892183288409704


In [19]:
# Save the trained model
with open('celebrity_face_recognition_model.pkl', 'wb') as f:
    pickle.dump(svm_classifier, f)

In [21]:
# Save the celebrity names for reference
with open('celebrity_names.pkl', 'wb') as f:
    pickle.dump(celebrity_names, f)

In [23]:
# Function for face recognition
def recognize_face(image_path):
    # Load the image
    image = cv2.imread(image_path)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Find face locations and embeddings
    face_locations = face_recognition.face_locations(rgb_image)
    face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
    
    # If faces are found, try to predict the label
    if face_encodings:
        for encoding in face_encodings:
            prediction = svm_classifier.predict([encoding])
            prob = svm_classifier.predict_proba([encoding])
            max_prob_index = np.argmax(prob)
            celebrity_name = celebrity_names[max_prob_index]
            print(f"Recognized: {celebrity_name}")
        return celebrity_name
    else:
        print("No face found.")
        return None

In [31]:
# Test the recognition with a new image
test_image_path = r"C:\Users\Sarav\Imarticus Learning (Mega Sync)\Internships\CodeClause\Facial Recognition System\pirates.jpeg"  # Replace with the path to a test image
recognized_celebrity = recognize_face(test_image_path)

Recognized: Johnny Depp
