In [1]:
import os
import cv2
import numpy as np
import torch
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from facenet_pytorch import InceptionResnetV1
from ultralytics import YOLO

In [2]:
DATASET_PATH = r'D:\Studies\Quantic Tech Internship\facerecog\celebfaces'
IMG_SIZE = 160

In [3]:
detector = YOLO('yolov8n-face.pt')

In [4]:
facenet = InceptionResnetV1(pretrained='vggface2').eval()

In [5]:
def detect_and_crop(image_path):
    img = cv2.imread(image_path)
    results = detector(img)
    if results[0].boxes.shape[0] == 0:
        return None
    xyxy = results[0].boxes[0].xyxy[0].cpu().numpy().astype(int)
    x1, y1, x2, y2 = xyxy
    face = img[y1:y2, x1:x2]
    face = cv2.resize(face, (IMG_SIZE, IMG_SIZE))
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    return face

In [6]:
embeddings = []
labels = []

for person in os.listdir(DATASET_PATH):
    person_dir = os.path.join(DATASET_PATH, person)
    for img_name in os.listdir(person_dir):
        img_path = os.path.join(person_dir, img_name)
        face = detect_and_crop(img_path)
        if face is not None:
            face_tensor = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float() / 255.0
            emb = facenet(face_tensor).detach().numpy()[0]
            embeddings.append(emb)
            labels.append(person)


0: 640x544 1 face, 83.7ms
Speed: 4.2ms preprocess, 83.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 544)

0: 640x640 1 face, 77.6ms
Speed: 3.3ms preprocess, 77.6ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x544 2 faces, 71.4ms
Speed: 3.1ms preprocess, 71.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 544)

0: 640x544 1 face, 63.9ms
Speed: 2.1ms preprocess, 63.9ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 544)

0: 640x608 1 face, 80.8ms
Speed: 3.7ms preprocess, 80.8ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 608)

0: 640x544 3 faces, 65.2ms
Speed: 3.0ms preprocess, 65.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 544)

0: 640x640 1 face, 65.8ms
Speed: 3.2ms preprocess, 65.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 faces, 72.2ms
Speed: 3.8ms preprocess, 72.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 6

In [7]:
from sklearn.preprocessing import normalize
X = normalize(embeddings)

In [8]:
le = LabelEncoder()
X = np.array(embeddings)
y = le.fit_transform(labels)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42,stratify=y)

clf = SVC(kernel='linear', probability=True)
clf.fit(X, y)

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


In [9]:
from sklearn.metrics import classification_report

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))

                    precision    recall  f1-score   support

    Angelina Jolie       0.89      1.00      0.94        24
         Brad Pitt       1.00      0.96      0.98        25
 Denzel Washington       1.00      1.00      1.00        25
      Hugh Jackman       1.00      1.00      1.00        25
 Jennifer Lawrence       1.00      0.96      0.98        25
       Johnny Depp       1.00      1.00      1.00        25
      Kate Winslet       1.00      1.00      1.00        25
 Leonardo DiCaprio       1.00      1.00      1.00        25
         Megan Fox       1.00      1.00      1.00        25
   Natalie Portman       1.00      0.96      0.98        24
     Nicole Kidman       1.00      1.00      1.00        25
  Robert Downey Jr       0.96      0.92      0.94        25
    Sandra Bullock       0.93      1.00      0.96        25
Scarlett Johansson       1.00      1.00      1.00        50
        Tom Cruise       1.00      0.92      0.96        25
         Tom Hanks       1.00      1.00

In [10]:
def predict_image(image_path):
    face = detect_and_crop(image_path)
    if face is None:
        return "No face detected"
    face_tensor = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float() / 255.0
    emb = facenet(face_tensor).detach().numpy()
    probs = clf.predict_proba(emb)[0]
    pred_label = clf.classes_[np.argmax(probs)]
    confidence = np.max(probs)
    return pred_label, confidence

In [11]:
TEST_PATH = r'D:\Studies\Quantic Tech Internship\facerecog\tomcruisetest.jpg'
print(predict_image(TEST_PATH))


0: 384x640 1 face, 71.2ms
Speed: 2.1ms preprocess, 71.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
(14, 0.6693890469569015)


In [12]:
TEST_PATH = r"D:\Studies\Quantic Tech Internship\facerecog\tomhankstest.jpg"
print(predict_image(TEST_PATH))


0: 448x640 1 face, 82.4ms
Speed: 1.6ms preprocess, 82.4ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)
(15, 0.9464387279355716)


In [17]:
import joblib
joblib.dump(clf, "svm_face_recognizer.pkl")
joblib.dump(le, "label_encoder.pkl")

['label_encoder.pkl']