In [1]:
pwd

'D:\\'

In [2]:
pip install insightface


Note: you may need to restart the kernel to use updated packages.


In [43]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from insightface.app import FaceAnalysis
import torch

# Dataset base path
# dataset_path = r'D:\E Driv Data\DATASETS\CASIA-3D-FACE-DATASET'

dataset_path = r"D:\Face classification\Face DATASETS\105_classes_pins_dataset"

# List class folders named '001', '002', ... '123'
class_names = sorted([d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))])
print(f"Found {len(class_names)} classes: {class_names[:5]} ... {class_names[-5:]}")

image_paths = []
labels = []

# Map class name to label index
class_to_idx = {name: idx for idx, name in enumerate(class_names)}

for class_name in class_names:
    class_dir = os.path.join(dataset_path, class_name)
    # Include .bmp extension
    image_files = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.jpeg', '.bmp'))]
    print(f"Class '{class_name}': {len(image_files)} images")
    for img_file in image_files:
        image_paths.append(os.path.join(class_dir, img_file))
        labels.append(class_to_idx[class_name])

print(f"Total images found: {len(image_paths)}")

# Initialize insightface FaceAnalysis
providers = ['CUDAExecutionProvider'] if torch.cuda.is_available() else ['CPUExecutionProvider']
app = FaceAnalysis(name='buffalo_l', providers=providers)
app.prepare(ctx_id=0 if torch.cuda.is_available() else -1)
app.det_size = (320, 320)  # Adjust detection size as needed

embedding_list = []
label_list = []
skip_count = 0

print("Extracting embeddings...")
for img_path, label in tqdm(zip(image_paths, labels), total=len(image_paths)):
    img = cv2.imread(img_path)
    if img is None:
        skip_count += 1
        continue
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    faces = app.get(img_rgb)
    if not faces:
        skip_count += 1
        continue

    embedding = faces[0].embedding
    embedding_list.append(embedding)
    label_list.append(label)

print(f"✅ Total valid embeddings extracted: {len(embedding_list)}")
print(f"⚠️ Skipped images (no face detected or load failed): {skip_count}")

if len(embedding_list) == 0:
    raise ValueError("❌ No embeddings extracted. Check dataset images or detection size.")

# Prepare numpy arrays
X = np.array(embedding_list)
y = np.array(label_list)




Found 105 classes: ['pins_Adriana Lima', 'pins_Alex Lawther', 'pins_Alexandra Daddario', 'pins_Alvaro Morte', 'pins_Amanda Crew'] ... ['pins_kiernen shipka', 'pins_margot robbie', 'pins_melissa fumero', 'pins_scarlett johansson', 'pins_tom ellis']
Class 'pins_Adriana Lima': 213 images
Class 'pins_Alex Lawther': 152 images
Class 'pins_Alexandra Daddario': 225 images
Class 'pins_Alvaro Morte': 139 images
Class 'pins_Amanda Crew': 117 images
Class 'pins_Andy Samberg': 196 images
Class 'pins_Anne Hathaway': 203 images
Class 'pins_Anthony Mackie': 124 images
Class 'pins_Avril Lavigne': 162 images
Class 'pins_Ben Affleck': 126 images
Class 'pins_Bill Gates': 122 images
Class 'pins_Bobby Morley': 138 images
Class 'pins_Brenton Thwaites': 209 images
Class 'pins_Brian J. Smith': 102 images
Class 'pins_Brie Larson': 169 images
Class 'pins_Chris Evans': 166 images
Class 'pins_Chris Hemsworth': 159 images
Class 'pins_Chris Pratt': 176 images
Class 'pins_Christian Bale': 154 images
Class 'pins_Cris

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
100%|████████████████████████████████████████████████████████████████████████████| 17534/17534 [23:58<00:00, 12.19it/s]

✅ Total valid embeddings extracted: 1449
⚠️ Skipped images (no face detected or load failed): 16085





In [44]:
# Split data stratified
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [45]:
# # Train SVM classifier
# clf = SVC(kernel='linear', probability=True)
# clf.fit(X_train, y_train)

# # Predict & evaluate
# y_pred = clf.predict(X_test)
# print("Accuracy:", (y_pred == y_test).mean())
# print("Classification report:\n", classification_report(y_test, y_pred, target_names=class_names))


Accuracy: 0.9379310344827586
Classification report:
                              precision    recall  f1-score   support

          pins_Adriana Lima       1.00      1.00      1.00         2
          pins_Alex Lawther       1.00      1.00      1.00         3
    pins_Alexandra Daddario       1.00      1.00      1.00         2
          pins_Alvaro Morte       1.00      1.00      1.00         2
           pins_Amanda Crew       1.00      1.00      1.00         3
          pins_Andy Samberg       1.00      1.00      1.00         5
         pins_Anne Hathaway       1.00      1.00      1.00         3
        pins_Anthony Mackie       1.00      0.80      0.89         5
         pins_Avril Lavigne       1.00      1.00      1.00         2
           pins_Ben Affleck       1.00      1.00      1.00         2
            pins_Bill Gates       1.00      1.00      1.00         3
          pins_Bobby Morley       1.00      1.00      1.00         2
      pins_Brenton Thwaites       1.00      1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [47]:
# from sklearn.ensemble import RandomForestClassifier

# clf = RandomForestClassifier(n_estimators=100, random_state=42)
# clf.fit(X_train, y_train)


In [49]:
# from sklearn.neighbors import KNeighborsClassifier

# clf = KNeighborsClassifier(n_neighbors=5)
# clf.fit(X_train, y_train)


In [51]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42)
clf.fit(X_train, y_train)


In [52]:
# Predict & evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", (y_pred == y_test).mean())
print("Classification report:\n", classification_report(y_test, y_pred, target_names=class_names))


Accuracy: 0.9275862068965517
Classification report:
                              precision    recall  f1-score   support

          pins_Adriana Lima       0.67      1.00      0.80         2
          pins_Alex Lawther       1.00      1.00      1.00         3
    pins_Alexandra Daddario       1.00      1.00      1.00         2
          pins_Alvaro Morte       1.00      1.00      1.00         2
           pins_Amanda Crew       1.00      1.00      1.00         3
          pins_Andy Samberg       0.71      1.00      0.83         5
         pins_Anne Hathaway       1.00      1.00      1.00         3
        pins_Anthony Mackie       1.00      0.80      0.89         5
         pins_Avril Lavigne       1.00      1.00      1.00         2
           pins_Ben Affleck       1.00      1.00      1.00         2
            pins_Bill Gates       1.00      1.00      1.00         3
          pins_Bobby Morley       1.00      0.50      0.67         2
      pins_Brenton Thwaites       1.00      1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
