<a href="https://colab.research.google.com/github/Sabastain-Wakoyi/Wakoyi-Tolulope/blob/main/Wakoyi_Tolulope_Facenet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install facenet-pytorch



In [None]:
!pip install scikit-learn



In [None]:
!pip install fastcore -U
!pip install Pillow -U

Collecting Pillow
  Downloading pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.1 kB)
Downloading pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Pillow
  Attempting uninstall: Pillow
    Found existing installation: pillow 10.2.0
    Uninstalling pillow-10.2.0:
      Successfully uninstalled pillow-10.2.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
facenet-pytorch 2.6.0 requires Pillow<10.3.0,>=10.2.0, but you have pillow 11.0.0 which is incompatible.[0m[31m
[0mSuccessfully installed Pillow-11.0.0


In [None]:
# Import necessary libraries
import torch
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import normalize
from sklearn.svm import SVC
import numpy as np
from collections import Counter

In [None]:
from sklearn.datasets import fetch_lfw_people # import fetch_lfw_people from sklearn.datasets


#Loading pre-trained FaceNet model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Preparing the LFW Dataset with sufficient samples per class
min_faces_per_person = 20  # Ensure at least 20 images per person
transform = transforms.Compose([
    transforms.Resize((160, 160)),  # Resize to FaceNet's input size
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),  # Add slight rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Vary brightness/contrast
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Use fetch_lfw_people from sklearn.datasets instead of torchvision.datasets
lfw_people = fetch_lfw_people(min_faces_per_person=min_faces_per_person,
                                 resize=0.4, #default resize parameter is 0.4
                                 color=True) # add color parameter to get color images

# Access images and target labels from lfw_people object
images = lfw_people.images # store images data to images variable
target = lfw_people.target # store target data to target variable
target_names = lfw_people.target_names # store target names to target_names variable

# Checking dataset distribution
label_counts = Counter(target)
print(f"Number of people: {len(label_counts)}")
print(f"Min faces per person: {min(label_counts.values())}")
print(f"Max faces per person: {max(label_counts.values())}")
print(f"Average faces per person: {np.mean(list(label_counts.values())):.2f}")

# Converting images to PyTorch tensors so as apply transformations
X = []
for img in images:
    # Convert to PIL Image
    img = transforms.ToPILImage()(img)
    # Apply transformations
    img = transform(img)
    X.append(img)

X = torch.stack(X)
y = target  # Use the target variable directly

#Split dataset into train and test sets with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

#Extracted embeddings using FaceNet
def extract_embeddings(images):
    embeddings = []
    with torch.no_grad():
        for img in images:
            img = img.unsqueeze(0).to(device)
            embedding = facenet(img).cpu().numpy().flatten()
            embeddings.append(embedding)
    return np.array(embeddings)

X_train_embeddings = extract_embeddings(X_train)
X_test_embeddings = extract_embeddings(X_test)

#Normalize the embeddings
X_train_embeddings = normalize(X_train_embeddings)
X_test_embeddings = normalize(X_test_embeddings)

#Training the classifier with hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01],
    'kernel': ['rbf']
}

grid = GridSearchCV(
    SVC(class_weight='balanced', probability=True, random_state=42),
    param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

grid.fit(X_train_embeddings, y_train)

clf = grid.best_estimator_

#Evaluate the model using cross-validation
cv_scores = cross_val_score(clf, X_train_embeddings, y_train, cv=5, scoring='accuracy')
print(f"Cross-validated accuracy: {np.mean(cv_scores):.4f} ± {np.std(cv_scores):.4f}")

#Make predictions on the test set
y_pred = clf.predict(X_test_embeddings)

#Calculate and display metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("\nDetailed classification report:")
print(classification_report(y_test, y_pred, target_names=lfw_people.target_names))




Number of people: 62
Min faces per person: 20
Max faces per person: 530
Average faces per person: 48.76
Cross-validated accuracy: 0.9982 ± 0.0009
Accuracy: 1.00

Detailed classification report:
                           precision    recall  f1-score   support

         Alejandro Toledo       1.00      1.00      1.00        10
             Alvaro Uribe       1.00      1.00      1.00         9
          Amelie Mauresmo       1.00      1.00      1.00         5
             Andre Agassi       1.00      1.00      1.00         9
           Angelina Jolie       1.00      1.00      1.00         5
             Ariel Sharon       1.00      1.00      1.00        19
    Arnold Schwarzenegger       1.00      1.00      1.00        11
     Atal Bihari Vajpayee       1.00      1.00      1.00         6
             Bill Clinton       1.00      1.00      1.00         7
             Carlos Menem       1.00      1.00      1.00         5
             Colin Powell       1.00      1.00      1.00        59
 

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#Calculate and display general metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # Weighted by class support
recall = recall_score(y_test, y_pred, average='weighted')  # Weighted by class support
f1 = f1_score(y_test, y_pred, average='weighted')  # Weighted by class support

print(f"Overall Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Overall Metrics:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
