In [9]:
import numpy as np
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Loading the LFW dataset
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=20, resize=0.4)

# Split the data into training and test sets
train_data, test_data, train_labels, test_labels = train_test_split(
    lfw_people.data, lfw_people.target, test_size=0.25, random_state=28, stratify=lfw_people.target)

# Lets Set up the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=150, whiten=True, random_state=28)),
    ('svm', SVC(kernel='rbf', class_weight='balanced'))
])

# Lets find best paramets for eigen faces
param_grid = {
    'svm__C': [0.1, 1, 10, 100, 1000],
    'svm__gamma': [1, 0.1, 0.01, 0.001, 0.0001]
}

# Perform GridSearchCV
grid = GridSearchCV(pipeline, param_grid, cv=5)
grid.fit(train_data, train_labels)

print(f"Best parameters found: {grid.best_params_}")
print(f"Best cross-validation score: {grid.best_score_:.2f}")

#  predictions
predictions = grid.predict(test_data)

# Print the results
print(classification_report(test_labels, predictions, target_names=lfw_people.target_names, zero_division=0))


Best parameters found: {'svm__C': 100, 'svm__gamma': 0.001}
Best cross-validation score: 0.59
                           precision    recall  f1-score   support

         Alejandro Toledo       0.70      0.70      0.70        10
             Alvaro Uribe       0.83      0.56      0.67         9
          Amelie Mauresmo       0.67      0.40      0.50         5
             Andre Agassi       0.33      0.44      0.38         9
           Angelina Jolie       0.50      0.20      0.29         5
             Ariel Sharon       0.62      0.79      0.70        19
    Arnold Schwarzenegger       0.36      0.45      0.40        11
     Atal Bihari Vajpayee       0.33      0.17      0.22         6
             Bill Clinton       0.50      0.43      0.46         7
             Carlos Menem       0.50      0.60      0.55         5
             Colin Powell       0.72      0.80      0.76        59
            David Beckham       0.40      0.25      0.31         8
          Donald Rumsfeld       0.

In [8]:
import numpy as np
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Loading the LFW dataset
lfw_people = datasets.fetch_lfw_people(min_faces_per_person=50, resize=0.4)

# Split the data into training and test sets
train_data, test_data, train_labels, test_labels = train_test_split(
    lfw_people.data, lfw_people.target, test_size=0.25, random_state=28, stratify=lfw_people.target)

# Lets Set up the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=150, whiten=True, random_state=28)),
    ('svm', SVC(kernel='rbf', class_weight='balanced'))
])

# Lets find best paramets for eigen faces
param_grid = {
    'svm__C': [0.1, 1, 10, 100, 1000],
    'svm__gamma': [1, 0.1, 0.01, 0.001, 0.0001]
}

# Perform GridSearchCV
grid = GridSearchCV(pipeline, param_grid, cv=5)
grid.fit(train_data, train_labels)

print(f"Best parameters found: {grid.best_params_}")
print(f"Best cross-validation score: {grid.best_score_:.2f}")

#  predictions
predictions = grid.predict(test_data)

# Print the results
print(classification_report(test_labels, predictions, target_names=lfw_people.target_names, zero_division=0))


Best parameters found: {'svm__C': 10, 'svm__gamma': 0.001}
Best cross-validation score: 0.81
                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.74      0.70        19
     Colin Powell       0.83      0.81      0.82        59
  Donald Rumsfeld       0.70      0.87      0.78        30
    George W Bush       0.92      0.87      0.90       133
Gerhard Schroeder       0.65      0.63      0.64        27
      Hugo Chavez       0.65      0.61      0.63        18
   Jacques Chirac       0.54      0.54      0.54        13
    Jean Chretien       0.92      0.86      0.89        14
    John Ashcroft       0.73      0.85      0.79        13
Junichiro Koizumi       0.88      0.93      0.90        15
  Serena Williams       0.57      0.62      0.59        13
       Tony Blair       0.76      0.72      0.74        36

         accuracy                           0.79       390
        macro avg       0.74      0.75      0.74       390
     weighted avg   