In [2]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Step 1: Load the data
faces = fetch_lfw_people(min_faces_per_person=60)

# Step 2: Print the field names
print("Field names (keys to the dictionary):", faces.keys())


Field names (keys to the dictionary): dict_keys(['data', 'images', 'target', 'target_names', 'DESCR'])


In [3]:
# Step 3: Print the dataset description
print("Dataset description:", faces.DESCR)

Dataset description: .. _labeled_faces_in_the_wild_dataset:

The Labeled Faces in the Wild face recognition dataset
------------------------------------------------------

This dataset is a collection of JPEG pictures of famous people collected
over the internet, all details are available on the official website:

    http://vis-www.cs.umass.edu/lfw/

Each picture is centered on a single face. The typical task is called
Face Verification: given a pair of two pictures, a binary classifier
must predict whether the two images are from the same person.

An alternative task, Face Recognition or Face Identification is:
given the picture of the face of an unknown person, identify the name
of the person by referring to a gallery of previously seen pictures of
identified persons.

Both Face Verification and Face Recognition are tasks that are typically
performed on the output of a model trained to perform Face Detection. The
most popular model for Face Detection is called Viola-Jones and is
imp

In [4]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Step 1: Load the data
faces = fetch_lfw_people(min_faces_per_person=60)

# Step 4: Print the data, its shape, and the target names

# Print the data # What does the data look like?
print(" The data looks like:", faces.data)

# Print the shape of the data # what is the shape of the data?
print("The data shape is:", faces.data.shape)

# Print the target names # what are the target names?
print("The target names are:", faces.target_names)

 The data looks like: [[0.53333336 0.52418303 0.49673203 ... 0.00653595 0.00653595 0.00130719]
 [0.28627452 0.20784314 0.2535948  ... 0.96993464 0.95032686 0.9346406 ]
 [0.31633988 0.3895425  0.275817   ... 0.4261438  0.7895425  0.9555555 ]
 ...
 [0.11633987 0.11111111 0.10196079 ... 0.5660131  0.579085   0.5542484 ]
 [0.19346406 0.21045752 0.29150328 ... 0.6875817  0.6575164  0.5908497 ]
 [0.12418301 0.09673203 0.10849673 ... 0.12941177 0.16209151 0.29150328]]
The data shape is: (1348, 2914)
The target names are: ['Ariel Sharon' 'Colin Powell' 'Donald Rumsfeld' 'George W Bush'
 'Gerhard Schroeder' 'Hugo Chavez' 'Junichiro Koizumi' 'Tony Blair']


In [1]:
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Step 1: Load the data
faces = fetch_lfw_people(min_faces_per_person=60)

# Step 5: Divide the data into features (X) and target (y)
X = faces.data
y = faces.target

# Step 6: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 7: Declare SVM model with kernel='rbf', class_weight='balanced'
svm_model = SVC(kernel='rbf', class_weight='balanced')

# Step 8: Use grid search cross-validation with 10 CV to explore random combinations of parameters
param_grid = {'C': [1, 5, 10, 50], 'gamma': [0.001, 0.0005, 0.01, 0.1]}
grid_search = GridSearchCV(svm_model, param_grid, cv=10)
grid_search.fit(X_train, y_train)

# Get the best model from the grid search
best_model = grid_search.best_estimator_

# Step 9: Predict on the test set using the best model
y_pred = best_model.predict(X_test)

# Step 10: Model performances
labels = list(faces.target_names)
print(classification_report(y_test, y_pred, target_names=labels))

                   precision    recall  f1-score   support

     Ariel Sharon       0.59      0.76      0.67        17
     Colin Powell       0.80      0.83      0.81        84
  Donald Rumsfeld       0.67      0.81      0.73        36
    George W Bush       0.90      0.86      0.88       146
Gerhard Schroeder       0.69      0.71      0.70        28
      Hugo Chavez       1.00      0.63      0.77        27
Junichiro Koizumi       0.89      1.00      0.94        16
       Tony Blair       0.82      0.78      0.80        51

         accuracy                           0.81       405
        macro avg       0.79      0.80      0.79       405
     weighted avg       0.83      0.81      0.82       405



In [None]:
Observations about the model performance 

1. Precision:
   - The model shows varying precision across different classes, ranging from 0.59 to 1.00. It indicates that the model's ability to correctly classify instances differs by class.
   - Some classes have relatively low precision (e.g., Ariel Sharon), meaning there are more false positives in those predictions.
   - Other classes, like Hugo Chavez, have very high precision, suggesting that when the model predicts this class, it is often correct.

2. Recall:
   - The recall also differs across classes, ranging from 0.63 to 1.00. This shows that the model's ability to identify all instances of a particular class differs.
   - The class Hugo Chavez has relatively low recall, which means that the model misses some actual instances of this class.
   - Classes like Junichiro Koizumi have a perfect recall of 1.00, indicating that the model correctly identifies all instances of this class.

3. F1-Score:
   - The F1-score is the harmonic mean of precision and recall. It reflects the balance between the two metrics.
   - Classes with high F1-scores, such as George W Bush, have a strong balance between precision and recall.
   - Classes with lower F1-scores may have an imbalance between precision and recall.

4. Accuracy:
   - The overall accuracy of the model is 0.81, indicating that the model correctly classifies approximately 81% of the instances in the test set.

5. Macro and Weighted Averages:
   - The macro-average F1-score is 0.79, indicating the average performance across all classes without considering class imbalance.
   - The weighted average F1-score is 0.82, which considers class imbalance and gives more weight to classes with more instances.

In summary, the model performs reasonably well in classifying faces into different categories, but there are variations in performance across some classes, and more accurate for some individuals than others. 

