## Eigen Faces Dataset

The dataset used "Labeled Faces in the Wild", aka LFW_:
  http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz 

In [1]:
from time import time
import logging
import pylab as pl
import numpy as np

## Importing Libraries

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.decomposition import PCA
from sklearn.svm import SVC

## Downloading the dataset if not present on disk

In [5]:
labelled_faces_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

In [6]:
labelled_faces_people.images.shape

(1288, 50, 37)

# WHAT IS THIS?

In [7]:
n_samples, h, w = labelled_faces_people.images.shape

In [8]:
np.random.seed(42)

### Input Data

In [9]:
X = labelled_faces_people.data

In [10]:
X

array([[ 254.        ,  254.        ,  251.66667175, ...,   87.33333588,
          88.66666412,   86.66666412],
       [  39.66666794,   50.33333206,   47.        , ...,  117.66666412,
         115.        ,  133.66667175],
       [  89.33333588,  104.        ,  126.        , ...,  175.33332825,
         183.33332825,  183.        ],
       ..., 
       [  86.        ,   80.33333588,   74.66666412, ...,   44.        ,
          49.66666794,   44.66666794],
       [  50.33333206,   65.66666412,   88.        , ...,  197.        ,
         179.33332825,  166.33332825],
       [  30.        ,   27.        ,   32.66666794, ...,   35.        ,
          35.33333206,   61.        ]], dtype=float32)

In [11]:
X.shape

(1288, 1850)

### No of Features

In [12]:
n_features = X.shape[1]

### Output Data

In [13]:
y = labelled_faces_people.target

In [14]:
y.shape

(1288,)

### Names of People whose images are added as input

In [15]:
labelled_faces_people.target_names

array(['Ariel Sharon', 'Colin Powell', 'Donald Rumsfeld', 'George W Bush',
       'Gerhard Schroeder', 'Hugo Chavez', 'Tony Blair'],
      dtype='<U17')

In [16]:
target_names = labelled_faces_people.target_names

In [17]:
n_classes = target_names.shape[0] # No of people 

## Splitting into training & test data

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

## Applying PCA (Eigenfaces)

In [20]:
# PCA no of components
n_components = 140

In [22]:
X_train.shape  # Total no of faces in X_train = 966

(966, 1850)

### Extracting top 140 eigenfaces from 966 faces

In [26]:
pca = PCA(n_components=n_components, whiten=True, svd_solver="randomized").fit(X_train)

### Getting principle components of the face data
Reshaping Principle Components to get actual images

In [28]:
eigenfaces = pca.components_.reshape((n_components, h, w))

In [34]:
X_train_pca = pca.transform(X_train)

In [35]:
X_test_pca = pca.transform(X_test)

In [41]:
pca.explained_variance_ratio_[0]

0.19346543

In [42]:
pca.explained_variance_ratio_[1]

0.15116845

In [45]:
pca.explained_variance_ratio_.cumsum()[1]

0.34463388