In [None]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plotter

In [None]:

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
#print(faces.images[0])
print(faces.target_names)
print(faces.images.shape)

In [None]:
#Try to plot the faces from faces.images matrix and faces.target_names as labels. TIP: use subplots and imshow functions from matplotlib

fig, ax = plotter.subplots(2, 1, figsize=(8, 3))
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[])
    axi.set_ylabel(faces.target_names[faces.target[i]].split()[-1], color = "black")

In [None]:
from sklearn import svm, metrics
n_samples = len(faces.images)
data = faces.images.reshape((n_samples, -1))
print(data.shape)

In [None]:
#Make pipeline of SVM and RandomizedPCA model using sklearn library (make_pipeline command).
#TIP: use nonlinear Gaussian kernel in SVM (rbf) and number of PCA components (try 50 and 150).

from sklearn.svm import SVC
clf = SVC(kernel='rbf', C=1E6)
X, y = faces.data, faces.target
clf.fit(X, y)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=150)
pca.fit(X)
print(pca.explained_variance_)

In [None]:
#here we will use a principal component analysis to extract 150 fundamental components to feed into our 
#support vector machine classifier. 
#We can do this most straightforwardly by packaging the preprocessor and the classifier into a single pipeline 
#using make_pipeline function
from sklearn.decomposition import RandomizedPCA
from sklearn.pipeline import make_pipeline

pca=PCA(n_components=150,svd_solver='randomized')
clf=SVC(kernel='rbf', C=1E6)

poly_model = make_pipeline(pca,clf)
poly_model.fit(X,y)

In [None]:
#For the sake of testing our classifier output, you have to will split the data into a training and testing set. 
#TIP: use training_test_split function from _sklearn.cross_validation_

from sklearn.cross_validation import train_test_split
#help(train_test_split)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
#Perform a grid search cross-validation to explore combinations of parameters. 
#Here we will adjust C (which controls the margin hardness) and 
#gamma (which controls the size of the radial basis function kernel) in SVM, to find the best model. 
#TIP: use GridSearchCV from _sklearn.grid_search_, use following values for 'svn__c': [1, 5, 10, 50] 
#and 'svc__gamma: [0.0001, 0.0005, 0.001, 0.005]. Fit the training data to find out the parameters.
from sklearn.grid_search import GridSearchCV
tuned_parameters = [{'C': [1, 5, 10, 50], 'gamma': [0.0001, 0.0005, 0.001, 0.005],'kernel': ['rbf']}]


In [None]:
#Print best parameters from grid search. If The optimal values fall fell at the edges, 
#we would want to expand the grid to make sure we have found the true optimum.
pca = PCA (n_components=50,svd_solver='randomized',whiten=True)   

clf = GridSearchCV(SVC(), tuned_parameters, cv=5)
best_model=make_pipeline(pca,clf)
best_model.fit(X_train, y_train)
print(clf.best_params_)

In [None]:
#Use few test images to check the fit accuracy and plot images with estimator assigned labels like in 1).
a=best_model.predict(X_test)

In [None]:
def plot_images(X_test, a):
    fig, axes = plotter.subplots(3, 5,figsize=(8, 3))
    for i, axi in enumerate(axes.flat):
        #axi.imshow(data[i].reshape(62,47), cmap='bone')
        axi.imshow(X_test[i].reshape(62,47), cmap='bone')
        axi.set(xticks=[], yticks=[])
    if y_test[i]==a[i]:
        axi.set_ylabel(faces.target_names[a[i]].split()[-1], color = "black")
    else:
        axi.set_ylabel(faces.target_names[a[i]].split()[-1], color = "red")
                                        
plot_images(X_test, a)