## Training Model PCA (Eigenface) + SVM

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt


### 1 Preprocessing Dataset
- Predic Face, Crop, Resize & Conver to Grayscale

In [None]:
face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')

image_list = []
labels = []

for folder in os.listdir("lfw_dataset"):
    for file in os.listdir("lfw_dataset/" + folder):
        img = cv2.imread("lfw_dataset/" + folder + "/" + file)
        
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        faces = face_cascade.detectMultiScale(img_gray, 1.3, 5)
        for (x, y, w, h) in faces:
            img_face = img_gray[y:y+h, x:x+w]  # crop face image 
            img_resize = cv2.resize(img_face, (100, 100)) # resize to 100 x 100 pixel
            
            image_list.append(img_resize)
            labels.append(folder) # append label (name) of image

In [None]:
len(image_list)

- save detected face into folder

In [None]:
if not os.path.exists("croped_face") :
    os.mkdir("croped_face")

In [None]:
face_folder = "croped_face"
for i in range(len(image_list)):
    
    # get image
    img = image_list[i]
    
    # check if folder exist. if not, create that folder    
    folder_path = os.path.join(face_folder, labels[i])
    if not os.path.exists(folder_path) :
        os.mkdir(folder_path)
        
    # remove image if exist
    file_name = labels[i] + "_%4d.jpg" % i
    file_path = os.path.join(*[face_folder, labels[i], file_name])
    if os.path.exists(file_path) :
        os.remove(file_path) # remove file using os.remove
        
    # save image
    cv2.imwrite(file_path, img)

### 2. Load Croped Face Dataset

In [None]:
image_faces= []
labels = []

for folder in os.listdir("croped_face"):
    for file in os.listdir("croped_face/" + folder):
        img = cv2.imread("croped_face/" + folder + "/" + file, 0)
        flatten_vector = img.flatten() 
        image_faces.append(flatten_vector)
        labels.append(folder) # append label (name) of image

In [None]:
image_faces[0].shape # 1D vector from 100x100 pixel face image 

### 3. Label Encoding (Scikit-Learn)

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
labels

In [None]:
le = LabelEncoder()

le.fit(labels)

label_name = le.classes_
print(label_name)

In [None]:
labels_vec = le.transform(labels)

In [None]:
print(labels_vec)

### 4. Split dataset (75% train, 25% test)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(np.array(image_faces, dtype=np.float32),    # input data
                                                    np.array(labels_vec),                    # target/output data 
                                                    test_size=0.30,                          # split ratio test (25%)
                                                    random_state=42)

In [None]:
print(X_train.shape, y_train.shape, X_test.shape,  y_test.shape)

### 5. Calculate Eigenface using PCA

In [None]:
from sklearn.decomposition import PCA

In [None]:
h, w = 100, 100

In [None]:
K = 233
pca = PCA(n_components=K).fit(X_train)

- Show Eigenface

In [None]:
eigenfaces = pca.components_.reshape((K, h, w))

In [None]:
plt.figure(figsize=(15, 10))
for i in range(K)[:20]: # display 20 eigenface
    plt.subplot(4, 5, i + 1)
    plt.imshow(eigenfaces[i], cmap="gray")

- Projecting Input data to PCA

In [None]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:
X_train[0].shape

In [None]:
X_train_pca[0].shape

- save PCA model

In [None]:
import pickle

def save_model(model, filename, path=""): 
    with open(os.path.join(path, filename), 'wb') as out_name:
        pickle.dump(model, out_name, pickle.HIGHEST_PROTOCOL)

def read_model(filename, path=""):
    with open(os.path.join(path, filename), 'rb') as in_name:
        model = pickle.load(in_name)
        return model

In [None]:
save_model(pca, "pca_model.pkl")

### 6. Apply to SVM

In [None]:
from sklearn.svm import SVC

In [None]:
svm = SVC(kernel='linear', probability=True, C=100, gamma=0.001)

In [None]:
svm.fit(X_train_pca, y_train)

- predict result

In [None]:
y_pred = svm.predict(X_test_pca)

- save model

In [None]:
save_model(svm, "SVM_scikit_model.pkl")

### 7. Model Evaluation

In [None]:
import itertools
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.figure(figsize=(6, 6))
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, y_pred, labels=np.unique(labels_vec))
np.set_printoptions(precision=2)


# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix, classes=label_name,normalize=False,
                      title='Confusion matrix')

In [None]:
print(classification_report(y_test, 
                            y_pred, 
                            target_names=label_name))