In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os

In [26]:
def collect_dataset():
    images = []
    labels = []
    labels_dic = {}
    
    people = [person for person in os.listdir("C:/Users/smart/Downloads/AI_Dataset/")]
    for i, person in enumerate(people): 
        labels_dic[i] = person
        for img in os.listdir("C:/Users/smart/Downloads/AI_Dataset/" +person):
            if img.endswith('.jpg'):
                image = cv2.imread('C:/Users/smart/Downloads/AI_Dataset/' +person +'/'+img,0)
                images.append(image)
                labels.append(i)
    return (images, labels, labels_dic)

In [27]:
images, target, label_dic = collect_dataset()

Data Preprocessing

In [28]:
print(len(images))
print(len(target))

504
504


In [29]:
print(label_dic)
pd.Series(target).value_counts()

{0: 'smarth', 1: 'Unknown'}


0    300
1    204
dtype: int64

In [30]:
X = np.array(images)
Y = np.array(target)


In [31]:
X

array([[[125, 101,  50, ..., 251, 255, 251],
        [126,  80,  44, ..., 253, 249, 251],
        [130,  88,  43, ..., 253, 255, 252],
        ...,
        [219, 219, 218, ..., 252, 252, 252],
        [218, 219, 218, ..., 252, 252, 252],
        [218, 219, 218, ..., 252, 252, 252]],

       [[ 65,  58,  75, ..., 253, 252, 252],
        [ 59,  54,  94, ..., 232, 237, 239],
        [ 69,  63,  51, ..., 231, 236, 251],
        ...,
        [230, 227, 227, ..., 253, 253, 253],
        [228, 227, 229, ..., 253, 253, 253],
        [228, 230, 229, ..., 253, 253, 253]],

       [[185, 114,  79, ..., 236, 237, 237],
        [190, 105,  83, ..., 236, 236, 235],
        [154, 105,  64, ..., 236, 236, 236],
        ...,
        [236, 235, 232, ..., 250, 250, 250],
        [230, 231, 231, ..., 250, 250, 250],
        [233, 234, 232, ..., 250, 250, 250]],

       ...,

       [[250, 252, 254, ...,   1,   2,   2],
        [249, 251, 253, ...,   1,   1,   1],
        [250, 252, 253, ...,   1,   1,   0

In [32]:
X.shape

(504, 100, 80)

In [33]:
X_train = X.reshape(len(X), -1)
print(X_train.shape)

(504, 8000)


Feature Scaling and Selection

In [34]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [35]:
sc = StandardScaler()
x_train_sc = sc.fit_transform(X_train)

In [36]:
pca = PCA(0.98)
X_train_final = pca.fit_transform(x_train_sc)

In [37]:
pca.n_components_

212

In [38]:
X_train_final.shape

(504, 212)

Parameter Tuning

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV,cross_val_score,KFold

In [40]:
log_param = {
    'C': [0.001,0.01,0.1,1,10],
    'penalty': ['l1','l2'],
}

In [41]:
kf = KFold(5, shuffle=True)

In [42]:
gd_log=GridSearchCV(LogisticRegression(), param_grid = log_param, cv = kf, scoring = 'accuracy')

In [43]:
import warnings
warnings.filterwarnings('ignore')
gd_log.fit(X_train_final, Y)

In [44]:
gd_log.best_params_

{'C': 0.01, 'penalty': 'l2'}

In [45]:
gd_log.best_score_

1.0

In [46]:
sv_param={
    'C':[0.001,0.01,0.1,1,10],
    'kernel':['linear','rbf'],
}

In [47]:
gd_svm=GridSearchCV(SVC(probability=True),param_grid=sv_param,cv=kf,scoring='accuracy')

In [48]:
gd_svm.fit(X_train_final,Y)

In [49]:
gd_svm.best_params_

{'C': 1, 'kernel': 'rbf'}

In [50]:
gd_svm.best_score_

1.0

In [51]:
clf=gd_svm.best_estimator_
clf

In [52]:
clf.fit(X_train_final,Y)
print("Training score: ",clf.score(X_train_final,Y))

Training score:  1.0


Deploy

In [53]:
import pickle

In [54]:
file='svm_face_model.pkl'
f=open(file,'wb')
pickle.dump(clf,f)
f.close()

print("Model saved sucessfully!!!")

Model saved sucessfully!!!


In [55]:
file1='face_sc.pkl'
f1=open(file1,'wb')
pickle.dump(sc,f1)
f1.close()

print("Standard scaler file saved sucessfully!!!")

Standard scaler file saved sucessfully!!!


In [56]:
file2='face_pca.pkl'
f2=open(file2,'wb')
pickle.dump(pca,f2)
f2.close()

print("PCA file saved sucessfully!!!")

PCA file saved sucessfully!!!
