In [1]:
import numpy as np
import cv2
import pandas as pd
import os
import pickle
from google.colab import drive
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split

In [2]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/ComputerVision/'
# Hiển thị tất cả thư mục hoặc file
os.listdir(path)

Mounted at /content/drive


['Pics', 'Models', 'FaceImage', 'data_face_features.pickle']

In [3]:
data = pickle.load(open("/content/drive/MyDrive/ComputerVision/data_face_features.pickle", mode="rb"))

In [4]:
x = np.array(data['data'])
y = np.array(data['label'])

print(x)
print(y)

[[[ 4.12972440e-04  2.00944394e-01 -7.10193635e-05 ... -6.53045923e-02
    7.79734850e-02  6.92033395e-03]]

 [[ 2.41263434e-02  2.04540446e-01 -7.79361743e-03 ...  2.65857968e-02
    1.21704385e-01  1.49168121e-02]]

 [[ 1.27565507e-02  2.20630527e-01 -1.08428365e-02 ...  9.52625833e-03
    1.50048286e-01  2.36851294e-02]]

 ...

 [[ 6.35003671e-02  1.94789156e-01  3.05566005e-03 ...  6.76212534e-02
    1.15328692e-01  3.71697769e-02]]

 [[ 6.57169968e-02  1.57503560e-01 -4.84599397e-02 ... -3.10451481e-02
    7.40923211e-02  8.18373188e-02]]

 [[ 6.52924404e-02  1.44821167e-01 -4.80778068e-02 ...  5.23325503e-02
    9.11173597e-03  2.66670659e-02]]]
['1721031620_PhanQuocHuy' '1721031620_PhanQuocHuy'
 '1721031620_PhanQuocHuy' ... '151901205_BuiKhanhBinh'
 '151901205_BuiKhanhBinh' '151901205_BuiKhanhBinh']


In [5]:
y.shape
x = x.reshape(-1, 128)
x.shape
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=1)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((3828, 128), (958, 128), (3828,), (958,))

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [7]:
def get_report(model, x_train, y_train, x_test, y_test):
  y_pred_train = model.predict(x_train)
  y_pred_test = model.predict(x_test)

  # accuracy score
  acc_train = accuracy_score(y_train, y_pred_train)
  acc_test = accuracy_score(y_test, y_pred_test)

  # f1 score
  f1_score_train = f1_score(y_train, y_pred_train, average="macro")
  f1_score_test = f1_score(y_test, y_pred_test, average="macro")

  # print
  print('Accurency Train = %0.2f'%acc_train)
  print('Accurency Test = %0.2f'%acc_test)
  print('F1 Score Train = %0.2f'%f1_score_train)
  print('F1 Score Test = %0.2f'%f1_score_test)

In [8]:
# Logistic regression
model_logistic = LogisticRegression()
model_logistic.fit(x_train, y_train) # training

In [9]:
get_report(model_logistic, x_train, y_train, x_test, y_test)

Accurency Train = 0.71
Accurency Test = 0.71
F1 Score Train = 0.51
F1 Score Test = 0.51


In [10]:
model_svc = SVC(probability=True)
model_svc.fit(x_train, y_train)
get_report(model_svc, x_train, y_train, x_test, y_test)
model_rf = RandomForestClassifier(n_estimators=20,)
model_rf.fit(x_train, y_train)
get_report(model_rf, x_train, y_train, x_test, y_test)

Accurency Train = 0.91
Accurency Test = 0.84
F1 Score Train = 0.83
F1 Score Test = 0.75
Accurency Train = 1.00
Accurency Test = 0.76
F1 Score Train = 1.00
F1 Score Test = 0.68


In [11]:
model_voting = VotingClassifier(estimators=[
    ('logistic', LogisticRegression()),
    ('svm', SVC(probability=True)),
    ('rf', RandomForestClassifier(n_estimators=20,)),
], voting='soft', weights=[2,3,1])

In [12]:
model_voting.fit(x_train, y_train)
get_report(model_voting, x_train, y_train, x_test, y_test)

Accurency Train = 0.98
Accurency Test = 0.84
F1 Score Train = 0.98
F1 Score Test = 0.75


In [13]:
from sklearn.model_selection import GridSearchCV

model_grid = GridSearchCV(model_voting, param_grid={
    'svm__C': [3,5,7,10],
    'svm__gamma': [0.1, 0.3, 0.5],
    'rf__n_estimators': [5, 10,20],
    'rf__max_depth': [3, 5, 7],
    'voting': ['soft', 'hard']
}, scoring='accuracy', cv=3, n_jobs=1, verbose=2)

In [14]:
model_grid.fit(x_train, y_train)

Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   9.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   6.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   8.7s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   6.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   8.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   6.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   7.8s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   6.0s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   7.4s
[

In [15]:
model_best_estimator = model_grid.best_estimator_

In [16]:
model_grid.best_score_

0.8375130616509927

In [17]:
pickle.dump(model_best_estimator, open("/content/drive/MyDrive/ComputerVision/ml_face_person_identity.pkl", mode='wb'))
