In [1]:
# pip install numpy opencv-python pandas


In [2]:
#pip install numpy opencv-python pandas scikit-learn

In [3]:

import numpy as np
import cv2
import pandas as pd
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV

In [4]:
# 1. DATA

# Load data from pickle file
data = pickle.load(open("E:/thigiacmaytinh/data_face_features.pickle", mode="rb"))

x = np.array(data['data'])
y = np.array(data['label'])

# Reshape dữ liệu x
x = x.reshape(-1, 128)

# Chia nhỏ dữ liệu
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((3461, 128), (866, 128), (3461,), (866,))

In [5]:
# 2. Train machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
# Logistic regression
model_logistic = LogisticRegression()
model_logistic.fit(x_train, y_train)  # training

def get_report(model, x_train, y_train, x_test, y_test):
    y_pred_train = model.predict(x_train)
    y_pred_test = model.predict(x_test)

    # accuracy score
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)

    # f1 score
    f1_score_train = f1_score(y_train, y_pred_train, average="macro")
    f1_score_test = f1_score(y_test, y_pred_test, average="macro")

    # print
    print('Accurency Train = %0.2f' % acc_train)
    print('Accurency Test = %0.2f' % acc_test)
    print('F1 Score Train = %0.2f' % f1_score_train)
    print('F1 Score Test = %0.2f' % f1_score_test)

get_report(model_logistic, x_train, y_train, x_test, y_test)

# Support Vector Machines
model_svc = SVC(probability=True)
model_svc.fit(x_train, y_train)

get_report(model_svc, x_train, y_train, x_test, y_test)

# Random forest
model_rf = RandomForestClassifier(n_estimators=20)
model_rf.fit(x_train, y_train)

get_report(model_rf, x_train, y_train, x_test, y_test)
# Voting Classifier
model_voting = VotingClassifier(estimators=[
    ('logistic', LogisticRegression()),
    ('svm', SVC(probability=True)),
    ('rf', RandomForestClassifier(n_estimators=20)),
], voting='soft', weights=[2, 3, 1])

model_voting.fit(x_train, y_train)

get_report(model_voting, x_train, y_train, x_test, y_test)

Accurency Train = 0.72
Accurency Test = 0.69
F1 Score Train = 0.52
F1 Score Test = 0.47
Accurency Train = 0.91
Accurency Test = 0.83
F1 Score Train = 0.84
F1 Score Test = 0.72
Accurency Train = 1.00
Accurency Test = 0.76
F1 Score Train = 1.00
F1 Score Test = 0.61
Accurency Train = 0.98
Accurency Test = 0.82
F1 Score Train = 0.98
F1 Score Test = 0.70


In [6]:
# 3. Parameter Tuning
from sklearn.model_selection import GridSearchCV

model_grid = GridSearchCV(model_voting, param_grid={
    'svm__C': [3,5,7,10],
    'svm__gamma': [0.1, 0.3, 0.5],
    'rf__n_estimators': [5, 10,20],
    'rf__max_depth': [3, 5, 7],
    'voting': ['soft', 'hard']
}, scoring='accuracy', cv=3, n_jobs=1, verbose=2)

model_grid.fit(x_train, y_train)

model_best_estimator = model_grid.best_estimator_
model_grid.best_score_


Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   2.8s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   2.7s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=soft; total time=   2.6s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   2.5s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   2.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.1, voting=hard; total time=   2.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   2.5s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   2.4s
[CV] END rf__max_depth=3, rf__n_estimators=5, svm__C=3, svm__gamma=0.3, voting=soft; total time=   2.7s
[

np.float64(0.844266558040888)

In [7]:
# 4. Save Model
pickle.dump(model_best_estimator, open("E:/thigiacmaytinh/ml_face_person_identity.pkl", mode='wb'))