In [1]:
import pickle

import numpy as np
import pandas as pd

from cv2 import cv2
from tqdm import tqdm, trange
from keras.models import load_model
from sklearn.model_selection import StratifiedShuffleSplit

Using TensorFlow backend.


In [2]:
data = pd.read_csv('data/labeled/data_resized.csv')
classes = data['name']
path = data['path']
print(data.shape)

(4439, 3)


In [3]:
stratSplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

In [4]:
splitted = stratSplit.split(path, classes)

In [5]:
for train_idx, test_idx in splitted:
    x_train = path[train_idx]
    x_test = path[test_idx]
    y_train = classes[train_idx]
    y_test = classes[test_idx]

In [6]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(3551,) (888,) (3551,) (888,)


In [7]:
def get_embedding(model, x):
    x = x.astype('float32')
    mean, std = x.mean(), x.std()
    x = (x - mean) / std
    x = np.expand_dims(x, axis=0)
    y = model.predict(x)
    return y[0]

In [8]:
model = load_model('model/facenet_keras.h5')













Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
model.summary()

_________________
Block8_1_Branch_1_Conv2d_0b_1x3 (None, 3, 3, 192)    0           Block8_1_Branch_1_Conv2d_0b_1x3_B
__________________________________________________________________________________________________
Block8_1_Branch_0_Conv2d_1x1 (C (None, 3, 3, 192)    344064      Mixed_7a[0][0]                   
__________________________________________________________________________________________________
Block8_1_Branch_1_Conv2d_0c_3x1 (None, 3, 3, 192)    110592      Block8_1_Branch_1_Conv2d_0b_1x3_A
__________________________________________________________________________________________________
Block8_1_Branch_0_Conv2d_1x1_Ba (None, 3, 3, 192)    576         Block8_1_Branch_0_Conv2d_1x1[0][0
__________________________________________________________________________________________________
Block8_1_Branch_1_Conv2d_0c_3x1 (None, 3, 3, 192)    576         Block8_1_Branch_1_Conv2d_0c_3x1[0
___________________________________________________________________________________________

In [10]:
x_train_feature = []
x_test_feature = []
for item in tqdm(x_train):
    image = cv2.imread(item)
    feature = get_embedding(model, image)
    x_train_feature.append(feature)
for item in tqdm(x_test):
    image = cv2.imread(item)
    feature = get_embedding(model, image)
    x_test_feature.append(feature)

100%|██████████| 3551/3551 [01:25<00:00, 41.30it/s]
100%|██████████| 888/888 [00:20<00:00, 42.44it/s]


In [11]:
x_train_feature = np.array(x_train_feature)
x_test_feature = np.array(x_test_feature)

In [12]:
print(x_train_feature.shape, x_test_feature.shape)

(3551, 128) (888, 128)


In [13]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC

In [14]:
x_encoder = Normalizer(norm='l2')
x_train_normalized = x_encoder.transform(x_train_feature)
x_test_normalized = x_encoder.transform(x_test_feature)

In [15]:
print(x_train_feature.shape, x_test_feature.shape)

(3551, 128) (888, 128)


In [16]:
y_encoder = LabelEncoder()
y_encoder.fit(y_train)
y_train_encoded = y_encoder.transform(y_train)
y_test_encoded = y_encoder.transform(y_test)

In [17]:
encoding = {}
for i in range(len(y_encoder.classes_)):
    encoding[i] = y_encoder.classes_[i]
file = open('label_encoding.pkl', 'wb')
pickle.dump(encoding, file, pickle.HIGHEST_PROTOCOL)
file.close()

In [18]:
print(y_train_encoded.shape, y_test_encoded.shape)
print(y_train_encoded, y_test_encoded)

(3551,) (888,)
[ 8 20 16 ...  1  1 16][ 8 13 13  7 13  7  7 18  4  1 20  7  1 13 16  4 13  4 20 18  1  2 18 13
 22  8 22 13  4  1  6  6 18 13 17  7 20 20  7 19 16  8 17  8 18 13  9  1
  1  9 17 20 20  4 13 13 12 20 18  7 13 13 17 20 13  1  4 16  1  1 13  1
  1  4  7 18  6 17  6  7 13  4 18 16  1  1  6  1 16  7  4  8 13  4 23  1
 20  6 22 13 18 13 16  6  6 17  4 22 24  6  6  4  4  6  4 18 13  6 16 13
 16  6  6  1 23  6 13  4  6  1 17 20  1 16 19  1  6 16 13 13  6 17 17 16
 20  1  6  6  7  4 16  6  9  9 22  1  7 14  7 24  6  7  6 16  8  7  1 13
 13  4  6 13 17  4  7  6  8 17  8  6 18  6 17  6 23  1  1  1  1  6 16  4
  6  4  6 16 19  6 13  7 17 17  6  6  7  9  6  7  6 17  4 23  7 18  6  6
  4  1 13  6 24  9  7 13 18  6 16  2  4  8  4  7 17 13 16  4 17  4  4 16
  8  6 17  1 13 13 13  4  9 16  6 20  1  4  1 16  6 16 13 18  9 13  6  1
  1 17  6  8  1  2  4 13  4 18 13 13 13 11  6 17  8  4 17  4  4 13  7  6
 13  9  6  6  1 20 19 16  7 17 20  9  4 13  6  6  6 16 11  6  7 20  6 13
  6 19  1  6 

In [25]:
classifier = SVC(kernel='linear', probability=True, verbose=True)
classifier.fit(x_train_normalized, y_train_encoded)

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=True, random_state=None,
    shrinking=True, tol=0.001, verbose=True)

In [26]:
train_predict = classifier.predict(x_train_normalized)
test_predict = classifier.predict(x_test_normalized)

In [27]:
score_train = accuracy_score(y_train_encoded, train_predict)
score_test = accuracy_score(y_test_encoded, test_predict)

In [28]:
print(score_train)
print(score_test)

0.9549422697831597
0.9391891891891891


In [29]:
a = classifier.predict_proba(x_test_normalized[1:2])
b = classifier.predict(x_test_normalized[1:2])

In [30]:
file = open('model/classifier.pkl', 'wb')
pickle.dump(classifier, file)
file.close()

In [32]:
np.save('x_train_normalized.npy', x_train_normalized)
np.save('y_train_encoded.npy', y_train_encoded)