In [None]:
%pip install keras_vggface
%pip install mtcnn
%pip install xgboost
%pip install pillow
%pip install pandas
%pip install tqdm
%pip install swifter

In [None]:
import tensorflow as tf
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
from matplotlib import pyplot
from mtcnn.mtcnn import MTCNN
from PIL import Image
import numpy as np
import glob
import pandas as pd
from tqdm import tqdm 
import pickle
import swifter
import ast

from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, make_scorer
import xgboost

In [None]:
def extract_face_old(filename, required_size=(224,224)):
  pixels = pyplot.imread(filename)
  #detector = MTCNN()
  #results = detector.detect_faces(pixels)
  #print(results)
  #x1,y1,width,height = results[0]['box']
  #x2,y2 = x1+width,y1+height
  #face = pixels[y1:y2, x1:x2]
  face = pixels
  image = Image.fromarray(face, 'RGB')
  image = image.resize(required_size)
  pyplot.imshow(image)
  face_array = np.asarray(image)
  return face_array

In [None]:
def extract_face(filename, required_size=(224,224)):
  face = Image.open(filename)
  image = face.resize(required_size)
  face_array = np.asarray(image)
  return face_array

In [None]:
def get_embeddings(filenames):
  face=extract_face(filenames)
  face = np.reshape(face,(1,224,224,3))
  sample = np.asarray(face, 'float32')
  sample = preprocess_input(sample)
  yhat = model.predict(sample)
  return yhat.flatten()

In [None]:
f = open("./Names100Dataset/names100.txt", "r")
names = []
for line in f:
  name = line.split("\n")[0]
  names.append(name)
print(names)
name_to_int = dict((name, number) for number, name in enumerate(names))

In [None]:
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [None]:
save_obj(name_to_int, 'name_to_int')

## Getting embeddings to prepare for classification

In [None]:
model = VGGFace(model='resnet50', include_top=False, input_shape=(224,224,3), pooling='avg')
df = pd.DataFrame(columns=['Name', 'File', 'Class', "Embedding"])
names = []
files = []
classes = []
embeddings = []

full_dir = glob.glob("./Names100Dataset/Names100_Images/*.png")

for i in tqdm(range(len(full_dir))):
    filepath = full_dir[i]
    file = filepath.split("/")[-1]
    files.append(file)
    items = file.split('_')
    name = items[0]
    names.append(name)
    classnb = name_to_int[name]
    classes.append(classnb)
    embedding = get_embeddings(filepath)
    embeddings.append(embedding)

df["Name"] = names
df["File"] = files
df["Class"] = classes
df["Embedding"] = embeddings
df

In [None]:
df.to_csv('training_embeddings.csv', mode='a', index=False)

In [None]:
save_obj(np.asarray(embeddings), 'train_X')
save_obj(np.asarray(classes), 'train_y')

In [None]:
read_df = pd.read_csv('training_embeddings.csv')

In [None]:
train_X = load_obj("train_X")
train_y = np.ravel(load_obj("train_y"))
name_to_int = load_obj('name_to_int')

In [None]:
model = xgboost.XGBClassifier(objective="multi:softmax",num_class=100, learning_rate=0.01, n_estimators=100, max_depth=3,
                              subsample=0.8, colsample_bytree=1, gamma=1, n_jobs=-1, verbosity=1)
score_mi = cross_val_score(model, train_X, train_y, cv=3, scoring=make_scorer(accuracy_score))
save_obj(model, "trained_XGBoost")
print("Mean: {}, Std Dev: {}".format(score_mi.mean(), np.std(score_mi)))

In [None]:
save_obj(model, "trained_XGBoost")