# General Info

dataset: LFW-People (https://www.kaggle.com/atulanandjha/lfwpeople/data)

VGGFace2
(https://machinelearningmastery.com/how-to-perform-face-recognition-with-vggface2-convolutional-neural-network-in-keras/)

In [1]:
import os
import pandas as pd
import numpy as np
import cv2

In [2]:
dataset_dir = "../slnp_faces_leg_55"

In [3]:
_, dirs,_ = next(os.walk(dataset_dir))

In [4]:
folders = pd.DataFrame(dirs)

In [5]:
folders.columns = ["name"]

In [6]:
folders.head()

Unnamed: 0,name
0,105112_b
1,112437_b
2,113247_b
3,114941_b
4,118594_b


## Picture Urls

In [7]:
folders["file"] = folders["name"].apply(lambda x: [f for f in os.listdir(dataset_dir+"/"+x) if f.endswith(".jpg")])

In [8]:
folders.count()

name    514
file    514
dtype: int64

In [9]:
pictures = pd.DataFrame(folders.file.tolist(), index=folders.name).stack().reset_index(level=1, drop=True).reset_index(name='file')[['file','name']]

In [10]:
pictures.count()

file    9138
name    9138
dtype: int64

## Detecting Face

In [11]:
from mtcnn import MTCNN
import matplotlib.pyplot as plt
from IPython.display import Image

Using TensorFlow backend.


In [12]:
detector = MTCNN()

In [13]:
def extract_face(filename, required_size=(224, 224)):

    pixels = cv2.imread(filename)
    if pixels is not None:
        pixels_rgb = cv2.cvtColor(pixels, cv2.COLOR_BGR2RGB)

        results = detector.detect_faces(pixels_rgb)

        if len(results)>0:

            x1, y1, width, height = results[0]['box']
            x2, y2 = x1 + width, y1 + height
            face = pixels_rgb[y1:y2, x1:x2]

            if face.shape[0]>0 and face.shape[1]>0:
                return cv2.resize(face, required_size)

    return 'no_face'

## Feature Extraction

In [14]:
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace

In [15]:
backbone= 'senet50'

In [16]:
model = VGGFace(model=backbone, include_top=False, input_shape=(224, 224, 3), pooling='avg')

In [17]:
def get_embeddings(filename):
    face = extract_face(filename)
    if face == 'no_face':
        return '-'
    sample = np.asarray(face, 'float32')
    sample = np.expand_dims(sample, axis=0)
    sample = preprocess_input(sample, version=2)
    
    embedding = model.predict(sample)
    
    return embedding[0]

In [18]:
complete_urls = dataset_dir+'/'+pictures['name']+'/'+pictures['file']

In [19]:
embeddings = complete_urls.apply(lambda x: get_embeddings(x))

  This is separate from the ipykernel package so we can avoid doing imports until


In [20]:
embeddings.head()

0    [0.0996729, 0.028490392, 0.20510468, 0.5868249...
1    [0.086341664, 0.13112758, 0.0015521318, 0.2116...
2    [0.10442628, 0.11352078, 0.7367716, 2.401755, ...
3    [0.06998727, 3.4273074, 0.0, 2.3481488, 0.0052...
4    [0.10927686, 0.12103541, 2.4254398, 0.02788562...
dtype: object

In [21]:
pictures["complete_url"] = complete_urls

In [22]:
pictures['embeddings'] = embeddings

In [23]:
pictures.head()

Unnamed: 0,file,name,complete_url,embeddings
0,0.jpg,105112_b,../slnp_faces_leg_55/105112_b/0.jpg,"[0.0996729, 0.028490392, 0.20510468, 0.5868249..."
1,0_0.jpg,105112_b,../slnp_faces_leg_55/105112_b/0_0.jpg,"[0.086341664, 0.13112758, 0.0015521318, 0.2116..."
2,1_0.jpg,105112_b,../slnp_faces_leg_55/105112_b/1_0.jpg,"[0.10442628, 0.11352078, 0.7367716, 2.401755, ..."
3,2_0.jpg,105112_b,../slnp_faces_leg_55/105112_b/2_0.jpg,"[0.06998727, 3.4273074, 0.0, 2.3481488, 0.0052..."
4,3_0.jpg,105112_b,../slnp_faces_leg_55/105112_b/3_0.jpg,"[0.10927686, 0.12103541, 2.4254398, 0.02788562..."


Removendo imagens em que não foi possível extrair os embeddings

In [24]:
invalid = pictures.loc[pictures["embeddings"] == '-'].index

  result = libops.scalar_compare(x.ravel(), y, op)


In [25]:
print("Numero de imagens invalidas: "+str(len(invalid)))

Numero de imagens invalidas: 500


In [26]:
pictures = pictures.drop(invalid)

## Saving DataFrame

In [27]:
pictures.to_pickle(f'faces_leg55_embeddings_{backbone}.pkl')