In [1]:
from dataloader import DiveFaceDataLoader
import keras
import os
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import tensorflow as tf
import keras_vggface
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, Lambda, Activation, ActivityRegularization
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, models, layers, regularizers
from keras.preprocessing import image
from keras_vggface import utils
from keras_vggface.vggface import VGGFace
from tensorflow.python.keras.backend import ndim

import tensorflow as tf
from tensorflow.compat.v1 import InteractiveSession


In [2]:
#load whole dataset
demo_data = DiveFaceDataLoader().LoadData("4K_120")
#Set UP so its usable with keras ImageDataGenerator
demo_data.rename(columns={'Image':'filename'},inplace=True)


In [3]:
my_model = 'resnet50'
resnet = VGGFace(model = my_model)

#Select the last leayer as feature embedding  
last_layer = resnet.get_layer('avg_pool').output
feature_layer = Flatten(name='flatten')(last_layer)
model_vgg=Model(resnet.input, feature_layer)

#Freeze the model
model_vgg.trainable = False


In [4]:
#Create the 3 subsets from the dataset (one per race)
#white people (about 55k images keep one per identity)
white_entries = demo_data.drop(['HN','HA','MN','MA'],axis=1)
white_entries = white_entries[demo_data['HB'] != demo_data['MB']]
white_entries = white_entries.drop_duplicates("Id")
#asian people
asian_entries = demo_data.drop(['HN','HB','MN','MB'],axis=1)
asian_entries = asian_entries[demo_data['HA'] != demo_data['MA']]
#afroamerican people
afr_entries = demo_data.drop(['HA','HB','MB','MA'],axis=1)
afr_entries = afr_entries[demo_data['HN'] != demo_data['MN']]


In [5]:
balanced_classifier = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
balanced_classifier.compile(loss='categorical_crossentropy',metrics=['acc'])

In [6]:
#Preprocessing used for the images
def preprocess(img):
    img = np.expand_dims(img, axis=0)
    return img

In [8]:
wh_mf = white_entries.rename(columns={'HB':'H','MB':'M'})
balanced_dataset_training = wh_mf[wh_mf['H'] == 1].head(500)
balanced_dataset_training = balanced_dataset_training.append(wh_mf[wh_mf['M'] == 1].head(500))
balanced_dataset_eval = wh_mf[wh_mf['H'] == 1].tail(250)
balanced_dataset_eval= balanced_dataset_eval.append(wh_mf[wh_mf['M'] == 1].tail(250)) 
as_mf = asian_entries.rename(columns={'HA':'H','MA':'M'})
balanced_dataset_training = balanced_dataset_training.append(as_mf[as_mf['H'] == 1].head(500))
balanced_dataset_training = balanced_dataset_training.append(as_mf[as_mf['M'] == 1].head(500))
balanced_dataset_eval = balanced_dataset_eval.append(as_mf[as_mf['H'] == 1].tail(250)) 
balanced_dataset_eval= balanced_dataset_eval.append(as_mf[as_mf['M'] == 1].tail(250)) 
af_mf = afr_entries.rename(columns={'HN':'H','MN':'M'})
balanced_dataset_training = balanced_dataset_training.append(af_mf[af_mf['H'] == 1].head(500))
balanced_dataset_training = balanced_dataset_training.append(af_mf[af_mf['M'] == 1].head(500))
balanced_dataset_eval = balanced_dataset_eval.append(af_mf[af_mf['H'] == 1].tail(250)) 
balanced_dataset_eval= balanced_dataset_eval.append(af_mf[af_mf['M'] == 1].tail(250)) 


In [9]:
training_balanced = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(balanced_dataset_training,directory=".",target_size=(224,224),y_col=['H','M'],class_mode='raw')

evaluation_balanced = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(balanced_dataset_eval,directory=".",target_size=(224,224),y_col=['H','M'],class_mode='raw')

white_full = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(wh_mf,directory=".",target_size=(224,224),y_col=['H','M'],class_mode='raw')
asian_full = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(as_mf,directory=".",target_size=(224,224),y_col=['H','M'],class_mode='raw')
black_full = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(af_mf,directory=".",target_size=(224,224),y_col=['H','M'],class_mode='raw')


Found 3000 validated image filenames.
Found 1500 validated image filenames.
Found 8000 validated image filenames.
Found 43409 validated image filenames.
Found 40425 validated image filenames.


In [10]:
balanced_classifier.fit(training_balanced,validation_data=evaluation_balanced,
                               epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fa167d692d0>

In [11]:
#%% Evaluate on other races
#Black
print("Accuracy over Black demographic group")
balanced_classifier.evaluate(black_full)

print("Accuracy over Asian demographic group")
balanced_classifier.evaluate(asian_full)
#White
print("Accuracy over White demographic group")
balanced_classifier.evaluate(white_full)

Accuracy over Black demographic group
Accuracy over Asian demographic group
Accuracy over White demographic group


[0.15064741671085358, 0.9827499985694885]