In [1]:
from dataloader import DiveFaceDataLoader
import keras
import os
import numpy as np
import matplotlib.pyplot as plt
import pickle
import sys
import tensorflow as tf
import keras_vggface
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, Lambda, Activation, ActivityRegularization
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, models, layers, regularizers
from keras.preprocessing import image
from keras_vggface import utils
from keras_vggface.vggface import VGGFace
from tensorflow.python.keras.backend import ndim

import tensorflow as tf
from tensorflow.compat.v1 import InteractiveSession


In [2]:
#load whole dataset
demo_data = DiveFaceDataLoader().LoadData("4K_120")
#Set UP so its usable with keras ImageDataGenerator
demo_data.rename(columns={'Image':'filename'},inplace=True)


In [3]:
my_model = 'resnet50'
resnet = VGGFace(model = my_model)

#Select the last leayer as feature embedding  
last_layer = resnet.get_layer('avg_pool').output
feature_layer = Flatten(name='flatten')(last_layer)
model_vgg=Model(resnet.input, feature_layer)

#Freeze the model
model_vgg.trainable = False


In [4]:
#Create the 3 subsets from the dataset (one per race)
#white people (about 55k images keep one per identity)
white_entries = demo_data.drop(['HN','HA','MN','MA'],axis=1)
white_entries = white_entries[demo_data['HB'] != demo_data['MB']]
white_entries = white_entries.drop_duplicates("Id")
#asian people
asian_entries = demo_data.drop(['HN','HB','MN','MB'],axis=1)
asian_entries = asian_entries[demo_data['HA'] != demo_data['MA']]
#afroamerican people
afr_entries = demo_data.drop(['HA','HB','MB','MA'],axis=1)
afr_entries = afr_entries[demo_data['HN'] != demo_data['MN']]


In [7]:

#Preprocessing used for the images
def preprocess(img):
    img = np.expand_dims(img, axis=0)
    return img

#Split into training and validation
training_split  = 0.8

## White people
indxs = np.random.rand(len(white_entries)) < training_split 
training = white_entries[indxs]
print("Training entries",len(training))
training_data_white = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(training,directory=".",target_size=(224,224),y_col=['HB','MB'],class_mode='raw')
testing = white_entries[~indxs]
print("Testing entries",len(testing))
testing_data_white = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(testing,directory=".",target_size=(224,224),y_col=['HB','MB'],class_mode='raw')

## Black people
indxs = np.random.rand(len(afr_entries)) < training_split 
training = afr_entries[indxs]
print("Training entries",len(training))
training_data_black = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(training,directory=".",target_size=(224,224),y_col=['HN','MN'],class_mode='raw')
testing = afr_entries[~indxs]
print("Testing entries",len(testing))
testing_data_black= ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(testing,directory=".",target_size=(224,224),y_col=['HN','MN'],class_mode='raw')

## Asian people
indxs = np.random.rand(len(asian_entries)) < training_split 
training = asian_entries[indxs]
print("Training entries",len(training))
training_data_asian= ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(training,directory=".",target_size=(224,224),y_col=['HA','MA'],class_mode='raw')
testing = asian_entries[~indxs]
print("Testing entries",len(testing))
testing_data_asian = ImageDataGenerator(preprocessing_function=preprocess).flow_from_dataframe(testing,directory=".",target_size=(224,224),y_col=['HA','MA'],class_mode='raw')



Training entries 6414
Found 6414 validated image filenames.
Testing entries 1586
Found 1586 validated image filenames.
Training entries 32368
Found 32368 validated image filenames.
Testing entries 8057
Found 8057 validated image filenames.
Training entries 34794
Found 34794 validated image filenames.
Testing entries 8615
Found 8615 validated image filenames.


In [8]:

white_gender_class = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
white_gender_class.compile(loss='categorical_crossentropy',metrics=['acc'])

In [10]:
white_gender_class.fit(training_data_white,validation_data=testing_data_white,
                               epochs=3)











Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fcb13c8d470>

In [11]:
#Black
white_gender_class.evaluate(testing_data_black)

#Asian
white_gender_class.evaluate(testing_data_asian)








[0.90187007188797, 0.8748694062232971]

In [12]:
black_gender_class = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
black_gender_class.compile(loss='categorical_crossentropy',metrics=['acc'])

In [13]:
black_gender_class.fit(training_data_black,validation_data=testing_data_black,
                               epochs=3)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fcb12c8a2b0>

In [14]:
#Black
print("Accuracy over White demographic group")
black_gender_class.evaluate(testing_data_white)

#Asian
print("Accuracy over Asian demographic group")
black_gender_class.evaluate(testing_data_asian)


Accuracy over White demographic group
Accuracy over Asian demographic group


[0.5421883463859558, 0.9535693526268005]

In [15]:
asian_gender_class = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
black_gender_class.compile(loss='categorical_crossentropy',metrics=['acc'])

In [16]:
asian_gender_class = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
black_gender_class.compile(loss='categorical_crossentropy',metrics=['acc'])

In [17]:
#Black
print("Accuracy over White demographic group")
black_gender_class.evaluate(testing_data_white)

#Asian
print("Accuracy over Asian demographic group")
black_gender_class.evaluate(testing_data_asian)


Accuracy over White demographic group
Accuracy over Asian demographic group


[0.5421882271766663, 0.9535693526268005]

In [18]:
print(indxs)

[ True False  True ...  True  True  True]


In [20]:
asian_gender_class = keras.Sequential([
    model_vgg,
    keras.layers.Dense(3000,activation="relu"),
    keras.layers.Dense(2,activation="softmax")]
)
asian_gender_class.compile(loss='categorical_crossentropy',metrics=['acc'])

In [21]:
asian_gender_class.fit(training_data_asian,validation_data=testing_data_asian,
                               epochs=3)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fcb11170fd0>

In [22]:
#Black
print("Accuracy over Black demographic group")
asian_gender_class.evaluate(testing_data_black)

#Asian
print("Accuracy over White demographic group")
asian_gender_class.evaluate(testing_data_white)

Accuracy over Black demographic group
Accuracy over White demographic group


[3.9624886512756348, 0.7862547039985657]