In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

from keras.applications.xception import Xception, preprocess_input
from keras.layers import GlobalAveragePooling2D
from keras.layers import Input, Dense, LeakyReLU
from keras import backend as K
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

PATH = '/workspace/dataset/'
FACE_DEFAULT_SHAPE = (218, 178)
BS = 32

celeb_data = pd.read_csv('identity_CelebA.txt', sep=" ", header=None)
celeb_data.columns = ["image", "label"]
attributes = pd.read_csv(PATH + 'list_attr_celeba.csv')
attributes = attributes.replace(-1, 0)

# 0 - train, 1 - validation, 2 - test
train_val_test = pd.read_csv('list_eval_partition.csv', usecols=['partition']).values[:, 0]

Using TensorFlow backend.


In [2]:
# checkpoint = ModelCheckpoint('attributes.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

attributes = attributes[['image_id','Attractive','Bald','Male','Bags_Under_Eyes','Narrow_Eyes',
                         'Oval_Face','Pointy_Nose','Receding_Hairline','Young']]

features = attributes.drop(['image_id'], axis=1).columns

df_train = attributes.iloc[train_val_test == 0]
df_valid = attributes.iloc[train_val_test == 1]
df_test = attributes.iloc[train_val_test == 2]

# necessary for flow_from_dataframe method
df_valid = df_valid.reset_index()
df_test = df_test.reset_index()

datagen = ImageDataGenerator(horizontal_flip=True, preprocessing_function=preprocess_input)

train_gen = datagen.flow_from_dataframe(df_train, directory=PATH+'img_align_celeba', x_col='image_id', 
                                        y_col=features, target_size=FACE_DEFAULT_SHAPE, color_mode='rgb',
                                        classes=None, class_mode='other', batch_size=BS, shuffle=True)
valid_gen = datagen.flow_from_dataframe(df_valid, directory=PATH+'img_align_celeba', x_col='image_id', 
                                        y_col=features, target_size=FACE_DEFAULT_SHAPE, color_mode='rgb',
                                        classes=None, class_mode='other', batch_size=BS, shuffle=True)


xception = Xception(include_top=False, weights=None, input_shape = FACE_DEFAULT_SHAPE + (3,))
output = GlobalAveragePooling2D()(xception.output)
base_model = Model(xception.input, output, name = 'base_xception')

def get_attr_model(conv_feat_size, num_feat):
    '''
    Takes the output of the conv feature extractor and yields the embeddings
    '''
    input = Input((conv_feat_size,), name = 'input')
    x = Dense(512)(input)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dense(128)(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dense(num_feat, activation='sigmoid')(x)
    model = Model(input, x, name = 'attr_classification')
    return model

inp_shape = K.int_shape(base_model.input)[1:]
conv_feat_size = K.int_shape(base_model.output)[-1]

input = Input( inp_shape )
emb_attr = get_attr_model(conv_feat_size, len(features))
att_model = Model(input, emb_attr(base_model(input)))

att_model.compile(Adam(lr=0.0002), loss = 'binary_crossentropy', metrics=['binary_accuracy'])
att_model.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=50, initial_epoch = 0,
                             validation_data=valid_gen, validation_steps=len(valid_gen), 
                             use_multiprocessing=True, workers=12)

Found 162770 images.
Found 19867 images.
Instructions for updating:
Colocations handled automatically by placer.


In [None]:
att_model.save('attributes.h5')