In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPooling2D, ZeroPadding2D
from tensorflow.keras.layers import MaxPooling2D, Flatten, Dense, Input, add
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dropout

In [2]:
p = Path('celeba-dataset')

df = pd.read_csv(p / 'list_attr_celeba.csv')
columns = df.columns[1:]

df.replace(-1, 0, inplace=True)
print(columns)
print(df.head())

Index(['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes',
       'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair',
       'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin',
       'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones',
       'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard',
       'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline',
       'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair',
       'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick',
       'Wearing_Necklace', 'Wearing_Necktie', 'Young'],
      dtype='object')
     image_id  5_o_Clock_Shadow  Arched_Eyebrows  Attractive  Bags_Under_Eyes  \
0  000001.jpg                 0                1           1                0   
1  000002.jpg                 0                0           0                1   
2  000003.jpg                 0                0           0                0   
3  000

In [3]:
train_datagen = ImageDataGenerator(
                rotation_range = 40,
                width_shift_range = 0.2,
                height_shift_range = 0.2,
                brightness_range = (0.3, 1.0),
                rescale = 1/255,
                shear_range = 0.2,
                zoom_range = 0.2,
                horizontal_flip = True
                )
test_datagen = ImageDataGenerator(rescale = 1/255)

In [9]:
train_y, test_y = train_test_split(df, test_size = 0.3)
valid_y, test_y = train_test_split(test_y, test_size = 0.7)
print(train_y.shape)
print(test_y.shape)
print(valid_y.shape)
print(train_y.head())

(141819, 41)
(42546, 41)
(18234, 41)
          image_id  5_o_Clock_Shadow  Arched_Eyebrows  Attractive  \
96740   096741.jpg                 0                0           1   
140755  140756.jpg                 0                0           1   
194592  194593.jpg                 0                0           0   
129450  129451.jpg                 0                0           0   
95721   095722.jpg                 0                1           1   

        Bags_Under_Eyes  Bald  Bangs  Big_Lips  Big_Nose  Black_Hair  ...  \
96740                 0     0      0         0         0           0  ...   
140755                0     0      0         0         0           0  ...   
194592                0     0      1         0         0           1  ...   
129450                1     0      0         0         1           0  ...   
95721                 0     0      0         0         0           0  ...   

        Sideburns  Smiling  Straight_Hair  Wavy_Hair  Wearing_Earrings  \
96740      

In [11]:
train_generator = train_datagen.flow_from_dataframe(
                dataframe = train_y,
                directory = p / 'img_align_celeba',
                x_col = 'image_id',
                y_col = columns,
                batch_size = 32,
                seed = 42,
                shuffle = True,
                class_mode = 'other',
                target_size = (64, 64)
)

validation_generator = test_datagen.flow_from_dataframe(
                dataframe = valid_y,
                directory = p / 'img_align_celeba',
                x_col = 'image_id',
                y_col = columns,
                batch_size = 64,
                seed = 42,
                shuffle = True,
                class_mode = 'other',
                target_size = (64, 64)
)

test_generator = test_datagen.flow_from_dataframe(
                dataframe = test_y,
                directory = p / 'img_align_celeba',
                x_col = 'image_id',
                batch_size = 1,
                seed = 42,
                shuffle = False,
                class_mode = None,
                target_size = (64, 64)
)

Found 141819 validated image filenames.
Found 18234 validated image filenames.
Found 42546 validated image filenames.


In [4]:
def res_block(layer, filters, stride=(2,2)):
    x = BatchNormalization()(layer)
    x = Activation('relu')(x)
    x = Conv2D(filters, stride, activation = 'relu')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(filters, stride, activation = 'relu')(x)
    return x
# I switched all activations to relu as it's the standard.

In [5]:
inputs = Input(shape=(64, 64, 3))
saved_inputs = inputs

# I've left out the initial batch normalization to keep variance in the input information.
# I've also removed the regularizers.
x = Conv2D(64, (1, 1), use_bias=False)(inputs)

x = BatchNormalization(axis = 1, epsilon = 0.0001, momentum = 0.95)(x)
x = Activation('relu')(x)
x = Conv2D(64, (3, 3), padding="same", use_bias=False)(x) # This layer is a bottleneck.
# Same padding prevents downsampling so we can add the original input back in later.

x = BatchNormalization(axis = 1, epsilon = 0.0001, momentum = 0.95)(x)
x = Activation("relu")(x)
x = Conv2D(128, (1, 1), use_bias=False)(x)

saved_inputs = Conv2D(128, (1, 1), use_bias=False)(saved_inputs)
x = add([x, saved_inputs])

y = res_block(x, 64)
x = Conv2D(64, (2, 2), activation = 'relu')(x)
x = Conv2D(64, (2, 2), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 64)
x = Conv2D(64, (2, 2), activation = 'relu')(x)
x = Conv2D(64, (2, 2), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 128, (1, 1))
x = Conv2D(128, (1, 1), activation = 'relu')(x)
x = Conv2D(128, (1, 1), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 128, (1, 1))
x = Conv2D(128, (1, 1), activation = 'relu')(x)
x = Conv2D(128, (1, 1), activation = 'relu')(x)
x = add([x, y])

x = Dropout(0.2)(x)
x = ZeroPadding2D((1,1))(x)
x = MaxPooling2D((2,2), strides=(2,2))(x)
# I've added dropout and 'blurred' the image with max pooling before the last half of the network.

y = res_block(x, 256, (2, 2))
x = Conv2D(256, (2, 2), activation = 'relu')(x)
x = Conv2D(256, (2, 2), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 256, (2, 2))
x = Conv2D(256, (2, 2), activation = 'relu')(x)
x = Conv2D(256, (2, 2), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 512, (2, 2))
x = Conv2D(512, (2, 2), activation = 'relu')(x)
x = Conv2D(512, (2, 2), activation = 'relu')(x)
x = add([x, y])

y = res_block(x, 512, (2, 2))
x = Conv2D(512, (2, 2), activation = 'relu')(x)
x = Conv2D(512, (2, 2), activation = 'relu')(x)
x = add([x, y])

x = BatchNormalization(axis = 1, epsilon = 0.0001, momentum = 0.95)(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size = (2, 2), strides = (2, 2))(x)
x = Flatten()(x)

output = Dense(40, activation = 'sigmoid', kernel_initializer='he_normal')(x)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
model = Model(inputs, output, name="ResNet_for_faces")
sgd = SGD(momentum=0.9, nesterov=True)
model.compile(optimizer = sgd, loss = 'binary_crossentropy', metrics = ['accuracy'])

In [13]:
train_steps = train_generator.n // train_generator.batch_size
valid_steps = validation_generator.n // validation_generator.batch_size
test_steps = test_generator.n // test_generator.batch_size

print(valid_steps)

284


In [11]:
history = model.fit_generator(generator = train_generator,
                    steps_per_epoch = train_steps,
                    epochs = 5,
                    verbose = 1,
                    callbacks = [EarlyStopping(monitor='val_loss', patience=2, min_delta=0.0001, restore_best_weights=True),
                    ModelCheckpoint(filepath='weights.hdf5', verbose=1, save_best_only=True),
                    CSVLogger('log.csv', append=True, separator=';')],
                    validation_data = validation_generator,
                    validation_steps = valid_steps
                    )

Epoch 1/5
 284/4431 [>.............................] - ETA: 12:04 - loss: 0.3847 - acc: 0.8285
Epoch 00001: val_loss improved from inf to 0.38467, saving model to weights.hdf5
Epoch 2/5
 284/4431 [>.............................] - ETA: 11:51 - loss: 0.3689 - acc: 0.8347
Epoch 00002: val_loss improved from 0.38467 to 0.36886, saving model to weights.hdf5
Epoch 3/5
 284/4431 [>.............................] - ETA: 11:51 - loss: 0.3515 - acc: 0.8438
Epoch 00003: val_loss improved from 0.36886 to 0.35150, saving model to weights.hdf5
Epoch 4/5
 284/4431 [>.............................] - ETA: 11:51 - loss: 0.2939 - acc: 0.8708
Epoch 00004: val_loss improved from 0.35150 to 0.29391, saving model to weights.hdf5
Epoch 5/5
 284/4431 [>.............................] - ETA: 11:51 - loss: 0.2761 - acc: 0.8797
Epoch 00005: val_loss improved from 0.29391 to 0.27605, saving model to weights.hdf5


In [8]:
model.load_weights(r'C:\Users\user\Desktop\Revature\Projects\face_recognition\res_512_weights.h5')

In [27]:
from keras.preprocessing.image import img_to_array
import cv2

image = cv2.imread(r'C:\Users\user\Desktop\Revature\Projects\face_recognition\celeba-dataset\img_align_celeba\000019.jpg')
image = cv2.resize(image, (64,64))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)

prediction = model.predict(image)
print(prediction)

face_features = {}
for x, pred in enumerate(prediction):
    face_features[x] = []
    for i, prob in enumerate(pred):
        if prob >= 0.5:
            face_features[x].append(columns[i])

print(face_features)

[[0.19457582 0.10393882 0.17653105 0.26333618 0.08533549 0.19610411
  0.21308208 0.2839433  0.18760109 0.07670495 0.32755396 0.17299652
  0.12269059 0.13736796 0.11152101 0.12020501 0.12962231 0.11064738
  0.07986185 0.21210691 0.6474339  0.3691056  0.14214575 0.1903778
  0.7027931  0.1759311  0.13535953 0.15727514 0.11320087 0.02121565
  0.1318909  0.17199785 0.2568685  0.16608894 0.15406975 0.44883415
  0.09599724 0.08212063 0.09775853 0.6479239 ]]
{0: ['Male', 'No_Beard', 'Young']}


In [14]:
test_generator.reset()
prediction = model.predict_generator(
    test_generator,
    steps = test_steps,
    verbose = 1
)

   24/42546 [..............................] - ETA: 7:04:46

KeyboardInterrupt: 

In [17]:
face_features = {}
for i, col in enumerate(columns):
    face_features[col] = []
    for pred in prediction:
        face_features[col].append(pred[i])

face_predictions = pd.DataFrame(face_features)

face_predictions.to_csv('face_predictions_res_net.csv')
files.download('face_predictions_res_net.csv')


MessageError: ignored