# Facial Attributes Recognization

#### This project uses the data from CelebFaces Attributes (CelebA) Dataset on Kaggle, which was originally collected by researchers at MMLAB, The Chinese University of Hong Kong (specific reference in Acknowledgment section).

#### The main job of this project is to build and train a deep learning network via Keras for recognizing the 40 different facial attributes.

In [1]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import random
import pickle
from sklearn.utils import shuffle
import glob
import matplotlib.image as mpimg
import gc

## 1. Import data

In [2]:
# define a function to translate the number which is type of string into type int
def trans_str2int(strlist):
    intlist = []
    for i in range(len(strlist)):
        intlist.append([])
        for j in range(len(strlist[i])):
            intlist[i].append(int(strlist[i][j]))
    return np.array(intlist, dtype = np.int16)

X = []
Y = []
facial_attributes = []

print('Reading in the labels......')
with open('./data/list_attr_celeba.csv') as csvfile:
    lines = csv.reader(csvfile)
    for line in lines:
        del line[0]
        Y.append(line)
    facial_attributes = Y[0]
    del Y[0]
    Y = trans_str2int(Y)
print('Finish!')

Reading in the labels......
Finish!


In [3]:
print('Reading in the pixel datas......')
images = glob.glob('./data/img/*.jpg')

for file in images:
    image = mpimg.imread(file)
    X.append(image)
print('Finish!')

Reading in the pixel datas......
Finish!


In [4]:
print('Reading in the suggestion for spliting data......')
suggestion = []
with open('./data/list_eval_partition.csv') as csvfile:
    lines = csv.reader(csvfile)
    for line in lines:
        suggestion.append(line[1])
    del suggestion[0]
    suggestion = trans_str2int(suggestion)
print('Finish!')

Reading in the suggestion for spliting data......
Finish!


In [5]:
print('The length of X is: %d' % len(X))
print('The length of Y is: %d' % len(Y))
print('The length of suggestion is: %d' % len(suggestion))

The length of X is: 202599
The length of Y is: 202599
The length of suggestion is: 202599


In [6]:
print('Spliting the data......')
x_train = []
y_train = []
x_valid = []
y_valid = []
x_test = []
y_test = []
for i in range(len(suggestion)):
    if suggestion[i] == 0:
        x_train.append(X[i])
        y_train.append(Y[i])
    elif suggestion[i] == 1:
        x_valid.append(X[i])
        y_valid.append(Y[i])
    else:
        x_test.append(X[i])
        y_test.append(Y[i])
print('Finish!')

Spliting the data......
Finish!


In [7]:
print('The length of training data is: %d' % len(x_train))
print('The length of validation data is: %d' % len(x_valid))
print('The length of testing data is: %d' % len(x_test))

The length of training data is: 162770
The length of validation data is: 19867
The length of testing data is: 19962


In [8]:
del X, Y
gc.collect()

0

In [9]:
## save the original data， release the memory
with open('data_original_xtrain.pickle', 'wb') as file:
    x_train = np.array(x_train, dtype = np.int16)
    data_dict1 = {'x_train': x_train}
    del x_train
    gc.collect()
    pickle.dump(data_dict1,file)
del data_dict1
gc.collect()

MemoryError: 

In [None]:
with open('data_original_others.pickle', 'wb') as file:
    x_valid = np.array(x_valid, dtype = np.int16)
    x_test = np.array(x_test, dtype = np.int16)
    y_train = np.array(y_train, dtype = np.int8)
    y_valid = np.array(y_valid, dtype = np.int8)
    y_test = np.array(y_test, dtype = np.int8)
    data_dict2 = {'y_train': y_train, 'x_valid': x_valid, 'y_valid': y_valid, 
                  'x_test': x_test, 'y_test': y_test, 'facial_attributes': facial_attributes}
    del y_train, x_valid, y_valid, x_test, y_test, facial_attributes
    gc.collect()
    pickle.dump(data_dict2,file)   

del y_train, x_valid, y_valid, x_test, y_test, facial_attributes, data_dict2
gc.collect()

In [None]:
## read the data
with  openopen('project_data.pickle', 'rb') as file:
    data_dict =  pickle.load(file)
    x_train = data_dict1['x_train']
    y_train = data_dict2['y_train']
    x_valid = data_dict2['x_valid']
    y_valid = data_dict2['y_valid']
    x_test = data_dict2['x_test']
    y_test = data_dict2['y_test']
    facial_attributes = data_dict2['facial_attributes']

## 2. Process data

In [None]:
## visualize data

index = random.randint(0, len(x_train))
image = x_train[index].squeeze()

plt.figure(figsize=(1,1))
plt.imshow(image, cmap="gray")


In [None]:
## Normalize the data, cause there is a memory error with x_train, so I define the datatype as float16 to save the memory
x_train = (np.array(x_train, dtype = np.float16) - 128.)/256.
x_valid = (np.array(x_valid, dtype = np.float16) - 128.)/256.
x_test = (np.array(x_test, dtype = np.float16) - 128.)/256.

In [None]:
## save the data
data_dict = {'x_train': x_train, 'y_train': y_train, 'x_valid': x_valid, 
             'y_valid': y_valid, 'x_test': X_test, 'y_test': y_test}

with open('project_data.pickle', 'wb') as file:
    pickle.dump(data_dict,file)

In [None]:
## read the data

with  openopen('project_data.pickle', 'rb') as file:
    data_dict =  pickle.load(file)
    x_train = data_dict['x_train']
    y_train = data_dict['y_train']
    x_valid = data_dict['x_valid']
    y_valid = data_dict['y_valid']
    x_test = data_dict['x_test']
    facial_attributes = data_dict['facial_attributes']

## 3. Set Keras Parameters

In [None]:
epoches = 30
batch_size = 125

### 3.1 Build Network