In [91]:
import pandas as pd
import datetime
import requests
from requests.auth import HTTPBasicAuth
import zipfile
import os
import shutil
import warnings
from sklearn.metrics import classification_report, confusion_matrix

#keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense,Dropout,Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.models import Model,Sequential 
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [61]:
#download files with images
#https://talhassner.github.io/home/projects/Adience/Adience-data.html
BASE_URL = "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification"
DOWNLOAD_FILES = {
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/aligned.tar.gz": "aligned.tar.gz",
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/fold_0_data.txt": "fold_0_data.txt",
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/fold_1_data.txt": "fold_1_data.txt",
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/fold_2_data.txt": "fold_2_data.txt",
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/fold_3_data.txt": "fold_3_data.txt",
          "http://www.cslab.openu.ac.il/download/adiencedb/AdienceBenchmarkOfUnfilteredFacesForGenderAndAgeClassification/fold_4_data.txt": "fold_4_data.txt"
                }

for DOWNLOAD_FILE, FILE_NAME in DOWNLOAD_FILES.items():
    with open(FILE_NAME, 'wb') as file:
        r = requests.get(DOWNLOAD_FILE, auth = HTTPBasicAuth('adiencedb', 'adience'))
        file.write(r.content)
        

In [88]:
#compile all files catalog
fold = pd.read_csv('fold_0_data.txt',sep='\t')
fold.rename(columns={' user_id':'user_id'},inplace=True)
fold['fold'] = 0
for i in range(1,5):
    temp = pd.read_csv('fold_'+str(i)+'_data.txt',sep='\t')
    temp['fold'] = i
    fold = fold.append(temp,ignore_index=True)
    
fold.dropna(subset=['gender'],inplace=True)
fold = fold[['user_id','original_image','face_id','age','gender','fold']]

In [92]:
fold

Unnamed: 0,user_id,original_image,face_id,age,gender,fold
0,30601258@N03,10399646885_67c7d20df9_o.jpg,1,"(25, 32)",f,0
1,30601258@N03,10424815813_e94629b1ec_o.jpg,2,"(25, 32)",m,0
2,30601258@N03,10437979845_5985be4b26_o.jpg,1,"(25, 32)",f,0
3,30601258@N03,10437979845_5985be4b26_o.jpg,3,"(25, 32)",m,0
4,30601258@N03,11816644924_075c3d8d59_o.jpg,2,"(25, 32)",m,0
...,...,...,...,...,...,...
19341,101515718@N03,10587826073_6663f5b654_o.jpg,2280,"(25, 32)",f,4
19342,101515718@N03,10587571495_a61785cd06_o.jpg,2278,"(25, 32)",m,4
19343,101515718@N03,10587571495_a61785cd06_o.jpg,2279,"(25, 32)",f,4
19344,50458575@N08,9426695459_9e8b347604_o.jpg,2281,"(25, 32)",f,4


In [59]:
#creates directories for training, test and validation files
cwd = os.getcwd()
train_faces = os.path.join(cwd,'train')
os.mkdir(train_faces)
test_faces = os.path.join(cwd,'test')
os.mkdir(test_faces)
validation_faces = os.path.join(cwd,'validation')
os.mkdir(validation_faces)

In [70]:
#copy images to each directory
for index, row in fold.loc[fold.fold==0].iterrows():
    src = os.path.join(cwd,'faces',row['user_id'],'coarse_tilt_aligned_face.'+str(row['face_id'])+'.'+row['original_image'])
    dst = os.path.join(train_faces,row['original_image'])
    shutil.copyfile(src,dst)
    
print('imagens de treino copiadas')

for index, row in fold.loc[fold.fold==1].iterrows():
    src = os.path.join(cwd,'faces',row['user_id'],'coarse_tilt_aligned_face.'+str(row['face_id'])+'.'+row['original_image'])
    dst = os.path.join(test_faces,row['original_image'])
    shutil.copyfile(src,dst)

print('imagens de test copiadas')

for index, row in fold.loc[fold.fold==2].iterrows():
    src = os.path.join(cwd,'faces',row['user_id'],'coarse_tilt_aligned_face.'+str(row['face_id'])+'.'+row['original_image'])
    dst = os.path.join(validation_faces,row['original_image'])
    shutil.copyfile(src,dst)

print('imagens de validation copiadas')

imagens de treino copiadas
imagens de test copiadas
imagens de validation copiadas


In [89]:
# Initialising the CNN
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape = (150, 150, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Conv2D(32, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Flatten())
model.add(Dense(units = 128, activation = 'relu'))
model.add(Dense(units = 3, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimazer = optimizers.RMSprop(lr=1e-4), metrics=['acc'])

In [80]:
#vectorizing images 
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_dataframe(
                    fold,
                    x_col='original_image',
                    y_col='gender',
                    directory=train_faces,
                    target_size=(150,150),
                    batch_size=20,
                    class_mode='categorical'
                )

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
                    fold,
                    x_col='original_image',
                    y_col='gender',
                    directory=test_faces,
                    target_size=(150,150),
                    batch_size=20,
                    class_mode='categorical'
                )


Found 4431 validated image filenames belonging to 3 classes.
Found 3692 validated image filenames belonging to 3 classes.


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


In [90]:
#training model
history = model.fit_generator(train_generator,
                              steps_per_epoch=100,
                              epochs=15,
                              validation_data = test_generator,
                              validation_steps=15)
model.save('face_gender_detection.h5')

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
# if you have the last version of tensorflow, the predict_generator is deprecated.
# you should use the predict method.
# if you do not have the last version, you must use predict_generator
Y_pred = model.predict_generator(test_set, 63) # ceil(num_of_test_samples / batch_size)
Y_pred = (Y_pred>0.5)
print('Confusion Matrix')
print(confusion_matrix(test_set.classes, Y_pred))
print('Classification Report')
target_names = ['Cats', 'Dogs']
print(classification_report(test_set.classes, Y_pred, target_names=target_names))


loss_train = hist.history['accuracy']
loss_val = hist.history['val_accuracy']
epochs = range(1,6)
plt.plot(epochs, loss_train, 'g', label='Training accuracy')
plt.plot(epochs, loss_val, 'b', label='Validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

loss_train = hist.history['loss']
loss_val = hist.history['val_loss']
epochs = range(1,6)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
Lucas and Francisco 

decide the metric (acc,f1,...?)
finish the viz of this metric
create the model of age classification
organize better the images into our splits
benchmark between papers
post in git 
report

In [None]:
Bruna and Isha

Topic: how children change you vocation?
get deeper in the EDA
What we are going to show
How are we going to show
How to make interactive 
post in git