# Libraries

In [8]:
import numpy as np
import pandas as pd
import glob
from skimage import io

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, classification_report, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Activation, Flatten, Dense, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import keras
from keras.models import load_model

import pickle
import sys
sys.path.insert(1, '../Src/Lib')
from functions import image_augmentation, image_preprocess

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Dataset

In [2]:
# Dataset is divided in 4 files, this concatenates them all

df_list = []
for file_name in glob.glob("../Data/Raw/Archive/*.txt"):
    df_temp = pd.read_csv(file_name, sep="\t")
    df_list.append(df_temp)
df = pd.concat(df_list, axis=0, ignore_index=True)

## Cleaning

In [3]:
# Getting rid of missing values

df = df.dropna()
df = df[df["age"] != "None"]

In [4]:
# This dataset is meant to make the age a classification problem, but we are going to make it a regression one

df["age"].value_counts()

(25, 32)     4953
(0, 2)       2488
(38, 43)     2293
(4, 6)       2140
(8, 12)      2119
(15, 20)     1642
(60, 100)     867
(48, 53)      825
35            293
13            168
22            149
34            105
23             96
45             88
(27, 32)       77
55             76
36             56
(38, 42)       46
57             24
3              18
29             11
(38, 48)        6
58              5
2               3
(8, 23)         1
42              1
46              1
Name: age, dtype: int64

In [5]:
# Creating two lists, one with the current values (keys) and one with the new values (values), latter being the average of the first.

ages_keys = df["age"].value_counts().index
ages_values = []
for x in df["age"].value_counts().index:
    if x.startswith("("):
        x = x.split(", ")
        x[0] = x[0].replace("(","")
        x[1] = x[1].replace(")","")
        x[0] = int(x[0])
        x[1] = int(x[1])
        x = int((x[0]+x[1])/2)
        ages_values.append(x)
    else:
        ages_values.append(int(x))
        
# Age map to use for regression.
ages_map = {}
for key, value in zip(ages_keys, ages_values):
    ages_map[key] = value
    
df["age"] = df["age"].map(ages_map)

In [6]:
# We will later make the model predict a non-binary category

df = df[df["gender"] != "u"]
df["gender"] = df["gender"].apply(lambda x: 1 if x == "m" else 0).astype(int)

In [7]:
# Creating the path to the image

df["face_id"] = df["face_id"].astype(str)
df["path"] = "../Data/Raw/Archive/Faces/"+df["user_id"]+"/coarse_tilt_aligned_face."+df["face_id"]+"."+df["original_image"]

In [8]:
Saving the model for future use

df.to_csv("../Data/Clean/Faces.csv")

## X/y Split

In [9]:
# As we will do two models, we have two targets

X = df["path"]
y_age = df["age"]
y_gender = df["gender"]

## Train/Test Split

In [10]:
# X_train and X_test is the same for both

X_train, X_test, y_train_age, y_test_age = train_test_split(X, y_age, test_size = 0.22, random_state = 22)
X_train, X_test, y_train_gender, y_test_gender = train_test_split(X, y_gender, test_size = 0.22, random_state = 22)

## Image preprocess

In [85]:
# Image greyscaled, downscaled, size adjusted and transformed.

def image_preprocess(path):
    img = tf.io.read_file(np.array(path).ravel()[0])
    img = tf.image.decode_jpeg(img, channels = 1, ratio = 2)
    img = tf.image.resize(img, [64,64])
    img = img / 255 # This part normalizes the image, scaling it down; 255 is the max, while 0 is the min
    return img

In [12]:
# Preparing the data that will be fed to the model, needs to be np.array

X_train_images = np.array([image_preprocess(path) for path in X_train])
X_test_images = np.array([image_preprocess(path) for path in X_test])

In [13]:
Saving both arrays for future use

filename = "../Data/Clean/X_train_images.pkl"
with open(filename, "wb") as file:
    pickle.dump(X_train_images, file)
    
filename = "../Data/Clean/X_test_images.pkl"
with open(filename, "wb") as file:
    pickle.dump(X_test_images, file)

# Models

## Age

In [14]:
model_age = Sequential()
# First layer needs as many nodes as inputs
model_age.add(Conv2D(64,(2,2), activation = "relu", input_shape = (64,64,1)))
model_age.add(MaxPool2D((2,2)))
model_age.add(Conv2D(64,(2,2), activation = "relu"))
model_age.add(MaxPool2D((2,2)))
model_age.add(Conv2D(64,(2,2), activation = "relu"))
model_age.add(MaxPool2D((2,2)))
model_age.add(Flatten())
model_age.add(Dense(64, activation = "relu"))
model_age.add(Dense(1, activation = "relu"))
opt = keras.optimizers.Adam(learning_rate = 0.01)
model_age.compile(optimizer = opt,
              loss = "mse",
              metrics = ["mae", "mse", "mape"])
model_age.summary()

### Callbacks

In [22]:
# This changes the learning rate based on epochs

def scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr - (lr/(epoch))

In [15]:
# Patience
    
early_stop = EarlyStopping(patience=5)

# Checkpoint

checkpoint_path = '../Models/Age_NN6.hdf5'
checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    save_freq='epoch',
    save_weights_only=False,
    verbose=1
)

# Learning rate

schedule = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [16]:
history = model_age.fit(
    X_train_images, y_train_age,
    epochs=100,
    validation_data = (X_test_images, y_test_age),
    batch_size=128,
    verbose=2,
    callbacks=[early_stop, checkpoint, schedule]
)

### Predictions

In [17]:
y_train_pred = model_age.predict(X_train_images)
y_test_pred  = model_age.predict(X_test_images)

display(mean_absolute_error(y_train_age,y_train_pred))
display(mean_absolute_error(y_test_age,y_test_pred))

## Gender

In [18]:
model_gender = Sequential()
# First layer needs as many nodes as inputs
model_gender.add(Conv2D(64,(2,2), activation = "relu", input_shape = (64,64,1)))
model_gender.add(MaxPool2D((2,2)))
model_gender.add(Conv2D(64,(2,2), activation = "relu"))
model_gender.add(MaxPool2D((2,2)))
model_gender.add(Conv2D(64,(2,2), activation = "relu"))
model_gender.add(MaxPool2D((2,2)))
model_gender.add(Flatten())
model_gender.add(Dense(64, activation = "relu"))
model_gender.add(Dense(1, activation = "sigmoid"))
model_gender.compile(optimizer = "adam",
              loss = "binary_crossentropy",
              metrics = "accuracy")
model_gender.summary()

### Callbacks

In [32]:
early_stop = EarlyStopping(patience=5)

checkpoint_path = '../Models/Gender_NN16.hdf5'
checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    save_freq='epoch',
    save_weights_only=False,
    verbose=1
)

schedule = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [33]:
history = model_gender.fit(
    X_train_images, y_train_gender,
    epochs=50,
    validation_data = (X_test_images, y_test_gender),
    batch_size=128,
    verbose=2,
    callbacks=[early_stop, checkpoint, schedule]
)

Epoch 1/50

Epoch 1: saving model to ../Models\Gender_NN16.hdf5
107/107 - 123s - loss: 0.9337 - accuracy: 0.5584 - val_loss: 0.6604 - val_accuracy: 0.6083 - lr: 0.0030 - 123s/epoch - 1s/step
Epoch 2/50

Epoch 2: saving model to ../Models\Gender_NN16.hdf5
107/107 - 110s - loss: 0.6526 - accuracy: 0.6080 - val_loss: 0.6483 - val_accuracy: 0.6156 - lr: 0.0030 - 110s/epoch - 1s/step
Epoch 3/50

Epoch 3: saving model to ../Models\Gender_NN16.hdf5
107/107 - 111s - loss: 0.6484 - accuracy: 0.6167 - val_loss: 0.6288 - val_accuracy: 0.6448 - lr: 0.0030 - 111s/epoch - 1s/step
Epoch 4/50

Epoch 4: saving model to ../Models\Gender_NN16.hdf5
107/107 - 117s - loss: 0.6615 - accuracy: 0.6198 - val_loss: 0.6834 - val_accuracy: 0.5604 - lr: 0.0030 - 117s/epoch - 1s/step
Epoch 5/50

Epoch 5: saving model to ../Models\Gender_NN16.hdf5
107/107 - 104s - loss: 0.6761 - accuracy: 0.5732 - val_loss: 0.6611 - val_accuracy: 0.6182 - lr: 0.0030 - 104s/epoch - 971ms/step
Epoch 6/50

Epoch 6: saving model to ../Mo

### Predictions

In [None]:
y_train_pred = model_gender.predict(X_train_images)
y_test_pred  = model_gender.predict(X_test_images)


# This is for binary_crossentropy (1 neuron final output)
y_train_pred2 = [int(round(y_train_pred[x][0],0)) for x in range(len(y_train_pred))]
y_test_pred2 = [int(round(y_test_pred[x][0],0)) for x in range(len(y_test_pred))]


# This is for sparse_categorical_crossestropy (2 neurons final output)
# y_train_pred2 = np.argmax(y_train_pred, axis=1).reshape(-1,1)
# y_test_pred2 = np.argmax(y_test_pred, axis=1).reshape(-1,1)

print("Kappa score:",cohen_kappa_score(y_train_gender, y_train_pred2))
print(classification_report(y_train_gender, y_train_pred2, zero_division = True))
print("Kappa score:",cohen_kappa_score(y_test_gender, y_test_pred2))
print(classification_report(y_test_gender, y_test_pred2, zero_division = True))


display(model_gender.predict(np.array([image_preprocess("../Data/Test/20Female.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/25Female1.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/25Female2.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/50Female1.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/50Female2.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/50Female3.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/50Female4.jpg")])))

display(model_gender.predict(np.array([image_preprocess("../Data/Test/30Male1.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/30Male2.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/30Male3.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/40Male.jpg")])))
display(model_gender.predict(np.array([image_preprocess("../Data/Test/50Male.jpg")])))

## Data Augmentation

In [2]:
model_age = load_model("../Models/Age_NN3.hdf5")
model_gender = load_model("../Models/Gender_NN3.hdf5")

In [5]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2)
])

In [18]:
data_augmentation.save("../Models/Augmentation.hdf5")

ValueError: Weights for model sequential_3 have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.

In [6]:
images = []
image1 = image_preprocess("../Data/Test/25Female1.jpg")
images.append(np.array([image1]))
i = 0
while i < 9:
    image = image_preprocess("../Data/Test/25Female1.jpg")
    image = tf.cast(tf.expand_dims(image, 0), tf.float32)
    image = data_augmentation(image)
    images.append(image)
    i = i+1



In [9]:
predictions_age = []
predictions_female = []
predictions_male = []
for image in images:
    predictions_age.append(model_age.predict(image)[0])
    predictions_female.append(model_gender.predict(image)[0][0])
    predictions_male.append(model_gender.predict(image)[0][1])
    
print(np.mean(predictions_age))
print(np.mean(predictions_female))
print(np.mean(predictions_male))

NameError: name 'images' is not defined

In [11]:
model_gender.predict(np.array([image]))



array([[0.86575  , 0.0824468]], dtype=float32)

In [11]:
image = image_preprocess("../Data/Test/25Female1.jpg")
age, female, male = image_augmentation(image, model_age, model_gender)

print("Age: {}, Female: {}, Male: {}".format(age, female, male))

Age: 13.148493766784668, Female: 0.8690377473831177, Male: 0.12919019162654877
