# Setup

### Imports

In [None]:
import time
import cv2
import os
import itertools
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from zipfile import ZipFile
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras import utils
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from keras.models import load_model

### Mounting with Google Drive

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


## Constants

In [None]:
face_age_images_path = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/face_age"
utk_face_images_path = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/UTKFace"
utk_dataset_csv_path = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/ageutk_full.csv"

full_dataset_path = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/full_dataset.csv"
test_file_name = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/test.csv"
train_file_name = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/train.csv"
val_file_name = "/content/gdrive/MyDrive/BasementFriends/ageValidation/dataset/val.csv"

model_directory_path = "/content/gdrive/MyDrive/BasementFriends/ageValidation/model"
image_path_column_name = "image_path"
age_column_name =  "age"
log_dir_name = "logs"
checkpoint_file_name = "model_checkpoint.h5"
separator = ","
plot_name = "plot.png"
final_cnn_name = "final_cnn.h5"
confusion_matrix_name = "confusion_matrix.png"
batch_size = 16
num_classes = 5

# Dataset


## Preparing training, validation and test sets

### Creating data frame containing paths to images and age of human being in the image

Reading UTK Face dataset

In [None]:
labels = []
image_paths = []

In [None]:
def convert_age(age):
    if 1 <= age <= 2:
        return 0
    elif 3 <= age <= 9:
        return 1
    elif 10 <= age <= 15:
        return 2
    elif 16 <= age <= 45:
        return 3
    else:
        return 4

In [None]:
utk_face_dataset = pd.read_csv(utk_dataset_csv_path, header=0)

In [None]:
utk_face_dataset.head()

Unnamed: 0,files,age,gender
0,26_0_2_20170104023102422.jpg.chip.jpg,26,0
1,22_1_1_20170112233644761.jpg.chip.jpg,22,1
2,21_1_3_20170105003215901.jpg.chip.jpg,21,1
3,28_0_0_20170117180555824.jpg.chip.jpg,28,0
4,17_1_4_20170103222931966.jpg.chip.jpg,17,1


In [None]:
for index, row in utk_face_dataset.iterrows():
  labels.append(convert_age(row['age']))
  image_paths.append(os.path.join(utk_face_images_path, str(row['files'])))

In [None]:
print(labels)
print(image_paths)

Output hidden; open in https://colab.research.google.com to view.

face_age dataset:

In [None]:
for dir in os.listdir(face_age_images_path):
  label = int(str(pathlib.PurePath(dir)))
  full_dir_path = os.path.join(face_age_images_path, dir)
  for image in os.listdir(full_dir_path):
    labels.append(convert_age(label))
    image_paths.append(os.path.join(full_dir_path, os.path.basename(image)))

In [None]:
dataFrame = pd.DataFrame({image_path_column_name: image_paths,
                          age_column_name: labels})

In [None]:
dataFrame

Unnamed: 0,image_path,age
0,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
1,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
2,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
3,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
...,...,...
32941,/content/gdrive/MyDrive/BasementFriends/ageVal...,0
32942,/content/gdrive/MyDrive/BasementFriends/ageVal...,0
32943,/content/gdrive/MyDrive/BasementFriends/ageVal...,0
32944,/content/gdrive/MyDrive/BasementFriends/ageVal...,0


Saving dataFrame

In [None]:
dataFrame.to_csv(full_dataset_path)

Reading data from csv file

In [None]:
dataFrame = pd.read_csv(full_dataset_path, header=0,usecols={image_path_column_name, age_column_name})

In [None]:
dataFrame.head()

Unnamed: 0,image_path,age
0,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
1,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
2,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
3,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


### Creating training, validation and test sets

In [None]:
train, val = train_test_split(dataFrame, test_size = 0.3, random_state = 20, shuffle=True)
val, test = train_test_split(val, test_size = 0.3, random_state = 20, shuffle=True)

In [None]:
train.shape

(23062, 2)

In [None]:
train.head()

Unnamed: 0,image_path,age
19851,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
32489,/content/gdrive/MyDrive/BasementFriends/ageVal...,0
23878,/content/gdrive/MyDrive/BasementFriends/ageVal...,4
10868,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
17205,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


In [None]:
val.shape

(6918, 2)

In [None]:
val.head()

Unnamed: 0,image_path,age
14957,/content/gdrive/MyDrive/BasementFriends/ageVal...,4
18096,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
31167,/content/gdrive/MyDrive/BasementFriends/ageVal...,1
4529,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
11213,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


In [None]:
test.shape

(2966, 2)

In [None]:
test.head()

Unnamed: 0,image_path,age
16393,/content/gdrive/MyDrive/BasementFriends/ageVal...,1
15083,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
5147,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
15867,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4638,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


Saving test, train and val datasets to csv

In [None]:
def save_to_file(path, data):
  file = open(path, "w")
  for index, row in data.iterrows():
    file.write(row[image_path_column_name] + separator + str(row[age_column_name]) + "\n")
  file.close()

In [None]:
test_full_path = test_file_name
train_full_path = train_file_name
val_full_path = val_file_name

In [None]:
save_to_file(test_full_path, test)
save_to_file(val_full_path, val)
save_to_file(train_full_path, train)

### Reading test, train and val sets from csv

In [None]:
test_full_path =  test_file_name
train_full_path = train_file_name
val_full_path = val_file_name

In [None]:
train = pd.read_csv(train_full_path, header=None, usecols=None, names=[image_path_column_name, age_column_name])

In [None]:
train.head()

Unnamed: 0,image_path,age
0,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
1,/content/gdrive/MyDrive/BasementFriends/ageVal...,0
2,/content/gdrive/MyDrive/BasementFriends/ageVal...,4
3,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


In [None]:
val = pd.read_csv(val_full_path, header=None, usecols=None, names=[image_path_column_name, age_column_name])

In [None]:
val.head()

Unnamed: 0,image_path,age
0,/content/gdrive/MyDrive/BasementFriends/ageVal...,4
1,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
2,/content/gdrive/MyDrive/BasementFriends/ageVal...,1
3,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


In [None]:
test = pd.read_csv(test_full_path, header=None, usecols=None, names=[image_path_column_name, age_column_name])

In [None]:
test.head()

Unnamed: 0,image_path,age
0,/content/gdrive/MyDrive/BasementFriends/ageVal...,1
1,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
2,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
3,/content/gdrive/MyDrive/BasementFriends/ageVal...,3
4,/content/gdrive/MyDrive/BasementFriends/ageVal...,3


### Reading images, creating labels

In [None]:
train_filenames_tensor = tf.constant(list(train[image_path_column_name]))
train_labels_tensor = tf.constant(list(train[age_column_name]))

val_filenames_tensor = tf.constant(list(val[image_path_column_name]))
val_labels_tensor = tf.constant(list(val[age_column_name]))

test_filenames_tensor = tf.constant(list(test[image_path_column_name]))
test_labels_tensor = tf.constant(list(test[age_column_name]))

In [None]:
def parse_images_and_labels(filename, label):
    image_string = tf.io.read_file(filename)
    image_decoded = tf.io.decode_jpeg(image_string, channels=1)
    label = tf.one_hot(label, num_classes)
    return image_decoded, label

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames_tensor, train_labels_tensor))
train_dataset = train_dataset.map(parse_images_and_labels)
train_dataset = train_dataset.batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((val_filenames_tensor, val_labels_tensor))
val_dataset = val_dataset.map(parse_images_and_labels)
val_dataset = val_dataset.batch(batch_size)

test_dataset = tf.data.Dataset.from_tensor_slices((test_filenames_tensor, test_labels_tensor))
test_dataset = test_dataset.map(parse_images_and_labels)
test_dataset = test_dataset.batch(batch_size)

# Architecture

## Preparing CNN architecture

In [None]:
log_dir = os.path.join(model_directory_path, log_dir_name)
tensorboard = TensorBoard(log_dir=log_dir)

In [None]:
checkpoint_path = os.path.join(model_directory_path, checkpoint_file_name)
checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                             monitor='val_accuracy',
                             save_best_only=True,
                             save_weights_only=False,
                             verbose=1
                            )

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

In [None]:
final_cnn = Sequential()

final_cnn.add(Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(200, 200, 1)))
final_cnn.add(AveragePooling2D(pool_size=(2,2)))

final_cnn.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
final_cnn.add(AveragePooling2D(pool_size=(2,2)))

final_cnn.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
final_cnn.add(AveragePooling2D(pool_size=(2,2)))

final_cnn.add(Conv2D(filters=256, kernel_size=3, activation='relu'))
final_cnn.add(AveragePooling2D(pool_size=(2,2)))

final_cnn.add(GlobalAveragePooling2D())

final_cnn.add(Dense(132, activation='relu'))

final_cnn.add(Dense(num_classes, activation='softmax'))

final_cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 198, 198, 32)      320       
                                                                 
 average_pooling2d (Average  (None, 99, 99, 32)        0         
 Pooling2D)                                                      
                                                                 
 conv2d_1 (Conv2D)           (None, 97, 97, 64)        18496     
                                                                 
 average_pooling2d_1 (Avera  (None, 48, 48, 64)        0         
 gePooling2D)                                                    
                                                                 
 conv2d_2 (Conv2D)           (None, 46, 46, 128)       73856     
                                                                 
 average_pooling2d_2 (Avera  (None, 23, 23, 128)       0

In [None]:
final_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## Loading model

In [None]:
final_cnn = load_model(os.path.join(model_directory_path, checkpoint_file_name))

# Training

In [None]:
final_cnn_history = final_cnn.fit(train_dataset,
                                  batch_size=batch_size,
                                  validation_data=val_dataset,
                                  epochs=100,
                                  callbacks=[tensorboard, checkpoint, early_stopping],
                                  shuffle=True
                                 )

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.88450, saving model to /content/gdrive/MyDrive/BasementFriends/ageValidation/model/model_checkpoint.h5


  saving_api.save_model(


Epoch 2/100
Epoch 2: val_accuracy did not improve from 0.88450
Epoch 3/100
Epoch 3: val_accuracy did not improve from 0.88450
Epoch 4/100
Epoch 4: val_accuracy did not improve from 0.88450
Epoch 5/100

NotFoundError: ignored

In [None]:
# Saving the model as a h5 file for possible use later.
path = model_directory_path + final_cnn_name
final_cnn.save(path, save_format='h5')

# Model evaluation

In [None]:
train_loss = final_cnn_history.history['loss']
test_loss = final_cnn_history.history['val_loss']
train_accuracy = final_cnn_history.history['accuracy']
test_accuracy = final_cnn_history.history['val_accuracy']

## Plot

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(15,7))

ax = ax.ravel()

ax[0].plot(train_loss, label='Train Loss', color='royalblue', marker='o', markersize=5)
ax[0].plot(test_loss, label='Test Loss', color = 'orangered', marker='o', markersize=5)

ax[0].set_xlabel('Epochs', fontsize=14)
ax[0].set_ylabel('Categorical Crossentropy', fontsize=14)

ax[0].legend(fontsize=14)
ax[0].tick_params(axis='both', labelsize=12)

ax[1].plot(train_accuracy, label='Train Accuracy', color='royalblue', marker='o', markersize=5)
ax[1].plot(test_accuracy, label='Test Accuracy', color='orangered', marker='o', markersize=5)

ax[1].set_xlabel('Epochs', fontsize=14)
ax[1].set_ylabel('Accuracy', fontsize=14)

ax[1].legend(fontsize=14)
ax[1].tick_params(axis='both', labelsize=12)

fig.suptitle(x=0.5, y=0.92, t="Lineplots showing loss and accuracy of CNN model by epochs", fontsize=16)

# Exporting plot image in PNG format.
plot_full_path = os.path.join(model_directory_path, plot_name)
plt.savefig(plot_full_path, bbox_inches='tight');

In [None]:
# Evaluating the model on test dataset.

final_cnn_score = final_cnn.evaluate(test_dataset, verbose=1)

In [None]:
# Printing the relevant score summary.

final_cnn_labels = final_cnn.metrics_names
print(f'CNN model {final_cnn_labels[0]} \t\t= {round(final_cnn_score[0], 3)}')
print(f'CNN model {final_cnn_labels[1]} \t= {round(final_cnn_score[1], 3)}')

In [None]:
final_cnn_full_path = os.path.join(model_directory_path, final_cnn_name)
final_cnn.save(final_cnn_full_path, save_format='h5')

In [None]:
final_cnn_pred = final_cnn.predict(test_dataset)
final_cnn_pred = final_cnn_pred.argmax(axis=-1)

In [None]:
conf_mat = confusion_matrix(list(test[age_column_name]), final_cnn_pred)
conf_mat

In [None]:
# Defining a function to plot the confusion matrix in a grid for easier visualization.

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix', export_as='confusion_matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    # print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=16)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True labels', fontsize=14)
    plt.xlabel('Predicted labels', fontsize=14)

    # Exporting plot image in PNG format.
    confusion_matrix_full_path = os.path.join(model_directory_path, confusion_matrix_name)
    plt.savefig(confusion_matrix_full_path, bbox_inches='tight');

In [None]:
# Plotting the confusion matrix using the function defined above.

cm_plot_labels = [set(train[age_column_name])]

plt.figure(figsize=(160,80))
plot_confusion_matrix(conf_mat, cm_plot_labels, normalize=True,
                      title="Confusion Matrix based on predictions from CNN model",
                      export_as="final_cnn_conf_mat_norm"
                     )

plt.show()