# Introduction

Intro here.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

project_path = "/content/gdrive/MyDrive/AIM/Projects/FaceAge"

Mounted at /content/gdrive


---


# Pipeline Description

Describe dependencies and the pipeline.

In [None]:
!pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[?25l[K     |▏                               | 10 kB 35.2 MB/s eta 0:00:01[K     |▎                               | 20 kB 36.1 MB/s eta 0:00:01[K     |▍                               | 30 kB 20.3 MB/s eta 0:00:01[K     |▋                               | 40 kB 18.3 MB/s eta 0:00:01[K     |▊                               | 51 kB 17.7 MB/s eta 0:00:01[K     |▉                               | 61 kB 16.2 MB/s eta 0:00:01[K     |█                               | 71 kB 13.8 MB/s eta 0:00:01[K     |█▏                              | 81 kB 15.0 MB/s eta 0:00:01[K     |█▎                              | 92 kB 15.6 MB/s eta 0:00:01[K     |█▌                              | 102 kB 14.2 MB/s eta 0:00:01[K     |█▋                              | 112 kB 14.2 MB/s eta 0:00:01[K     |█▊                              | 122 kB 14.2 MB/s eta 0:00:01[K     |█▉                              | 133 kB 14.2 MB/s eta 0:00:

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
import PIL
import mtcnn
import keras
import numpy as np
import pandas as pd
import tensorflow as tf

from IPython.display import clear_output

from skimage.io import imsave, imread

In [None]:
print("Python version     : ", sys.version.split('\n')[0])
print("TensorFlow version : ", tf.__version__)
print("Keras version      : ", keras.__version__)
print("Numpy version      : ", np.__version__)

Python version     :  3.7.12 (default, Sep 10 2021, 00:21:48) 
TensorFlow version :  2.7.0
Keras version      :  2.7.0
Numpy version      :  1.19.5


In [None]:
BASE_DATA_PATH = os.path.join(project_path, "data")
BASE_MODEL_PATH = os.path.join(project_path, "models")

---

# Data Processing

Describe the data processing phase.

## Face Localization

Describe the face localization step.

In [None]:
def get_face_bbox_from_image(path_to_image):
  
  # sanity check
  assert os.path.exists(path_to_image)

  pat_img = imread(path_to_image)
  
  try:
    return mtcnn.mtcnn.MTCNN().detect_faces(pat_img)[0]
  except:
    # patient
    print('ERROR: Processing error for file "%s"'%(path_to_image))
    return dict()

In [None]:
input_base_path = os.path.join(BASE_DATA_PATH, "utk_hi-res_qa")
input_file_list = [f for f in os.listdir(input_base_path) if ".jpg" in f]

# save the output dictionary in a folder parsable by the data_viz notebook
BASE_OUTPUT_PATH = BASE_DATA_PATH

In [None]:
face_bbox_dict = dict()

# limit the number of subjects for a faster execution
# if set to -1, run on all the hi-res UTK data (provided)
N_SUBJECTS = -1

# subset the file list to speed up the execution of the whole notebook
input_file_list = input_file_list[:N_SUBJECTS] if N_SUBJECTS > 0 else input_file_list


for idx, input_image in enumerate(input_file_list):

  # get rid of label information and file extension
  subj_id = input_image.split("_")[3].split(".")[0] 

  print('(%g/%g) Running the face localization pipeline for "%s"'%(idx + 1,
                                                                   len(input_file_list),
                                                                   subj_id))
  
  clear_output(wait = True)

  subj_age = input_image.split("_")[0]
  subj_gender = input_image.split("_")[1]
  subj_race = input_image.split("_")[2]

  path_to_image = os.path.join(input_base_path, input_image)
  
  face_bbox_dict[subj_id] = dict()

  face_bbox_dict[subj_id]["age"] = subj_age
  face_bbox_dict[subj_id]["gender"] = subj_gender
  face_bbox_dict[subj_id]["race"] = subj_race
  
  face_bbox_dict[subj_id]["path_to_image"] = path_to_image

  face_bbox_dict[subj_id]["mtcnn_output_dict"] = get_face_bbox_from_image(path_to_image)

(2547/2547) Running the face localization pipeline for "20170117152131106"


In [None]:
# example of the output dictionary
face_bbox_dict[subj_id]

{'age': '35',
 'gender': '1',
 'mtcnn_output_dict': {'box': [246, 0, 584, 601],
  'confidence': 0.9992105960845947,
  'keypoints': {'left_eye': (439, 165),
   'mouth_left': (447, 403),
   'mouth_right': (726, 391),
   'nose': (604, 328),
   'right_eye': (714, 155)}},
 'path_to_image': '/content/gdrive/MyDrive/AIM/Projects/FaceAge/data/utk_hi-res_qa/35_1_0_20170117152131106.jpg',
 'race': '0'}

---

# FaceAge Estimation

Describe the FaceAge estimation step.

In [None]:
def get_model_prediction(model, path_to_image, mtcnn_output_dict):
  
  # sanity check
  assert os.path.exists(path_to_image)

  pat_img = imread(path_to_image)

  # extract the bounding box from the first face
  x1, y1, width, height = mtcnn_output_dict['box']
  x1, y1 = abs(x1), abs(y1)
  x2, y2 = x1 + width, y1 + height

  # crop the face
  pat_face = pat_img[y1:y2, x1:x2]

  # resize cropped image to the model input size
  pat_face_pil = PIL.Image.fromarray(np.uint8(pat_face)).convert('RGB')
  pat_face = np.asarray(pat_face_pil.resize((160, 160)))
  
  # prep image for TF processing
  mean, std = pat_face.mean(), pat_face.std()
  pat_face = (pat_face - mean) / std
  pat_face_input = pat_face.reshape(1, 160, 160, 3)
  
  return np.squeeze(model.predict(pat_face_input))

In [None]:
model_path = os.path.join(BASE_MODEL_PATH, "faceage_model.h5")
model = keras.models.load_model(model_path)



In [None]:
age_pred_dict = dict()

for subj_id in face_bbox_dict.keys():
  path_to_image = face_bbox_dict[subj_id]["path_to_image"]
  mtcnn_output_dict = face_bbox_dict[subj_id]["mtcnn_output_dict"]

  age_pred_dict[subj_id] = dict()

  age_pred_dict[subj_id]["faceage"] = get_model_prediction(model, path_to_image, mtcnn_output_dict)
  age_pred_dict[subj_id]["age"] = face_bbox_dict[subj_id]["age"]
  age_pred_dict[subj_id]["gender"] = face_bbox_dict[subj_id]["gender"]
  age_pred_dict[subj_id]["race"] = face_bbox_dict[subj_id]["race"]

In [None]:
# example of the final dictionary to be converted into a DataFrame
age_pred_dict[subj_id]

{'age': '35',
 'faceage': array(29.636372, dtype=float32),
 'gender': '1',
 'race': '0'}

In [None]:
age_pred_df = pd.DataFrame.from_dict(age_pred_dict, orient = 'index')
age_pred_df.reset_index(level = 0, inplace = True)
age_pred_df.rename(columns = {"index": "subj_id"}, inplace = True)

In [None]:
age_pred_df

Unnamed: 0,subj_id,faceage,age,gender,race
0,20170110153238490,75.026596,74,1,0
1,20170109213056053,28.754328,21,1,2
2,20170117012906285,48.943573,38,0,1
3,20170117151304315,30.146166,30,1,0
4,20170116200714834,45.974422,26,0,1
...,...,...,...,...,...
2542,20170117165048545,48.71622,55,0,1
2543,20170117182534670,60.842632,35,1,0
2544,20170117194914689,68.6251,63,0,0
2545,20170117175135184,25.377224,26,1,2


In [None]:
outfile_name = 'utk_hi-res_qa_res.csv'
outfile_path = os.path.join(BASE_OUTPUT_PATH, outfile_name) 
age_pred_df.to_csv(outfile_path, index = False)