# Age Classification Using Facial Images

#### Import Statements

In [1]:
import numpy as np 
import plotly.express as px 
import pandas as pd 
import cv2 
import os 
from glob import glob
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Activation, Dropout, Flatten, Dense, Dropout, LayerNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import matplotlib.pyplot as plt
import pickle 

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

from tensorflow.keras.optimizers import Adam
#added import for sgd
from tensorflow.keras.optimizers import SGD

# import dlib
import dlib

import dlib

from imutils.face_utils import FaceAligner
from imutils.face_utils import rect_to_bb

import imutils

from tensorflow.keras.metrics import Precision, Recall



## Data Retrival and Cleaning

#### Converting the txt data to a dataframe

In [2]:
df = pd.read_csv('cleaned_IMDB_WIKI_non_gray.csv')
df

Unnamed: 0,age,gender,path,AgeRange
0,30,male,imdb_crop/99/nm0625099_rm316640512_1974-11-2_2...,Youth
1,27,male,imdb_crop/90/nm0004790_rm365789952_1976-8-23_2...,Youth
2,23,male,imdb_crop/68/nm0356468_rm3801258496_1975-8-4_1...,Youth
3,29,male,imdb_crop/58/nm1864458_rm1663605504_1975-9-17_...,Youth
4,24,male,wiki_crop/56/29835356_1989-07-25_2014.jpg,Youth
...,...,...,...,...
211096,48,male,imdb_crop/81/nm0000381_rm1343402752_1962-7-19_...,Adult
211097,42,male,imdb_crop/62/nm1101562_rm723357184_1959-10-7_2...,Adult
211098,29,female,wiki_crop/22/3571322_1977-01-31_2007.jpg,Youth
211099,57,male,imdb_crop/16/nm0000616_rm2154482176_1956-4-18_...,Old


#### Dropping null values and resampling

In [3]:
df = df.dropna()
df = df.sample(frac=1)

In [4]:
df.shape

(211101, 4)

#### Using the details of dataframe to get the image path

In [5]:
df['image_path'] = '/age and gender prediction/project/data/'+ df['path']

In [6]:
df['AgeRange'].value_counts()

Youth    104604
Adult     83935
Old       18921
Kid        3641
Name: AgeRange, dtype: int64

In [7]:
new_df = df[['AgeRange', 'age', 'gender', 'image_path']]

In [8]:
del df

#### Using LabelEncoder to obtain targets in integers

In [9]:
from sklearn.preprocessing import LabelEncoder 
le = LabelEncoder()
new_df['AgeRange'] = le.fit_transform(new_df['AgeRange'])

#### Dumping the same for future usage

In [10]:
with open('imdbwiki_age_encoder.pkl','wb') as pkl_file:
    pickle.dump(le, pkl_file)

## Image Preprocessing

#### Preparing to split for train and test set

In [11]:
X = new_df[['image_path']].values 
y = new_df[['AgeRange']].values 

In [12]:
set(y.flatten().tolist())

{0, 1, 2, 3}

#### Train Test Split

In [13]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Assigning uniform image extensions and resizing

In [14]:
def preprocess_image(individual_path):
    img = tf.io.read_file(np.array(individual_path).ravel()[0]) 
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [227,227])
    return img 

## Face Detection and Landmark Detection

In [15]:
def yield_training_values(X_train,y_train):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    fa = FaceAligner(predictor, desiredFaceWidth=256)
    for image_path, value in zip(X_train, y_train):
        imageP = image_path[0].decode("utf-8")
        img= cv2.imread(imageP, 1)
        denoised_image = cv2.fastNlMeansDenoisingColored(img, None, 5, 6, 7, 21)

        gray = cv2.cvtColor(denoised_image, cv2.COLOR_BGR2GRAY)
        # Detect the face
        rects = detector(gray, 1)
        # Detect landmarks for each face
        
        try:
            for rect in rects:
                faceAligned = fa.align(img, gray, rect)

            gray1 = cv2.cvtColor(faceAligned, cv2.COLOR_BGR2GRAY)
            face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
            faces = face_cascade.detectMultiScale(gray1, 1.3, 5)

            try:
                for (x,y,w,h) in faces:
                    # for putting rectangle on face
                    #cv2.rectangle(faceAligned, (x,y), (x+w, y+h), (0, 255, 0),3)
                    roi_color = faceAligned[y:y+h, x:x+w]
                    cv2.imwrite(imageP , roi_color)
            except:
                continue
        except:
            continue
        
        image = preprocess_image([bytes(imageP, 'utf-8')])
        yield image, value 

#### Using train and test for the model

In [16]:
ds_train = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_train, y_train],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([227, 227, 3], [1]))


ds_test = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_test, y_test],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([227, 227, 3], [1]))

### Shuffling the data

In [17]:
AUTOTUNE = tf.data.AUTOTUNE
ds_train = ds_train.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)

## Model Implementation

### Data Augmentation

In [18]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation(0.2),
  tf.keras.layers.RandomZoom(0.2,0.2),
])

### Model Building

In [19]:
model = keras.models.Sequential([
    data_augmentation,
    keras.layers.Conv2D(filters=96, kernel_size=(7,7), strides=(4,4), activation='relu', input_shape=(227,227,3)),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=((2,2))),
    keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=((2,2))),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(l=0.01)),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4, activation='softmax')
])

In [20]:
checkpoint_path = "model_checkpoints_weights/imdbwiki/age_checkpoint_18jan.ckpt"

# Create a ModelCheckpoint callback that saves the model's weights only
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                         save_weights_only=True, # set to False to save the entire model
                                                         save_best_only=True, # set to True to save only the best model instead of a model every epoch 
                                                         save_freq="epoch", # save every epoch
                                                         verbose=1)

In [21]:
#adam = tf.keras.optimizers.Adam(learning_rate=0.001) 
sgd = SGD(learning_rate=0.001)
model.compile(optimizer=sgd, loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

### Training the Model

In [None]:
history = model.fit(ds_train, validation_data=ds_test, epochs=40, callbacks = [checkpoint_callback])

Epoch 1/40
   1513/Unknown - 35220s 23s/step - loss: 6.2200 - accuracy: 0.4459

In [None]:
# Plot the validation and training data separately
def plot_loss_curves(history):
    """
    Returns separate loss curves for training and validation metrics.
    """ 
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    epochs = range(len(history.history['loss']))

    # Plot loss
    plt.plot(epochs, loss, label='training_loss')
    plt.plot(epochs, val_loss, label='val_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.figure()
    plt.plot(epochs, accuracy, label='training_accuracy')
    plt.plot(epochs, val_accuracy, label='val_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend();

In [None]:
model.save("models/imdbwiki/age_26nov.h5") 

In [None]:
plot_loss_curves(history)

In [None]:
model.evaluate(ds_test)

In [None]:
model.summary()