# Gender Classification using Facial Images

#### Import Statements

In [1]:
import numpy as np 
import plotly.express as px 
import pandas as pd 
import cv2 
import os 
from glob import glob
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Activation, Dropout, Flatten, Dense, Dropout, LayerNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import matplotlib.pyplot as plt
import pickle 

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

#added import for sgd
from tensorflow.keras.optimizers import SGD

# #for Landmark detection
# import dlib
import dlib

import dlib

from imutils.face_utils import FaceAligner
from imutils.face_utils import rect_to_bb

import imutils

from tensorflow.keras.metrics import Precision, Recall



## Date Retrival and Cleaning

In [2]:
df = pd.read_csv('sample_IMDB_WIKI_non_gray.csv')
df

Unnamed: 0,age,gender,path,AgeRange
0,24,male,wiki_crop/09/43981209_1990-07-17_2015.jpg,Youth
1,41,male,imdb_crop/08/nm0651008_rm1017367040_1970-10-15...,Adult
2,33,female,imdb_crop/01/nm0000701_rm1272548096_1975-10-5_...,Youth
3,30,female,imdb_crop/35/nm0001435_rm233299200_1963-7-30_1...,Youth
4,33,male,imdb_crop/29/nm0005129_rm2932918528_1976-4-20_...,Youth
...,...,...,...,...
2106,17,male,imdb_crop/07/nm0430107_rm3292896000_1987-2-9_2...,Youth
2107,38,male,imdb_crop/17/nm0000417_rm3158022912_1964-4-20_...,Adult
2108,12,female,wiki_crop/30/24972730_2000-04-09_2013.jpg,Kid
2109,56,male,imdb_crop/47/nm0000547_rm3455170816_1953-5-24_...,Adult


#### Dropping null values and resampling

In [3]:
df = df.dropna()
df = df.sample(frac=1)

In [4]:
df.shape

(2111, 4)

#### Using the details of dataframe to get the image path

In [5]:
df['image_path'] = '/age and gender prediction/project/data/'+ df['path']

In [6]:
df

Unnamed: 0,age,gender,path,AgeRange,image_path
1325,20,male,imdb_crop/56/nm1409956_rm1012765184_1986-1-29_...,Youth,/age and gender prediction/project/data/imdb_c...
1542,53,male,imdb_crop/35/nm0866835_rm3490183936_1917-5-7_1...,Adult,/age and gender prediction/project/data/imdb_c...
945,44,male,imdb_crop/32/nm0000332_rm3246296832_1964-11-29...,Adult,/age and gender prediction/project/data/imdb_c...
994,35,male,imdb_crop/16/nm0358316_rm224965632_1971-3-10_2...,Adult,/age and gender prediction/project/data/imdb_c...
764,25,female,imdb_crop/33/nm2079733_rm539592960_1983-12-2_2...,Youth,/age and gender prediction/project/data/imdb_c...
...,...,...,...,...,...
626,22,male,wiki_crop/42/32809942_1990-05-09_2013.jpg,Youth,/age and gender prediction/project/data/wiki_c...
130,28,female,imdb_crop/57/nm0005057_rm3466959616_1970-10-6_...,Youth,/age and gender prediction/project/data/imdb_c...
1445,45,male,imdb_crop/05/nm0121605_rm2110238208_1966-11-25...,Adult,/age and gender prediction/project/data/imdb_c...
281,47,female,imdb_crop/61/nm0000161_rm3077108224_1966-9-2_2...,Adult,/age and gender prediction/project/data/imdb_c...


In [7]:
new_df = df.copy()

#### Mapping target to float value

In [8]:
new_df['gender'] = new_df['gender'].apply(lambda x : 1 if x == 'male' else 0).astype(np.float32)

In [9]:
new_df.gender.value_counts()

1.0    1201
0.0     910
Name: gender, dtype: int64

## Image Preprocessing

#### Preparing to split for train and test set

In [10]:
X = new_df[['image_path']].values 
y = new_df[['gender']].values 

#### Train Test Split

In [11]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Assigning uniform image extensions and resizing 

In [12]:
def preprocess_image(individual_path):
    img = tf.io.read_file(np.array(individual_path).ravel()[0]) 
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [227,227])
    return img 

In [13]:
X_train

array([['/age and gender prediction/project/data/wiki_crop/66/6902966_1970-08-19_2011.jpg'],
       ['/age and gender prediction/project/data/wiki_crop/76/98676_1936-10-16_1990.jpg'],
       ['/age and gender prediction/project/data/imdb_crop/58/nm0001458_rm3537619200_1926-4-30_1975.jpg'],
       ...,
       ['/age and gender prediction/project/data/imdb_crop/68/nm1310368_rm1903199232_1980-6-24_2010.jpg'],
       ['/age and gender prediction/project/data/imdb_crop/25/nm3232025_rm2186722560_1986-2-15_2009.jpg'],
       ['/age and gender prediction/project/data/imdb_crop/74/nm3158974_rm160494336_1988-6-12_2015.jpg']],
      dtype=object)

In [14]:
X_train[100]

array(['/age and gender prediction/project/data/imdb_crop/06/nm0308606_rm125104128_1964-2-20_2014.jpg'],
      dtype=object)

In [15]:
# img = cv2.imread(X_train[100][0], 1)
# cv2.imshow('img', img)
# cv2.waitKey(0)

## Face Detection, Face Alignment, Landmark Detection, and Image Denoising

In [16]:
def yield_training_values(X_train,y_train):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    fa = FaceAligner(predictor, desiredFaceWidth=227)
    for image_path, value in zip(X_train, y_train):
        imageP = image_path[0].decode("utf-8")
        img= cv2.imread(imageP, 1)
        denoised_image = cv2.fastNlMeansDenoisingColored(img, None, 5, 6, 7, 21)
        
        gray = cv2.cvtColor(denoised_image, cv2.COLOR_BGR2GRAY)
        # Detect the face
        rects = detector(gray, 1)
        # Detect landmarks for each face
        try:
            for rect in rects:
                faceAligned = fa.align(img, gray, rect)

            gray1 = cv2.cvtColor(faceAligned, cv2.COLOR_BGR2GRAY)
            face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
            faces = face_cascade.detectMultiScale(gray1, 1.3, 5)

            try:
                for (x,y,w,h) in faces:
                    # to put rectangle on face
                    #cv2.rectangle(faceAligned, (x,y), (x+w, y+h), (0, 255, 0),3)
                    roi_color = faceAligned[y:y+h, x:x+w]
                    cv2.imwrite(imageP , roi_color)
            except:
                continue
        except:
            continue
        
        image = preprocess_image([bytes(imageP, 'utf-8')])
        yield image, value 

#### Using train and test for the model

In [17]:
ds_train = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_train, y_train],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([227, 227, 3], [1]))


ds_test = tf.data.Dataset.from_generator(yield_training_values,
                                          args=[X_test, y_test],
                                          output_types=(tf.float32, tf.float32),
                                          output_shapes=([227, 227, 3], [1]))

### Shuffling the data

In [18]:
AUTOTUNE = tf.data.AUTOTUNE
ds_train = ds_train.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.cache().shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=AUTOTUNE)

## Model Implementation

### Data Augmentation

In [19]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.RandomRotation(0.2),
  tf.keras.layers.RandomZoom(0.2,0.2),
])

### Model Building

In [20]:
model = keras.models.Sequential([
    data_augmentation,
    keras.layers.Conv2D(filters=96, kernel_size=(7,7), strides=(4,4), activation='relu', input_shape=(227,227,3)),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
    keras.layers.Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=((2,2))),
    keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(3,3), strides=((2,2))),
    keras.layers.Flatten(),
    #dense change from 4096
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.5),
    #dense change from 4096
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.5),
    #activation change from softmax
    #dense change from 2
    keras.layers.Dense(1, activation='sigmoid')
])

In [21]:
checkpoint_path = "model_checkpoints_weights/imdbwiki/gender_sample_checkpoint_9mar.ckpt" # note: remember saving directly to Colab is temporary

# Create a ModelCheckpoint callback that saves the model's weights only
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                         save_weights_only=True, # set to False to save the entire model
                                                         save_best_only=True, # set to True to save only the best model instead of a model every epoch 
                                                         save_freq="epoch", # save every epoch
                                                         verbose=1)

In [22]:
learning_rate = 0.01
epochs = 80
decay_rate = learning_rate / epochs
momentum = 0.9
sgd = SGD(learning_rate=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
#loss and optimiser change from sparse and adam
model.compile(optimizer=sgd, loss = tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy', Precision() , Recall()])

### Training the Model

In [23]:
history = model.fit(ds_train, validation_data=ds_test, epochs=epochs, callbacks = [checkpoint_callback])

Epoch 1/80
     53/Unknown - 986s 8s/step - loss: 1.4814 - accuracy: 0.5101 - precision: 0.5678 - recall: 0.5838
Epoch 1: val_loss improved from inf to 18.34063, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 2/80
Epoch 2: val_loss improved from 18.34063 to 0.71126, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 3/80
Epoch 3: val_loss did not improve from 0.71126
Epoch 4/80


Epoch 4: val_loss improved from 0.71126 to 0.68265, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 5/80
Epoch 5: val_loss improved from 0.68265 to 0.67533, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 6/80
Epoch 6: val_loss did not improve from 0.67533
Epoch 7/80
Epoch 7: val_loss improved from 0.67533 to 0.67030, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 8/80
Epoch 8: val_loss did not improve from 0.67030
Epoch 9/80
Epoch 9: val_loss did not improve from 0.67030
Epoch 10/80
Epoch 10: val_loss did not improve from 0.67030
Epoch 11/80
Epoch 11: val_loss did not improve from 0.67030
Epoch 12/80
Epoch 12: val_loss improved from 0.67030 to 0.66623, saving model to model_checkpoints_weights/imdbwiki\gender_sample_checkpoint_9mar.ckpt
Epoch 13/80
Epoch 13: val_loss did not improve from 0.66623
Epoch 14/80
Epoch 14: val_loss did not improve from 0.66623

KeyboardInterrupt: 

In [None]:
# Plot the validation and training data separately
def plot_loss_curves(history):
    """
    Returns separate loss curves for training and validation metrics.
    """ 
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    accuracy = history.history['accuracy']
    val_accuracy = history.history['val_accuracy']

    epochs = range(len(history.history['loss']))

    # Plot loss
    plt.plot(epochs, loss, label='training_loss')
    plt.plot(epochs, val_loss, label='val_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

    # Plot accuracy
    plt.figure()
    plt.plot(epochs, accuracy, label='training_accuracy')
    plt.plot(epochs, val_accuracy, label='val_accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend();

## Saving the Model

In [None]:
model.save("models/imdbwiki/gender_mar9_sample.h5") 

In [None]:
plot_loss_curves(history)

In [None]:
model.evaluate(ds_test)

In [None]:
model.summary()