# Gender and Age Prediction

Dataset used was Kaggle's **AGE, GENDER AND ETHNICITY (FACE DATA) CSV** and can be found [here](https://www.kaggle.com/nipunarora8/age-gender-and-ethnicity-face-data-csv)

In [1]:
# Required downloads

!pip install -q kaggle

In [2]:
# Required modules

import os
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from PIL import Image
from zipfile import ZipFile
from matplotlib import pyplot as plt

In [3]:
# Some configuration

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 7)

In [4]:
# Moving the credentials file to main dir

!mkdir -p ~/.kaggle/
!cp ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
# Download the dataset

!kaggle datasets download -d nipunarora8/age-gender-and-ethnicity-face-data-csv

age-gender-and-ethnicity-face-data-csv.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
# Extract the dataset

with ZipFile('/content/age-gender-and-ethnicity-face-data-csv.zip', 'r') as zf:
    zf.extractall('./')

In [7]:
# Loading the data

data = pd.read_csv('age_gender.csv')
data.head()

Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,129 128 128 126 127 130 133 135 139 142 145 14...
1,1,2,0,20161219222752047.jpg.chip.jpg,164 74 111 168 169 171 175 182 184 188 193 199...
2,1,2,0,20161219222832191.jpg.chip.jpg,67 70 71 70 69 67 70 79 90 103 116 132 145 155...
3,1,2,0,20161220144911423.jpg.chip.jpg,193 197 198 200 199 200 202 203 204 205 208 21...
4,1,2,0,20161220144914327.jpg.chip.jpg,202 205 209 210 209 209 210 211 212 214 218 21...


In [8]:
# Inspecting the data

data.info()
data.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23705 entries, 0 to 23704
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   age        23705 non-null  int64 
 1   ethnicity  23705 non-null  int64 
 2   gender     23705 non-null  int64 
 3   img_name   23705 non-null  object
 4   pixels     23705 non-null  object
dtypes: int64(3), object(2)
memory usage: 926.1+ KB


Unnamed: 0,age,ethnicity,gender
count,23705.0,23705.0,23705.0
mean,33.300907,1.269226,0.477283
std,19.885708,1.345638,0.499494
min,1.0,0.0,0.0
25%,23.0,0.0,0.0
50%,29.0,1.0,0.0
75%,45.0,2.0,1.0
max,116.0,4.0,1.0


In [9]:
# Change Image name

data['img_name'] = data['img_name'].apply(lambda x: x.split('.')[0] + '.jpg')

In [10]:
# Important directory paths

main_dir = './img_data/'

In [11]:
# Required functions

def row2img(row, save_dir):
    img_data = np.array(list(map(int, row['pixels'].split())), dtype='uint8').reshape(48, 48)

    img = Image.fromarray(img_data)
    img.save(save_dir+row['img_name'], format='jpeg')

In [12]:
# Convert pixels to images

if not os.path.isdir(main_dir):
    os.mkdir(main_dir)

_ = data.apply(lambda x: row2img(x, main_dir), axis=1)

In [13]:
# Actual Data

actual_data = data[['age', 'gender', 'img_name']].copy()

In [14]:
# Getting the data

img_h = 48
img_w = 48

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255.,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2
)

train_gen = datagen.flow_from_dataframe(
    dataframe=actual_data,
    directory=main_dir,
    subset='training',
    target_size=(img_h, img_w),
    x_col='img_name',
    y_col=['age', 'gender'],
    class_mode="multi_output"
)

test_gen = datagen.flow_from_dataframe(
    dataframe=actual_data,
    directory=main_dir,
    subset='validation',
    target_size=(img_h, img_w),
    x_col='img_name',
    y_col=['age', 'gender'],
    class_mode="multi_output"
)

Found 18964 validated image filenames.
Found 4741 validated image filenames.


In [15]:
# Getting the model

input_layer = tf.keras.layers.Input(shape=(img_w, img_h, 3))

pretrained = tf.keras.applications.vgg16.VGG16(include_top=False, weights='imagenet', input_tensor=input_layer)

for layer in pretrained.layers:
    layer.trainable = True

flatten = tf.keras.layers.Flatten()(pretrained.output)
dense = tf.keras.layers.Dense(128, activation='relu')(flatten)
dense = tf.keras.layers.Dense(64, activation='relu')(dense)
dense = tf.keras.layers.Dense(2)(dense)

model = tf.keras.models.Model(inputs=pretrained.input, outputs=dense)

In [16]:
# Model Summary

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 48, 48, 3)]       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 48, 48, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 48, 48, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 24, 24, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 24, 24, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0     

In [17]:
# Model Hyperparameters

epochs = 30
batch_size = 32

In [18]:
# Compiling the model

loss = ['mse']
# metric = [tf.keras.metrics.Accuracy()]
optim = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=optim, loss=loss)

In [19]:
# Defining the callbacks

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), patience=5)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('model_weights.hdf5', monitor='val_loss', mode='min', save_best_only=True, save_weights_only=True)

callbacks = [model_checkpoint, reduce_lr]

In [20]:
# Fitting the model

model.fit(train_gen, validation_data=test_gen, epochs=epochs, batch_size=batch_size, callbacks=callbacks, shuffle=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f74c01a09d0>