## Age and Gender Detection System.

### In this project, we will be performing both classification and regression to predict both gender and age respectively.

In [40]:
# Import necessary modules first.
import tensorflow as tf
from tensorflow.keras.utils import load_img
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input
import numpy as np
import random
import matplotlib.pyplot as plt
import os
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

## Load the dataset

In [41]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/datasett/perempuan'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/datasett/perempuan


In [42]:
BASE_DIR = '/content/drive/MyDrive/datasett/perempuan'
age_labels = []
gender_labels = []
image_paths = []

In [43]:
!ls
!pwd

 100.jpg    168.jpg	    251.jpg	 332.jpg     41.webp	   504.PNG
 101.jpg    169.jpg	    252.jpg	 333.jpg     420.jpg	   505.jpeg
 102.jpg    16.jpg	    253.jpg	 334.jpg     421.jpg	   506.jpeg
 102.webp   16.webp	    254.jpg	 335.jpg     422.jpg	   507.jpeg
 103.jpg    170.jpg	    254.PNG	 336.jpg     423.jpg	   508.jpeg
 103.webp   171.jpg	    255.JPG	 337.jpg     424.jpg	   509.PNG
 104.jpg    172.png	    256.jpg	 338.jpg     425.jpg	   50.jpg
 104.webp   173.JPG	    257.jpg	 339.jpg     426.jpg	   510.jpeg
 105.jpg    174.PNG	    258.jpg	 33.JPG      427.PNG	   511.jpg
 105.webp   175.jpg	    259.jpg	 33.webp     428.jpeg	   512.jpg
 106.jpg    176.PNG	    25.jpg	 341.jpg     428.PNG	   51.jpg
 106.webp   177.jpg	    25.JPG	 345.jpeg    429.PNG	   52.jpg
 107.jpg    178.jpg	    260.PNG	 345.jpg     42.jpg	   53.jpg
 107.webp   179.jpg	    261.jpg	 346.jpg     42.webp	   53.PNG
 108.jpg    17.jpg	    261.png	 347.jpg     430.jpeg	   54.jpg
 108.webp   17.webp	    262.PNG	 348.jpg    

In [44]:
for filename in tqdm (os.listdir(BASE_DIR)):
  image_path = os.path.join(BASE_DIR, filename)
  temp = filename.split('_')
  age = int(temp[0])
  gender = int(temp[1])
  image_paths.append(image_path)
  age.labels.append(age)
  gender_labels.append(gender)

  0%|          | 0/580 [00:00<?, ?it/s]

ValueError: ignored

In [45]:
print(f'Number of age_labels: {len(age_labels)}, Number of gender_labels: {len(gender_labels)}, Number of image_paths: {len(image_paths)}')

Number of age_labels: 0, Number of gender_labels: 0, Number of image_paths: 0


In [46]:
gender_mapping = {
    1: 'Female',
    0: 'Male'
}

In [None]:
import pandas as pd
df = pd.DataFrame()
df['image_path'], df['gender'] = image_paths, gender_labels
df.head(5)

## Exploratory Data Anlysis

In [None]:
from PIL import Image

rand_index = random.randint(0, len(image_paths))
gender = df['gender'][rand_index]
IMG = Image.open(df['image_path'][rand_index])
plt.title(f'Gender: {gender_mapping[gender]}')
plt.axis('off')
plt.imshow(IMG)

In [None]:
 # Age distribution
 sns.distplot(df['age'])

The distribution roughly follows a normal distribution that is slightly skewed to the right with a median of around 27 years. The range is from 0 to 120 years. There are some outliers at the higher end of the distribution.

In [None]:
sns.countplot(df['gender'])

The number of samples for females are slightly higher than that of males. However, the dataset is not highly imbalanced.

In [None]:
plt.figure(figsize=(20, 20))
samples = df.iloc[0:16]

for index, sample, gender in samples.itertuples():
    plt.subplot(4, 4, index + 1)
    img = load_img(sample)
    img = np.array(img)
    plt.axis('off')
    plt.title(f'Gender: {gender_mapping[gender]}')
    plt.imshow(img)

## Feature Extraction

In [None]:
def extract_image_features(images):
    features = list()

    for image in tqdm(images):
        img = load_img(image, grayscale=True)
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)

    features = np.array(features)
    features = features.reshape(len(features), 128, 128, 1)
    return features

In [None]:
X = extract_image_features(df['image_path'])

In [None]:
X.shape

In [None]:
X = X / 255.0

In [None]:
y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

In [None]:
input_shape = (128, 128, 1)

In [None]:
inputs = Input((input_shape))
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
max_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(max_1)
max_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(max_2)
max_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(max_3)
max_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

flatten = Flatten()(max_4)

# fully connected layers
dense_1 = Dense(256, activation='relu')(flatten)
dense_2 = Dense(256, activation='relu')(flatten)

dropout_1 = Dropout(0.3)(dense_1)
dropout_2 = Dropout(0.3)(dense_2)

output_1 = Dense(1, activation='sigmoid', name='gender_out')(dropout_1)
output_2 = Dense(1, activation='relu', name='age_out')(dropout_2)

model = Model(inputs=[inputs], outputs=[output_1, output_2])

model.compile(loss=['binary_crossentropy', 'mae'],
              optimizer='adam', metrics=['accuracy'])

In [None]:
# plot the model
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
history = model.fit(x=X, y=[y_gender, y_age],
                    batch_size=32, epochs=50, validation_split=0.2)

## Plot Results

In [None]:
# plot results for gender
acc = history.history['gender_out_accuracy']
val_acc = history.history['val_gender_out_accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Accuracy Graph')
plt.legend()
plt.figure()

loss = history.history['gender_out_loss']
val_loss = history.history['val_gender_out_loss']

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.show()

In [None]:
# plot results for age
loss = history.history['age_out_loss']
val_loss = history.history['val_age_out_loss']
epochs = range(len(loss))

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.show()

## Predicting Test Data

In [None]:
def get_image_features(image):
  img = load_img(image, grayscale=True)
  img = img.resize((128, 128), Image.ANTIALIAS)
  img = np.array(img)
  img = img.reshape(1, 128, 128, 1)
  img = img / 255.0
  return img

In [None]:
img_to_test = '/content/drive/MyDrive/Projects/CodeClause Projects/Age and Gender Detection System/test images/1.png'
features = get_image_features(img_to_test)
pred = model.predict(features)
gender = gender_mapping[round(pred[0][0][0])]
age = round(pred[1][0][0])

plt.title(f'Predicted Age: {age} Predicted Gender: {gender}')
plt.axis('off')
plt.imshow(np.array(load_img(img_to_test)))