## Import Modules

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input

## Load the Dataset

In [2]:
# this is dataset folder name
BASE_DIR = '../input/utkface-new/UTKFace/'

### extracting image paths,age labels,Gender labels and race labels from file names

In [3]:
# labels - age, gender, ethnicity
image_paths = []
age_labels = []
gender_labels = []
race_labels=[]

for filename in tqdm(os.listdir(BASE_DIR)):
    image_path = os.path.join(BASE_DIR, filename)
    temp = filename.split('_')
    age = int(temp[0])
    gender = int(temp[1])
    race= temp[2]
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)
    race_labels.append(race)

In [4]:
# first five lables of age
age_labels[:5]

In [5]:
# first five lables of gender
gender_labels[:5]

In [6]:
# first five lables of race
race_labels[:5]

### converting image paths,age,gender and race to dataframe

In [7]:
# convert to dataframe
df = pd.DataFrame()
df['image'], df['age'], df['gender'],df['race']= image_paths, age_labels, gender_labels,race_labels
df.head()

### checking for inconsistency in race labels as races are not converted into integer

In [8]:

df['race'].unique()

here are some values like '20170109142408075.jpg.chip.jpg',
'20170109150557335.jpg.chip.jpg', '20170116174525125.jpg.chip.jpg'
  due to which race can not be converted into integer

In [9]:
# data type of race
df['race'].dtype

data type of race feature is object which shows it is non numerical so it is must to make race to be numerical

### replacing abnormal values with null values

In [10]:
df['race']=df['race'].replace(['20170109142408075.jpg.chip.jpg',
       '20170109150557335.jpg.chip.jpg', '20170116174525125.jpg.chip.jpg'],[np.nan,np.nan,np.nan])

### removing null values

In [11]:
df=df.dropna()

In [12]:
df['race'].unique()

now race can be converted into numerical as abnormal values are removed

### converting race into numerical feature

In [13]:
df['race']=df['race'].apply(lambda x:int(x))

### map labels for gender and race

In [14]:

gender_dict = {0:'Male', 1:'Female'}
race_dict={0:'White', 1:'Black',2:'Asian',3:'Indian',4:'Others (like Hispanic, Latino, Middle Eastern)'}

## Exploratory Data Analysis

In [15]:
# image at 0 index
from PIL import Image
img = Image.open(df['image'][0])
plt.axis('off')
plt.imshow(img);

### histogram to show distribution of age feature

In [16]:
sns.distplot(df['age'])

### Barchart to show value counts in gender feature

In [17]:
sns.countplot(df['gender'])

### Barchart to show value counts in race feature

In [18]:
sns.countplot(df['race'])

### showing first 25 images with age,gender and race

In [19]:
# to display grid of images
plt.figure(figsize=(20, 20))
files = df.iloc[0:25]

for index, file, age, gender,race in files.itertuples():
    plt.subplot(5, 5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title(f"Age: {age} Gender: {gender_dict[gender]} Race:{race_dict[race]}")
    plt.axis('off')

## Feature Extraction

In [20]:
# this function is used to resize images and convert images to arrays
def extract_features(images):
    features = []
    for image in tqdm(images):
        img = load_img(image, grayscale=True)
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
        
    features = np.array(features)
    # ignore this step if using RGB
    features = features.reshape(len(features), 128, 128, 1)
    return features

In [None]:
# call function for extraction
X = extract_features(df['image'])

X is inputs to CNN model X contains all images arrays

In [None]:
X.shape

there are 23705 images. Every image have size of (128,128,1)

#### normalizing the image arrays

In [None]:
X = X/255.0

#### converting gender,race and age labels into one dimentional arrays

In [None]:
y_gender = np.array(df['gender'])
y_race= np.array(df['race'])
y_age = np.array(df['age'])

## Model Creation

This is convolutional neural network(CNN) model building <br>
CNN model contains convolutional,maxpooling,flatten,dense,dropout and output layers <br>
Segmoid,relue and softmax are activation functions

In [None]:
input_shape = (128, 128, 1)
inputs = Input((input_shape))
# convolutional layers
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu') (inputs)
maxp_1 = MaxPooling2D(pool_size=(2, 2)) (conv_1)
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu') (maxp_1)
maxp_2 = MaxPooling2D(pool_size=(2, 2)) (conv_2)
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu') (maxp_2)
maxp_3 = MaxPooling2D(pool_size=(2, 2)) (conv_3)
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu') (maxp_3)
maxp_4 = MaxPooling2D(pool_size=(2, 2)) (conv_4)

flatten = Flatten() (maxp_4)

# fully connected layers
dense_1 = Dense(256, activation='relu') (flatten)
dense_2 = Dense(256, activation='relu') (flatten)
dense_3 = Dense(256, activation='relu') (flatten)

dropout_1 = Dropout(0.3) (dense_1)
dropout_2 = Dropout(0.3) (dense_2)
dropout_3 = Dropout(0.3) (dense_3)

output_1 = Dense(1, activation='sigmoid', name='gender_out') (dropout_1)
output_2 = Dense(5, activation='softmax', name='race_out') (dropout_2)
output_3 = Dense(1, activation='relu', name='age_out') (dropout_3)

model = Model(inputs=[inputs], outputs=[output_1,output_2,output_3])



### compilation of model

In [None]:
model.compile(loss=['binary_crossentropy','sparse_categorical_crossentropy' ,'mae'], optimizer='adam', metrics=['accuracy'])

### ploting models

In [None]:
# plot the model
from tensorflow.keras.utils import plot_model
plot_model(model)

### train model across inputs and outputs

In [None]:

history =model.fit(x=X, y=[y_gender,y_race, y_age], batch_size=3, epochs=10, validation_split=0.2)

### saving model

In [None]:
model.save("cnn_model_for_gender_race_and_age.h5")
print("Saved model to disk")

## Plot the Results

##### plot results for gender

In [None]:

acc = history.history['gender_out_accuracy']
val_acc = history.history['val_gender_out_accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Accuracy Graph')
plt.legend()
plt.figure()

loss = history.history['gender_out_loss']
val_loss = history.history['val_gender_out_loss']

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()

##### plot results for race

In [None]:
# plot results for gender
acc = history.history['race_out_accuracy']
val_acc = history.history['val_race_out_accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Accuracy Graph')
plt.legend()
plt.figure()

loss = history.history['race_out_loss']
val_loss = history.history['race_gender_out_loss']

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()

##### plot results for age

In [None]:
# plot results for age
loss = history.history['age_out_loss']
val_loss = history.history['val_age_out_loss']
epochs = range(len(loss))

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Loss Graph')
plt.legend()
plt.show()

# Prediction with Test Data

In [None]:
image_index = 100
print("Original Gender:", gender_dict[y_gender[image_index]],"Original Race:",race_dict[y_race[image_index]], "Original Age:", y_age[image_index])
# predict from model
pred = model.predict(X[image_index].reshape(1, 128, 128, 1))
pred_gender = gender_dict[round(pred[0][0][0])]
pred_race = race_dict[round(pred[1][0][0])]
pred_age = round(pred[2][0][0])
print("Predicted Gender:", pred_gender,Predicted Race:", pred_race, "Predicted Age:", pred_age)
plt.axis('off')
plt.imshow(X[image_index].reshape(128, 128), cmap='gray');