## Import Modules

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

import tensorflow as tf
from keras.preprocessing.image import load_img
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input

## Load the Dataset

In [4]:
#BASE_DIR = '../input/utkface-new/UTKFace/'
BASE_DIR = 'D:/step to ML/DataSets/UTKFace/UTKFace'

In [6]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

Collecting ipywidgets
  Obtaining dependency information for ipywidgets from https://files.pythonhosted.org/packages/b8/d4/ce436660098b2f456e2b8fdf76d4f33cbc3766c874c4aa2f772c7a5e943f/ipywidgets-8.1.0-py3-none-any.whl.metadata
  Downloading ipywidgets-8.1.0-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.7 (from ipywidgets)
  Obtaining dependency information for widgetsnbextension~=4.0.7 from https://files.pythonhosted.org/packages/8e/d4/d31b12ac0b87e8cc9fdb6ea1eb6596de405eaaa2f25606aaa755d0eebbc0/widgetsnbextension-4.0.8-py3-none-any.whl.metadata
  Downloading widgetsnbextension-4.0.8-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.7 (from ipywidgets)
  Obtaining dependency information for jupyterlab-widgets~=3.0.7 from https://files.pythonhosted.org/packages/74/5e/2475ac62faf2e342b2bf20b8d8e375f49400ecb38f52e4e0a7557eb1cedb/jupyterlab_widgets-3.0.8-py3-none-any.whl.metadata
  Downloading jupyterlab_widgets-3.0.8-py3-none-any.whl.metadata (4

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert run server troubleshoot trust

Jupyter command `jupyter-nbextension` not found.


### extracting image paths,age labels,Gender labels and race labels from file names¶

In [7]:
# labels - age, gender, ethnicity
image_paths = []
age_labels = []
gender_labels = []
race_labels=[]

for filename in tqdm(os.listdir(BASE_DIR)):
    image_path = os.path.join(BASE_DIR, filename)
    temp = filename.split('_')
    age = int(temp[0])
    gender = int(temp[1])
    race= temp[2]
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)
    race_labels.append(race)

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [None]:
# first five lables of age
age_labels[:5]

In [None]:
# first five lables of gender
gender_labels[:5]

In [None]:
# first five lables of race
race_labels[:5]

In [None]:
# convert to dataframe
df = pd.DataFrame()
df['image'], df['age'], df['gender'],df['race']= image_paths, age_labels, gender_labels,race_labels
df.head()

In [None]:
df['race'].unique()

In [None]:
df['race'].dtype

In [None]:
df['race']=df['race'].replace(['20170109142408075.jpg.chip.jpg',
       '20170109150557335.jpg.chip.jpg', '20170116174525125.jpg.chip.jpg'],[np.nan,np.nan,np.nan])

In [None]:
df=df.dropna()

In [None]:
df['race']=df['race'].apply(lambda x:int(x))

In [None]:
# map labels for gender
gender_dict = {0:'Male', 1:'Female'}
race_dict={0:'White', 1:'Black',2:'Asian',3:'Indian',4:'Others (like Hispanic, Latino, Middle Eastern)'}

## Exploratory Data Analysis

In [None]:
from PIL import Image
img = Image.open(df['image'][0])
plt.axis('off')
plt.imshow(img);

In [None]:
sns.distplot(df['age'])

In [None]:
sns.countplot(df['gender'])

In [None]:
sns.countplot(df['race'])

In [None]:
# to display grid of images
plt.figure(figsize=(20, 20))
files = df.iloc[0:25]

for index, file, age, gender,race in files.itertuples():
    plt.subplot(5, 5, index+1)
    img = load_img(file)
    img = np.array(img)
    plt.imshow(img)
    plt.title(f"Age: {age} Gender: {gender_dict[gender]} Race:{race_dict[race]}")
    plt.axis('off')

## Feature Extraction

In [None]:
def extract_features(images):
    features = []
    for image in tqdm(images):
        img = load_img(image, grayscale=True)
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
        
    features = np.array(features)
    # ignore this step if using RGB
    features = features.reshape(len(features), 128, 128, 1)
    return features

In [None]:
X = extract_features(df['image'])

In [None]:
X.shape

In [None]:
# normalize the images
X = X/255.0

In [None]:
Y=df[['gender','race','age']]
Y.head()

In [None]:
from sklearn.model_selection import train_test_split 
X_train, X_validation, y_train, y_validation = train_test_split(X, Y, test_size= 0.2, random_state=42)

In [None]:
y_train_gender = np.array(y_train['gender'])
y_train_age = np.array(y_train['age'])
y_train_race=np.array(y_train['race'])

In [None]:
y_validation_gender = np.array(y_validation['gender'])
y_validation_age = np.array(y_validation['age'])
y_validation_race=np.array(y_validation['race'])

In [None]:
input_shape = (128, 128, 1)

## Model Creation

### Building a CNN model for race classification

In [None]:
#Building CNN model
model = Sequential([
                         Conv2D(filters=32, kernel_size=3, strides=(1,1), padding='valid',activation= 'relu', input_shape=(128, 128, 1)),
                         MaxPooling2D(pool_size=(2,2)),
                         Flatten(),
                         Dense(units=128, activation='relu'),
                         Dense(units=5, activation='softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(loss="sparse_categorical_crossentropy", 
              optimizer="adam",
              metrics=["accuracy"])

In [None]:
# plot the model
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
model.fit(X_train,y_train_race,epochs=30)

In [None]:
model.save("cnn_model_for_race.h5")
print("Saved model to disk")

In [None]:
model.evaluate(X_validation,y_validation_race)

In [None]:
def test_prediction_race(image_index):
    pred =model.predict(np.expand_dims(X_validation[image_index],0))
    pred=pred.round(2)
    pred=np.argmax(pred)
    print(f'predicted race:{race_dict[pred]}')
    print(f'Actual race:{race_dict[y_validation_race[image_index]]}')
    plt.matshow(X_validation[image_index])
    plt.show()

In [None]:
test_prediction_race(1)

### Building a CNN model for gender classification

In [None]:
# #Building CNN model
model_2 = Sequential([
                         Conv2D(filters=32, kernel_size=3, strides=(1,1), padding='valid',activation= 'relu', input_shape=(128, 128, 1)),
                         MaxPooling2D(pool_size=(2,2)),
                         Flatten(),
                         Dense(units=128, activation='relu'),
                         Dense(units=1, activation='sigmoid')
])

In [None]:
model_2.compile(loss="binary_crossentropy", 
              optimizer="adam",
              metrics=["accuracy"])

In [None]:
# plot the model
from tensorflow.keras.utils import plot_model
plot_model(model_2)

In [None]:
model_2.fit(X_train,y_train_gender,epochs=3)

In [None]:
model.save("cnn_model_for_gender.h5")
print("Saved model to disk")

In [None]:
model_2.evaluate(X_validation,y_validation_gender)

In [None]:
def test_prediction_race(image_index):
    pred =model.predict(np.expand_dims(X_validation[image_index],0))
    pred=pred.round(2)
    pred=np.argmax(pred)
    print(f'predicted gender:{gender_dict[pred]}')
    print(f'Actual gender:{gender_dict[y_validation_gender[image_index]]}')
    plt.matshow(X_validation[image_index])
    plt.show()

In [None]:
test_prediction_race(6)

### Building A CNN model for age

In [None]:
#Building CNN model
model_3 = Sequential([
                         Conv2D(filters=32, kernel_size=3, strides=(1,1), padding='valid',activation= 'relu', input_shape=(128, 128, 1)),
                         MaxPooling2D(pool_size=(2,2)),
                         Flatten(),
                         Dense(units=128, activation='relu'),
                         Dense(units=1, activation='relu')
])

In [None]:
model_3.compile(loss=["mae"], optimizer="Adam",
	metrics=["accuracy"])

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model)

In [None]:
model_3.fit(X_train,y_train_age,epochs=30)

In [None]:
model.save("cnn_model_for_age.h5")
print("Saved model to disk")

In [None]:
def test_prediction_race(image_index):
    pred =model.predict(np.expand_dims(X_validation[image_index],0))
    pred=pred.round(2)
    pred=np.argmax(pred)
    print(f'predicted Age:{[pred]}')
    print(f'Actual Age:{[y_validation_age[image_index]]}')
    plt.matshow(X_validation[image_index])
    plt.show()