In [2]:
# import all the necessary libraries
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Activation,Conv2D,MaxPooling2D,Flatten,Dense,Dropout,Input,AveragePooling2D,GlobalAveragePooling2D, GlobalMaxPooling2D,BatchNormalization,Activation
from tensorflow.keras.optimizers import Adam,RMSprop,Adagrad,Nadam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import regularizers

In [None]:

!pip install keras-tuner

In [None]:
# fetch the age and gender dataset
utk_face_dir='/kaggle/input/utkface-new/UTKFace'
ages=[]
genders=[]
for filename in os.listdir(utk_face_dir):
    if filename.endswith('.jpg') and '_' in filename:
       if len(filename.split('_')) >= 2 and filename.split('_')[0].isdigit() and filename.split('_')[1].isdigit():
          age=int(filename.split('_')[0])
          gender=int(filename.split('_')[1])
          # path=os.path.join(utk_face_dir,filename)
          # image=tf.io.read_file(path)
          # image=tf.image.decode_jpeg(image,channels=3)
          # image=tf.image.resize(image,[128,128])
          # image=image/255.0
          # images.append(image.numpy())
          ages.append(age)
          genders.append(gender)


In [None]:
# make the dataframe
df_utk=pd.DataFrame(
  {  "Age" : ages,
    "Gender": genders}
)
df_utk.head()



In [None]:
df_utk=df_utk[df_utk["Gender"].isin([0,1])]

In [None]:
plt.figure(figsize=(5,5))
sns.histplot(df_utk["Age"],bins=30,kde=True)
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()

In [None]:
df_utk.describe()


In [None]:
plt.figure(figsize=(5,5))
sns.boxplot(df_utk["Age"])
plt.show()

In [None]:
plt.figure(figsize=(5,5))
sns.countplot(x="Gender",data=df_utk)
plt.title("Gender Distribution")
plt.show()

In [None]:
print(df_utk["Gender"].value_counts())

In [None]:
df_utk["Gender"].value_counts().plot(
    kind="pie",
    labels=["Male","Female"],
    autopct="%1.1f%%"
)

In [None]:
df_utk.groupby("Gender")["Age"].apply(lambda x: x.mode().iloc[0])

In [None]:
#  use the age below 80
valid_filenames = []
for filename in os.listdir(utk_face_dir):
    if filename.endswith('.jpg') and '_' in filename:
        parts = filename.split('_')
        if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit():
            age=int(parts[0])
            if age<=80:
               valid_filenames.append(os.path.join(utk_face_dir, filename))

def parse_image(filename):
    # Get last part of path (filename)
    parts = tf.strings.split(filename, '/')[-1]
    fields = tf.strings.split(parts, '_')

    # Extract labels
    age = tf.strings.to_number(fields[0], tf.int32)
    gender = tf.strings.to_number(fields[1], tf.int32)

    # Decode and preprocess image
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    image = tf.cast(image, tf.float32) / 255.0

    return image, {'age': age, 'gender': gender}



In [None]:
# split of the data in train and validation in equal proportion of male and female
import random
female_files = [f for f in valid_filenames if os.path.basename(f).split('_')[1] == '0']
male_files   = [f for f in valid_filenames if os.path.basename(f).split('_')[1] == '1']

random.seed(42)
random.shuffle (female_files)
random.shuffle(male_files)


f_split = int(0.8 * len(female_files))
m_split = int(0.8 * len(male_files))

female_train = female_files[:f_split]
female_val   = female_files[f_split:]
male_train   = male_files[:m_split]
male_val     = male_files[m_split:]


train_filenames = female_train + male_train
val_filenames   = female_val   + male_val


random.shuffle(train_filenames)
random.shuffle(val_filenames)

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices(train_filenames)
train_ds = train_ds.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.shuffle(1000,seed=42).batch(32).prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices(val_filenames)
val_ds = val_ds.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
# transfer learning via ResNet152V2
from tensorflow.keras.applications import ResNet152V2
base_model=ResNet152V2(input_shape=(128,128,3),
                      include_top=False,
                      weights='imagenet')
base_model.trainable=False
inputs = Input(shape=(128,128,3))
x = base_model(inputs)
x = GlobalAveragePooling2D()(x)
x=Dense(1024, activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001))(x)
x=BatchNormalization()(x)
x=Dropout(0.5)(x)
x=Dense(2048, activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001))(x)
age_output = Dense(1, name='age')(x)                            # Regression
gender_output = Dense(1, activation='sigmoid', name='gender')(x)  # Binary classification

model = Model(inputs=inputs, outputs=[age_output, gender_output])
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss={'age': 'mse', 'gender': 'binary_crossentropy'},
    metrics={'age': 'mae', 'gender': 'accuracy'}
)

In [None]:
callbacks=[EarlyStopping(
        monitor='val_gender_accuracy',
        mode='max',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        filepath='/kaggle/working/keras_tuner/age_gender_transfer_learning.h5',
        monitor='val_gender_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_gender_accuracy',
        factor=0.2,
        patience=3,
        min_lr=1e-6,
        verbose=1
    )
 ]

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,
    callbacks=callbacks
)

In [None]:
# HyperParameter Tuning 
import keras_tuner as kt
def build_model(hp):
    # model=Sequential()
    input_layer=Input(shape=(128,128,3))
    x=input_layer
    #Tune the number of conv layers
    for i in range (6):
        # Tune filters
      if i == 0:
        filters = hp.Int(f"filters_{i}", min_value=32, max_value=64, step=32)
      elif i == 1:
        filters = hp.Int(f"filters_{i}", min_value=64, max_value=128, step=64)
      elif i == 2:
        filters = hp.Int(f"filters_{i}", min_value=128, max_value=512, step=128)
      else:
        filters = hp.Int(f"filters_{i}", min_value=256, max_value=512, step=128)
        # Tune the padding layer
      padding=hp.Choice(f"padding_{i}",["same"])
        # Tune strides
        # stride = hp.Choice(f'strides_{i}', [1, 2])
        # Tune activation
      activation = 'relu'
      x=Conv2D(
            filters=filters,
            kernel_size=(3,3),
            padding=padding,
            activation='relu',
            kernel_initializer='he_normal',
            kernel_regularizer=regularizers.l2(0.0005)

        )(x)
        # Add pooling layer
      if hp.Choice(f"Pooling_{i}",["max","avg"])=="max":
              x=MaxPooling2D(pool_size=(2,2))(x)
      else:
              x=AveragePooling2D(pool_size=(2,2))(x)
      x=BatchNormalization()(x)
      x=(Dropout(hp.Float("dropout1", 0.2, 0.5, step=0.1)))(x)
    x=Flatten()(x)
    x = Dense(1024, activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.0005))(x)
    x=BatchNormalization()(x)
    x = Dropout(0.25)(x)
    x = Dense(2048, activation='relu',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.0005))(x)
    x=BatchNormalization()(x)
    x = Dropout(0.24)(x)
    age_output = Dense(1, name='age',kernel_regularizer=regularizers.l2(0.0005),kernel_initializer="glorot_uniform")(x)                            # Regression
    gender_output = Dense(1, activation='sigmoid', name='gender',kernel_regularizer=regularizers.l2(0.0005),kernel_initializer="glorot_uniform")(x)  # Binary classification


# Final model
    model = Model(inputs=input_layer, outputs=[age_output, gender_output])
    model.compile(
    optimizer=Nadam(learning_rate=1e-3),
    loss={ 'age': 'mse', 'gender': 'binary_crossentropy'},
    metrics={'age': 'mae', 'gender': 'accuracy'}
)

    return model

In [None]:
callbacks=[EarlyStopping(
        monitor='val_gender_accuracy',
        mode='max',
        patience=3,
        restore_best_weights=True,
        verbose=1
    ),
    ModelCheckpoint(
        filepath='/kaggle/working/age_gender_detection.h5',
        save_weights_only=False,
        monitor='val_gender_accuracy',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_gender_accuracy',
        factor=0.2,
        patience=3,
        min_lr=1e-6,
        verbose=1
    )
]

In [None]:
from keras_tuner import RandomSearch,Objective
tuner=kt.RandomSearch(
    build_model,
    objective=Objective("val_gender_accuracy",direction='max'),
    max_trials=10,
    directory='/kaggle/working/keras_tuner',
    project_name='age_gender',
    overwrite=False

)

In [None]:
tuner.search(
   train_ds,
   validation_data=val_ds,
   epochs=25,
   callbacks=callbacks,
   verbose=1
            )

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.save('/kaggle/working/keras_tuner/project/age_gender_best_model.h5')