In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jangedoo/utkface-new")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'utkface-new' dataset.
Path to dataset files: /kaggle/input/utkface-new


In [None]:
folder_path = "/kaggle/input/utkface-new/UTKFace"

In [None]:
folder_path

'/kaggle/input/utkface-new/UTKFace'

In [None]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
import os

age = []
gender = []
img_path = []
for file in os.listdir(folder_path):
    if os.path.isfile(os.path.join(folder_path, file)) and file[0].isdigit():
        parts = file.split('_')
        if len(parts) >= 2:
            age.append(int(parts[0]))
            gender.append(int(parts[1]))
            img_path.append(file)
        else:
            print(f"Skipping file with unexpected format: {file}")
    else:
        print(f"Skipping non-image file or directory: {file}")

In [None]:
len(age)

23708

In [None]:
df = pd.DataFrame({'age':age,'gender':gender,'img':img_path})

In [None]:
df.shape

(23708, 3)

In [None]:
df.head()

Unnamed: 0,age,gender,img
0,26,0,26_0_2_20170104023102422.jpg.chip.jpg
1,22,1,22_1_1_20170112233644761.jpg.chip.jpg
2,21,1,21_1_3_20170105003215901.jpg.chip.jpg
3,28,0,28_0_0_20170117180555824.jpg.chip.jpg
4,17,1,17_1_4_20170103222931966.jpg.chip.jpg


In [None]:
train_df = df.sample(frac=1,random_state=0).iloc[:20000]
test_df = df.sample(frac=1,random_state=0).iloc[20000:]

In [None]:
train_df.shape

(20000, 3)

In [None]:
test_df.shape

(3708, 3)

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=30,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(train_df,
                                                    directory=folder_path,
                                                    x_col='img',
                                                    y_col=['age','gender'],
                                                    target_size=(200,200),
                                                    class_mode='multi_output')

test_generator = test_datagen.flow_from_dataframe(test_df,
                                                    directory=folder_path,
                                                    x_col='img',
                                                    y_col=['age','gender'],
                                                    target_size=(200,200),
                                                  class_mode='multi_output')

Found 20000 validated image filenames.
Found 3708 validated image filenames.


In [None]:
from keras.applications.resnet50 import ResNet50
from keras.layers import *
from keras.models import Model

In [None]:
resnet = ResNet50(include_top=False, input_shape=(200,200,3))

In [None]:
resnet = ResNet50(include_top=False, input_shape=(200,200,3))

resnet.trainable=False

output = resnet.layers[-1].output

flatten = Flatten()(output)

dense1 = Dense(512, activation='relu')(flatten)
dense2 = Dense(512,activation='relu')(flatten)

dense3 = Dense(512,activation='relu')(dense1)
dense4 = Dense(512,activation='relu')(dense2)

output1 = Dense(1,activation='linear',name='age')(dense3)
output2 = Dense(1,activation='sigmoid',name='gender')(dense4)

In [None]:
model = Model(inputs=resnet.input,outputs=[output1,output2])

In [None]:
model.compile(optimizer='adam', loss={'age': 'mae', 'gender': 'binary_crossentropy'}, metrics={'age': 'mae', 'gender': 'accuracy'},loss_weights={'age':1,'gender':99})

In [None]:
import tensorflow as tf

# Function to load and preprocess images
def load_image_and_labels(image_path, age, gender):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [200, 200])
    img = img / 255.0  # Rescale to [0, 1]
    return img, (tf.cast(age, tf.float32), tf.cast(gender, tf.float32)) # Return labels as a tuple of floats

# Create datasets from dataframes
train_ds = tf.data.Dataset.from_tensor_slices((folder_path + '/' + train_df['img'], train_df['age'], train_df['gender']))
test_ds = tf.data.Dataset.from_tensor_slices((folder_path + '/' + test_df['img'], test_df['age'], test_df['gender']))

# Map the loading and preprocessing function to the datasets
train_ds = train_ds.map(load_image_and_labels)
test_ds = test_ds.map(load_image_and_labels)

# Shuffle and batch the datasets
train_ds = train_ds.shuffle(buffer_size=1000).batch(32).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.batch(32).prefetch(buffer_size=tf.data.AUTOTUNE)

Now you can train your model using the `tf.data.Dataset` objects:

In [None]:
model.fit(train_ds, epochs=10, validation_data=test_ds)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 111ms/step - age_loss: 10.9776 - age_mae: 10.9776 - gender_accuracy: 0.5241 - gender_loss: 0.6922 - loss: 79.5009 - val_age_loss: 11.6922 - val_age_mae: 11.6928 - val_gender_accuracy: 0.5272 - val_gender_loss: 0.6920 - val_loss: 80.1999
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 107ms/step - age_loss: 11.0251 - age_mae: 11.0251 - gender_accuracy: 0.5235 - gender_loss: 0.6922 - loss: 79.5510 - val_age_loss: 10.7227 - val_age_mae: 10.7231 - val_gender_accuracy: 0.5272 - val_gender_loss: 0.6918 - val_loss: 79.2106
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 110ms/step - age_loss: 10.8148 - age_mae: 10.8148 - gender_accuracy: 0.5208 - gender_loss: 0.6923 - loss: 79.3565 - val_age_loss: 10.9986 - val_age_mae: 10.9991 - val_gender_accuracy: 0.5272 - val_gender_loss: 0.6918 - val_loss: 79.4924
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x7cc3ac0fdf70>