In [1]:
#import kagglehub
#path = kagglehub.dataset_download("aibloy/fairface")
#print("Path to dataset files:", path)

In [2]:
import pandas as pd
import os
from tensorflow.keras.utils import to_categorical
import cv2
import numpy as np

In [3]:
df_train = pd.read_csv("train_labels.csv")
df_val = pd.read_csv("val_labels.csv")

In [4]:
df_train.head()

Unnamed: 0,file,age,gender,race,service_test
0,train/1.jpg,50-59,Male,East Asian,True
1,train/2.jpg,30-39,Female,Indian,False
2,train/3.jpg,3-9,Female,Black,False
3,train/4.jpg,20-29,Female,Indian,True
4,train/5.jpg,20-29,Female,Indian,True


In [5]:
df_train['gender'].value_counts()

gender
Male      45986
Female    40758
Name: count, dtype: int64

In [6]:
df_val['gender'].value_counts()

gender
Male      5792
Female    5162
Name: count, dtype: int64

In [7]:
image_dir = "Fairface/"

In [8]:
X_train, y_train = [], []

for idx, row in df_train.iterrows():
    img_path_train = os.path.join(image_dir, row["file"])
    
    img = cv2.imread(img_path_train)
    if img is None:
        continue

    img = cv2.resize(img, (64, 64)).astype(np.float16) / 255.0  
    
    X_train.append(img)
    
    gender = 0 if row["gender"] == "Male" else (1 if row["gender"] == "Female" else 2)
    y_train.append(gender)

X_train = np.array(X_train, dtype=np.float16)
y_train = to_categorical(y_train, num_classes=3)

print("Dataset Loaded: ", X_train.shape, y_train.shape)

Dataset Loaded:  (86744, 64, 64, 3) (86744, 3)


In [9]:
X_val, y_val = [], []

for idx, row in df_val.iterrows():
    img_path_val = os.path.join(image_dir, row["file"])
    
    img = cv2.imread(img_path_val)
    if img is None:
        continue

    img = cv2.resize(img, (64, 64)).astype(np.float16) / 255.0  
    
    X_val.append(img)
    
    gender = 0 if row["gender"] == "Male" else (1 if row["gender"] == "Female" else 2)
    y_val.append(gender)

X_val = np.array(X_val, dtype=np.float16)
y_val = to_categorical(y_val, num_classes=3)

print("Dataset Loaded: ", X_val.shape, y_val.shape)

Dataset Loaded:  (10954, 64, 64, 3) (10954, 3)


In [10]:
import h5py

with h5py.File("dataset.h5", "w") as f:
    f.create_dataset("X_train", data=X_train)
    f.create_dataset("y_train", data=y_train)
    f.create_dataset("X_val", data=X_val)
    f.create_dataset("y_val", data=y_val)

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import SGD

In [16]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='sigmoid')
])

In [17]:
model.compile(optimizer=SGD(learning_rate=0.001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

In [18]:
model.summary()

In [19]:
def data_generator(batch_size=32):
    with h5py.File("dataset.h5", "r") as f:
        X_train, y_train = f["X_train"], f["y_train"]
        while True:
            for i in range(0, len(X_train), batch_size):
                yield X_train[i : i + batch_size], y_train[i : i + batch_size]

train_gen = data_generator(batch_size=32)
val_gen = data_generator(batch_size=32)

history = model.fit(
    train_gen,
    epochs=40,
    steps_per_epoch=len(X_train) // 32,
    validation_data=val_gen,
    validation_steps=len(X_val) // 32
)

Epoch 1/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 154ms/step - accuracy: 0.5386 - loss: 0.8139 - val_accuracy: 0.6817 - val_loss: 0.6078
Epoch 2/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 156ms/step - accuracy: 0.6667 - loss: 0.6111 - val_accuracy: 0.7539 - val_loss: 0.4849
Epoch 3/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 146ms/step - accuracy: 0.7354 - loss: 0.5261 - val_accuracy: 0.7833 - val_loss: 0.4433
Epoch 4/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m391s[0m 144ms/step - accuracy: 0.7679 - loss: 0.4775 - val_accuracy: 0.8064 - val_loss: 0.3991
Epoch 5/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 145ms/step - accuracy: 0.7908 - loss: 0.4431 - val_accuracy: 0.8225 - val_loss: 0.3724
Epoch 6/40
[1m2710/2710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m391s[0m 144ms/step - accuracy: 0.8054 - loss: 0.4170 - val_accuracy: 0.8265 - val_loss:

In [20]:
test_loss, test_acc = model.evaluate(val_gen, steps=len(X_val) // 32)
print(f"Validation Accuracy: {test_acc:.4f}")

[1m342/342[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 31ms/step - accuracy: 0.9474 - loss: 0.1273
Validation Accuracy: 0.9486


In [21]:
model.save("gender_classification_model.h5")

