In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from keras.models import Sequential, load_model, Model
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, BatchNormalization, Flatten, Input
from sklearn.model_selection import train_test_split

# Path to the UTKFace dataset
path = "UTKFace/UTKFace"

# Initialize lists to store images, ages, and genders
images = []
ages = []
genders = []

# Load images and extract age and gender labels from filenames
for img_name in os.listdir(path):
    age = img_name.split("_")[0]
    gender = img_name.split("_")[1]

    # Read the image
    img = cv2.imread(os.path.join(path, img_name))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Append the data to respective lists
    images.append(img)
    ages.append(int(age))  # Convert age to integer
    genders.append(int(gender))  # Convert gender to integer

# Convert lists to numpy arrays
ages = np.array(ages, dtype=np.int64)
genders = np.array(genders, dtype=np.uint64)
images = np.array(images)

# Resize images to 224x224 (ResNet input size)
images_resized = np.array([cv2.resize(img, (224, 224)) for img in images])

# Normalize images (ResNet expects images normalized to [-1, 1])
images_normalized = images_resized / 127.5 - 1.0

# Split the dataset for age prediction
x_train_age, x_test_age, y_train_age, y_test_age = train_test_split(images_normalized, ages, random_state=42)

# Split the dataset for gender prediction
x_train_gender, x_test_gender, y_train_gender, y_test_gender = train_test_split(images_normalized, genders, random_state=42)

print("Data preprocessing completed successfully.")


Summary of Changes:
Variable Names:

Changed age to ages and gender to genders to avoid conflicts with loop variables.
Resizing and Normalization:

Added resizing of images to 224x224.
Added normalization of images to the range [-1, 1].
Updated Data Splits:

The train_test_split function now operates on the resized and normalized images (images_normalized).