In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
drive.mount('/content/drive')

# Assuming your zip file is named "archive.zip" in your Google Drive
# Adjust the path to match where your file is located
zip_path = "/content/drive/MyDrive/datasets/archive.zip"  # Change this to your actual file path

# Create a directory to extract to
!mkdir -p "/content/UTKFace_extracted"

# Extract the zip file
!unzip -q "{zip_path}" -d "/content/UTKFace_extracted"

# Check what was extracted
!ls "/content/UTKFace_extracted"

In [None]:
# Update this to point to the correct folder containing the images
# The exact path depends on how the files are organized in the zip
dataset_path = "/content/UTKFace_extracted"  # You might need to add subdirectories

# If the images are in a subdirectory, check the structure
import os
print(os.listdir(dataset_path))

# Update path if needed, e.g., if images are in a subfolder
# dataset_path = "/content/UTKFace_extracted/UTKFace"

In [None]:
dataset_path = "UTKFace_extracted"  # Update if necessary
image_size = 64
batch_size = 32

In [None]:
def find_image_directory(base_path):
    for root, dirs, files in os.walk(base_path):
        jpg_count = len([f for f in files if f.lower().endswith('.jpg')])
        png_count = len([f for f in files if f.lower().endswith('.png')])
        if jpg_count > 0 or png_count > 0:
            print(f"Found {jpg_count + png_count} images in {root}")
            return root
    return base_path

# Try to automatically find the directory with images
dataset_path = find_image_directory(dataset_path)
print(f"Using dataset path: {dataset_path}")

In [None]:
# Function to load and preprocess images
def load_dataset(dataset_path):
    images = []
    ages = []
    genders = []

    print(f"Looking for images in: {dataset_path}")
    file_count = 0
    error_count = 0

    # For each image in the dataset
    for filename in os.listdir(dataset_path):
        file_count += 1
        if filename.endswith('.jpg') or filename.endswith('.png'):
            try:
                # Extract age and gender from filename
                # UTKFace filename format: [age]_[gender]_[race]_[date&time].jpg
                parts = filename.split('_')
                age = int(parts[0])
                gender = int(parts[1])  # 0 for male, 1 for female

                # Load and preprocess image
                img_path = os.path.join(dataset_path, filename)
                img = cv2.imread(img_path)
                if img is None:
                    error_count += 1
                    if error_count <= 5:
                        print(f"Could not read image: {img_path}")
                    continue

                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                img = cv2.resize(img, (image_size, image_size))  # Resize
                img = img / 255.0  # Normalize

                # Append to lists
                images.append(img)
                ages.append(age)
                genders.append(gender)

            except Exception as e:
                error_count += 1
                if error_count <= 5:  # Only print first few errors
                    print(f"Error processing {filename}: {e}")
                continue

    print(f"Processed {file_count} files, encountered {error_count} errors, loaded {len(images)} valid images")
    return np.array(images), np.array(ages), np.array(genders)

# Load the dataset
print("Loading dataset...")
images, ages, genders = load_dataset(dataset_path)
print(f"Dataset loaded: {len(images)} images")

# Verify we have enough images
if len(images) == 0:
    raise ValueError("No valid images were loaded. Please check the dataset path and file format.")

In [None]:
# Quick visualization of some sample images
def visualize_samples(images, ages, genders, num_samples=5):
    plt.figure(figsize=(15, 3))
    indices = np.random.choice(range(len(images)), num_samples, replace=False)

    for i, idx in enumerate(indices):
        plt.subplot(1, num_samples, i+1)
        plt.imshow(images[idx])
        gender_label = "Female" if genders[idx] == 1 else "Male"
        plt.title(f"Age: {ages[idx]}, Gender: {gender_label}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# Visualize a few samples
if len(images) > 0:
    visualize_samples(images, ages, genders)

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, age_train, age_test, gender_train, gender_test = train_test_split(
    images, ages, genders, test_size=0.2, random_state=42
)

print(f"Training set: {len(X_train)} images")
print(f"Testing set: {len(X_test)} images")

In [None]:
# Data augmentation for training set
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Build the CNN model
def build_model(input_shape):
    # Input layer
    inputs = Input(shape=input_shape)

    # Convolutional layers
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Flatten the output
    x = Flatten()(x)

    # Fully connected layers
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output layers
    age_output = Dense(1, name='age_output')(x)  # Regression task
    gender_output = Dense(1, activation='sigmoid', name='gender_output')(x)  # Binary classification

    # Create model
    model = Model(inputs=inputs, outputs=[age_output, gender_output])

    # Compile model
    model.compile(optimizer='adam',
              loss={'age_output': 'mse', 'gender_output': 'binary_crossentropy'},
              metrics={'age_output': 'mae', 'gender_output': 'accuracy'})


    return model

# Create and train the model
model = build_model((image_size, image_size, 3))
model.summary()

In [None]:
def multi_output_generator(X, y_age, y_gender, batch_size=32):
    while True:
        # Get random batch indices
        idx = np.random.randint(0, X.shape[0], batch_size)

        # Get batch data
        batch_X = X[idx]
        batch_age = y_age[idx]
        batch_gender = y_gender[idx]

        # Apply augmentation to images (optional)
        # This is a simplified version - you'd need to implement actual augmentation

        yield batch_X, {'age_output': batch_age, 'gender_output': batch_gender}

# Then use it in your model.fit:
history = model.fit(
    multi_output_generator(X_train, age_train, gender_train, batch_size),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=50,
    validation_data=(X_test, {'age_output': age_test, 'gender_output': gender_test}),
)

In [None]:
# Evaluate the model
evaluation = model.evaluate(X_test, {'age_output': age_test, 'gender_output': gender_test})
print(f"Test Loss: {evaluation[0]}")
print(f"Age MAE: {evaluation[1]}")
print(f"Gender Accuracy: {evaluation[3]}")


In [None]:
# Function to make predictions on new images
def predict_age_gender(image_path, model):
    # Load and preprocess the image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (image_size, image_size))
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    # Make prediction
    age_pred, gender_pred = model.predict(img)

    # Process predictions
    age = int(age_pred[0][0])
    gender = "Female" if gender_pred[0][0] > 0.5 else "Male"

    return age, gender, img[0]  # Return preprocessed image for display
