## Data Organization

### Creating subdirectories within both the training and validation directories for each gender using the ground truth labels provided in the CSV files.

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import shutil

In [4]:
# Reading in the data

train_dir = os.path.join("train")
val_dir = os.path.join("val")
train_csv = os.path.join("fairface_label_train_18th.csv")
val_csv = os.path.join("fairface_label_val_18th.csv")

In [5]:
# Create subdirectories for male and female in training and validation directories
subdirs = ['Male', 'Female']
for subdir in subdirs:
    os.makedirs(os.path.join(train_dir, subdir), exist_ok=True)
    os.makedirs(os.path.join(val_dir, subdir), exist_ok=True)

In [6]:
# Function to move images to respective subdirectories
def organize_images(df, data_dir):
    gender_mapping = {'Male': 'Male', 'Female': 'Female'}
    for index, row in df.iterrows():
        # Extract the numerical part of the file path
        filename = os.path.basename(row['file'])  # e.g., "1.jpg"
        gender = gender_mapping[row['gender']]
        src_path = os.path.join(data_dir, filename)
        dst_path = os.path.join(data_dir, gender, filename)
        
        # Ensure the source file exists before attempting to move it
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
        else:
            print(f"Warning: {src_path} does not exist and cannot be moved.")

In [7]:
# Load CSV files
train_df = pd.read_csv(train_csv)
val_df = pd.read_csv(val_csv)

In [8]:
# Organize training and validation images
organize_images(train_df, train_dir)
organize_images(val_df, val_dir)

print("Images have been successfully organized into separate folders.")

Images have been successfully organized into separate folders.


In [9]:
# Image Data Generators without Augmentation
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='binary'
)

Found 86744 images belonging to 2 classes.
Found 10954 images belonging to 2 classes.
