In [1]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.models import resnet50
from torch.optim import Adam
import torch.nn as nn
import pandas as pd
import os
from PIL import Image
import numpy as np

In [2]:
# Load the CSV file into a DataFrame
csv_file = './partitioned.csv'  # Update this to the correct path
attributes_df = pd.read_csv(csv_file)
attributes_df

Unnamed: 0,image_id,Gender,partition
0,039088.jpg,Female,0
1,030894.jpg,Male,0
2,045279.jpg,Female,0
3,016399.jpg,Female,0
4,013654.jpg,Male,0
...,...,...,...
49995,035413.jpg,Male,2
49996,013543.jpg,Female,2
49997,010990.jpg,Female,2
49998,027439.jpg,Female,2


In [3]:
# Map filenames to labels (assuming gender is the attribute for classification)
# Let's say 'Male' is represented as 1 and 'Female' as -1 in the CSV
filename_to_label = {row['image_id']: 1 if row['Gender'] == "Male" else 0 for index, row in attributes_df.iterrows()}
filename_to_label

{'039088.jpg': 0,
 '030894.jpg': 1,
 '045279.jpg': 0,
 '016399.jpg': 0,
 '013654.jpg': 1,
 '013749.jpg': 0,
 '023966.jpg': 0,
 '045553.jpg': 0,
 '030220.jpg': 0,
 '024080.jpg': 1,
 '012182.jpg': 0,
 '021849.jpg': 0,
 '014291.jpg': 1,
 '024756.jpg': 0,
 '035014.jpg': 0,
 '015362.jpg': 1,
 '048039.jpg': 1,
 '036312.jpg': 0,
 '030240.jpg': 1,
 '007196.jpg': 0,
 '003580.jpg': 1,
 '036148.jpg': 1,
 '034096.jpg': 0,
 '010442.jpg': 0,
 '016308.jpg': 0,
 '004205.jpg': 0,
 '006958.jpg': 1,
 '009164.jpg': 0,
 '020628.jpg': 1,
 '017568.jpg': 0,
 '014833.jpg': 1,
 '027030.jpg': 0,
 '044198.jpg': 0,
 '000002.jpg': 0,
 '035450.jpg': 0,
 '030904.jpg': 0,
 '007649.jpg': 1,
 '004218.jpg': 0,
 '010475.jpg': 1,
 '001186.jpg': 0,
 '046702.jpg': 0,
 '026827.jpg': 1,
 '012091.jpg': 1,
 '005387.jpg': 1,
 '017352.jpg': 1,
 '018845.jpg': 1,
 '028817.jpg': 0,
 '043053.jpg': 0,
 '011348.jpg': 0,
 '042803.jpg': 0,
 '003980.jpg': 1,
 '007091.jpg': 1,
 '021600.jpg': 1,
 '034265.jpg': 0,
 '017931.jpg': 0,
 '041959.j

In [4]:

def getImagePath(image_id):
    return os.path.join('img_align_celeba',image_id)

df = pd.read_csv("partitioned_multi_attr.csv")
train_df = df[df['partition'] == 0]
val_df = df[df['partition'] == 1]
test_df = df[df['partition'] == 2]

df_labels = df.set_index('image_id')
file_paths = df['image_id'].apply(getImagePath).tolist()


# Define the transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [5]:
class CelebADataset(Dataset):
    def __init__(self, file_paths, file_to_label, transform=None):
        self.file_paths = file_paths
        self.file_to_label = file_to_label
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_name = self.file_paths[idx]
        image = Image.open(img_name).convert('RGB')
        label = self.file_to_label[os.path.basename(img_name)]

        if self.transform:
            image = self.transform(image)

        return image, label

In [6]:
# # Load the CelebA dataset from a single directory
# image_directory = './processed_img'  # Replace with your dataset path
# image_paths = [os.path.join(image_directory, img) for img in os.listdir(image_directory)]

In [7]:
# # Create the dataset
# celeba_dataset = CelebADataset(image_paths, filename_to_label, transform=transform)

# # Now you can create DataLoaders for training and validation
# train_size = int(0.8 * len(celeba_dataset))
# val_size = len(celeba_dataset) - train_size
# train_dataset, val_dataset = torch.utils.data.random_split(celeba_dataset, [train_size, val_size])

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=32)

In [8]:
# Assuming the images are in a directory named 'images' in the current working directory
# Create separate file path and label mappings for each dataset partition
train_file_paths = train_df['image_id'].apply(getImagePath).tolist()
val_file_paths = val_df['image_id'].apply(getImagePath).tolist()
test_file_paths = test_df['image_id'].apply(getImagePath).tolist()

train_filename_to_label = {filename: labels.values for filename, labels in train_df.set_index('image_id').iterrows()}
val_filename_to_label = {filename: labels.values for filename, labels in val_df.set_index('image_id').iterrows()}
test_filename_to_label = {filename: labels.values for filename, labels in test_df.set_index('image_id').iterrows()}

# Initialize the datasets for each partition
train_dataset = CelebADataset(train_file_paths, train_filename_to_label, transform=transform)
val_dataset = CelebADataset(val_file_paths, val_filename_to_label, transform=transform)
test_dataset = CelebADataset(test_file_paths, test_filename_to_label, transform=transform)

# Create data loaders for each dataset partition
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [13]:
# Load a pre-trained ResNet model
model = resnet50(pretrained=True)

# Modify the model for binary classification
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)  # Output one value for binary classification

In [14]:
# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
device

device(type='cuda', index=0)

In [15]:
# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss() 
optimizer = Adam(model.parameters(), lr=0.001)

In [16]:
# Early stopping parameters
patience = 5  # How many epochs to wait after last time validation loss improved.
best_loss = np.Inf
epochs_no_improve = 0
early_stop = False

num_epochs = 100  # You can adjust the number of epochs
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device).float()
        optimizer.zero_grad()
        outputs = model(inputs)
        outputs = outputs.view(-1)  # Flatten the output to match the labels' shape
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        # Print stats after each batch
        if (batch_idx + 1) % 100 == 0:  # Adjust the modulo number based on your batch size
            print(f'Epoch: {epoch+1}, Batch: {batch_idx+1}, Loss: {running_loss / (batch_idx+1):.4f}', flush=True)

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            outputs = outputs.view(-1)  # Flatten the output
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # Calculate average losses
    train_loss = running_loss / len(train_loader)
    val_loss = val_loss / len(val_loader)
    
    # Print training/validation statistics 
    print(f'Epoch: {epoch+1} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {val_loss:.6f}')
    print(f'Validation Accuracy: {100 * correct / total}%')

    # Save model if validation loss has decreased
    if val_loss < best_loss:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        best_loss,
        val_loss))
        torch.save(model.state_dict(), 'gender_classification_model.pth')
        best_loss = val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print('Early stopping')
            early_stop = True
            break

    if early_stop:
        print("Stopped early due to no improvement in validation loss")
        break

print('Training complete')

ValueError: Target size (torch.Size([32, 23])) must be the same as input size (torch.Size([32]))