# Fairness and Performance Evaluation of Multiple Models

In [None]:

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
    

In [None]:

# Define the transformations for the training and testing sets
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Load and preprocess the datasets
train_dataset = torchvision.datasets.ImageFolder(root='path_to_train_dataset', transform=transform)
test_dataset = torchvision.datasets.ImageFolder(root='path_to_test_dataset', transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
    

In [None]:

# Define the models
models = [
    torchvision.models.resnet18(pretrained=True),
    torchvision.models.vgg16(pretrained=True),
    torchvision.models.densenet121(pretrained=True),
    torchvision.models.mobilenet_v2(pretrained=True),
    torchvision.models.alexnet(pretrained=True)
]

# Modify the final layer to match the number of classes
num_classes = len(train_dataset.classes)
for model in models:
    if hasattr(model, 'fc'):
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    elif hasattr(model, 'classifier'):
        if isinstance(model.classifier, nn.Sequential):
            model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, num_classes)
        else:
            model.classifier = nn.Linear(model.classifier.in_features, num_classes)
    

In [None]:

# Define the training function
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(dataloaders['train'].dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')
    return model
    

In [None]:

# Train all models
trained_models = []
model_names = ['ResNet18', 'VGG16', 'DenseNet121', 'MobileNetV2', 'AlexNet']
for model, model_name in zip(models, model_names):
    print(f"Training {model_name}...")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    dataloaders = {'train': train_loader, 'val': test_loader}
    trained_model = train_model(model, dataloaders, criterion, optimizer, num_epochs=25)
    trained_models.append(trained_model)
    torch.save(trained_model.state_dict(), f'model_{model_name}.pth')
    

In [None]:

# Define the evaluation function
def evaluate_model(model, dataloader, protected_attr_name='Age_Category'):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to(device)
    all_preds = []
    all_labels = []
    all_protected_attrs = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_protected_attrs.extend(inputs.cpu().numpy()[:, protected_attr_name])
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    all_protected_attrs = np.array(all_protected_attrs)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    dp = {group: np.mean(all_preds[all_protected_attrs == group]) for group in np.unique(all_protected_attrs)}
    eo = {group: {'TPR': np.mean((all_preds == 1) & (all_labels == 1) & (all_protected_attrs == group)),
                  'FPR': np.mean((all_preds == 1) & (all_labels == 0) & (all_protected_attrs == group))} 
          for group in np.unique(all_protected_attrs)}
    return dp, eo, accuracy, f1
    

In [None]:

# Evaluate fairness and performance metrics for each trained model
for model, model_name in zip(trained_models, model_names):
    print(f"Evaluating fairness and performance metrics for {model_name}...")
    dp, eo, accuracy, f1 = evaluate_model(model, test_loader, protected_attr_name='Age_Category')
    print(f"Demographic Parity for {model_name}: {dp}")
    print(f"Equalized Odds for {model_name}: {eo}")
    print(f"Accuracy for {model_name}: {accuracy}")
    print(f"F1 Score for {model_name}: {f1}")
    print()
    