# Lab 2: Fairness Attacks

## Objectives
- Understand fairness vulnerabilities
- Implement poisoning for bias
- Test demographic parity attacks
- Evaluate fairness metrics

In [1]:
import torch
import torch.nn as nn
import numpy as np

# Detect device (supports CUDA, Apple Silicon MPS, and CPU)
if torch.cuda.is_available():
    device = 'cuda'
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

## Part 1: Create Biased Dataset

In [2]:
# Simulate dataset with sensitive attribute
X = torch.randn(1000, 10)
sensitive_attr = torch.randint(0, 2, (1000,))  # Binary sensitive attribute
y = torch.randint(0, 2, (1000,))

# Poison to create bias
poison_mask = (sensitive_attr == 1)
y[poison_mask] = 0  # Force negative outcomes for group 1

print(f'Group 0 positive rate: {y[sensitive_attr==0].float().mean():.2%}')
print(f'Group 1 positive rate: {y[sensitive_attr==1].float().mean():.2%}')

Group 0 positive rate: 47.02%
Group 1 positive rate: 0.00%


## Part 2: Train and Evaluate

In [3]:
model = nn.Sequential(
    nn.Linear(10, 32),
    nn.ReLU(),
    nn.Linear(32, 2)
).to(device)

optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

for epoch in range(20):
    optimizer.zero_grad()
    outputs = model(X.to(device))
    loss = criterion(outputs, y.to(device))
    loss.backward()
    optimizer.step()

# Evaluate fairness
with torch.no_grad():
    preds = model(X.to(device)).argmax(1)
    acc_0 = (preds[sensitive_attr==0] == y[sensitive_attr==0].to(device)).float().mean()
    acc_1 = (preds[sensitive_attr==1] == y[sensitive_attr==1].to(device)).float().mean()

print(f'Group 0 accuracy: {acc_0:.2%}')
print(f'Group 1 accuracy: {acc_1:.2%}')
print(f'Fairness gap: {abs(acc_0-acc_1):.2%}')

Group 0 accuracy: 52.98%
Group 1 accuracy: 100.00%
Fairness gap: 47.02%
