In [None]:
import time
import random
import IPython
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.optim as optim

from models import Classifier
from dataloader import FairnessDataset
from algorithm import train_fair_classifier

In [None]:
##### Which dataset to test #####
dataset_name = 'COMPAS' # ['Moon', 'Lawschool', 'AdultCensus', 'CreditDefault', 'COMPAS']

##### Which fairness notion to consider (Demographic Parity / Equalized Odds) #####
fairness = 'DP' # ['DP', 'EO']

##### Model specifications #####
n_layers = 2 # [positive integers]
n_hidden_units = 16 # [positive integers]

##### Our algorithm hyperparameters #####
h = 0.1 # Bandwidth hyperparameter in KDE [positive real numbers]
delta = 1.0 # Delta parameter in Huber loss [positive real numbers]
lambda_ = 0.05 # regularization factor of DDP/DEO; Positive real numbers \in [0.0, 1.0]

##### Other training hyperparameters #####
batch_size = 2048
lr = 2e-4
lr_decay = 1.0 # Exponential decay factor of LR scheduler
n_seeds = 5 # Number of random seeds to try
n_epochs = 200

##### Whether to enable GPU training or not
device = torch.device('cuda') # or torch.device('cpu')

In [None]:
result = pd.DataFrame()
starting_time = time.time()

seed = 0
IPython.display.clear_output()
print('Currently working on - seed: {}'.format(seed))

# Set a seed for random number generation
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Import dataset
dataset = FairnessDataset(dataset=dataset_name, device=device)
dataset.normalize()
input_dim = dataset.XZ_train.shape[1]

# Create a classifier model
net = Classifier(n_layers=n_layers, n_inputs=input_dim, n_hidden_units=n_hidden_units)
net = net.to(device)

# Set an optimizer
optimizer = optim.Adam(net.parameters(), lr=lr)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay) # None
for lambda_ in np.logspace(-2, 2, 50):
# Fair classifier training
    temp = train_fair_classifier(dataset=dataset, 
                                 net=net, 
                                 optimizer=optimizer, lr_scheduler=lr_scheduler,
                                 fairness=fairness, lambda_=lambda_, h=h, delta=delta, 
                                 device=device, n_epochs=n_epochs, batch_size=batch_size, seed=seed)
    temp['seed'] = seed
    result = result.append(temp)

print('Average running time: {:.3f}s'.format((time.time() - starting_time) / 5.0))

In [None]:
result.mean()