In [1]:
import sys
sys.path.append("..")

In [2]:
import torch
import numpy as np
import random
import torch.optim as optim

from algorithms.algorithms import *
from utils.models import *
from utils.dataloaders import *

from tabulate import tabulate

In [3]:
device = 'cpu'
np.random.seed(0)

dataset = CompasDataset(device=device, sensitive_feature_labels=["sex", "age"])

In [4]:
def lr_kde_model_runner(dataset, hp, seeds):
    test = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    train = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    val = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}

    def append_res(l,acc,ei,dp,eo,eodd):
        l['accuracy'].append(acc)
        l['ei_disparity'].append(ei)
        l['dp_disparity'].append(dp)
        l['eo_disparity'].append(eo)
        l['eodd_disparity'].append(eodd)

    for i in range(len(seeds)):
        print('training seed', seeds[i] ,'started')
        random.seed(seeds[i])
        np.random.seed(seeds[i])
        torch.manual_seed(seeds[i]) 

        model = logReg(num_features=dataset.XZ_train.shape[1])
        model = model.to(device)
        
        lr = hp['learning_rate']
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
        
        results = trainer_kde_fair(
            model,
            dataset,
            optimizer,
            device,
            n_epochs=hp['n_epochs'],
            batch_size=hp['batch_size'], 
            z_blind=False,
            fairness=hp['fairness'], 
            lambda_=hp['lambda_'], 
            h=hp['h'], 
            delta_huber=hp['delta_huber'], 
            optimal_effort=True, 
            delta_effort=hp['delta_effort']
            )
        
        append_res(train,results.train_acc_hist[-1],results.train_ei_hist[-1],results.train_dp_hist[-1],results.train_eo_hist[-1],results.train_eodd_hist[-1])
        append_res(val,results.val_acc,results.val_ei,results.val_dp,results.val_eo,results.val_eodd)
        append_res(test,results.test_acc,results.test_ei,results.test_dp,results.test_eo,results.test_eodd)

    def get_res(l):
        res = {}
        res['accuracy_mean'] = np.mean(l['accuracy'])
        res['accuracy_var'] = np.std(l['accuracy'])
        res['accuracy_list'] = l['accuracy']
        res['ei_mean'] = np.mean(l['ei_disparity'])
        res['ei_var'] = np.std(l['ei_disparity'])
        res['ei_list'] = l['ei_disparity']
        res['dp_mean'] = np.mean(l['dp_disparity'])
        res['dp_var'] = np.std(l['dp_disparity'])
        res['dp_list'] = l['dp_disparity']
        res['eo_mean'] = np.mean(l['eo_disparity'])
        res['eo_var'] = np.std(l['eo_disparity'])
        res['eo_list'] = l['eo_disparity']
        res['eodd_mean'] = np.mean(l['eodd_disparity'])
        res['eodd_var'] = np.std(l['eodd_disparity'])
        res['eodd_list'] = l['eodd_disparity']
        return res

    res_train = get_res(train)
    res_val = get_res(val)
    res_test = get_res(test)
    print('Training finished for all seeds.')
    
    return res_train, res_val, res_test

def lr_fb_model_runner(dataset, hp, seeds):
    test = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    train = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    val = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}

    def append_res(l,acc,ei,dp,eo,eodd):
        l['accuracy'].append(acc)
        l['ei_disparity'].append(ei)
        l['dp_disparity'].append(dp)
        l['eo_disparity'].append(eo)
        l['eodd_disparity'].append(eodd)

    for i in range(len(seeds)):
        print('training seed', seeds[i] ,'started')
        random.seed(seeds[i])
        np.random.seed(seeds[i])
        torch.manual_seed(seeds[i]) 

        model = logReg(num_features=dataset.XZ_train.shape[1])
        model = model.to(device)
        
        lr = hp['learning_rate']
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
        
        results = trainer_fb_fair(
            model,
            dataset,
            optimizer,
            device,
            n_epochs=hp['n_epochs'],
            batch_size=hp['batch_size'], 
            z_blind=False,
            fairness=hp['fairness'], 
            lambda_=hp['lambda_'],
            optimal_effort=True, 
            delta_effort=hp['delta_effort']
            )
        
        append_res(train,results.train_acc_hist[-1],results.train_ei_hist[-1],results.train_dp_hist[-1],results.train_eo_hist[-1],results.train_eodd_hist[-1])
        append_res(val,results.val_acc,results.val_ei,results.val_dp,results.val_eo,results.val_eodd)
        append_res(test,results.test_acc,results.test_ei,results.test_dp,results.test_eo,results.test_eodd)

    def get_res(l):
        res = {}
        res['accuracy_mean'] = np.mean(l['accuracy'])
        res['accuracy_var'] = np.std(l['accuracy'])
        res['accuracy_list'] = l['accuracy']
        res['ei_mean'] = np.mean(l['ei_disparity'])
        res['ei_var'] = np.std(l['ei_disparity'])
        res['ei_list'] = l['ei_disparity']
        res['dp_mean'] = np.mean(l['dp_disparity'])
        res['dp_var'] = np.std(l['dp_disparity'])
        res['dp_list'] = l['dp_disparity']
        res['eo_mean'] = np.mean(l['eo_disparity'])
        res['eo_var'] = np.std(l['eo_disparity'])
        res['eo_list'] = l['eo_disparity']
        res['eodd_mean'] = np.mean(l['eodd_disparity'])
        res['eodd_var'] = np.std(l['eodd_disparity'])
        res['eodd_list'] = l['eodd_disparity']
        return res

    res_train = get_res(train)
    res_val = get_res(val)
    res_test = get_res(test)
    print('Training finished for all seeds.')
    
    return res_train, res_val, res_test

def lr_fc_model_runner(dataset, hp, seeds):
    test = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    train = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}
    
    val = {'accuracy':[],
            'ei_disparity':[],
            'dp_disparity':[],
            'eo_disparity':[],
            'eodd_disparity':[]}

    def append_res(l,acc,ei,dp,eo,eodd):
        l['accuracy'].append(acc)
        l['ei_disparity'].append(ei)
        l['dp_disparity'].append(dp)
        l['eo_disparity'].append(eo)
        l['eodd_disparity'].append(eodd)

    for i in range(len(seeds)):
        print('training seed', seeds[i] ,'started')
        random.seed(seeds[i])
        np.random.seed(seeds[i])
        torch.manual_seed(seeds[i]) 

        model = logReg(num_features=dataset.XZ_train.shape[1])
        model = model.to(device)
        
        lr = hp['learning_rate']
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
        
        results = trainer_fc_fair(
            model,
            dataset,
            optimizer,
            device,
            n_epochs=hp['n_epochs'],
            batch_size=hp['batch_size'], 
            z_blind=False,
            fairness=hp['fairness'], 
            lambda_=hp['lambda_'],
            optimal_effort=True, 
            delta_effort=hp['delta_effort']
            )
        
        append_res(train,results.train_acc_hist[-1],results.train_ei_hist[-1],results.train_dp_hist[-1],results.train_eo_hist[-1],results.train_eodd_hist[-1])
        append_res(val,results.val_acc,results.val_ei,results.val_dp,results.val_eo,results.val_eodd)
        append_res(test,results.test_acc,results.test_ei,results.test_dp,results.test_eo,results.test_eodd)

    def get_res(l):
        res = {}
        res['accuracy_mean'] = np.mean(l['accuracy'])
        res['accuracy_var'] = np.std(l['accuracy'])
        res['accuracy_list'] = l['accuracy']
        res['ei_mean'] = np.mean(l['ei_disparity'])
        res['ei_var'] = np.std(l['ei_disparity'])
        res['ei_list'] = l['ei_disparity']
        res['dp_mean'] = np.mean(l['dp_disparity'])
        res['dp_var'] = np.std(l['dp_disparity'])
        res['dp_list'] = l['dp_disparity']
        res['eo_mean'] = np.mean(l['eo_disparity'])
        res['eo_var'] = np.std(l['eo_disparity'])
        res['eo_list'] = l['eo_disparity']
        res['eodd_mean'] = np.mean(l['eodd_disparity'])
        res['eodd_var'] = np.std(l['eodd_disparity'])
        res['eodd_list'] = l['eodd_disparity']
        return res

    res_train = get_res(train)
    res_val = get_res(val)
    res_test = get_res(test)
    print('Training finished for all seeds.')
    
    return res_train, res_val, res_test

def experiment_runner(dataset, SGD_hp, EI_hp_fc, EI_hp_kde, EI_hp_fb, seeds):
    
    _, _, SGD = lr_kde_model_runner(dataset, SGD_hp, seeds)
    _, _, EI_fc = lr_fc_model_runner(dataset, EI_hp_fc, seeds)
    _, _, EI_kde = lr_kde_model_runner(dataset, EI_hp_kde, seeds)
    _, _, EI_fb = lr_fb_model_runner(dataset, EI_hp_fb, seeds)
    
    return SGD, EI_fc, EI_kde, EI_fb

def fb_hyperparameter_test(dataset, hp_test, seed=0):
    hp = hp_test.copy()
    result = []
    for i in hp_test['learning_rate']:
        for k in hp_test['lambda_']:
            c = []
            hp['learning_rate'] = i
            hp['lambda_'] = k
            train, val, _ = lr_fb_model_runner(dataset, hp, seeds=[seed])
            c.append(hp['learning_rate'])
            c.append(hp['lambda_'])
            c.append(train['accuracy_mean'])
            c.append(val['accuracy_mean'])
            c.append(val['ei_mean'])
            c.append(val['dp_mean'])
            c.append(val['eo_mean'])
            c.append(val['eodd_mean'])
            result.append(c)
    print(tabulate(result, headers=['learning_rate', 'lambda_', 'accuracy_train', 'accuracy_val','ei', 'dp', 'eo', 'eodd']))

def kde_hyperparameter_test(dataset, hp_test, seed=0):
    hp = hp_test.copy()
    result = []
    for i in hp_test['learning_rate']:
        for k in hp_test['lambda_']:
            c = []
            hp['learning_rate'] = i
            hp['lambda_'] = k
            train, val, _ = lr_kde_model_runner(dataset, hp, seeds=[seed])
            c.append(hp['learning_rate'])
            c.append(hp['lambda_'])
            c.append(train['accuracy_mean'])
            c.append(val['accuracy_mean'])
            c.append(val['ei_mean'])
            c.append(val['dp_mean'])
            c.append(val['eo_mean'])
            c.append(val['eodd_mean'])
            result.append(c)
    print(tabulate(result, headers=['learning_rate', 'lambda_', 'accuracy_train', 'accuracy_val','ei', 'dp', 'eo', 'eodd']))

def fc_hyperparameter_test(dataset, hp_test, seed=0):
    hp = hp_test.copy()
    result = []
    for i in hp_test['learning_rate']:
        for k in hp_test['lambda_']:
            c = []
            hp['learning_rate'] = i
            hp['lambda_'] = k
            train, val, _ = lr_fc_model_runner(dataset, hp, seeds=[seed])
            c.append(hp['learning_rate'])
            c.append(hp['lambda_'])
            c.append(train['accuracy_mean'])
            c.append(val['accuracy_mean'])
            c.append(val['ei_mean'])
            c.append(val['dp_mean'])
            c.append(val['eo_mean'])
            c.append(val['eodd_mean'])
            result.append(c)
    print(tabulate(result, headers=['learning_rate', 'lambda_', 'accuracy_train', 'accuracy_val','ei', 'dp', 'eo', 'eodd']))

In [5]:
SGD_hp_test = {}
SGD_hp_test['learning_rate'] = [0.0001, 0.001, 0.01, 0.1]
SGD_hp_test['lambda_'] = [0]
SGD_hp_test['n_epochs'] = 100
SGD_hp_test['batch_size'] = 1024 
SGD_hp_test['fairness'] = ''
SGD_hp_test['h'] = 0.01
SGD_hp_test['delta_huber'] = 0.5
SGD_hp_test['delta_effort'] = 3.1

In [6]:
kde_hyperparameter_test(dataset, SGD_hp_test, seed=0)

training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 22.23epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.01epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.12epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.17epochs/s]

Training finished for all seeds.
  learning_rate    lambda_    accuracy_train    accuracy_val         ei         dp         eo       eodd
---------------  ---------  ----------------  --------------  ---------  ---------  ---------  ---------
         0.0001          0          0.553293        0.564502  0.0281503  0.0419637  0.0723883  0.0723883
         0.001           0          0.67266         0.678788  0.0353051  0.0302846  0.0588049  0.0588049
         0.01            0          0.680676        0.677922  0          0.214921   0.263757   0.263757
         0.1             0          0.678726        0.664935  0          0.194474   0.279935   0.279935





In [7]:
SGD_hp = SGD_hp_test.copy()
SGD_hp['learning_rate'] = 0.001
SGD_hp['lambda_'] = 0

In [8]:
EI_hp_test = SGD_hp_test.copy()
EI_hp_test['learning_rate'] = [0.001]
EI_hp_test['lambda_'] = [0, 0.2, 0.6, 0.8, 0.9]
EI_hp_test['fairness'] = 'EI'

In [9]:
kde_hyperparameter_test(dataset, EI_hp_test, seed=0)

training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.05epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.82epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 18.35epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.77epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 18.14epochs/s]

Training finished for all seeds.
  learning_rate    lambda_    accuracy_train    accuracy_val          ei         dp         eo       eodd
---------------  ---------  ----------------  --------------  ----------  ---------  ---------  ---------
          0.001        0            0.67266         0.678788  0.0353051   0.0302846  0.0588049  0.0588049
          0.001        0.2          0.672444        0.678788  0.0041841   0.0302846  0.0588049  0.0588049
          0.001        0.6          0.671577        0.677056  0.0041841   0.0260293  0.0568855  0.0568855
          0.001        0.8          0.67136         0.677056  0.0041958   0.0235056  0.0588049  0.0588049
          0.001        0.9          0.673527        0.673593  0.00142248  0.0296399  0.0664825  0.0664825





In [10]:
EI_hp_kde = EI_hp_test.copy()
EI_hp_kde['learning_rate'] = 0.001
EI_hp_kde['lambda_'] = 0.9

In [11]:
EI_hp_test = SGD_hp_test.copy()
EI_hp_test['learning_rate'] = [0.001]
EI_hp_test['lambda_'] = [0, 0.6, 0.8, 0.9, 0.95]
EI_hp_test['fairness'] = 'EI'

In [12]:
fc_hyperparameter_test(dataset, EI_hp_test, seed=0)

training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.73epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.75epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.72epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.97epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.92epochs/s]

Training finished for all seeds.
  learning_rate    lambda_    accuracy_train    accuracy_val         ei         dp         eo       eodd
---------------  ---------  ----------------  --------------  ---------  ---------  ---------  ---------
          0.001       0             0.67266         0.678788  0.0353051  0.0302846  0.0588049  0.0588049
          0.001       0.6           0.67266         0.678788  0.0301638  0.0302846  0.0588049  0.0588049
          0.001       0.8           0.672877        0.678788  0.0302847  0.0320162  0.0607243  0.0607243
          0.001       0.9           0.67331         0.681385  0.0304304  0.0371373  0.0645631  0.0645631
          0.001       0.95          0.674177        0.683117  0.0210536  0.0406005  0.0703212  0.0703212





In [13]:
EI_hp_fc = EI_hp_test.copy()
EI_hp_fc['learning_rate'] = 0.001
EI_hp_fc['lambda_'] = 0.95

In [14]:
EI_hp_test = SGD_hp_test.copy()
EI_hp_test['learning_rate'] = [0.001]
EI_hp_test['lambda_'] = [0, 0.2, 0.4, 0.6, 0.8]
EI_hp_test['fairness'] = 'EI'

In [15]:
fb_hyperparameter_test(dataset, EI_hp_test, seed=0)

training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.60epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 18.72epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.97epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.30epochs/s]


Training finished for all seeds.
training seed 0 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.00epochs/s]

Training finished for all seeds.
  learning_rate    lambda_    accuracy_train    accuracy_val         ei         dp         eo       eodd
---------------  ---------  ----------------  --------------  ---------  ---------  ---------  ---------
          0.001        0            0.67266         0.678788  0.0353051  0.0302846  0.0588049  0.0588049
          0.001        0.2          0.681326        0.677922  0          0.127586   0.163991   0.163991
          0.001        0.4          0.67851         0.67619   0          0.147923   0.20027    0.20027
          0.001        0.6          0.681976        0.677056  0          0.164078   0.222248   0.222248
          0.001        0.8          0.683492        0.668398  0          0.15052    0.194512   0.194512





In [16]:
EI_hp_fb = EI_hp_test.copy()
EI_hp_fb['learning_rate'] = 0.001
EI_hp_fb['lambda_'] = 0.6

In [17]:
seeds = np.arange(1,6)

SGD, EI_fc, EI_kde, EI_fb = experiment_runner(dataset, SGD_hp, EI_hp_fc, EI_hp_kde, EI_hp_fb, seeds)

since Python 3.9 and will be removed in a subsequent version. The only 
supported seed types are: None, int, float, str, bytes, and bytearray.
  random.seed(seeds[i])


training seed 1 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.44epochs/s]


training seed 2 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.42epochs/s]


training seed 3 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.48epochs/s]


training seed 4 started


Training: 100%|██████████| 100/100 [00:04<00:00, 22.44epochs/s]


training seed 5 started


Training: 100%|██████████| 100/100 [00:04<00:00, 23.17epochs/s]
since Python 3.9 and will be removed in a subsequent version. The only 
supported seed types are: None, int, float, str, bytes, and bytearray.
  random.seed(seeds[i])


Training finished for all seeds.
training seed 1 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.98epochs/s]


training seed 2 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.11epochs/s]


training seed 3 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.52epochs/s]


training seed 4 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.68epochs/s]


training seed 5 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.78epochs/s]


Training finished for all seeds.
training seed 1 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.44epochs/s]


training seed 2 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.81epochs/s]


training seed 3 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.66epochs/s]


training seed 4 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.96epochs/s]


training seed 5 started


Training: 100%|██████████| 100/100 [00:05<00:00, 17.82epochs/s]
since Python 3.9 and will be removed in a subsequent version. The only 
supported seed types are: None, int, float, str, bytes, and bytearray.
  random.seed(seeds[i])


Training finished for all seeds.
training seed 1 started


Training: 100%|██████████| 100/100 [00:05<00:00, 19.43epochs/s]


training seed 2 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.39epochs/s]


training seed 3 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.43epochs/s]


training seed 4 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.73epochs/s]


training seed 5 started


Training: 100%|██████████| 100/100 [00:04<00:00, 20.43epochs/s]

Training finished for all seeds.





In [18]:
result = []
models = ["SGD", "EI FC", "EI KDE", "EI FB"]
sol = [SGD, EI_fc, EI_kde, EI_fb]
for i in range(len(models)):
    c = []
    c.append(models[i])
    res = sol[i]
    c.append(res['accuracy_mean'])
    c.append(res['accuracy_var'])
    c.append(res['ei_mean'])
    c.append(res['ei_var'])
    c.append(res['dp_mean'])
    c.append(res['dp_var'])
    c.append(res['eo_mean'])
    c.append(res['eo_var'])
    c.append(res['eodd_mean'])
    c.append(res['eodd_var'])
    result.append(c)

print(tabulate(result, headers=["model","accuracy_mean","accuracy_var","ei_mean","ei_var", "dp_mean", "dp_var","eo_mean","eo_var","eodd_mean","eodd_var"]))

model      accuracy_mean    accuracy_var      ei_mean       ei_var    dp_mean     dp_var    eo_mean     eo_var    eodd_mean    eodd_var
-------  ---------------  --------------  -----------  -----------  ---------  ---------  ---------  ---------  -----------  ----------
SGD             0.646015       0.0205736  0.00296756   0.00375783   0.0887482  0.0774021  0.100277   0.100458      0.131355   0.0847939
EI FC           0.646015       0.0205736  0.00143457   0.00208606   0.087159   0.076738   0.0984699  0.0994703     0.128585   0.0856497
EI KDE          0.645738       0.0238271  0.000401098  0.000802196  0.0760159  0.0859186  0.090494   0.115127      0.114208   0.100208
EI FB           0.642412       0.033238   0            0            0.102079   0.068147   0.126783   0.0877617     0.126783   0.0877617
