In [1]:
# uncomment and install dependencies before continuing
# !pip install --upgrade inFairness requests tqdm

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

from inFairness.fairalgo import SenSeI
from inFairness import distances
from inFairness.auditor import SenSRAuditor, SenSeIAuditor

%load_ext autoreload
%autoreload 2

import data
import metrics

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class AdultDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __getitem__(self, idx):
        data = self.data[idx]
        label = self.labels[idx]
        return data, label
    
    def __len__(self):
        return len(self.labels)

In [4]:
train_df, test_df = data.load_data()

X_train_df, Y_train_df = train_df
X_test_df, Y_test_df = test_df

# Create test data with spouse variable flipped
X_test_df_spouse_flipped = X_test_df.copy()
X_test_df_spouse_flipped.relationship_Wife = 1 - X_test_df_spouse_flipped.relationship_Wife

X_train_df.head()

Unnamed: 0,age,capital-gain,capital-loss,education-num,hours-per-week,marital-status_Divorced,marital-status_Married-AF-spouse,marital-status_Married-civ-spouse,marital-status_Married-spouse-absent,marital-status_Never-married,...,relationship_Unmarried,relationship_Wife,sex_Male,workclass_Federal-gov,workclass_Local-gov,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,workclass_State-gov,workclass_Without-pay
0,0.409331,-0.14652,-0.218253,-1.613806,-0.49677,0,0,0,0,1,...,1,0,0,0,0,1,0,0,0,0
1,-1.104187,-0.14652,-0.218253,-0.050064,-1.741764,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
2,1.393118,-0.14652,-0.218253,-0.440999,2.574214,0,0,1,0,0,...,0,0,1,0,1,0,0,0,0,0
3,-0.423104,-0.14652,-0.218253,-0.440999,1.163221,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
4,-0.877159,-0.14652,-0.218253,1.122743,0.748224,0,0,1,0,0,...,0,0,1,0,0,0,0,1,0,0


In [5]:
device = torch.device('cpu')

protected_vars = ['race_White', 'sex_Male']
protected_idxs = [X_train_df.columns.get_loc(var) for var in protected_vars]

X_train, y_train = data.convert_df_to_tensor(X_train_df, Y_train_df)
X_test, y_test = data.convert_df_to_tensor(X_test_df, Y_test_df)
X_test_flip, y_test_flip = data.convert_df_to_tensor(X_test_df_spouse_flipped, Y_test_df)

# Create the training and testing dataset
train_ds = AdultDataset(X_train, y_train)
test_ds = AdultDataset(X_test, y_test)
test_ds_flip = AdultDataset(X_test_flip, y_test_flip)

# Create train and test dataloaders
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=1000, shuffle=False)
test_dl_flip = DataLoader(test_ds_flip, batch_size=1000, shuffle=False)

In [6]:
# Create a fully connected neural network

class Model(nn.Module):

    def __init__(self, input_size, output_size):

        super().__init__()
        self.fc1 = nn.Linear(input_size, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fcout = nn.Linear(100, output_size)

    def forward(self, x):

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fcout(x)
        return x

### Standard training

In [7]:
input_size = X_train.shape[1]
output_size = 2

network_standard = Model(input_size, output_size).to(device)
optimizer = torch.optim.Adam(network_standard.parameters(), lr=1e-3)
loss_fn = F.cross_entropy

EPOCHS = 10

In [8]:
network_standard.train()

for epoch in tqdm(range(EPOCHS)):

    for x, y in train_dl:

        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        y_pred = network_standard(x).squeeze()
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()

100%|██████████| 10/10 [00:08<00:00,  1.20it/s]


In [9]:
accuracy = metrics.accuracy(network_standard, test_dl, device)
balanced_acc = metrics.balanced_accuracy(network_standard, test_dl, device)
spouse_consistency = metrics.spouse_consistency(network_standard, test_dl, test_dl_flip, device)

print(f'Accuracy: {accuracy}')
print(f'Balanced accuracy: {balanced_acc}')
print(f'Spouse consistency: {spouse_consistency}')

Accuracy: 0.855373740196228
Balanced accuracy: 0.7758295636205936
Spouse consistency: 0.9483635559486953


### Individually fair training

In [10]:
network_fair = Model(input_size, output_size).to(device)
optimizer = torch.optim.Adam(network_fair.parameters(), lr=1e-3)
lossfn = F.cross_entropy

distance_x = distances.LogisticRegSensitiveSubspace()
distance_y = distances.SquaredEuclideanDistance()

distance_x.fit(X_train, protected_idxs=protected_idxs)
distance_y.fit(num_dims=output_size)

distance_x.to(device)
distance_y.to(device)

In [11]:
rho = 5.0
eps = 0.1
auditor_nsteps = 100
auditor_lr = 1e-3

fairalgo = SenSeI(network_fair, distance_x, distance_y, lossfn, rho, eps, auditor_nsteps, auditor_lr)

In [12]:
fairalgo.train()

for epoch in tqdm(range(EPOCHS)):
    for x, y in train_dl:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        result = fairalgo(x, y)
        result.loss.backward()
        optimizer.step()

100%|██████████| 10/10 [08:18<00:00, 49.90s/it]


In [13]:
accuracy = metrics.accuracy(network_fair, test_dl, device)
balanced_acc = metrics.balanced_accuracy(network_fair, test_dl, device)
spouse_consistency = metrics.spouse_consistency(network_fair, test_dl, test_dl_flip, device)

print(f'Accuracy: {accuracy}')
print(f'Balanced accuracy: {balanced_acc}')
print(f'Spouse consistency: {spouse_consistency}')

Accuracy: 0.8373507261276245
Balanced accuracy: 0.7335829237002883
Spouse consistency: 0.9997788589119858


#### Let's now audit the two models and check for their individua fairness compliance

In [14]:
# Auditing using the SenSR Auditor

audit_nsteps = 1000
audit_lr = 0.1

auditor = SenSRAuditor(loss_fn=loss_fn, distance_x=distance_x, num_steps=audit_nsteps, lr=audit_lr, max_noise=0.5, min_noise=-0.5)

audit_result_stdmodel = auditor.audit(network_standard, X_test, y_test, lambda_param=10.0, audit_threshold=1.15)
audit_result_fairmodel = auditor.audit(network_fair, X_test, y_test, lambda_param=10.0, audit_threshold=1.15)

print("="*100)
print(f"Loss ratio (Standard model) : {audit_result_stdmodel.lower_bound}. Is model fair: {audit_result_stdmodel.is_model_fair}")
print(f"Loss ratio (fair model) : {audit_result_fairmodel.lower_bound}. Is model fair: {audit_result_fairmodel.is_model_fair}")
print("-"*100)
print("\t As signified by these numbers, the fair model is fairer than the standard model")
print("="*100)

  loss_ratio = np.divide(loss_vals_adversarial, loss_vals_original)


Loss ratio (Standard model) : 2.930340963480738. Is model fair: False
Loss ratio (fair model) : 1.045007604621082. Is model fair: True
----------------------------------------------------------------------------------------------------
	 As signified by these numbers, the fair model is fairer than the standard model


In [15]:
# Auditing using the SenSeI Auditor

audit_nsteps = 500
audit_lr = 0.001

auditor = SenSeIAuditor(distance_x=distance_x, distance_y=distance_y, num_steps=audit_nsteps, lr=audit_lr, max_noise=0.5, min_noise=-0.5)

audit_result_stdmodel = auditor.audit(network_standard, X_test, y_test, loss_fn, audit_threshold=1.15, lambda_param=50.0)
audit_result_fairmodel = auditor.audit(network_fair, X_test, y_test, loss_fn, audit_threshold=1.15, lambda_param=50.0)

print("="*100)
print(f"Loss ratio (Standard model) : {audit_result_stdmodel.lower_bound}. Is model fair: {audit_result_stdmodel.is_model_fair}")
print(f"Loss ratio (fair model) : {audit_result_fairmodel.lower_bound}. Is model fair: {audit_result_fairmodel.is_model_fair}")
print("-"*100)
print("\t As signified by these numbers, the fair model is fairer than the standard model")
print("="*100)

Loss ratio (Standard model) : 238.7647221454322. Is model fair: False
Loss ratio (fair model) : 1.0002661858231792. Is model fair: True
----------------------------------------------------------------------------------------------------
	 As signified by these numbers, the fair model is fairer than the standard model
