In [None]:
import math
import random
import logging
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split,KFold
from ray import tune
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable,Function
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.utils.data.dataset import random_split
from tqdm import trange
from pycm import ConfusionMatrix
from secml.array.c_array import CArray

myfont = {'family': 'Times New Roman',
         'weight': 'normal',
         'size': 20,
         }

logger = logging.getLogger(__name__)

%matplotlib inline

In [None]:
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1):
    """
    Generates an adversarial attack on a general model.
    :param X: Original inputs on which the model is trained
    :param Y: Original outputs on which the model is trained
    :param S: Original protected attributes on which the model is trained
    :return: Adversarial dataset (i.e. new data points + original input)
    """

    from secml.data import CDataset
    from secml.array import CArray

    data_set_encoded_secML = CArray(X, dtype=float, copy=True)
    data_set_encoded_secML = CDataset(data_set_encoded_secML, Y)

    n_tr = round(0.66 * X.shape[0])
    n_ts = X.shape[0] - n_tr

    logger.debug(X.shape)
    logger.debug(n_tr)
    logger.debug(n_ts)

    from secml.data.splitter import CTrainTestSplit
    splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts)

    tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML)

    from secml.ml.classifiers import CClassifierSVM
    from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
    from secml.ml.kernel import CKernelRBF
    clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

    xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]}

    random_state = 999

    from secml.data.splitter import CDataSplitterKFold
    xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

    logger.debug("Estimating the best training parameters...")
    best_params = clf.estimate_parameters(
        dataset=tr_set_secML,
        parameters=xval_params,
        splitter=xval_splitter,
        metric='accuracy',
        perf_evaluator='xval'
    )
    logger.debug("The best training parameters are: ", best_params)

    logger.debug(clf.get_params())
    logger.debug(clf.num_classifiers)

    from secml.ml.peval.metrics import CMetricAccuracy
    metric = CMetricAccuracy()

    # Train the classifier
    clf.fit(tr_set_secML)
    logger.debug(clf.num_classifiers)

    # Compute predictions on a test set
    y_pred = clf.predict(ts_set_secML.X)

    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred)

    logger.debug("Accuracy on test set: {:.2%}".format(acc))

    # Prepare attack configuration

    noise_type = 'l2'   # Type of perturbation 'l1' or 'l2'
    lb, ub = 0, 1       # Bounds of the attack space. Can be set to `None` for unbounded
    y_target = None     # None if `error-generic` or a class label for `error-specific`

    solver_params = {
        'eta': 0.1,         # grid search resolution
        'eta_min': 0.1,
        'eta_max': None,    # None should be ok
        'max_iter': 1000,
        'eps': 1e-2         # Tolerance on the stopping crit.
    }

    # Run attack

    from secml.adv.attacks.evasion import CAttackEvasionPGDLS
    pgd_ls_attack = CAttackEvasionPGDLS(
        classifier=clf,
        surrogate_classifier=clf,
        surrogate_data=tr_set_secML,
        distance=noise_type,
        dmax=dmax,
        lb=lb, ub=ub,
        solver_params=solver_params,
        y_target=y_target)

    nb_feat = X.shape[1]

    result_pts = np.empty([nb_attack, nb_feat])
    result_class = np.empty([nb_attack, 1])

    # take a point at random being the starting point of the attack and run the attack
    import random
    for nb_iter in range(0, nb_attack):
        rn = random.randint(0, ts_set_secML.num_samples - 1)
        x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y,

        try:
            y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0)
            adv_pt = adv_ds_pgdls.X.get_data()
            # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float)
            result_pts[nb_iter] = adv_pt
            result_class[nb_iter] = y_pred_pgdls.get_data()[0]
        except ValueError:
            logger.warning("value error on {}".format(nb_iter))

    return result_pts, result_class, ts_set_secML[:nb_attack, :].Y

In [None]:
class bm:
    def __init__(self, df):
        self._df = df

    def P(self, **kwargs):
        """
        Declares the random variables from the set `kwargs`.
        """
        self._variables = kwargs
        return self

    def given(self, **kwargs):
        """
        Calculates the probability on a finite set of samples with `kwargs` in the
        conditioning set. 
        """
        self._given = kwargs
        
        # Here's where the magic happens
        prior = True
        posterior = True
        
        for k in self._variables:
            if type(self._variables[k]) == type(lambda x:x):
                posterior = posterior & (self._df[k].apply(self._variables[k]))
            else:
                posterior = posterior & (self._df[k] == self._variables[k])

        
        for k in self._given:
            if type(self._given[k]) == type(lambda x:x):
                prior = prior & (self._df[k].apply(self._given[k]))
                posterior = posterior & (self._df[k].apply(self._given[k]))
            else:
                prior = prior & (self._df[k] == self._given[k])
                posterior = posterior & (self._df[k] == self._given[k])
        return posterior.sum()/prior.sum()

In [None]:
def transform_dataset(df):
    """

    :param df:
    :return: Tuple of the transformed dataset and the labels Y and S
    """

    df_binary = df[(df["race"] == "Caucasian") | (df["race"] == "African-American")]

    del df_binary['c_jail_in']
    del df_binary['c_jail_out']

    ##separated class from the rests of the features
    # remove unnecessary dimensions from Y -> only the decile_score remains
    Y = df_binary['decile_score']
    del df_binary['decile_score']
    Y_true = df_binary['two_year_recid']
    del df_binary['two_year_recid']
    del df_binary['score_text']

    S = df_binary['race']
    #del df_binary['race']
    #del df_binary['is_recid']

    print(df_binary.shape)

    # set sparse to False to return dense matrix after transformation and keep all dimensions homogeneous
    encod = preprocessing.OneHotEncoder(sparse=False)

    data_to_encode = df_binary.to_numpy()
    feat_to_encode = data_to_encode[:, 0]
    # print(feat_to_encode)
    # transposition
    feat_to_encode = feat_to_encode.reshape(-1, 1)
    # print(feat_to_encode)
    encoded_feature = encod.fit_transform(feat_to_encode)

    df_binary_encoded = pd.DataFrame(encoded_feature)

    feat_to_encode = data_to_encode[:, 1]
    feat_to_encode = feat_to_encode.reshape(-1, 1)
    encoded_feature = encod.fit_transform(feat_to_encode)


    df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    feat_to_encode = data_to_encode[:, 2] == "Caucasian"
    feat_to_encode = feat_to_encode.reshape(-1, 1)
    encoded_feature = encod.fit_transform(feat_to_encode)

    df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    # feature [2] [3] [4] [5] [6] [7] [8] has to be put between 0 and 1

    for i in range(3, 10):
        encoded_feature = data_to_encode[:, i]
        ma = np.amax(encoded_feature)
        mi = np.amin(encoded_feature)
        encoded_feature = (encoded_feature - mi) / (ma - mi)
        df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    feat_to_encode = data_to_encode[:, 10]
    feat_to_encode = feat_to_encode.reshape(-1, 1)
    encoded_feature = encod.fit_transform(feat_to_encode)

    df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    feat_to_encode = data_to_encode[:, 11]
    feat_to_encode = feat_to_encode.reshape(-1, 1)
    encoded_feature = encod.fit_transform(feat_to_encode)

    df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    return df_binary_encoded, Y, S, Y_true

In [None]:
def transform_dataset_census(df):
    """
    :param df: the dataset "census income" from a csv file with reduced features, heterogeneous types and missing values, no header
    :return: Tuple of the transformed dataset and the labels Y and S
    """
    df_replace = df.replace(to_replace="?",value=np.nan)
    df_replace.dropna(inplace=True, axis=0)

    label_encoder = preprocessing.LabelEncoder()
    oh_encoder = preprocessing.OneHotEncoder(sparse=False)

    df_label = df_replace.iloc[:,-1]

    ##Y_true is the vector containing labels, at this point, labels (initially strings) have been transformed into integer (0 and 1) -> -5000 is now '0' and 5000+ is now '+1'
    Y = label_encoder.fit_transform(df_label)
    #remove last column from df
    del df_replace[df_replace.columns[-1]]

    # Y_true is the true outcome, in this case we're not using a future predictor (vs. compas)
    Y_true=[]

    #S is the protected attribute
    # could also be feature 7 (sex) or feature 13 (citizenship)
    S=df_replace["sex"]
    del df_replace["sex"]

    #remove feature fnlwgt
    del df_replace["fnlwgt"]

    #remove examples with missing values
              ## change 1 to 0 

    #     if df_replace.shape == df.shape:
    #         raise AssertionError("The removal of na values failed")

    print(df_replace.shape)

    #transform other features
    #feature age to normalize
    encoded_feature = df_replace.to_numpy()[:, 0]
    mi = np.amin(encoded_feature)
    ma = np.amax(encoded_feature)
    encoded_feature = (encoded_feature - mi) / (ma - mi)

    #df_binary_encoded is the data frame containing encoded features
    df_binary_encoded = pd.DataFrame(encoded_feature)
    print(df_binary_encoded.shape)


    encod_feature = df_replace.iloc[:,1]
    encoded_feature = pd.get_dummies(encod_feature)
    # df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    #feature 1 to 7 (after removal) are categorical
    for i in range(1,8):
        encod_feature = df_replace.iloc[:,i]
    #     print(encod_feature.shape)
        encoded_feature = pd.get_dummies(encod_feature)
    #     print(encoded_feature)
    #     print(df_binary_encoded)
        df_binary_encoded = pd.concat([df_binary_encoded.reset_index(drop=True), pd.DataFrame(encoded_feature).reset_index(drop=True)], axis=1)
#         print(df_binary_encoded)
    #     print('')

    #feature 8 and 9 are numerical
    for i in range(8,10):
        encod_feature = df_replace.iloc[:,i]
        mi = np.amin(encod_feature)
        ma = np.amax(encod_feature)
        encoded_feature = (encod_feature - mi) / (ma - mi)
        df_binary_encoded = pd.concat([df_binary_encoded.reset_index(drop=True), pd.DataFrame(encoded_feature).reset_index(drop=True)], axis=1)
    #     print(df_binary_encoded.shape)
    #feature 10 and 11 are categorical
    for i in range(10,12):
        encod_feature = df_replace.iloc[:,i]
        encoded_feature = pd.get_dummies(encod_feature)
        df_binary_encoded = pd.concat([df_binary_encoded.reset_index(drop=True), pd.DataFrame(encoded_feature).reset_index(drop=True)], axis=1)
    #     print(df_binary_encoded.shape)

    return df_binary_encoded, Y, S, Y_true

In [None]:
def transform_dataset_credit(df):

    label_encoder = preprocessing.LabelEncoder()
    oh_encoder = preprocessing.OneHotEncoder(sparse=False)

    Y = np.array(df.iloc[:,-1] == 2)

    del df[df.columns[-1]]

    # Y_true is the true outcome, in this case we're not using a future predictor (vs. compas)
    Y_true=[]

    #S is the protected attribute
    S=df.iloc[:,12] > 25
    #del df["Age"]

    #remove examples with missing values
    df_replace = df.replace(to_replace="?",value=np.nan)
    df_replace.dropna(inplace=True, axis=1)

    print(df_replace.shape)

    #transform other features
    #feature age to normalize
    encoded_feature = df_replace.to_numpy()[:, 1]
    mi = np.amin(encoded_feature)
    ma = np.amax(encoded_feature)
    encoded_feature = (encoded_feature - mi) / (ma - mi)

    #df_binary_encoded is the data frame containing encoded features
    df_binary_encoded = pd.DataFrame(encoded_feature)

    # categorical attributes
    for i in [0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18,19]:
        encod_feature = df_replace.iloc[:,i]
        encoded_feature = pd.get_dummies(encod_feature)
        df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    # Numerical attributes
    for i in [1, 7, 10, 15, 17]:
        encod_feature = df_replace.iloc[:,i]
        mi = np.amin(encod_feature)
        ma = np.amax(encod_feature)
        encoded_feature = (encod_feature - mi) / (ma - mi)
        df_binary_encoded = pd.concat([df_binary_encoded, pd.DataFrame(encoded_feature)], axis=1)

    print(S)

    return df_binary_encoded, Y, S, Y_true


In [None]:
class GradientReversalFunction(Function):

    @staticmethod
    def forward(ctx, x, lambda_):
        ctx.lambda_ = lambda_
        return x.clone()

    @staticmethod
    def backward(ctx, grads):
        lambda_ = ctx.lambda_
        lambda_ = grads.new_tensor(lambda_)
        dx = -lambda_ * grads
        return dx, None

class GradientReversal(torch.nn.Module):
    def __init__(self, lambda_=1):
        super(GradientReversal, self).__init__()
        self.lambda_ = lambda_

    def forward(self, x):
        return GradientReversalFunction.apply(x, self.lambda_)
    



In [None]:
class Net(nn.Module):

    def __init__(self, input_shape, grl_lambda=100):
        super(Net, self).__init__()
        # an affine operation: y = Wx + b
        self._grl_lambda = grl_lambda
        self.fc1 = nn.Linear(input_shape, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 32)
        self.fc4 = nn.Linear(32, 1)
        if self._grl_lambda != 0:
            self.grl = GradientReversal(grl_lambda)
            self.fc5 = nn.Linear(32, 2)
        # self.grl = GradientReversal(100)

    def forward(self, x):
        hidden = self.fc1(x)
        hidden = F.relu(hidden)
        hidden = F.dropout(hidden, 0.1)
        hidden = self.fc2(hidden)
        hidden = F.relu(hidden)
        hidden = self.fc3(hidden)
        hidden = F.relu(hidden)

        y = self.fc4(hidden)
        # y = F.dropout(y, 0.1)

        if self._grl_lambda != 0:
            s = self.grl(hidden)
            s = self.fc5(s)
            # s = F.sigmoid(s)
            # s = F.dropout(s, 0.1)
            return y, s
        else:
            return y

In [None]:
class Net_nodrop(nn.Module):

    def __init__(self, input_shape, grl_lambda=100):
        super(Net_nodrop, self).__init__()
        # an affine operation: y = Wx + b
        self._grl_lambda = grl_lambda
        self.fc1 = nn.Linear(input_shape, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 32)
        self.fc4 = nn.Linear(32, 1)
        if self._grl_lambda != 0:
            self.grl = GradientReversal(grl_lambda)
            self.fc5 = nn.Linear(32, 2)
        # self.grl = GradientReversal(100)

    def forward(self, x):
        hidden = self.fc1(x)
        hidden = F.relu(hidden)
#         hidden = F.dropout(hidden, 0.1)
        hidden = self.fc2(hidden)
        hidden = F.relu(hidden)
        hidden = self.fc3(hidden)
        hidden = F.relu(hidden)

        y = self.fc4(hidden)
        # y = F.dropout(y, 0.1)

        if self._grl_lambda != 0:
            s = self.grl(hidden)
            s = self.fc5(s)
            # s = F.sigmoid(s)
            # s = F.dropout(s, 0.1)
            return y, s
        else:
            return y

In [None]:
class Net_CENSUS(nn.Module):

    def __init__(self, input_shape, grl_lambda=100):
        super(Net_CENSUS, self).__init__()
        # an affine operation: y = Wx + b
        self._grl_lambda = grl_lambda
        self.fc1 = nn.Linear(input_shape, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 1)
        if self._grl_lambda != 0:
            self.grl = GradientReversal(grl_lambda)
            self.fc5 = nn.Linear(128, 2)
        # self.grl = GradientReversal(100)

    def forward(self, x):
        hidden = self.fc1(x)
        hidden = F.relu(hidden)
        hidden = F.dropout(hidden, 0.1, training=self.training)
        hidden = self.fc2(hidden)
        hidden = F.relu(hidden)
        hidden = self.fc3(hidden)
        hidden = F.relu(hidden)

        y = self.fc4(hidden)
        # y = F.dropout(y, 0.1)

        if self._grl_lambda != 0:
            s = self.grl(hidden)
            s = self.fc5(s)
            # s = F.sigmoid(s)
            # s = F.dropout(s, 0.1)
            return y, s
        else:
            return y

In [None]:
def get_paras(net):
    paras=[]
    for name,parameters in net.named_parameters():
        paras.append(parameters)
    return paras

def get_active_neurons4(net,sample):
    neurons=[]
    def hook(module,input,output):
        neurons.append(output.data)
    handle1=net.fc1.register_forward_hook(hook)
    handle2=net.fc2.register_forward_hook(hook)
    handle3=net.fc3.register_forward_hook(hook)
    handle4=net.fc4.register_forward_hook(hook)
    net(x=torch.tensor(sample,dtype=torch.float32))
    handle1.remove()
    handle2.remove()
    handle3.remove()
    handle4.remove()
    return neurons

def get_contrib4(paras,neurons):
    contrib_list=[]
    for i in range(3):
        i=i
        contrib=neurons[i]*paras[2*i+2]
        contrib_list.append(contrib)
    return contrib_list

def get_path_set4(net,sample,GAMMA=0.9):
    active_neuron_indice=[[],[],[],[]]
    path_set=set()
    neurons=get_active_neurons4(net,sample)
    paras=get_paras(net)
    contrib_list=get_contrib4(paras,neurons)
    active_neuron_indice[3].append(torch.argmax(neurons[3]).item())
    for i in range(3):
        L=3-i
        for j in active_neuron_indice[L]:
            s=torch.sort(contrib_list[L-1][j],descending=True)
            sum=0
            for k in range(len(contrib_list[L-1][j])):
                sum+=s.values[k].item()
                active_neuron_indice[L-1].append(s.indices[k].item())
                path_set.add((L,s.indices[k].item(),j))
                if(sum>=GAMMA*neurons[L][j].item()):
                    break
    return path_set


In [None]:
def get_metrics(results, threshold, fraction,dataset='compas'):
    "Create the metrics from an output df."

    # Calculate biases after training
    dem_parity = abs(
        bm(results).P(pred=lambda x: x > threshold).given(race=0)
        - bm(results).P(pred=lambda x: x > threshold).given(
            race=1))

    eq_op = abs(
        bm(results).P(pred=lambda x: x > threshold).given(race=0, compas=True)
        - bm(results).P(pred=lambda x: x > threshold).given(race=1, compas=True))

    dem_parity_ratio = abs(
        bm(results).P(pred=lambda x: x > threshold).given(race=0)
        / bm(results).P(pred=lambda x: x > threshold).given(
            race=1))

    cm = ConfusionMatrix(actual_vector=(results['true'] == True).values,
                         predict_vector=(results['pred'] > threshold).values)
    if dataset=='compas':
        cm_high_risk = ConfusionMatrix(actual_vector=(results['compas'] > 8).values,
                             predict_vector=(results['pred'] > 8).values)

        result = {"DP": dem_parity,
                  "EO": eq_op,
                  "DP ratio": dem_parity_ratio,
                  "acc": cm.Overall_ACC,
                  "acc_ci_min": cm.CI95[0],
                  "acc_ci_max": cm.CI95[1],
                  "f1": cm.F1_Macro,
                  "acc_high_risk": cm_high_risk.Overall_ACC,
                  "acc_ci_min_high_risk": cm_high_risk.CI95[0],
                  "acc_ci_max_high_risk": cm_high_risk.CI95[1],
                  "f1_high_risk": cm_high_risk.F1_Macro,
                  "adversarial_fraction": fraction
                  }
    else:
        result = {"DP": dem_parity,
                  "EO": eq_op,
                  "DP ratio": dem_parity_ratio,
                  "acc": cm.Overall_ACC,
                  "acc_ci_min": cm.CI95[0],
                  "acc_ci_max": cm.CI95[1],
                  "f1": cm.F1_Macro,
                  "adversarial_fraction": fraction
                  }

    return result

In [None]:
def train_and_evaluate(train_loader: DataLoader,
                       val_loader: DataLoader,
                       test_loader: DataLoader,
                       device,
                       input_shape,
                       grl_lambda=None,
                       model=None,
                       dataset='compas'):
    """

    :param train_loader: Pytorch-like DataLoader with training data.
    :param val_loader: Pytorch-like DataLoader with validation data.
    :param test_loader: Pytorch-like DataLoader with testing data.
    :param device: The target device for the training.
    :return: A tuple: (trained Pytorch-like model, dataframe with results on test set)
    """

    torch.manual_seed(0)

    grl_lambda = 50
    epochs = 50

    if model is None:
        # Redefine the model
        if dataset=='census':
            model = Net_CENSUS(input_shape=input_shape, grl_lambda=grl_lambda).to(device)
        elif dataset=='compas_nodrop':
            model = Net_nodrop(input_shape=input_shape, grl_lambda=grl_lambda).to(device)
        else:
            model = Net(input_shape=input_shape, grl_lambda=grl_lambda).to(device)
        
    model = model.to(device)
    criterion = nn.MSELoss().to(device)
    criterion_bias = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, threshold=0.3, cooldown=5)

    training_losses = []
    validation_losses = []

    t_prog = trange(epochs, desc='Training neural network', leave=False, position=1, mininterval=5)
    # t_prog = trange(50)

    for epoch in t_prog:
        model.train()

        batch_losses = []
        for x_batch, y_batch, _, s_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            s_batch = s_batch.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            if grl_lambda is not None and grl_lambda != 0:
                outputs, outputs_protected = model(x_batch)
                loss = criterion(outputs, y_batch) + criterion_bias(outputs_protected, s_batch.argmax(dim=1))
            else:
                outputs = model(x_batch)
                loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            batch_losses.append(loss.item())

        training_loss = np.mean(batch_losses)
        training_losses.append(training_loss)

        with torch.no_grad():
            val_losses = []
            for x_val, y_val, _, s_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                s_val = s_val.to(device)
                model.eval()
                if grl_lambda is not None and grl_lambda != 0:
                    yhat, s_hat = model(x_val)
                    val_loss = (criterion(y_val, yhat) + criterion_bias(s_val, s_hat.argmax(dim=1))).item()
                else:
                    yhat = model(x_val)
                    val_loss = criterion(y_val, yhat).item()
                val_losses.append(val_loss)
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

            scheduler.step(val_loss)

        t_prog.set_postfix({"epoch": epoch, "training_loss": training_loss,
                            "validation_loss": validation_loss}, refresh=False)  # print last metrics

#     if args.show_graphs:
#         plt.plot(range(len(training_losses)), training_losses)
#         plt.plot(range(len(validation_losses)), validation_losses)
#         # plt.scatter(x_tensor, y_out.detach().numpy())
#         plt.ylabel('some numbers')
#         plt.show()

    with torch.no_grad():
        test_losses = []
        test_results = []
        for x_test, y_test, ytrue, s_true in test_loader:
            x_test = x_test.to(device)
            y_test = y_test.to(device)
            s_true = s_true.to(device)
            model.eval()
            if grl_lambda is not None and grl_lambda != 0:
                yhat, s_hat = model(x_test)
                test_loss = (criterion(y_test, yhat) + criterion_bias(s_true, s_hat.argmax(dim=1))).item()
                test_losses.append(val_loss)
                test_results.append({"y_hat": yhat, "y_true": ytrue, "y_compas": y_test, "s": s_true, "s_hat": s_hat})
            else:
                yhat = model(x_test)
                test_loss = (criterion(y_test, yhat)).item()
                test_losses.append(val_loss)
                test_results.append({"y_hat": yhat, "y_true": ytrue, "y_compas": y_test, "s": s_true})

        # print({"Test loss": np.mean(test_losses)})

    results = test_results[0]['y_hat']
    outcome = test_results[0]['y_true']
    compas = test_results[0]['y_compas']
    protected_results = test_results[0]['s']
    if grl_lambda is not None and grl_lambda != 0:
        protected = test_results[0]['s_hat']
    for r in test_results[1:]:
        results = torch.cat((results, r['y_hat']))
        outcome = torch.cat((outcome, r['y_true']))
        compas = torch.cat((compas, r['y_compas']))
        protected_results = torch.cat((protected_results, r['s']))
        if grl_lambda is not None and grl_lambda != 0:
            protected = torch.cat((protected, r['s_hat']))

    df = pd.DataFrame(data=results.cpu().numpy(), columns=['pred'])

    df['true'] = outcome.cpu().numpy()
    df['compas'] = compas.cpu().numpy()
    df['race'] = protected_results.cpu().numpy()[:, 0]
    if grl_lambda is not None and grl_lambda != 0:
        df['race_hat'] = protected.cpu().numpy()[:, 0]

    return model, df

In [None]:
def train_and_evaluate_drop(adv_loader: DataLoader,
                            benign_loader: DataLoader,
                            val_loader: DataLoader,
                            test_loader: DataLoader,
                            device,
                            input_shape,
                            grl_lambda=None,
                            model=None,
                            dataset='compas'):
    """

    :param train_loader: Pytorch-like DataLoader with training data.
    :param val_loader: Pytorch-like DataLoader with validation data.
    :param test_loader: Pytorch-like DataLoader with testing data.
    :param device: The target device for the training.
    :return: A tuple: (trained Pytorch-like model, dataframe with results on test set)
    """

#     torch.manual_seed(0)

#     grl_lambda = 50
    epochs = 50

    if model is None:
        if dataset=='CENSUS':
            model = Net_CENSUS(input_shape=input_shape, grl_lambda=grl_lambda).to(device)
        else:
            model = Net(input_shape=input_shape, grl_lambda=grl_lambda).to(device)
    model = model.to(device)
    criterion = nn.MSELoss().to(device)
    criterion_bias = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, threshold=0.3, cooldown=5)

    training_losses = []
    validation_losses = []

    t_prog = trange(epochs, desc='Training neural network', leave=False, position=1, mininterval=5)
    # t_prog = trange(50)

    for epoch in t_prog:
        batch_losses = []
        
        model.train()
        for x_batch, y_batch, _, s_batch in adv_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            s_batch = s_batch.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            if grl_lambda is not None and grl_lambda != 0:
                outputs, outputs_protected = model(x_batch)
                loss = criterion(outputs, y_batch) + criterion_bias(outputs_protected, s_batch.argmax(dim=1))
            else:
                outputs = model(x_batch)
                loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
            
        model.eval()
        for x_batch, y_batch, _, s_batch in benign_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            s_batch = s_batch.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            if grl_lambda is not None and grl_lambda != 0:
                outputs, outputs_protected = model(x_batch)
                loss = criterion(outputs, y_batch) + criterion_bias(outputs_protected, s_batch.argmax(dim=1))
            else:
                outputs = model(x_batch)
                loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())

        training_loss = np.mean(batch_losses)
        training_losses.append(training_loss)

        with torch.no_grad():
            val_losses = []
            for x_val, y_val, _, s_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                s_val = s_val.to(device)
                model.eval()
                if grl_lambda is not None and grl_lambda != 0:
                    yhat, s_hat = model(x_val)
                    val_loss = (criterion(y_val, yhat) + criterion_bias(s_val, s_hat.argmax(dim=1))).item()
                else:
                    yhat = model(x_val)
                    val_loss = criterion(y_val, yhat).item()
                val_losses.append(val_loss)
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

            scheduler.step(val_loss)

        t_prog.set_postfix({"epoch": epoch, "training_loss": training_loss,
                            "validation_loss": validation_loss}, refresh=False)  # print last metrics

#     if args.show_graphs:
#         plt.plot(range(len(training_losses)), training_losses)
#         plt.plot(range(len(validation_losses)), validation_losses)
#         # plt.scatter(x_tensor, y_out.detach().numpy())
#         plt.ylabel('some numbers')
#         plt.show()

    with torch.no_grad():
        test_losses = []
        test_results = []
        for x_test, y_test, ytrue, s_true in test_loader:
            x_test = x_test.to(device)
            y_test = y_test.to(device)
            s_true = s_true.to(device)
            model.eval()
            if grl_lambda is not None and grl_lambda != 0:
                yhat, s_hat = model(x_test)
                test_loss = (criterion(y_test, yhat) + criterion_bias(s_true, s_hat.argmax(dim=1))).item()
                test_losses.append(val_loss)
                test_results.append({"y_hat": yhat, "y_true": ytrue, "y_compas": y_test, "s": s_true, "s_hat": s_hat})
            else:
                yhat = model(x_test)
                test_loss = (criterion(y_test, yhat)).item()
                test_losses.append(val_loss)
                test_results.append({"y_hat": yhat, "y_true": ytrue, "y_compas": y_test, "s": s_true})

        # print({"Test loss": np.mean(test_losses)})

    results = test_results[0]['y_hat']
    outcome = test_results[0]['y_true']
    compas = test_results[0]['y_compas']
    protected_results = test_results[0]['s']
    if grl_lambda is not None and grl_lambda != 0:
        protected = test_results[0]['s_hat']
    for r in test_results[1:]:
        results = torch.cat((results, r['y_hat']))
        outcome = torch.cat((outcome, r['y_true']))
        compas = torch.cat((compas, r['y_compas']))
        protected_results = torch.cat((protected_results, r['s']))
        if grl_lambda is not None and grl_lambda != 0:
            protected = torch.cat((protected, r['s_hat']))

    df = pd.DataFrame(data=results.cpu().numpy(), columns=['pred'])

    df['true'] = outcome.cpu().numpy()
    df['compas'] = compas.cpu().numpy()
    df['race'] = protected_results.cpu().numpy()[:, 0]
    if grl_lambda is not None and grl_lambda != 0:
        df['race_hat'] = protected.cpu().numpy()[:, 0]

    return model, df

In [None]:
def sample_sort(net, train_dataset, THETA=1e-3, GAMMA=0.9):
    net=net.cpu()
    # THETA = 1e-3
    path_set_list=[]
    for i in (range(len(train_dataset))):
        path_set=get_path_set4(net,train_dataset[i][0],GAMMA=GAMMA)
        path_set_list.append(path_set)
    v=pd.value_counts(path_set_list).rename_axis('pathset').reset_index(name='counts')
#     v_list.append(v)
    t=tuple(v[v.counts<=max(v.counts[0]*THETA,1)].pathset)
    adv_data_idx=[]
    for i in range(len(path_set_list)):
        if path_set_list[i] in t:
            adv_data_idx.append(i)
    print("frac:{}".format(len(adv_data_idx)/len(train_dataset)))
    return adv_data_idx

v_list=[]
def sample_sort_test(net, train_dataset, THETA=1e-3, GAMMA=0.9):
    net=net.cpu()
    # THETA = 1e-3
    path_set_list=[]
    for i in (range(len(train_dataset))):
        path_set=get_path_set4(net,train_dataset[i][0],GAMMA=GAMMA)
        path_set_list.append(path_set)
    v=pd.value_counts(path_set_list).rename_axis('pathset').reset_index(name='counts')
    v_list.append(v)
    t=tuple(v[v.counts<=max(v.counts[0]*THETA,1)].pathset)
    adv_data_idx=[]
    for i in range(len(path_set_list)):
        if path_set_list[i] in t:
            adv_data_idx.append(i)
    print("frac:{}".format(len(adv_data_idx)/len(train_dataset)))
    return adv_data_idx

def get_adv(train_dataset,adv_data_idx):
    x_t_adv, y_t_adv, l_t_adv, s_t_adv = (None,None,None,None)
    for i in range(len(train_dataset)):
        if i in adv_data_idx:
            a,b,c,d=train_dataset[i]
            x_t_adv = a.unsqueeze(0) if x_t_adv is None else torch.cat((x_t_adv,a.unsqueeze(0)),0)
            y_t_adv = b.unsqueeze(0) if y_t_adv is None else torch.cat((y_t_adv,b.unsqueeze(0)),0)
            l_t_adv = c.unsqueeze(0) if l_t_adv is None else torch.cat((l_t_adv,c.unsqueeze(0)),0)
            s_t_adv = d.unsqueeze(0) if s_t_adv is None else torch.cat((s_t_adv,d.unsqueeze(0)),0)
    x_t_benign, y_t_benign, l_t_benign, s_t_benign = (None,None,None,None)
    for i in range(len(train_dataset)):
        if i not in adv_data_idx:
            a,b,c,d=train_dataset[i]
            x_t_benign = a.unsqueeze(0) if x_t_benign is None else torch.cat((x_t_benign,a.unsqueeze(0)),0)
            y_t_benign = b.unsqueeze(0) if y_t_benign is None else torch.cat((y_t_benign,b.unsqueeze(0)),0)
            l_t_benign = c.unsqueeze(0) if l_t_benign is None else torch.cat((l_t_benign,c.unsqueeze(0)),0)
            s_t_benign = d.unsqueeze(0) if s_t_benign is None else torch.cat((s_t_benign,d.unsqueeze(0)),0)

    adv_dataset = TensorDataset(x_t_adv, y_t_adv, l_t_adv, s_t_adv)
    benign_dataset = TensorDataset(x_t_benign, y_t_benign, l_t_benign, s_t_benign)

    adv_loader = DataLoader(dataset=adv_dataset, batch_size=BATCH_SIZE, shuffle=True)
    benign_loader = DataLoader(dataset=benign_dataset, batch_size=BATCH_SIZE, shuffle=True)
    return adv_loader,benign_loader

def get_adv_rand(train_dataset,adv_data_idx):
    adv_data_idx=random.choices(range(0,len(train_dataset)),k=len(adv_data_idx))
    x_t_adv, y_t_adv, l_t_adv, s_t_adv = (None,None,None,None)
    for i in range(len(train_dataset)):
        if i in adv_data_idx:
            a,b,c,d=train_dataset[i]
            x_t_adv = a.unsqueeze(0) if x_t_adv is None else torch.cat((x_t_adv,a.unsqueeze(0)),0)
            y_t_adv = b.unsqueeze(0) if y_t_adv is None else torch.cat((y_t_adv,b.unsqueeze(0)),0)
            l_t_adv = c.unsqueeze(0) if l_t_adv is None else torch.cat((l_t_adv,c.unsqueeze(0)),0)
            s_t_adv = d.unsqueeze(0) if s_t_adv is None else torch.cat((s_t_adv,d.unsqueeze(0)),0)
    x_t_benign, y_t_benign, l_t_benign, s_t_benign = (None,None,None,None)
    for i in range(len(train_dataset)):
        if i not in adv_data_idx:
            a,b,c,d=train_dataset[i]
            x_t_benign = a.unsqueeze(0) if x_t_benign is None else torch.cat((x_t_benign,a.unsqueeze(0)),0)
            y_t_benign = b.unsqueeze(0) if y_t_benign is None else torch.cat((y_t_benign,b.unsqueeze(0)),0)
            l_t_benign = c.unsqueeze(0) if l_t_benign is None else torch.cat((l_t_benign,c.unsqueeze(0)),0)
            s_t_benign = d.unsqueeze(0) if s_t_benign is None else torch.cat((s_t_benign,d.unsqueeze(0)),0)

    adv_dataset = TensorDataset(x_t_adv, y_t_adv, l_t_adv, s_t_adv)
    benign_dataset = TensorDataset(x_t_benign, y_t_benign, l_t_benign, s_t_benign)

    adv_loader = DataLoader(dataset=adv_dataset, batch_size=BATCH_SIZE, shuffle=True)
    benign_loader = DataLoader(dataset=benign_dataset, batch_size=BATCH_SIZE, shuffle=True)
    return adv_loader,benign_loader

In [None]:
def EA(net, attack_size, iter_num, dataset='compas'):
    model=net
    EA_start=time.time()
    t_main=trange(10,desc="Attack", leave=False, position=0)
    global train_loader, x_train_tensor, y_train_tensor, l_train_tensor, s_train_tensor
    for i in range(iter_num):

        model, results = train_and_evaluate(train_loader, val_loader, test_loader, device,
                                            input_shape=x_tensor.shape[1], model=model)
        print(i)
        result = get_metrics(results, threshold, fraction=(attack_size)/(base_size * 7), dataset=dataset)
        t_main.set_postfix(result)
        global_results.append(result)    
        result_pts, result_class, labels = attack_keras_model(
                CArray(x_train_tensor),
                Y=CArray((y_train_tensor[:, 0] > threshold).int()),
                S=s_train_tensor,
                nb_attack=10)
        print('attack_done!')
        result_pts = torch.tensor(np.around(result_pts.astype(np.float32), decimals=3)).clamp(0.0, 1.0)
        result_pts[result_pts != result_pts] = 0.0
        result_class[result_class != result_class] = 0.0

        x_train_tensor = torch.cat((x_train_tensor, result_pts))
        y_train_tensor = torch.cat(
            (y_train_tensor, torch.tensor(result_class.reshape(-1, 1).astype(np.float32)).clamp(0, 10)))
        l_train_tensor = torch.cat((l_train_tensor, torch.tensor(labels.tondarray().reshape(-1, 1).astype(np.float32))))
        s = np.random.randint(2, size=len(result_class))
        s_train_tensor = torch.cat((s_train_tensor, torch.tensor(np.array([s, 1 - s]).T.astype(np.float64))))

        train_dataset = TensorDataset(x_train_tensor, y_train_tensor, l_train_tensor, s_train_tensor)
        train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        logging.debug("New training dataset has size {} (original {}).".format(len(train_loader), base_size * 7))
        EA_mid=time.time()
        cost_time=EA_mid-EA_start
        print('time costs:{} s'.format(cost_time))

    EA_end=time.time()
    cost_time=EA_end-EA_start
    print('time costs:{} s'.format(cost_time))


In [None]:
def Fixate(THETA=1e-3,GAMMA=0.9,epoch=10,dataset='compas'):
    our_start=time.time()
    for i in range(epoch):
        adv_data_idx = sample_sort(net,train_dataset,THETA,GAMMA)
        adv_loader, benign_loader = get_adv(train_dataset,adv_data_idx)
        net_drop, results = train_and_evaluate_drop(adv_loader, benign_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                                grl_lambda=0)

        result = get_metrics(results, threshold, 0, dataset=dataset)
        global_results.append(result)

    our_end=time.time()
    cost_time=our_end-our_start
    print('time costs:{} s'.format(cost_time))

In [None]:
def training_function(config):
    
    THETA, GAMMA = config['THETA'], config['GAMMA']
    train_dataset_s=config['train_dataset_s']
    val_loader_s=config['val']
    test_loader_s=config['test']
    x_train_tensor_s=config['x_tensor']

    adv_data_idx = sample_sort(net,train_dataset_s,THETA,GAMMA)
    adv_loader, benign_loader = get_adv(train_dataset_s,adv_data_idx)
    net_drop, results = train_and_evaluate_drop(adv_loader, benign_loader, val_loader_s, test_loader_s, device, input_shape=x_train_tensor_s.shape[1],
                                            grl_lambda=0,dataset=config['dataset'])
    result = get_metrics(results, threshold, 0,dataset=config['dataset'])
    complex_score = result['DP']+result['EO']+(1-result['DP ratio'])-0.01*result['acc']
    tune.report(mean_loss=complex_score)
    
    global_results.append(result)

In [None]:
def training_function_rand(config):
    
    THETA, GAMMA = config['THETA'], config['GAMMA']
    train_dataset_s=config['train_dataset_s']
    val_loader_s=config['val']
    test_loader_s=config['test']
    x_train_tensor_s=config['x_tensor']

    adv_data_idx = sample_sort(net,train_dataset_s,THETA,GAMMA)
    adv_loader, benign_loader = get_adv_rand(train_dataset_s,adv_data_idx)
    net_drop, results = train_and_evaluate_drop(adv_loader, benign_loader, val_loader_s, test_loader_s, device, input_shape=x_train_tensor_s.shape[1],
                                            grl_lambda=0,dataset=config['dataset'])
    result = get_metrics(results, threshold, 0,dataset=config['dataset'])
    complex_score = result['DP']+result['EO']+(1-result['DP ratio'])-0.01*result['acc']
    tune.report(mean_loss=complex_score)
    
    global_results.append(result)

In [None]:
def Fixate_with_val(epoch=10,dataset='compas'):
    our_start=time.time()
    base_size = len(val_dataset) // 10
    split = [8 * base_size, 1 * base_size, len(val_dataset) - 9 * base_size]  # Train, validation, test
    train_dataset_s, val_dataset_s, test_dataset_s = random_split(val_dataset, split)
#     print(train_dataset_s)
    
#     train_loader_s = DataLoader(dataset=train_dataset_s, batch_size=BATCH_SIZE, shuffle=True)
    val_loader_s = DataLoader(dataset=val_dataset_s, batch_size=BATCH_SIZE)
    test_loader_s = DataLoader(dataset=test_dataset_s, batch_size=BATCH_SIZE)

    x_train_tensor_s = val_dataset[:][0]


    analysis = tune.run(
        training_function,
        config={
            'THETA': tune.grid_search([0.1, 0.01, 3e-3, 1e-3, 3e-4, 1e-4]),
            'GAMMA': tune.grid_search([0.95, 0.9, 0.85, 0.8, 0.7, 0.6]),
            'dataset':dataset,
            'train_dataset_s':train_dataset_s,
            'val':val_loader_s,
            'test':test_loader_s,
            'x_tensor':x_train_tensor_s
        },
        resources_per_trial={
            "cpu": 16,
            "gpu": 2,
        }
    )
    best_config=analysis.get_best_config(metric="mean_loss", mode="min")
    print("Best config: ",best_config)
    THETA = best_config['THETA']
    GAMMA = best_config['GAMMA']
    val_end=time.time()
    for i in range(epoch):
        adv_data_idx = sample_sort(net,train_dataset,THETA,GAMMA)
        PA_end=time.time()
        adv_loader, benign_loader = get_adv(train_dataset,adv_data_idx)
        SS_end=time.time()
        net_drop, results = train_and_evaluate_drop(adv_loader, benign_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                                grl_lambda=0,dataset=dataset)
        Dropout_end=time.time()
        result = get_metrics(results, threshold, 0, dataset=dataset)
        global_results.append(result)

    our_end=time.time()
    cost_time=our_end-our_start
    val_time=val_end-our_start
    PA_time=PA_end-val_end
    SS_time=SS_end-PA_end
    Dropout_time=Dropout_end-SS_end
    print('param selection costs:{} s'.format(val_time))
    print('path analysis costs:{} s'.format(PA_time))
    print('sample separation costs:{} s'.format(SS_time))
    print('partial dropout training costs:{} s'.format(Dropout_time))
    print('total time costs:{} s'.format(cost_time))

In [None]:
def Fixate_with_val_rand(epoch=10,dataset='compas'):
    our_start=time.time()
    base_size = len(val_dataset) // 10
    split = [8 * base_size, 1 * base_size, len(val_dataset) - 9 * base_size]  # Train, validation, test
    train_dataset_s, val_dataset_s, test_dataset_s = random_split(val_dataset, split)
#     print(train_dataset_s)
    
#     train_loader_s = DataLoader(dataset=train_dataset_s, batch_size=BATCH_SIZE, shuffle=True)
    val_loader_s = DataLoader(dataset=val_dataset_s, batch_size=BATCH_SIZE)
    test_loader_s = DataLoader(dataset=test_dataset_s, batch_size=BATCH_SIZE)

    x_train_tensor_s = val_dataset[:][0]


    analysis = tune.run(
        training_function_rand,
        config={
            'THETA': tune.grid_search([0.1, 0.01, 3e-3, 1e-3, 3e-4, 1e-4]),
            'GAMMA': tune.grid_search([0.95, 0.9, 0.85, 0.8, 0.7, 0.6]),
            'dataset':dataset,
            'train_dataset_s':train_dataset_s,
            'val':val_loader_s,
            'test':test_loader_s,
            'x_tensor':x_train_tensor_s
        },
        resources_per_trial={
            "cpu": 8,
            "gpu": 2,
        }
    )

    best_config=analysis.get_best_config(metric="mean_loss", mode="min")
    print("Best config: ",best_config)
    THETA = best_config['THETA']
    GAMMA = best_config['GAMMA']
    for i in range(epoch):
        adv_data_idx = sample_sort(net,train_dataset,THETA,GAMMA)
        adv_loader, benign_loader = get_adv_rand(train_dataset,adv_data_idx)
        net_drop, results = train_and_evaluate_drop(adv_loader, benign_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                                grl_lambda=0,dataset=dataset)

        result = get_metrics(results, threshold, 0, dataset=dataset)
        global_results.append(result)

    our_end=time.time()
    cost_time=our_end-our_start
    print('time costs:{} s'.format(cost_time))

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/COMPAS/compas_recidive_two_years_sanitize_age_category_jail_time_decile_score.csv')
df_binary, Y, S, Y_true = transform_dataset(df)
Y = Y.to_numpy()
print(np.mean(Y))

l_tensor = torch.tensor(Y_true.to_numpy().reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 4

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0)
ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)

# EA
# EA(net,attack_size=10, iter_num=50)

for THETA in list(np.logspace(-0.01,-5,50)):
    Fixate(THETA=THETA,GAMMA=0.95,epoch=3)

In [None]:
df = pd.DataFrame(global_results)

df

In [None]:
# from itertools import product

# param = dict(
#     THETA = [0.1, 0.01, 3e-3, 1e-3, 3e-4, 1e-4],
#     GAMMA = [0.95, 0.9, 0.85, 0.8, 0.75, 0.7]
#     )

# param_values = [v for v in param.values()]

device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/COMPAS/compas_recidive_two_years_sanitize_age_category_jail_time_decile_score.csv')
df_binary, Y, S, Y_true = transform_dataset(df)
Y = Y.to_numpy()
print(np.mean(Y))

l_tensor = torch.tensor(Y_true.to_numpy().reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 4

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0)
ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)

# EA
# EA(net,attack_size=10, iter_num=50)

for THETA in list(np.logspace(-0.01,-5,50)):
    Fixate(THETA=THETA,GAMMA=0.95,epoch=3)
for GAMMA in list(np.linspace(1,0.5,50)):
    Fixate(THETA=0.01,GAMMA=GAMMA,epoch=3)

In [None]:
df = pd.DataFrame(global_results)

df

In [None]:
EA_ablation={'acc':0.759,
            'DP':0.095,
            'EO':0.095,
            'DP ratio':1.203}

In [None]:
ablation_df=pd.DataFrame(columns=['Theta','Gamma','acc','DP','EO','DP ratio'])
epoch=3
i=0
for GAMMA in list(np.linspace(1,0.5,50)):
    start=1+i*epoch
    end=start+epoch
    dic={'Theta':0.0003,
         'Gamma':GAMMA,
         'acc':(df['acc'].iloc[start:end]).mean(),
         'DP':(df['DP'].iloc[start:end]).mean(),
         'EO':(df['EO'].iloc[start:end]).mean(),
         'DP ratio':(df['DP ratio'].iloc[start:end]).mean()}
    ablation_df=ablation_df.append(dic, ignore_index=True)
    i+=1
ablation_df.to_csv('data/results/ablation_df_gamma')
ablation_df

In [None]:
ablation_df=pd.read_csv('data/results/ablation_df_gamma')
metric='EO'
plt.figure(figsize=(7, 5))
plt.tick_params(labelsize=14)
# for GAMMA in list(np.linspace(1,0.5,50)):
plt.plot((np.linspace(1,0.5,50)),(df.iloc[0][metric])*np.ones(50),'r-.',label='Naive baseline')
plt.plot((np.linspace(1,0.5,50)),(EA_ablation[metric])*np.ones(50),'g-+',label='Ethical Adversaries')
plt.plot((np.linspace(1,0.5,50)),ablation_df[metric],'bo-',label='FairNeuron')
plt.plot((np.linspace(1,0.5,50)),ablation_df[metric][27]*np.ones(50),'b-.',label='best param')

plt.xlabel('gamma',myfont)
plt.ylabel(metric,myfont)
# plt.ylim((0,1))
# legend=plt.legend()
plt.grid(linestyle='-.')
plt.savefig('data/results/Figures/ablation_gamma_{}.pdf'.format(metric))
print(legend.figure)
plt.show()

In [None]:
def export_legend(legend, filename="data/results/Figures/legend.pdf", expand=[-5,-5,5,5]):
    fig  = legend.figure
    fig.canvas.draw()
    bbox  = legend.get_window_extent()
    bbox = bbox.from_extents(*(bbox.extents + np.array(expand)))
    bbox = bbox.transformed(fig.dpi_scale_trans.inverted())
    fig.savefig(filename, dpi="figure", bbox_inches=bbox)
export_legend(legend)
plt.show()

In [None]:
ablation_df=pd.DataFrame(columns=['Theta','Gamma','acc','DP','EO','DP ratio'])
epoch=3
i=0
for THETA in list(np.logspace(-0.01,-5,50)):
    start=151+i*epoch
    end=start+epoch
    dic={'Theta':THETA,
         'Gamma':0.95,
         'acc':(df['acc'].iloc[start:end]).mean(),
         'DP':(df['DP'].iloc[start:end]).mean(),
         'EO':(df['EO'].iloc[start:end]).mean(),
         'DP ratio':(df['DP ratio'].iloc[start:end]).mean()}
    ablation_df=ablation_df.append(dic, ignore_index=True)
    i+=1
ablation_df.to_csv('data/results/ablation_df_theta')
ablation_df

In [None]:
ablation_df=pd.read_csv('data/results/ablation_df_theta')

In [None]:
metric='acc'
plt.figure(figsize=(7, 5))
plt.tick_params(labelsize=14)
# for GAMMA in list(np.linspace(1,0.5,50)):
plt.plot(np.log10(np.logspace(-0.01,-5,50)),df.iloc[0][metric]*np.ones(50),'r-.',label='Naive baseline')
plt.plot(np.log10(np.logspace(-0.01,-5,50)),EA_ablation[metric]*np.ones(50),'g-+',label='Ethical Adversaries')
plt.plot(np.log10(np.logspace(-0.01,-5,50)),ablation_df[metric],'bo-',label='FairNeuron')
plt.plot(np.log10(np.logspace(-0.01,-5,50)),ablation_df[metric][38]*np.ones(50),'b-.',label='best param')
plt.xlabel('lg theta',myfont)
plt.ylabel(metric,myfont)
# plt.ylim((0,1))
# plt.legend()
plt.grid(linestyle='-.')
plt.savefig('data/results/Figures/ablation_theta_{}.pdf'.format(metric))
plt.show()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/COMPAS/compas_recidive_two_years_sanitize_age_category_jail_time_decile_score.csv')
df_binary, Y, S, Y_true = transform_dataset(df)
Y = Y.to_numpy()
print(np.mean(Y))

l_tensor = torch.tensor(Y_true.to_numpy().reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 4

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0)


ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)
net_nodrop, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0,dataset='compas_nodrop')
result = get_metrics(results, threshold, 0)
global_results.append(result)

# EA
# EA(net,attack_size=10, iter_num=50)

Fixate_with_val(10)

In [None]:
v_list=[]

sample_sort_test(net,train_dataset,0.01,0.6)
v_list

In [None]:
total=path_stat.sum()
cum=[]
s=0
for i in range(1,path_stat[0]+1):
    s=s+i*(path_stat==i).sum()
    cum.append(s.copy())
cum=np.array(cum)/s
cum

In [None]:
path_stat=np.array(v_list[0].counts)
plt.figure(figsize=(7, 5))
plt.tick_params(labelsize=14)
# for GAMMA in list(np.linspace(1,0.5,50)):
# plt.plot(range(0,len(path_stat)),path_stat,'r-.')
# plt.plot(range(0,len(path_stat)),(path_stat[0]*0.01)*np.ones(len(path_stat)),'g-.')
plt.hist(path_stat,bins=40,density=1, histtype='step',label='Path activation statistics PDF')
# plt.hist(path_stat,bins=40,density=1,cumulative=1, histtype='step',label='Cumulative Distribution Function')
plt.plot(range(0,len(cum)),cum,'ro-',label='Sample cumulative ratio')
plt.vlines(47*0.03, 0, 2, colors='g', linestyles='dashed')
plt.xlabel('path activation statistics',myfont)
plt.ylabel('ratio',myfont)
plt.ylim((0,1.01))
plt.legend(loc='lower right')
plt.grid(linestyle='-.')
plt.savefig('data/results/Figures/detection.pdf')
plt.show()

In [None]:
df=pd.DataFrame(global_results)

df

In [None]:
# census

device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/Census/adult')
df_binary, Y, S, Y_true = transform_dataset_census(df)
print(np.mean(Y))

l_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 0.5

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0, dataset='census')
ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)


EA(net,attack_size=10, iter_num=50)

Fixate_with_val(10)

In [None]:
# credit

device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/Census/adult')
df_binary, Y, S, Y_true = transform_dataset_census(df)
print(np.mean(Y))

l_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 0.5

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0, dataset='census')
ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)


EA(net,attack_size=10, iter_num=50)

Fixate_with_val(10)

In [None]:
# credit

device = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE=128

df=pd.read_csv('data/Credit/german_credit',sep=' ')
df_binary, Y, S, Y_true = transform_dataset_credit(df)
print(np.mean(Y))

l_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
x_tensor = torch.tensor(df_binary.to_numpy().astype(np.float32))
y_tensor = torch.tensor(Y.reshape(-1, 1).astype(np.float32))
s_tensor = torch.tensor(preprocessing.OneHotEncoder().fit_transform(np.array(S).reshape(-1, 1)).toarray())

dataset = TensorDataset(x_tensor, y_tensor, l_tensor, s_tensor)  # dataset = CustomDataset(x_tensor, y_tensor)

base_size = len(dataset) // 10
split = [7 * base_size, 1 * base_size, len(dataset) - 8 * base_size]  # Train, validation, test

train_dataset, val_dataset, test_dataset = random_split(dataset, split)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

x_train_tensor = train_dataset[:][0]
y_train_tensor = train_dataset[:][1]
l_train_tensor = train_dataset[:][2]
s_train_tensor = train_dataset[:][3]

global_results = []

# get the classification threshold, we use the same scale for compas so 4 instead of 0.5
ori_start=time.time()
threshold = 0.5

net, results = train_and_evaluate(train_loader, val_loader, test_loader, device, input_shape=x_tensor.shape[1],
                                    grl_lambda=0,dataset='credit')
ori_end=time.time()
ori_cost_time=ori_end-ori_start
print('time costs:{} s'.format(ori_cost_time))

result = get_metrics(results, threshold, 0)
global_results.append(result)


EA(net,attack_size=10, iter_num=50)

Fixate_with_val(10)

In [None]:
df=pd.DataFrame(global_results)

df

In [None]:
EA(net,attack_size=10, iter_num=50,dataset='census')

In [None]:
Fixate_with_val(10,dataset='census')