# FairGAN 

Learning data representations with high utility and high fairness using an adverserial approach.

- Utility : w.r.t. classification goals
- Fairness : w.r.t difference in likelyhood of outcomes for protected groups (i.e. gender =  Male | Female)

## Structure of FairGAN
<img src="FairGANModel.png" width="400">

### Implemented in tensorflow
### source : https://arxiv.org/pdf/1805.11202.pdf


In [1]:
import numpy as np
import pandas as pd
from preprocess_adult import *
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, LeakyReLU
import tensorflow as tf
from tensorflow.keras import backend as K

In [2]:
@tf.function
def split_tensor(tensor, col_name, n=1):
    '''takes Xn (2D feature tensor) and returns 2 tensors(sensitive features and normal features)'''
    col = Xys_cols[col_name]
    dim = tensor.shape[-1]
    pre, sens, post =  tf.split(tensor, (col, n, (dim-(col+n))), axis=1)
    return sens, tf.concat([pre, post], axis=1)


### Encoder Layer

In [3]:
class Encoder(keras.layers.Layer):
    def __init__(self, input_dim, latent_dim, act):
        super(Encoder, self).__init__()
        self.dense_1 = Dense(128, activation=act, input_shape=(input_dim,))
        self.latent_output =  Dense(latent_dim, name='latent_output')

    def call(self, x):        
        x = self.dense_1(x)
        return self.latent_output(x)

### Decoder Layer

In [4]:
class Decoder(keras.layers.Layer):
    def __init__(self, latent_dim, act):
        super(Decoder, self).__init__()
        self.dense_1 = Dense(128, activation=act, input_shape=(latent_dim,))
        self.output_layer = Dense(input_dim)
        
    def call(self, x):
        x = self.dense_1(x)
        return self.output_layer(x)

### Autoencoder Model

In [5]:
class AutoEncoder(keras.models.Model):
    def __init__(self, encoder, decoder):
        super(AutoEncoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def call(self, x):
        x = self.encoder(x)
        return self.decoder(x)

### Dense Layer Block
- Dense
- LeakyReLU
- BatchNormalization

In [6]:
class DenseBlock(keras.layers.Layer):
    def __init__(self, n_dense,
                       act,
                       input_dim= False):

        super(DenseBlock, self).__init__()
        self.act = act
        self.dense = Dense(n_dense, input_dim=input_dim) \
                     if input_dim else Dense(n_dense)
        self.batchnorm = BatchNormalization()

    def call(self, x):
        x = self.dense(x)
        x = self.act()(x)
        return self.batchnorm(x)


### Discriminator Layer
- 3 `DenseBlock` layers
- Output Layer

In [7]:
class Discriminator(keras.layers.Layer):
    def __init__(self, input_dim, act):
        super(Discriminator, self).__init__()
        self.layer_1 = DenseBlock(256, act, input_dim=input_dim)
        self.layer_2 = DenseBlock(128, act)
        self.output_layer = Dense(1, activation='sigmoid')
    
    def call(self, x):        
        x = self.layer_1(x)
        x = self.layer_2(x)
        return self.output_layer(x)

## Implementing the FairGAN Algorithm

<img src="FairGAN.png" width="280">

In [8]:
class FairGAN(keras.Model):
    def __init__(self,
                 generator,
                 D1,
                 D2,
                 generator_opt,
                 D1_opt,
                 D2_opt,
                 **kwargs):
        
        super(FairGAN, self).__init__(**kwargs)
        self.sens_var = 'Sex_Male'
                                        
        self.generator = generator      # Models
        self.D1 = D1
        self.D2 = D2
                                        
        self.gen_opt = generator_opt    # Optimizers
        self.D1_opt = D1_opt
        self.D2_opt = D2_opt
                                        # Loss Trackers
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.gen_loss_tracker = keras.metrics.Mean(name="gen_loss")
        self.D1_loss_tracker = keras.metrics.Mean(name="disc_loss")
        self.D2_loss_tracker = keras.metrics.Mean(name="s_disc_loss")


    @property
    def metrics(self):
        return [
        self.total_loss_tracker,
        self.gen_loss_tracker,
        self.D1_loss_tracker,
        self.D2_loss_tracker
        ]


    def noise(self, shape):
        return tf.random.normal(shape=shape)


    def call(self, inputs):
        X = inputs
        sens, Xy = split_tensor(X, self.sens_var)                                             
        sens_dim = (K.shape(X)[0]//2, 1)                                         #shape = (batch_size/2, 1)
        noise_dim = (K.shape(X)[0], latent_dim-1)                                #shape = (batch_size/2, 49)
        s_labels = tf.concat([tf.zeros(sens_dim), tf.ones(sens_dim)], axis=0)    #shape = (batch_size, 50)

        ''' Gdec input of noise P(z) and sensitive condition P(s)          '''
        Gdec_input = tf.concat([self.noise(noise_dim), s_labels], axis=1)
        Gdec_output =  self.generator(Gdec_input)                                #shape = (batch_size, input_dim)

        ''' Conditionally generated P( x', y' | s ) for D2         '''
        _, Xy_gen =  split_tensor(Gdec_output, self.sens_var)                                #shapes = (batch_size, 1), (batch_size, input_dim-1)

        ''' D1 outputs for Real P( x, y, s) and Generated P( x', y', s')   '''
        disc_1_output_real = self.D1(X)                                          #shape = (batch_size, 1)
        disc_1_output_generated = self.D1(Gdec_output)                           #shape = (batch_size, 1)

        ''' D2 predicting sensitive attribute on generated  P( x', y' | s ) '''
        disc_2_output = self.D2(Xy_gen)                                          #shape = (batch_size, 1)

        return Gdec_output, disc_1_output_real, disc_1_output_generated, disc_2_output, s_labels
    


    @tf.function
    def adverserial_step_D1(self, data):
        with tf.GradientTape() as gen_tape, tf.GradientTape() as D1_tape:
            _, D1_out_real, D1_out_gen, _, _ = self.call(data)                        # Get Outputs
            
            D1_loss = discriminator_loss(D1_out_real, D1_out_gen, model_type='D1')    # Compute D1 loss
            gen_loss_D1 = generator_loss(tf.ones_like(D1_out_gen),
                                                      D1_out_gen, adversary='D1')     # Compute Generator loss

        gen_grads = gen_tape.gradient(gen_loss_D1, self.generator.trainable_weights)  # Get generator gradients 
        D1_grads = D1_tape.gradient(D1_loss, self.D1.trainable_weights)               # Apply grads with optimizer

        self.gen_opt.apply_gradients(zip(gen_grads, self.generator.trainable_weights)) # Get D1 gradients
        self.D1_opt.apply_gradients(zip(D1_grads, self.D1.trainable_weights))          # Apply D1 gradients with optimizer
        return D1_loss, gen_loss_D1
        


    @tf.function
    def adverserial_step_D2(self, data):
        with tf.GradientTape() as gen_tape, tf.GradientTape() as D2_tape:
            _, _, _, D2_out, s_labels = self.call(data)

            D2_loss = discriminator_loss(s_labels, D2_out, model_type='D2')
            gen_loss_D2 = generator_loss(s_labels, D2_out, adversary='D2')

        gen_grads = gen_tape.gradient(gen_loss_D2, self.generator.trainable_weights) 
        D2_grads = D2_tape.gradient(D2_loss, self.D2.trainable_weights)

        self.gen_opt.apply_gradients(zip(gen_grads, self.generator.trainable_weights))
        self.D2_opt.apply_gradients(zip(D2_grads, self.D2.trainable_weights))
        return D2_loss, gen_loss_D2



    @tf.function
    def train_step(self, data):
        Xys, _ = data
        sens, Xy = split_tensor(Xys, self.sens_var)
        D1_loss, gen_loss_D1 = self.adverserial_step_D1( Xys)
        D2_loss, gen_loss_D2 = self.adverserial_step_D2( Xys)

        gen_loss = gen_loss_D1 + gen_loss_D2
        total_loss = gen_loss + D1_loss + D2_loss 

        self.total_loss_tracker.update_state(total_loss)
        self.gen_loss_tracker.update_state(gen_loss)
        self.D1_loss_tracker.update_state(D1_loss)
        self.D2_loss_tracker.update_state(D2_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "gen_loss": self.gen_loss_tracker.result(),
            "D1_loss": self.D1_loss_tracker.result(),
            "D2_loss": self.D2_loss_tracker.result(),
        }


### Model Loss functions
- Generator Loss (one per adversary)
- Discriminator Loss
- Sensitive Discriminator Loss 

In [9]:
cross_entropy = tf.keras.losses.BinaryCrossentropy( from_logits=True)


def generator_loss(true, pred, adversary):
    '''
       D1 adv loss =   cross_entropy( tf.ones_like(D1_preds), D1_preds )
       D2 adv loss = - cross_entropy( sensitive_label, D2_preds )
    '''
    assert (adversary =='D1') or (adversary=='D2'), \
     "adversary argument must be (str) with value 'D1' or 'D2'"
    if adversary == 'D1':
        adverserial_loss = cross_entropy(true, pred)
    elif adversary == 'D2':
        adverserial_loss = -cross_entropy(true, pred)
    return adverserial_loss



def discriminator_loss(true, pred, model_type):
    assert (model_type =='D1') or (model_type=='D2'), \
     "model_type argument must be (str) with value 'D1' or 'D2'"
    if model_type == 'D1':
        real_loss = cross_entropy(tf.ones_like(true), true)
        fake_loss = cross_entropy(tf.zeros_like(pred), pred)
        loss = real_loss + fake_loss
    elif model_type == 'D2':
        loss = cross_entropy(true, pred)
    return loss



In [10]:
'''
Possible alternatives to fooling D2

- Below are some alternatives to  "- crossentropy(sens, pred_sens)"
'''
@tf.function
def invert_sensitive(pred):
    '''
    Idea: - Inverts the predictions of D2 on sensitive var
    '''
    sb = tf.cast(pred, dtype=tf.bool)
    inv_sb = tf.math.logical_not(sb)
    inv_sb = tf.cast(inv_sb, dtype=tf.float32)
    return inv_sb

@tf.function
def randomize_sensitive(pred):
    '''
    Idea: - returns tensor with random vars between {0,1},
            to be compared with D2 predictions on sensitive var
    '''
    return tf.random.uniform(shape=K.shape(pred), maxval=1) 

### Load Adult Dataset 
- source:  UCI ML repo

In [11]:
data = load_adult(binarize=True)

X_train, X_test, y_train, y_test = [i.astype(np.float32) for i in data]
Xy_train = np.hstack([X_train, y_train.reshape(-1,1)])
Xy_test = np.hstack([X_test, y_test.reshape(-1,1)])
X_train.head()

Raw Dataset size :  48842
Size after dropping null values:  45222
Removed  3620  observations

Categorical  set size: (45222, 8)
Continuous  set size: (45222, 6)
Risk Difference M/F:
0.17403033921542613


Unnamed: 0,Age,fnlwgt,Education-Num,Capital Gain,Capital Loss,Hours per week,Workclass_Local-gov,Workclass_Private,Workclass_Self-emp-inc,Workclass_Self-emp-not-inc,...,Relationship_Not-in-family,Relationship_Other-relative,Relationship_Own-child,Relationship_Unmarried,Relationship_Wife,Race_Asian-Pac-Islander,Race_Black,Race_Other,Race_White,Sex_Male
27314,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
31381,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
30713,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
40890,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
12278,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


In [12]:
cn = list(X_train.columns)+['Target']
Xys_cols = dict([(j,i) for i,j in enumerate(cn)])


def risk_difference(df, sens_attr):
    s_col = Xys_cols[sens_attr]
    n = df.shape[0]
    y = df[:,-1]
    s = df[:,s_col] 
    P_y1_s0 = df[(y == 1) & (s == 0)].shape[0] / n
    P_y1_s1 = df[(y == 1) & (s == 1)].shape[0] / n
    return P_y1_s1 - P_y1_s0
risk_difference(Xy_train, 'Sex_Male')

0.17583320170589165

In [13]:
input_dim = Xy_train.shape[-1]
batch_size = Xy_train.shape[0]

latent_dim = 50
noise_dim = latent_dim

noise = tf.random.normal(shape=(batch_size, noise_dim))
noise.shape

TensorShape([31655, 50])

### Define all models

In [14]:
encoder = Encoder(input_dim, latent_dim, 'relu')
decoder = Decoder(latent_dim, 'relu')

autoencoder = AutoEncoder(encoder, decoder)

generator = autoencoder.decoder

D1 = Discriminator(input_dim, LeakyReLU)
D2 = Discriminator(input_dim-1, LeakyReLU)


### Check Model Outputs
#### AutoEncoder Output

In [15]:
ae_output = autoencoder(Xy_train)
ae_output.shape

TensorShape([31655, 102])

#### Generator Output

In [16]:
g_outputs = generator(noise)
g_outputs.shape

TensorShape([31655, 102])

#### Discriminator output

In [17]:
d_outputs = D1(g_outputs)
d_outputs.shape

TensorShape([31655, 1])

#### Sensitive Discriminator output

In [18]:
sens, gen_xy = split_tensor(g_outputs, 'Sex_Male', 1)
s_d_outputs = D2(gen_xy)
s_d_outputs.shape

TensorShape([31655, 1])

### Check Model Losses
#### Discriminator loss

In [19]:
discriminator_loss(X_train.values, g_outputs, model_type='D1')

<tf.Tensor: shape=(), dtype=float32, numpy=1.4107502>

#### Generator loss

In [20]:
generator_loss(sens, d_outputs, adversary='D1')

<tf.Tensor: shape=(), dtype=float32, numpy=0.61064565>

#### Sensitive Discriminator loss

In [21]:
sensitive = X_train.values[:,0].reshape(-1,1)
discriminator_loss(sensitive, s_d_outputs, model_type='D2')

<tf.Tensor: shape=(), dtype=float32, numpy=0.6978872>

In [22]:
generator_loss(tf.ones_like(s_d_outputs),s_d_outputs, adversary='D2')

<tf.Tensor: shape=(), dtype=float32, numpy=-0.81881565>

## Pre-train Auto-Encoder and evaluate

In [23]:



autoencoder.compile(loss='mse', optimizer='rmsprop')

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                   patience=2,
                                                    restore_best_weights=True)


In [24]:
autoencoder.fit(Xy_train, Xy_train,
                                validation_data=(Xy_test, Xy_test),
                                 batch_size=128,
                                  epochs=200,
                                   callbacks=[early_stopping])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200


<tensorflow.python.keras.callbacks.History at 0x1dd3a625880>

In [25]:
def evaluate_ae(test_set):    
    preds = autoencoder.predict(test_set)
    preds_b = np.where(preds>0.5,1,0).astype(float)
    return round(tf.keras.losses.mean_squared_error(test_set, preds_b).numpy().mean(), 5)


print("MSE :",evaluate_ae(Xy_test) )

MSE : 0.00043


### Define optimizers

In [26]:
gen_opt = tf.keras.optimizers.Adam(1e-3)
disc_opt = tf.keras.optimizers.Adam(1e-3)
s_disc_opt = tf.keras.optimizers.Adam(1e-3)

In [27]:
FGan = FairGAN(autoencoder.decoder,
             D1, D2,
            gen_opt, disc_opt, s_disc_opt)

In [28]:
FGan.compile()
FGan.fit(Xy_train[:-1,:], Xy_train[:-1,:], batch_size=256, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1dd6470b1c0>

In [29]:
true = Xy_test[:-1,:]
Xy_gen,_,_,_,_ = FGan(true)

## Fairness of generated data
- Metric:  Risk difference

In [30]:
def risk_difference(df, sens_attr):
    s_col = Xys_cols[sens_attr]
    n = df.shape[0]
    P_y1_s0 = df[(df[:,-1] == 1) & (df[:,s_col] == 0)].shape[0] / n
    P_y1_s1 = df[(df[:,-1] == 1) & (df[:,s_col] == 1)].shape[0] / n
    return P_y1_s1 - P_y1_s0
    

In [31]:
# Risk Diff in original dataset
risk_difference(Xy_train, 'Sex_Male')

0.17583320170589165

In [32]:
# Risk difference in generated data

Xy_gen = Xy_gen.numpy()
Xy_gen[:,-2] = np.where(Xy_gen[:,-2]>0.5,1,0)
Xy_gen[:,-1] = np.where(Xy_gen[:,-1]>0.5,1,0)
risk_difference(Xy_gen, 'Sex_Male')

-0.002948547840188706

In [33]:
Xy_gen[:,-2].sum(), true[:,-2].sum()

(11079.0, 9139.0)

## Utility of generated data: 
Comparing classifier performance in predicting income from real and generated data
- Real2Real classification
- Synth2Real classification

In [34]:
# MSE compared to original
round(tf.keras.losses.mse(true, Xy_gen).numpy().mean(),5)

0.10783

In [35]:
1 - (Xy_test[:,-1].sum()/len(Xy_test))

0.7521928208152133

In [36]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix

split_target = lambda x : (x[:, :-1], x[:, -1])

X_test, y_test = split_target(Xy_test)

for dset in [Xy_train, Xy_gen]:
    X_train, y_train = split_target(dset)
    clf = LinearSVC(random_state=0, tol=1e-5).fit(X_train, y_train)
    pred = clf.predict(X_test)
    print('RD:', round(risk_difference(np.hstack([X_test, pred.reshape(-1,1)]), 'Sex_Male'),2))
    print('ACC:',round(clf.score(X_test, y_test), 2))
    print()
    print(pd.DataFrame(confusion_matrix(y_test, pred)))
    print('-'*15, '\n')


RD: 0.15
ACC: 0.84

      0     1
0  9460   745
1  1455  1907
--------------- 

RD: 0.03
ACC: 0.76

       0    1
0  10055  150
1   3060  302
--------------- 

