In [None]:
import numpy as np 
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt 

from group4_banker import Group4Banker

Let 

$$
\pi_w(x) = \frac{\exp(w^\top x)}{\exp(w^\top x) + 1}
$$

be a policy parametrized by $w \in \mathbb{R}^n$, where $\pi_w$ indicates the probability of whether to grant or not grant a loan given some feature vector $x \in \mathbb{R}^n$. We remark that $\pi_w$ is differentiable by $\nabla_w \pi_w = x\exp(w^\top x) / (\exp(w^\top x)  + 1)^2$, making gradient based optimization a suitable approach to estimate $w$. To maximise revenue $U$ and at the same time account for fairness $F$, we seek the parameters $w$ of $\pi_w$ that solves

$$
\max_{\pi_w} \int_{\Theta} V_\theta(\pi_w) d\beta(\theta); \quad V_\theta(\pi_w) = (1 - \lambda)\mathbb{E}_{\pi_w}(U \mid x)  - \lambda \mathbb{E}_{\pi_w}(U \mid x, z)(F).
$$

Here, $\lambda$ is a regularisation parameter balancing fairness and utility, $\Theta$ is the set of possible inputs $\theta \sim \Theta$ to a Random Forest (RF) model $\beta$ that predicts the probability $P_\theta$ of a particular outcome $y$ indicating whether a loan is being repaid or not. To quantify model performance, we optimize with respect to $\mathbb{E}_{\pi_w}(U \mid x) = 1 / \exp(- (y - \pi_w(x))^2)$. This measure is maximized for $y = a_{\pi_w}$ in which case $\mathbb{E}_{\pi_w}(U \mid x)  = 1$, and the worst outcome is for $y = 0$ and $a_{\pi_w} = 1$ (or vice versa) for which $\mathbb{E}_{\pi_w}(U \mid x)  = 1 / e$. Moreover, we define the expected fairness $\mathbb{E}(F)$ as

$$
\mathbb{E}_{\pi_w}(U \mid x, z) (F) = \sum_{y, a, z} \pi_w(x) \left ( P_\theta (y \mid x, z) - P_\theta (y \mid x) \right )^2
$$

where $z \subset x$ denote some sensitive variables among the features. To optimize our objective, we approximate 

$$
\int_{\Theta} V_\theta(\pi_w) d\beta(\theta) \approx \frac{1}{n} \sum_{i=1}^n V_{\theta^{(i)}}(\pi_w),
$$

using Stochastic Gradient Descent (SGD) with automatic differentiation to obtain $\nabla_{w} V_\theta(\pi_w)$. In each iteration of SGD we collect bootstrap samples  $\theta^{(i)}$ from the validation/test set $\Theta$ and predict $P_{\theta^{(i)}}$ using the RF model $\beta(\theta^{(i)})$. Note that to distinguish betwen $P_\theta (a \mid y, z)$ and $P_\theta (a \mid y, z)$ we manage two RF models: one trained using all variables $x \cup z$ for in a boostrap sample $\theta$ and another trained using only $x$ variables. Thats is, we exclude the sensitive variables $z$ when training the latter RF model.

In [None]:
DTYPE = tf.float23

In [None]:
def sgd_step(x, y, w, delta_P, optimizer, lmbda=0.01) -> np.ndarray:
    """
    Args:
        action:
        x: Feature vector.
        w: Feature parameters to be learned.
        
    Returns:
        Updated estimate of parameter vector w.
    """
    
    # NOTE: TF is sensitive about dtypes.
    x = tf.cast(x, dtype=DTYPE)
    w = tf.cast(w, dtype=DTYPE)
    lmbda = tf.cast(lmbda, dtype=DTYPE)
    delta_P = tf.cast(delta_P, dtype=DTYPE)
    
    with tf.GradientTape() as tape:

        # Just in case. 
        tape.watch(w)
        
        # Parametrized policy.
        pi_w = tf.exp(tf.matmul(w, x)) / (tf.exp(tf.matmul(w, x)) + 1)
        
        # NOTE: Maximize V <=> minimize -1 * V.
        V = (lmbda - 1) * 1 / tf.exp(tf.square(y - pi_w)) + lmbda * tf.reduce_sum(pi_w * delta_P)
    
    optimizer.minimize(V, [w])
    
    # Cast to numpy so can be mixed with Python objects.
    return w.numpy(), -1.0 * V.numpy()

In [None]:
def experiment(X, y, seed=42, lmbda=1e-3):
    """
    Args:
        X: Feature matrix of N samples x P features.
        y: Ground truths.
    """
    
    np.random.seed(seed)
    
    # Initial parameter estimate and optimization object.
    w_i = np.random.random(X.shape[1])
    w_i_tf = tf.Variable(w_i, dtype=DTYPE)
    
    # Train each of the RF models. 
    rf_x.train()
    rf_xz.train()
    
    # Deviation from the RF model being independent on sensitive variable z.
    delta_p = rf_x.predict(X) - rf_xz.predict(X) ** 2

    optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4)
    
    V_est = 0
    for _ in range(num_epochs):
        
        i = np.random.choice(np.arange(X.shape[0]))        
        w_i, V = sgd_step(x=X[i], y=y[i], w=w_i_tf, delta_P=delta_p, optimizer=optimizer)
        
        V_est += V
                       
    return w_i, V_est
        

experiment()

In [None]:
def prep_data():

    features = ['checking account balance', 'duration', 'credit history',
                'purpose', 'amount', 'savings', 'employment', 'installment',
                'marital status', 'other debtors', 'residence time',
                'property', 'age', 'other installments', 'housing', 'credits',
                'job', 'persons', 'phone', 'foreign']

    target = 'repaid'

    df_train = pd.read_csv("../../data/credit/D_train.csv", sep=' ', names=features+[target])
    df_test = pd.read_csv("../../data/credit/D_test.csv", sep=' ', names=features+[target])

    numerical_features = ['duration', 'age', 'residence time', 'installment', 'amount', 'persons', 'credits']
    quantitative_features = list(filter(lambda x: x not in numerical_features, features))
    D_train = pd.get_dummies(df_train, columns=quantitative_features, drop_first=True)
    D_test = pd.get_dummies(df_test, columns=quantitative_features, drop_first=True)
    encoded_features = list(filter(lambda x: x != target, D_train.columns))

    return D_train, D_test, encoded_features, target

In [None]:
D_train, D_test, encoded_features, target = prep_data()

X_train = D_train.loc[:, encoded_features] 
y_train = D_train.loc[:, target] 

model = Group4Banker(optimize=False, random_state=42)
model.set_interest_rate(0.05)
model.fit(X_train, y_train)

X_test = D_test.loc[:, encoded_features] 
y_test = D_test.loc[:, target] 