### Main notebook


In [1]:
import numpy as np
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing


from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
from matplotlib.colors import ListedColormap
from scipy.interpolate import interp1d

from fairness_measure import discrimination, all_fairness, statistical_parity, temporal_bias
from modeling import datasets, get_results, weight_estimation, reweighing
from pre_process import pre_process_raw_data, preparing_dataframe
import modeling

%config Completer.use_jedi = False

import warnings


warnings.simplefilter('ignore')

plt.rcParams['figure.figsize'] = (8, 5)
# plt.style.use('fivethirtyeight')

## Dataset Reader
    - datasets:
        - jigsaw
        - adult
        - funding

In [2]:
name = 'adult'

In [3]:
df = datasets(name)

In [4]:
df.shape

(636625, 12)

In [5]:
sorted(df['date'].unique())

[2015, 2016, 2017, 2018]

## Baselines
- (0)(vanilla) Train once + test on entire future (sequentially) (average) (no fairness)
- (1)(static) Train once + test on entire future (sequentially) (average)
- (2)(dynamic) Train on entire past + test on immediate future (+ average)

- Ours: (2) + forecasting 

#### (0) (vanilla)

In [6]:
baseline_0_results, full_results_0, batches_0 = modeling.baseline_0(df)

Train on 2015
Test on 2016
Test on 2017
Test on 2018


In [7]:
baseline_0_results

auc    0.823921
S.P    0.107324
TPR    0.095297
FPR    0.037701
GAP    0.066499
dtype: float64

In [8]:
baseline_0_results = baseline_0_results.to_numpy()

#### (1) (static)

In [9]:
baseline_1_results, full_results_1, batches_1 = modeling.baseline_1(df)

Train on 2015
Test on 2016
Test on 2017
Test on 2018


In [10]:
baseline_1_results

auc    0.817730
S.P    0.078786
TPR    0.081577
FPR    0.008248
GAP    0.044912
dtype: float64

In [11]:
baseline_1_results = baseline_1_results.to_numpy()

#### 2) (Dynamic)

In [12]:
baseline_2_results, full_results_2, batches_2 = modeling.baseline_3(df)

[2015]
2016
[2015, 2016]
2017
[2015, 2016, 2017]
2018


In [13]:
baseline_2_results

auc    0.822943
S.P    0.074481
TPR    0.076409
FPR    0.006019
GAP    0.041214
dtype: float64

In [14]:
baseline_2_results = baseline_2_results.to_numpy()

## Results

### Baselines

In [15]:
columns = ['AUC','S.P','TPR','FPR','GAP']
pd.DataFrame(np.array([baseline_0_results, baseline_1_results, baseline_2_results]), columns=columns)

Unnamed: 0,AUC,S.P,TPR,FPR,GAP
0,0.823921,0.107324,0.095297,0.037701,0.066499
1,0.81773,0.078786,0.081577,0.008248,0.044912
2,0.822943,0.074481,0.076409,0.006019,0.041214


### Ours 4 (anticipated)

### Search Space 

In [16]:
alphas = np.linspace(0, 1, 11)

In [17]:
alphas = np.round(alphas, 2)

In [18]:
print(alphas)

[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]


In [19]:
aucs = []
sps = []
tprs = []
fprs = []
eods =[]
full_results = []

for i in alphas:
    print(i)
    results, full_result, _ = modeling.ours_cumulative(name, df, i)
    aucs.append(results[0])
    sps.append(results[1])
    tprs.append(results[2])
    fprs.append(results[3])
    eods.append(results[4])
    full_results.append(full_result)

0.0
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.1
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.2
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.3
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.4
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.5
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.6
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation from 2017
2017
[2015, 2016, 2017]
Use estimation from 2018
2018
0.7
[2015]
Use estimation from 2016
2016
[2015, 2016]
Use estimation 

In [20]:
pd.DataFrame({'AUC': aucs, 'S.P' : sps, 'TPR': tprs, 'FPR': fprs})

Unnamed: 0,AUC,S.P,TPR,FPR
0,0.820562,0.088,0.088541,0.012071
1,0.823406,0.094905,0.095042,0.018894
2,0.820851,0.0874,0.083725,0.013829
3,0.817653,0.078584,0.080284,0.008061
4,0.822484,0.089933,0.090114,0.013893
5,0.821793,0.093321,0.09011,0.023093
6,0.825779,0.101061,0.098438,0.026066
7,0.822758,0.073414,0.071449,0.008823
8,0.822728,0.092163,0.092876,0.015839
9,0.825642,0.058467,0.059905,0.023562


## Temporal Visualization

In [None]:
def temporal_visualization(name):
    if name == 'jigsaw':
        d = 0
        full_results_0['Time'] = batches_0[1:]
        full_results_1['Time'] = batches_1[1:]
        full_results_2['Time'] = batches_2[1:]
        ours = full_results[d].copy()
        ours['Time'] = batches_0[1:]

    elif name == 'adult':
        d = 9
        full_results_0['Time'] = batches_0[1:]
        full_results_1['Time'] = batches_1[1:]
        full_results_2['Time'] = batches_2[1:]
        ours = full_results[d].copy()
        ours['Time'] = batches_0[1:]
        
    fig, axs = plt.subplots(2, 2, figsize=(18,10))

    sns.lineplot(ax=axs[0,0], data=full_results_0, x='Time', y='auc', label = 'Vanilla')
    sns.lineplot(ax=axs[0,0], data=full_results_1, x='Time', y='auc', label = 'Static')
    sns.lineplot(ax=axs[0,0], data=full_results_2, x='Time', y='auc', label = 'Dynamic')
    sns.lineplot(ax=axs[0,0], data=ours, x='Time', y='auc', label = 'Ours', color = 'black')
    #axs[0,0].set(xlabel='Time', ylabel='AUC')
    axs[0,0].set_xlabel('Time', fontsize = 15)
    axs[0,0].set_ylabel('AUC', fontsize = 15)

    new_ticks = full_results_0['Time'].values
    axs[0,0].set_xticks(new_ticks)

    sns.lineplot(ax=axs[0,1], data=full_results_0, x='Time', y='S.P', label = 'Vanilla')
    sns.lineplot(ax=axs[0,1], data=full_results_1, x='Time', y='S.P', label = 'Static')
    sns.lineplot(ax=axs[0,1], data=full_results_2, x='Time', y='S.P', label = 'Dynamic')
    sns.lineplot(ax=axs[0,1], data=ours, x='Time', y='S.P', label = 'Ours', color = 'black')
    #axs[0,1].set(xlabel='Time', ylabel='Δ S.P')
    axs[0,1].set_xlabel('Time', fontsize = 15)
    axs[0,1].set_ylabel('Δ S.P', fontsize = 15)

    axs[0,1].set_xticks(new_ticks)
    
    sns.lineplot(ax=axs[1,0], data=full_results_0, x='Time', y='TPR', label = 'Vanilla')
    sns.lineplot(ax=axs[1,0], data=full_results_1, x='Time', y='TPR', label = 'Static')
    sns.lineplot(ax=axs[1,0], data=full_results_2, x='Time', y='TPR', label = 'Dynamic')
    sns.lineplot(ax=axs[1,0], data=ours, x='Time', y='TPR', label = 'Ours', color = 'black')
    #axs[1,0].set(xlabel='Time', ylabel='Δ TPR')
    axs[1,0].set_xlabel('Time', fontsize = 15)
    axs[1,0].set_ylabel('Δ TPR', fontsize = 15)

    axs[1,0].set_xticks(new_ticks)
    
    sns.lineplot(ax=axs[1,1], data=full_results_0, x='Time', y='FPR', label = 'Vanilla')
    sns.lineplot(ax=axs[1,1], data=full_results_1, x='Time', y='FPR', label = 'Static')
    sns.lineplot(ax=axs[1,1], data=full_results_2, x='Time', y='FPR', label = 'Dynamic')
    sns.lineplot(ax=axs[1,1], data=ours, x='Time', y='FPR', label = 'Ours', color = 'black')
    #axs[1,1].set(xlabel='Time', ylabel='Δ FPR')
    
    axs[1,1].set_xlabel('Time', fontsize = 15)
    axs[1,1].set_ylabel('Δ FPR', fontsize = 15)
    
    axs[1,1].set_xticks(new_ticks)
    
    plt.tight_layout()
    plt.savefig(f'./results/{name}_temporal.png')

In [None]:
temporal_visualization(name)

In [None]:
def just_auc_sp(name):
    if name == 'jigsaw':
        d = 0
        full_results_0['Time'] = batches_0[1:]
        full_results_1['Time'] = batches_1[1:]
        full_results_2['Time'] = batches_2[1:]
        ours = full_results[d].copy()
        ours['Time'] = batches_0[1:]

        
    fig, axs = plt.subplots(1, 2, figsize=(20,6))

    sns.lineplot(ax=axs[0], data=full_results_0, x='Time', y='auc', label = 'Vanilla')
    sns.lineplot(ax=axs[0], data=full_results_1, x='Time', y='auc', label = 'Static')
    sns.lineplot(ax=axs[0], data=full_results_2, x='Time', y='auc', label = 'Dynamic')
    sns.lineplot(ax=axs[0], data=ours, x='Time', y='auc', label = 'Ours', color = 'black')
    #axs[0].set(xlabel='Time', ylabel='AUC')
    axs[0].set_xlabel('Time', fontsize = 15)
    axs[0].set_ylabel('AUC', fontsize = 15)

    new_ticks = full_results_0['Time'].values
    axs[0].set_xticks(new_ticks)
   


    sns.lineplot(ax=axs[1], data=full_results_0, x='Time', y='S.P', label = 'Vanilla')
    sns.lineplot(ax=axs[1], data=full_results_1, x='Time', y='S.P', label = 'Static')
    sns.lineplot(ax=axs[1], data=full_results_2, x='Time', y='S.P', label = 'Dynamic')
    sns.lineplot(ax=axs[1], data=ours, x='Time', y='S.P', label = 'Ours', color = 'black')
    #axs[1].set(xlabel='Time', ylabel='Δ S.P')
    axs[1].set_xlabel('Time', fontsize = 15)
    axs[1].set_ylabel('Δ S.P', fontsize = 15)
    axs[1].set_xticks(new_ticks)
    
    
    
    plt.tight_layout()
    plt.savefig(f'./results/{name}_AUC_SP.png')

In [None]:
just_auc_sp(name)

## Static Visualization 

In [None]:
if name == 'jigsaw':
    d = 0
    full_results_0['Time'] = batches_0[1:]
    full_results_1['Time'] = batches_1[1:]
    full_results_2['Time'] = batches_2[1:]
    ours = full_results[d].copy()
    ours['Time'] = batches_0[1:]
        
elif name == 'funding':
    d = 9
    
elif name == 'adult':
    d = 9

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(20,5))

#'S.P'
axs[0].errorbar(full_results_0['S.P'].mean(axis=0), full_results_0['auc'].mean(axis=0), xerr=full_results_0['S.P'].std(axis=0), yerr=full_results_0['auc'].std(axis=0), marker = '+', label = 'Vanilla')
axs[0].errorbar(full_results_1['S.P'].mean(axis=0), full_results_1['auc'].mean(axis=0), xerr=full_results_1['S.P'].std(axis=0), yerr=full_results_1['auc'].std(axis=0), marker = 'd', label = 'Static')
axs[0].errorbar(full_results_2['S.P'].mean(axis=0), full_results_2['auc'].mean(axis=0), xerr=full_results_2['S.P'].std(axis=0), yerr=full_results_2['auc'].std(axis=0), marker = 'x', label = 'Dynamic')
axs[0].errorbar(full_results[d]['S.P'].mean(axis=0), full_results[d]['auc'].mean(axis=0), xerr=full_results[d]['S.P'].std(axis=0), yerr=full_results[d]['auc'].std(axis=0), color='black', marker = 's', label = 'Ours')
axs[0].legend()


#'TPR'
axs[1].errorbar(full_results_0['TPR'].mean(axis=0), full_results_0['auc'].mean(axis=0), xerr=full_results_0['TPR'].std(axis=0), yerr=full_results_0['auc'].std(axis=0), marker = '+', label = 'Vanilla')
axs[1].errorbar(full_results_1['TPR'].mean(axis=0), full_results_1['auc'].mean(axis=0), xerr=full_results_1['TPR'].std(axis=0), yerr=full_results_1['auc'].std(axis=0), marker = 'd', label = 'Static')
axs[1].errorbar(full_results_2['TPR'].mean(axis=0), full_results_2['auc'].mean(axis=0), xerr=full_results_2['TPR'].std(axis=0), yerr=full_results_2['auc'].std(axis=0), marker = 'x', label = 'Dynamic')
axs[1].errorbar(full_results[d]['TPR'].mean(axis=0), full_results[d]['auc'].mean(axis=0), xerr=full_results[d]['TPR'].std(axis=0), yerr=full_results[d]['auc'].std(axis=0), color='black', marker = 's', label = 'Ours')
axs[1].legend()

#'FPR'
axs[2].errorbar(full_results_0['FPR'].mean(axis=0), full_results_0['auc'].mean(axis=0), xerr=full_results_0['FPR'].std(axis=0), yerr=full_results_0['auc'].std(axis=0), marker = '+', label = 'Vanilla')
axs[2].errorbar(full_results_1['FPR'].mean(axis=0), full_results_1['auc'].mean(axis=0), xerr=full_results_1['FPR'].std(axis=0), yerr=full_results_1['auc'].std(axis=0), marker = 'd', label = 'Static')
axs[2].errorbar(full_results_2['FPR'].mean(axis=0), full_results_2['auc'].mean(axis=0), xerr=full_results_2['FPR'].std(axis=0), yerr=full_results_2['auc'].std(axis=0), marker = 'x', label = 'Dynamic')
axs[2].errorbar(full_results[d]['FPR'].mean(axis=0), full_results[d]['auc'].mean(axis=0), xerr=full_results[d]['FPR'].std(axis=0), yerr=full_results[d]['auc'].std(axis=0), color='black', marker = 's', label = 'Ours')
axs[2].legend()

axs[0].set_ylabel('AUC', fontsize = 15)
axs[0].set_xlabel('∆ S.P', fontsize = 15)
axs[1].set_xlabel('∆ TPR', fontsize = 15)
axs[2].set_xlabel('∆ FPR', fontsize = 15)
axs[0].set_xlim(0.01,0.12)
axs[0].set_ylim(0.81,0.845)

axs[1].set_xlim(0.01,0.13)
axs[1].set_ylim(0.81,0.845)

axs[2].set_xlim(-0.02,0.07)
axs[2].set_ylim(0.81,0.845)

plt.tight_layout()
plt.savefig(f'./results/{name}.png')

### Temporal Bias (metrics)

In [21]:
if name == 'jigsaw':
    d = 0
    
elif name == 'adult':
    d = 9
    
    
elif name == 'funding':
    d = 9
    
metrics = 'S.P'
def calculate_temporal_Bais():
    baseline_0 = temporal_bias(full_results_0[metrics])
    baseline_1 = temporal_bias(full_results_1[metrics])
    baseline_2 = temporal_bias(full_results_2[metrics])
    ours = temporal_bias(full_results[d][metrics])
    
    temporal_bias_results = np.row_stack([baseline_0, baseline_1, baseline_2, ours])
    results_table = pd.DataFrame(temporal_bias_results, columns = ['ROC','RMSB','SD', 'MAX-MIN', 'TS','MB','MABD','AADM','MEAN_SUM','CUMSUM-Plain'])
    results_table

    return results_table[['MAX-MIN','TS','MB']]

In [22]:
calculate_temporal_Bais()

Unnamed: 0,MAX-MIN,TS,MB
0,0.110612,0.004766,0.009202
1,0.085729,0.010416,0.013614
2,0.078511,0.004237,0.006855
3,0.079797,0.058615,0.060407


### Plot Alphas

In [None]:
alpha_results = pd.DataFrame({'⍺' : alphas, 'AUC': aucs, 'S.P': sps, 'TPR': tprs, 'FPR': fprs, 'EoD': eods})

In [None]:
alpha_results

In [None]:
# alpha_results.to_csv('./results/funding_alpha_results.csv', index = False)

In [None]:
plt.plot(alpha_results['⍺'], alpha_results['S.P'], color="red", marker="D")
plt.xlabel('⍺')
plt.ylabel('Δ S.P')
# plt.savefig(f'./results/{name}_alpha_behavior.png')

## Temp -- To plot Funding dataset 

In [None]:
fundind_alphas = pd.read_csv('./results/funding_alpha_results.csv')

In [None]:
fundind_alphas

In [None]:
name = 'funding'
plt.plot(fundind_alphas['⍺'], fundind_alphas['S.P'], color="blue", marker="D")
plt.xlabel('⍺')
plt.ylabel('Δ S.P')
plt.savefig(f'./results/{name}_alpha_behavior.png')