In [1]:
import pyro
import pyro.distributions as dist
from pyro.infer import Importance, EmpiricalMarginal
import matplotlib.pyplot as plt
import torch
import numpy as np
import pandas as pd
import random



def model():
    #EXO
    Nr = pyro.sample("Nr",dist.Categorical(torch.tensor([0.75,0.25]))) #Bias here
    Ns = pyro.sample("Ns",dist.Categorical(torch.tensor([0.6,0.4])))
    Na = pyro.sample("Na",dist.Normal(torch.tensor(0.),torch.tensor(1.))) #Standard Normal Dist.
    #ENDO
    Gval  = Na + 2.1* Nr + 3.3 * Ns + 0.5 * pyro.sample("g",dist.Normal(torch.tensor(0.),torch.tensor(1.)))
    G     = pyro.sample("G",dist.Delta(Gval))
    
    Lval  = Na + 5.8 * Nr + 0.7 * Ns + 0.1 * pyro.sample("x",dist.Normal(torch.tensor(0.),torch.tensor(1.)))
    L     = pyro.sample("L",dist.Delta(Lval))
    
    Fval  = Na + 2.3 * Nr + 1.0 * Ns + 0.3 * pyro.sample("z",dist.Normal(torch.tensor(0.),torch.tensor(1.)))
    F     = pyro.sample("F",dist.Delta(Lval))
    
    
    

   


In [2]:
#Simulate the endo and exogenous variables
posterior = Importance(model, num_samples=1000).run()
trace = pyro.poutine.trace(model)
#Directly generating samples of Transport from conditioned model
Nr = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]
Ns = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]
Na = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]
G = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]
L = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]
F = [(trace.get_trace().nodes['Nr']['value']).item() for _ in range(1000)]

In [3]:
#Preping the data
Nr=np.array(Nr).reshape(1000,1)
Ns=np.array(Ns).reshape(1000,1)
Na=np.array(Na).reshape(1000,1)

G=np.array(Nr).reshape(1000,1)
L=np.array(Nr).reshape(1000,1)
F=np.array(Nr).reshape(1000,1)


In [5]:
#Setting up the policies
#For better understanding check this repo
# https://github.com/mkusner/counterfactual-fairness/blob/master/law_school_classifiers.R
# Original:
# https://github.com/apedawi-cs/Causal-inference-discussion/blob/master/law_school.ipynb
# paper:
#https://arxiv.org/abs/1703.06856
# To understand the list index thing:
# https://www.kite.com/python/answers/how-to-use-numpy-argsort-in-descending-order-in-python

from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np


class NaivePolicy:
    def __init__(self):
        pass
    def evaluate(self, G, L, nb_seats=None):
        assert G.shape == L.shape
        nb_obs = G.shape[0]
        if nb_seats is None:
            nb_seats = nb_obs
        else:
            assert isinstance(nb_seats, int) and (nb_seats > 0)
            nb_seats = min(nb_obs, nb_seats)
        ind = (normalize(G) + normalize(L)).argsort(axis=0)[-nb_seats:][::-1]
        P = np.zeros([nb_obs, 1]).astype(bool)
        P[ind] = True
        return P

class UnawarePolicy:
    #Unaware = ZFYA ~ LSAT + UGPA 
    def __init__(self):
        pass
    def train(self, G, L, F):
        X_train, X_test, y_train, y_test = train_test_split(np.hstack([G, L]), F, test_size=0.33)
        self.F_reg = LinearRegression().fit(X_train,y_train)
        y_pred    = self.F_reg.predict(X_test)
        #print(np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
        
        
    def evaluate(self, G, L, nb_seats=None):
        assert G.shape == L.shape
        nb_obs = G.shape[0]
        if nb_seats is None:
            nb_seats = nb_obs
        else:
            assert isinstance(nb_seats, int) and (nb_seats > 0)
            nb_seats = min(nb_obs, nb_seats)
        F_hat = self.F_reg.predict(np.hstack([G, L]))
        ind = F_hat.argsort(axis=0)[-nb_seats:][::-1] #get the indexes in sorted ascending order using ranking
        P = np.zeros([nb_obs, 1]).astype(bool)
        P[ind] = True
        return P

class FairPolicy:
    #model-ugpa =
    #model-lsat =
    
    def __init__(self):
        pass
    def train(self, R, S, G, L):
        self.G_reg = LinearRegression().fit(np.hstack([R, S]), G)
        
        X_train, X_test, y_train, y_test = train_test_split(np.hstack([R, S]), G, test_size=0.33)
        self.G_reg = LinearRegression().fit(X_train,y_train)
        y_pred    = self.G_reg.predict(X_test)
        #print(np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
        
        self.L_reg = LinearRegression().fit(np.hstack([R, S]), L)
        
        X_train, X_test, y_train, y_test = train_test_split(np.hstack([R, S]), L, test_size=0.33)
        self.L_reg = LinearRegression().fit(X_train,y_train)
        y_pred    = self.L_reg.predict(X_test)
        #print(np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
        
        G_err = G - self.G_reg.predict(np.hstack([R, S]))
        L_err = L - self.L_reg.predict(np.hstack([R, S]))
        self.A_reg = PCA(whiten=True, n_components=1).fit(np.hstack([G_err, L_err]))
        self.sgn = np.sign(np.corrcoef(self.A_reg.transform(np.hstack([G_err, L_err])).T, G.T)[0, 1])
    def evaluate(self, R, S, G, L, nb_seats=None):
        assert R.shape == S.shape == G.shape == L.shape
        nb_obs = R.shape[0]
        if nb_seats is None:
            nb_seats = nb_obs
        else:
            assert isinstance(nb_seats, int) and (nb_seats > 0)
            nb_seats = min(nb_obs, nb_seats)
        G_err = G - self.G_reg.predict(np.hstack([R, S]))
        L_err = L - self.L_reg.predict(np.hstack([R, S]))
        A_hat = self.sgn * self.A_reg.transform(np.hstack([G_err, L_err]))
        ind = A_hat.argsort(axis=0)[-nb_seats:][::-1]
        P = np.zeros([nb_obs, 1]).astype(bool)
        P[ind] = True
        return P

In [10]:
#Utility Functions
def normalize(x):
    return (x - np.mean(x)) / np.std(x)

def minmax_normalizer(df):
    return (df - df.min()) / (df.max() - df.min())
def build_plot(P, A, R, S, G, L, F, colors=None, pc_samps=1000, figscale=6, fontsize=20):
    if colors is None:
        colors = {
            (0, 0): [(0.882, 0.529, 0.000, 1.000), (1.000, 0.647, 0.000, 0.500)],
            (1, 0): [(0.882, 0.000, 0.000, 1.000), (1.000, 0.000, 0.000, 0.500)],
            (0, 1): [(0.000, 0.882, 0.000, 1.000), (0.000, 1.000, 0.000, 0.500)],
            (1, 1): [(0.000, 0.000, 0.882, 1.000), (0.000, 0.000, 1.000, 0.500)]
        }
    gs = GridSpec(3, 4)
    gs.update(wspace=0, hspace=0)
    kwargs_hist = dict(bins=25, histtype='stepfilled', stacked=True)
    kwargs_text = dict(horizontalalignment='left', verticalalignment='top', fontsize=fontsize)
    fig = plt.figure(figsize=(4 * figscale, 3 * figscale))
    ax_dict = dict()
    for i, tup in enumerate(itertools.product([0, 1], [0, 1])):
        j, k = tup
        ind = (R == j) & (S == k)
        ax = fig.add_subplot(gs[j, k]) 
        ax.hist([A[ind & P], A[ind & ~P]], color=colors[tup], **kwargs_hist)
        ax.axvline(x=0, ls='dotted', color='black')
        ax.text(0.02, 0.98, 'R={0:}, S={1:}'.format(j, k), transform=ax.transAxes, **kwargs_text)
        ax.set_yticks([])
        ax.set_xlim([-5, 5])
        ax.set_xticks([])
        ax_dict[i] = ax
    ylim = [0, 1.05 * max([ax.get_ylim()[1] for ax in ax_dict.values()])]
    [ax.set_ylim(ylim) for ax in ax_dict.values()];
    ax = fig.add_subplot(gs[0:2, 2:])
    ax.hist([A[P], A[~P]], color=['darkgray', 'lightgray'], **kwargs_hist)
    ax.axvline(x=0, ls='dotted', color='black')
    ax.text(0.01, 0.99, 'All', transform=ax.transAxes, **kwargs_text)
    ax.set_yticks([])
    ax.set_xlim([-5, 5])
    ax.set_xticks([])
    ax = fig.add_subplot(gs[2:, 0:])
    z = ['A', 'G', 'L', 'F']
    x = range(len(z))
    df = pd.DataFrame({'A': A.flat, 'G': G.flat, 'L': L.flat, 'F': F.flat}, columns=z)
    df = minmax_normalizer(df)
    idx = np.random.choice(range(len(df)), pc_samps)
    colors = pd.DataFrame({'R': R.flat, 'S': S.flat}, columns=['R', 'S'])\
      .apply(tuple, axis=1).apply(lambda i: colors[i])
    for i in df.index[idx]:
        color = colors[i][0] if P[i] else colors[i][1]
        alpha = 0.500 if P[i] else 0.025
        ax.plot(x, df.loc[i], ls='solid', color=color, alpha=alpha)
    ax.set_ylim([0, 1])
    ax.set_xlim([x[0], x[-1]])
    ax.set_xticks(x)
    ax.set_xticklabels(z)
    [ax.axvline(x=_x, lw=1, ls='dotted', color='black') for _x in x];
    for tick in ax.yaxis.get_major_ticks():
        tick.label1On = False
        tick.tick1On = False
    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(fontsize) 
    return fig

In [12]:

import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
nb_seats = 20000
#Main
# set up naive policy
naivePolicy = NaivePolicy()

# set up and train unaware policy
unawarePolicy = UnawarePolicy()
unawarePolicy.train(G, L, F)

# set up and train fair policy
fairPolicy = FairPolicy()
fairPolicy.train(Nr, Ns, G, L)

array([[1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
    

In [11]:
P = {
    'naive': naivePolicy.evaluate(G, L, nb_seats),
    'unaware': unawarePolicy.evaluate(G, L, nb_seats),
    'fair': fairPolicy.evaluate(R, S, G, L, nb_seats)
}
#build_plot(P['naive'], A, R, S, G, L, F);
#build_plot(P['unaware'], A, R, S, G, L, F);
#build_plot(P['fair'], A, R, S, G, L, F);

[[1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.

NameError: name 'R' is not defined