In [23]:
import os
import csv
import math
import itertools
import pickle

import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler

from fairness_checker import *

In [24]:
# To load the model back
with open('./model/adult_synth.pkl', 'rb') as f:
    model = pickle.load(f)

# Random Forest

In [25]:
with open('./model/adult_RF.pkl', 'rb') as f:
    clf = pickle.load(f)

In [26]:
class model_wrapper:
    def __init__(self, model):
        self.model = model

    def predict(self, file):
        df = pd.read_csv(file)
        X = self.preprocessing(df)

        return self.model.predict(X.to_numpy())

    def preprocessing(self, df):
        df.drop(labels=["workclass", "education", "occupation", "relationship", "race", "native.country"], axis = 1, inplace = True)
        df.drop(labels=["income"], axis = 1, inplace = True)

        return df

trained = model_wrapper(clf)

In [27]:
# this takes
iter = 100

disparate_impact = []
demographic_parity = []
equalized_odds_1, equalized_odds_2 = [], []
equal_opportunity = []
accuracy_eqaulity = []
predictive_parity = []
equal_calibration = []
conditional_statistical_parity = []
predictive_equality = []
conditional_use_accuracy_equality_1, conditional_use_accuracy_equality_2 = [], []
positive_balance = []
negative_balance = []
mean_difference = []

for _ in range(iter):
    synth = model.synthetic_data(rows=30000)
    sdf = synth.df
    sdf.to_csv('./tmp/synth.csv', index=False)

    c = fairness_model_checker("./tmp/synth.csv", verbose=False)

    privileged_predicate = lambda row: row['sex'] != '0'
    positive_predicate = lambda Y: Y == 1
    truth_predicate = lambda row: row['income'] == '1'

    disparate_impact               .append( c.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True) )
    demographic_parity             .append( c.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True) )
    (x1, x2) = c.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    equalized_odds_1 .append( x1 )
    equalized_odds_2 .append( x2 )
    equal_opportunity              .append( c.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    accuracy_eqaulity              .append( c.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    predictive_parity              .append( c.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    # equal_calibration (n/a)
    # conditional_statistical_parity .append( c.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True) )
    predictive_equality            .append( c.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    (y1, y2) = c.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    conditional_use_accuracy_equality_1 .append( y1 )
    conditional_use_accuracy_equality_2 .append( y2 )
    # positive_balance (n/a)
    # negative_balance (n/a)
    mean_difference .append( c.mean_difference(0.2,trained, privileged_predicate, positive_predicate, value=True) )

In [28]:
og_disparate_impact = 0
og_demographic_parity = 0
og_equalized_odds_1, og_equalized_odds_2 = 0, 0
og_equal_opportunity = 0
og_accuracy_eqaulity = 0
og_predictive_parity = 0
og_equal_calibration = 0
og_conditional_statistical_parity = 0
og_predictive_equality = 0
og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2 = 0, 0
og_positive_balance = 0
og_negative_balance = 0
og_mean_difference = 0

cog = fairness_model_checker("./data/adult_processed.csv", verbose=False)

privileged_predicate = lambda row: row['sex'] != '0'
positive_predicate = lambda Y: Y == 1
truth_predicate = lambda row: row['income'] == '1'

og_disparate_impact = cog.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True)
og_demographic_parity = cog.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True)
(og_equalized_odds_1, og_equalized_odds_2) = cog.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_equal_opportunity = cog.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_accuracy_eqaulity = cog.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_predictive_parity = cog.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# equal_calibration (n/a)
# og_conditional_statistical_parity = cog.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True)
og_predictive_equality = cog.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
(og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2) = cog.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# positive_balance (n/a)
# negative_balance (n/a)
og_mean_difference = cog.mean_difference(0.2, trained, privileged_predicate, positive_predicate, value=True)

In [29]:
print("name               ", "og          ", "synth       ", "diff        ", "std         ", "ratio")
print("demographic_parity ", f"{og_demographic_parity:.10f}", f"{np.mean(np.array(demographic_parity)):.10f}", f"{abs(og_demographic_parity - np.mean(np.array(demographic_parity))):.10f}", f"{np.std(np.array(demographic_parity)):.10f}", f"{og_demographic_parity / np.mean(np.array(demographic_parity)):.10f}")
print("accuracy_eqaulity  ", f"{og_accuracy_eqaulity:.10f}", f"{np.mean(np.array(accuracy_eqaulity)):.10f}", f"{abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity))):.10f}", f"{np.std(np.array(accuracy_eqaulity)):.10f}", f"{og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity)):.10f}")
print("equalized_odds_1   ", f"{og_equalized_odds_1:.10f}", f"{np.mean(np.array(equalized_odds_1)):.10f}", f"{abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1))):.10f}", f"{np.std(np.array(equalized_odds_1)):.10f}", f"{og_equalized_odds_1 / np.mean(np.array(equalized_odds_1)):.10f}")
print("equalized_odds_2   ", f"{og_equalized_odds_2:.10f}", f"{np.mean(np.array(equalized_odds_2)):.10f}", f"{abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2))):.10f}", f"{np.std(np.array(equalized_odds_2)):.10f}", f"{og_equalized_odds_2 / np.mean(np.array(equalized_odds_2)):.10f}")
print("accuracy_equality_1", f"{og_conditional_use_accuracy_equality_1:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}")
print("accuracy_equality_2", f"{og_conditional_use_accuracy_equality_2:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}")
print("mean_difference    ", f"{og_mean_difference:.10f}", f"{np.mean(np.array(mean_difference)):.10f}", f"{abs(og_mean_difference - np.mean(np.array(mean_difference))):.10f}", f"{np.std(np.array(mean_difference)):.10f}", f"{og_mean_difference / np.mean(np.array(mean_difference)):.10f}")

print(
"sum of diff:",
abs(og_demographic_parity - np.mean(np.array(demographic_parity)))+
abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity)))+
abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1)))+
abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2)))+
abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1)))+
abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2)))+
abs(og_mean_difference - np.mean(np.array(mean_difference)))
)

print(
"avg of ratio:",
(og_demographic_parity / np.mean(np.array(demographic_parity))+
og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity))+
og_equalized_odds_1 / np.mean(np.array(equalized_odds_1))+
og_equalized_odds_2 / np.mean(np.array(equalized_odds_2))+
og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1))+
og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2))+
og_mean_difference / np.mean(np.array(mean_difference))) / 7
)

name                og           synth        diff         std          ratio
demographic_parity  0.1933447448 0.1079564137 0.0853883310 0.0035124766 1.7909519043
accuracy_eqaulity   0.0250249201 0.1399381429 0.1149132228 0.0038388767 0.1788284423
equalized_odds_1    0.0175621188 0.0870836472 0.0695215284 0.0037179021 0.2016695368
equalized_odds_2    0.0114542011 0.0622998348 0.0508456337 0.0105702962 0.1838560436
accuracy_equality_1 0.1941115666 0.1466371807 0.0474743859 0.0199363592 1.3237540827
accuracy_equality_2 0.1102330179 0.1450339719 0.0348009540 0.0075094620 0.7600496381
mean_difference     0.1801306340 0.1044604625 0.0756701715 0.0027826786 1.7243905460
sum of diff: 0.4786142272968424
avg of ratio: 0.8805000276791448


# Without heuristic

In [30]:
# To load the model back
with open('./model/adult_synth_mst.pkl', 'rb') as f:
    model = pickle.load(f)

In [31]:
# this takes
iter = 100

disparate_impact = []
demographic_parity = []
equalized_odds_1, equalized_odds_2 = [], []
equal_opportunity = []
accuracy_eqaulity = []
predictive_parity = []
equal_calibration = []
conditional_statistical_parity = []
predictive_equality = []
conditional_use_accuracy_equality_1, conditional_use_accuracy_equality_2 = [], []
positive_balance = []
negative_balance = []
mean_difference = []

for _ in range(iter):
    synth = model.synthetic_data(rows=30000)
    sdf = synth.df
    sdf.to_csv('./tmp/synth.csv', index=False)

    c = fairness_model_checker("./tmp/synth.csv", verbose=False)

    privileged_predicate = lambda row: row['sex'] != '0'
    positive_predicate = lambda Y: Y == 1
    truth_predicate = lambda row: row['income'] == '1'

    disparate_impact               .append( c.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True) )
    demographic_parity             .append( c.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True) )
    (x1, x2) = c.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    equalized_odds_1 .append( x1 )
    equalized_odds_2 .append( x2 )
    equal_opportunity              .append( c.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    accuracy_eqaulity              .append( c.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    predictive_parity              .append( c.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    # equal_calibration (n/a)
    # conditional_statistical_parity .append( c.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True) )
    predictive_equality            .append( c.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    (y1, y2) = c.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    conditional_use_accuracy_equality_1 .append( y1 )
    conditional_use_accuracy_equality_2 .append( y2 )
    # positive_balance (n/a)
    # negative_balance (n/a)
    mean_difference .append( c.mean_difference(0.2,trained, privileged_predicate, positive_predicate, value=True) )

In [32]:
print("name               ", "og          ", "synth       ", "diff        ", "std         ", "ratio")
print("demographic_parity ", f"{og_demographic_parity:.10f}", f"{np.mean(np.array(demographic_parity)):.10f}", f"{abs(og_demographic_parity - np.mean(np.array(demographic_parity))):.10f}", f"{np.std(np.array(demographic_parity)):.10f}", f"{og_demographic_parity / np.mean(np.array(demographic_parity)):.10f}")
print("accuracy_eqaulity  ", f"{og_accuracy_eqaulity:.10f}", f"{np.mean(np.array(accuracy_eqaulity)):.10f}", f"{abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity))):.10f}", f"{np.std(np.array(accuracy_eqaulity)):.10f}", f"{og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity)):.10f}")
print("equalized_odds_1   ", f"{og_equalized_odds_1:.10f}", f"{np.mean(np.array(equalized_odds_1)):.10f}", f"{abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1))):.10f}", f"{np.std(np.array(equalized_odds_1)):.10f}", f"{og_equalized_odds_1 / np.mean(np.array(equalized_odds_1)):.10f}")
print("equalized_odds_2   ", f"{og_equalized_odds_2:.10f}", f"{np.mean(np.array(equalized_odds_2)):.10f}", f"{abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2))):.10f}", f"{np.std(np.array(equalized_odds_2)):.10f}", f"{og_equalized_odds_2 / np.mean(np.array(equalized_odds_2)):.10f}")
print("accuracy_equality_1", f"{og_conditional_use_accuracy_equality_1:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}")
print("accuracy_equality_2", f"{og_conditional_use_accuracy_equality_2:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}")
print("mean_difference    ", f"{og_mean_difference:.10f}", f"{np.mean(np.array(mean_difference)):.10f}", f"{abs(og_mean_difference - np.mean(np.array(mean_difference))):.10f}", f"{np.std(np.array(mean_difference)):.10f}", f"{og_mean_difference / np.mean(np.array(mean_difference)):.10f}")

print(
"sum of diff:",
abs(og_demographic_parity - np.mean(np.array(demographic_parity)))+
abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity)))+
abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1)))+
abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2)))+
abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1)))+
abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2)))+
abs(og_mean_difference - np.mean(np.array(mean_difference)))
)

print(
"avg of ratio:",
(og_demographic_parity / np.mean(np.array(demographic_parity))+
og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity))+
og_equalized_odds_1 / np.mean(np.array(equalized_odds_1))+
og_equalized_odds_2 / np.mean(np.array(equalized_odds_2))+
og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1))+
og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2))+
og_mean_difference / np.mean(np.array(mean_difference))) / 7
)

name                og           synth        diff         std          ratio
demographic_parity  0.1933447448 0.0114808703 0.1818638745 0.0036530356 16.8406000332
accuracy_eqaulity   0.0250249201 0.0169731298 0.0080517903 0.0040903773 1.4743845380
equalized_odds_1    0.0175621188 0.0060451024 0.0115170164 0.0038910847 2.9051813704
equalized_odds_2    0.0114542011 0.0303776604 0.0189234593 0.0075380601 0.3770600168
accuracy_equality_1 0.1941115666 0.0315097164 0.1626018502 0.0180440635 6.1603717426
accuracy_equality_2 0.1102330179 0.0299032760 0.0803297419 0.0084697145 3.6863191181
mean_difference     0.1801306340 0.0645038782 0.1156267558 0.0028122831 2.7925550996
sum of diff: 0.5789144883690542
avg of ratio: 4.8909245598302515


# Logistic Regression

In [None]:
with open('./model/adult_LR.pkl', 'rb') as f:
    clf = pickle.load(f)

In [None]:
class model_wrapper:
    def __init__(self, model):
        self.model = model

    def predict(self, file):
        df = pd.read_csv(file)
        X = self.preprocessing(df)

        return self.model.predict(X)

    def preprocessing(self, df):
        df.drop(labels=['income','native.country', 'hours.per.week'], axis = 1, inplace = True)
        scaler = StandardScaler()
        df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)


        return df

trained = model_wrapper(clf)

In [None]:
# takes 10 minutes
iter = 10

disparate_impact = []
demographic_parity = []
equalized_odds_1, equalized_odds_2 = [], []
equal_opportunity = []
accuracy_eqaulity = []
predictive_parity = []
equal_calibration = []
conditional_statistical_parity = []
predictive_equality = []
conditional_use_accuracy_equality_1, conditional_use_accuracy_equality_2 = [], []
positive_balance = []
negative_balance = []
mean_difference = []

for _ in range(iter):
    synth = model.synthetic_data(rows=30000)
    sdf = synth.df
    sdf.to_csv('./tmp/synth.csv', index=False)

    c = fairness_model_checker("./tmp/synth.csv", verbose=False)

    privileged_predicate = lambda row: row['sex'] != '0'
    positive_predicate = lambda Y: Y == 1
    truth_predicate = lambda row: row['income'] == '1'

    disparate_impact               .append( c.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True) )
    demographic_parity             .append( c.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True) )
    (x1, x2) = c.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    equalized_odds_1 .append( x1 )
    equalized_odds_2 .append( x2 )
    equal_opportunity              .append( c.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    accuracy_eqaulity              .append( c.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    predictive_parity              .append( c.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    # equal_calibration (n/a)
    conditional_statistical_parity .append( c.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True) )
    predictive_equality            .append( c.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    (y1, y2) = c.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    conditional_use_accuracy_equality_1 .append( y1 )
    conditional_use_accuracy_equality_2 .append( y2 )
    # positive_balance (n/a)
    # negative_balance (n/a)
    mean_difference .append( c.mean_difference(0.2,trained, privileged_predicate, positive_predicate, value=True) )

In [None]:
og_disparate_impact = 0
og_demographic_parity = 0
og_equalized_odds_1, og_equalized_odds_2 = 0, 0
og_equal_opportunity = 0
og_accuracy_eqaulity = 0
og_predictive_parity = 0
og_equal_calibration = 0
og_conditional_statistical_parity = 0
og_predictive_equality = 0
og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2 = 0, 0
og_positive_balance = 0
og_negative_balance = 0
og_mean_difference = 0

cog = fairness_model_checker("./data/adult_processed.csv", verbose=False)

privileged_predicate = lambda row: row['sex'] != '0'
positive_predicate = lambda Y: Y == 1
truth_predicate = lambda row: row['income'] == '1'

og_disparate_impact = cog.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True)
og_demographic_parity = cog.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True)
(og_equalized_odds_1, og_equalized_odds_2) = cog.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_equal_opportunity = cog.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_accuracy_eqaulity = cog.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_predictive_parity = cog.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# equal_calibration (n/a)
og_conditional_statistical_parity = cog.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True)
og_predictive_equality = cog.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
(og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2) = cog.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# positive_balance (n/a)
# negative_balance (n/a)
og_mean_difference = cog.mean_difference(0.2, trained, privileged_predicate, positive_predicate, value=True)

In [None]:
print("name               ", "og          ", "synth       ", "diff        ", "std         ", "ratio")
print("demographic_parity ", f"{og_demographic_parity:.10f}", f"{np.mean(np.array(demographic_parity)):.10f}", f"{abs(og_demographic_parity - np.mean(np.array(demographic_parity))):.10f}", f"{np.std(np.array(demographic_parity)):.10f}", f"{og_demographic_parity / np.mean(np.array(demographic_parity)):.10f}")
print("accuracy_eqaulity  ", f"{og_accuracy_eqaulity:.10f}", f"{np.mean(np.array(accuracy_eqaulity)):.10f}", f"{abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity))):.10f}", f"{np.std(np.array(accuracy_eqaulity)):.10f}", f"{og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity)):.10f}")
print("equalized_odds_1   ", f"{og_equalized_odds_1:.10f}", f"{np.mean(np.array(equalized_odds_1)):.10f}", f"{abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1))):.10f}", f"{np.std(np.array(equalized_odds_1)):.10f}", f"{og_equalized_odds_1 / np.mean(np.array(equalized_odds_1)):.10f}")
print("equalized_odds_2   ", f"{og_equalized_odds_2:.10f}", f"{np.mean(np.array(equalized_odds_2)):.10f}", f"{abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2))):.10f}", f"{np.std(np.array(equalized_odds_2)):.10f}", f"{og_equalized_odds_2 / np.mean(np.array(equalized_odds_2)):.10f}")
print("accuracy_equality_1", f"{og_conditional_use_accuracy_equality_1:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}")
print("accuracy_equality_2", f"{og_conditional_use_accuracy_equality_2:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}")
print("mean_difference    ", f"{og_mean_difference:.10f}", f"{np.mean(np.array(mean_difference)):.10f}", f"{abs(og_mean_difference - np.mean(np.array(mean_difference))):.10f}", f"{np.std(np.array(mean_difference)):.10f}", f"{og_mean_difference / np.mean(np.array(mean_difference)):.10f}")

print(
"sum of diff:",
abs(og_demographic_parity - np.mean(np.array(demographic_parity)))+
abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity)))+
abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1)))+
abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2)))+
abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1)))+
abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2)))+
abs(og_mean_difference - np.mean(np.array(mean_difference)))
)

print(
"avg of ratio:",
(og_demographic_parity / np.mean(np.array(demographic_parity))+
og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity))+
og_equalized_odds_1 / np.mean(np.array(equalized_odds_1))+
og_equalized_odds_2 / np.mean(np.array(equalized_odds_2))+
og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1))+
og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2))+
og_mean_difference / np.mean(np.array(mean_difference))) / 7
)

name                og           synth        diff         std          ratio
demographic_parity  0.0139228408 0.0290808216 0.0151579808 0.0020812768 0.4787636686
accuracy_eqaulity   0.0992683727 0.1102998148 0.0110314422 0.0043228626 0.8999867571
equalized_odds_1    0.0017247318 0.0128049053 0.0110801734 0.0023301829 0.1346930565
equalized_odds_2    0.0080650603 0.0240439068 0.0159788465 0.0073211533 0.3354305262
accuracy_equality_1 0.3459565225 0.1521847909 0.1937717316 0.0168056408 2.2732660770
accuracy_equality_2 0.1120956978 0.1432408776 0.0311451798 0.0074096416 0.7825677956
mean_difference     0.0678595096 0.0364194480 0.0314400615 0.0025102616 1.8632767169
sum of diff: 0.30960541579755535
avg of ratio: 0.9668549425429421


# KNN

In [None]:
with open('./model/adult_KNN.pkl', 'rb') as f:
    clf = pickle.load(f)

In [None]:
class model_wrapper:
    def __init__(self, model):
        self.model = model

    def predict(self, file):
        df = pd.read_csv(file)
        X = self.preprocessing(df)

        return self.model.predict(X)

    def preprocessing(self, df):
        df = df[['age', 'workclass', 'education', 'marital.status', 'occupation', 'relationship', 'race', 'sex', 'capital.gain', 'capital.loss', 'hours.per.week', 'native.country']]
        scaler = StandardScaler()
        df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)


        return df

trained = model_wrapper(clf)

In [None]:
# takes 20 mins!
iter = 10

disparate_impact = []
demographic_parity = []
equalized_odds_1, equalized_odds_2 = [], []
equal_opportunity = []
accuracy_eqaulity = []
predictive_parity = []
equal_calibration = []
conditional_statistical_parity = []
predictive_equality = []
conditional_use_accuracy_equality_1, conditional_use_accuracy_equality_2 = [], []
positive_balance = []
negative_balance = []
mean_difference = []

for _ in range(iter):
    synth = model.synthetic_data(rows=30000)
    sdf = synth.df
    sdf.to_csv('./tmp/synth.csv', index=False)

    c = fairness_model_checker("./tmp/synth.csv", verbose=False)

    privileged_predicate = lambda row: row['sex'] != '0'
    positive_predicate = lambda Y: Y == 1
    truth_predicate = lambda row: row['income'] == '1'

    disparate_impact               .append( c.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True) )
    demographic_parity             .append( c.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True) )
    (x1, x2) = c.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    equalized_odds_1 .append( x1 )
    equalized_odds_2 .append( x2 )
    equal_opportunity              .append( c.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    accuracy_eqaulity              .append( c.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    predictive_parity              .append( c.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    # equal_calibration (n/a)
    conditional_statistical_parity .append( c.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True) )
    predictive_equality            .append( c.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True) )
    (y1, y2) = c.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
    conditional_use_accuracy_equality_1 .append( y1 )
    conditional_use_accuracy_equality_2 .append( y2 )
    # positive_balance (n/a)
    # negative_balance (n/a)
    mean_difference .append( c.mean_difference(0.2,trained, privileged_predicate, positive_predicate, value=True) )

In [None]:
og_disparate_impact = 0
og_demographic_parity = 0
og_equalized_odds_1, og_equalized_odds_2 = 0, 0
og_equal_opportunity = 0
og_accuracy_eqaulity = 0
og_predictive_parity = 0
og_equal_calibration = 0
og_conditional_statistical_parity = 0
og_predictive_equality = 0
og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2 = 0, 0
og_positive_balance = 0
og_negative_balance = 0
og_mean_difference = 0

cog = fairness_model_checker("./data/adult_processed.csv", verbose=False)

privileged_predicate = lambda row: row['sex'] != '0'
positive_predicate = lambda Y: Y == 1
truth_predicate = lambda row: row['income'] == '1'

og_disparate_impact = cog.disparate_impact(0.8, trained, privileged_predicate, positive_predicate, value=True)
og_demographic_parity = cog.demographic_parity(0.2, trained, privileged_predicate, positive_predicate, value=True)
(og_equalized_odds_1, og_equalized_odds_2) = cog.equalized_odds(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_equal_opportunity = cog.equal_opportunity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_accuracy_eqaulity = cog.accuracy_eqaulity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
og_predictive_parity = cog.predictive_parity(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# equal_calibration (n/a)
og_conditional_statistical_parity = cog.conditional_statistical_parity(0.2, trained, privileged_predicate, positive_predicate, lambda x: (lambda row: row['race'] == x), ('4',), value=True)
og_predictive_equality = cog.predictive_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
(og_conditional_use_accuracy_equality_1, og_conditional_use_accuracy_equality_2) = cog.conditional_use_accuracy_equality(0.2, trained, privileged_predicate, positive_predicate, truth_predicate, value=True)
# positive_balance (n/a)
# negative_balance (n/a)
og_mean_difference = cog.mean_difference(0.2, trained, privileged_predicate, positive_predicate, value=True)

In [None]:
print("name               ", "og          ", "synth       ", "diff        ", "std         ", "ratio")
print("demographic_parity ", f"{og_demographic_parity:.10f}", f"{np.mean(np.array(demographic_parity)):.10f}", f"{abs(og_demographic_parity - np.mean(np.array(demographic_parity))):.10f}", f"{np.std(np.array(demographic_parity)):.10f}", f"{og_demographic_parity / np.mean(np.array(demographic_parity)):.10f}")
print("accuracy_eqaulity  ", f"{og_accuracy_eqaulity:.10f}", f"{np.mean(np.array(accuracy_eqaulity)):.10f}", f"{abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity))):.10f}", f"{np.std(np.array(accuracy_eqaulity)):.10f}", f"{og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity)):.10f}")
print("equalized_odds_1   ", f"{og_equalized_odds_1:.10f}", f"{np.mean(np.array(equalized_odds_1)):.10f}", f"{abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1))):.10f}", f"{np.std(np.array(equalized_odds_1)):.10f}", f"{og_equalized_odds_1 / np.mean(np.array(equalized_odds_1)):.10f}")
print("equalized_odds_2   ", f"{og_equalized_odds_2:.10f}", f"{np.mean(np.array(equalized_odds_2)):.10f}", f"{abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2))):.10f}", f"{np.std(np.array(equalized_odds_2)):.10f}", f"{og_equalized_odds_2 / np.mean(np.array(equalized_odds_2)):.10f}")
print("accuracy_equality_1", f"{og_conditional_use_accuracy_equality_1:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_1)):.10f}", f"{og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1)):.10f}")
print("accuracy_equality_2", f"{og_conditional_use_accuracy_equality_2:.10f}", f"{np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2))):.10f}", f"{np.std(np.array(conditional_use_accuracy_equality_2)):.10f}", f"{og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2)):.10f}")
print("mean_difference    ", f"{og_mean_difference:.10f}", f"{np.mean(np.array(mean_difference)):.10f}", f"{abs(og_mean_difference - np.mean(np.array(mean_difference))):.10f}", f"{np.std(np.array(mean_difference)):.10f}", f"{og_mean_difference / np.mean(np.array(mean_difference)):.10f}")

print(
"sum of diff:",
abs(og_demographic_parity - np.mean(np.array(demographic_parity)))+
abs(og_accuracy_eqaulity - np.mean(np.array(accuracy_eqaulity)))+
abs(og_equalized_odds_1 - np.mean(np.array(equalized_odds_1)))+
abs(og_equalized_odds_2 - np.mean(np.array(equalized_odds_2)))+
abs(og_conditional_use_accuracy_equality_1 - np.mean(np.array(conditional_use_accuracy_equality_1)))+
abs(og_conditional_use_accuracy_equality_2 - np.mean(np.array(conditional_use_accuracy_equality_2)))+
abs(og_mean_difference - np.mean(np.array(mean_difference)))
)

print(
"avg of ratio:",
(og_demographic_parity / np.mean(np.array(demographic_parity))+
og_accuracy_eqaulity / np.mean(np.array(accuracy_eqaulity))+
og_equalized_odds_1 / np.mean(np.array(equalized_odds_1))+
og_equalized_odds_2 / np.mean(np.array(equalized_odds_2))+
og_conditional_use_accuracy_equality_1 / np.mean(np.array(conditional_use_accuracy_equality_1))+
og_conditional_use_accuracy_equality_2 / np.mean(np.array(conditional_use_accuracy_equality_2))+
og_mean_difference / np.mean(np.array(mean_difference))) / 7
)

name                og           synth        diff         std          ratio
demographic_parity  0.1721440158 0.1042331497 0.0679108661 0.0036381468 1.6515284854
accuracy_eqaulity   0.0473392149 0.1171415301 0.0698023152 0.0048074738 0.4041198272
equalized_odds_1    0.0571962732 0.0790041813 0.0218079081 0.0044851970 0.7239651399
equalized_odds_2    0.1667534959 0.1221507312 0.0446027647 0.0089484660 1.3651452947
accuracy_equality_1 0.1009553523 0.1328899026 0.0319345504 0.0213990924 0.7596916716
accuracy_equality_2 0.1193507001 0.1465997151 0.0272490150 0.0075578972 0.8141264125
mean_difference     0.1654413724 0.1060179881 0.0594233844 0.0032167290 1.5605028491
sum of diff: 0.32273080391905606
avg of ratio: 1.0398685257764473
