In [1]:
import mst as mst
import mst_fair_greedy as fairMST
import mst_fair_optimal as fairMSTOpt
import numpy as np
from mbi import FactoredInference, Dataset, Domain
import scipy
import scipy.stats as stats
from disjoint_set import DisjointSet
import networkx as nx
import itertools
from cdp2adp import cdp_rho
from scipy.special import logsumexp
import argparse
import heapq
import seaborn as sns
import matplotlib.ticker as mtick
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
import pandas as pd
import copy
import warnings
warnings.filterwarnings('ignore')
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

In [2]:
dataset = "data/cleaned_KDD.csv"
domain = "data/cleanded_KDD-domain.json"
admissible = ['age','class_worker','det_ind_code','det_occ_code','education','wage_per_hour','hs_college','marital_stat','major_ind_code','major_occ_code','hisp_origin','union_member','unemp_reason','full_or_part_emp', 'capital_gains','capital_losses','stock_dividends','tax_filer_stat','region_prev_res','state_prev_res','det_hh_fam_stat','det_hh_summ','unknown','mig_chg_msa','mig_chg_reg','mig_move_reg','mig_same','mig_prev_sunbelt','num_emp','fam_under_18','country_father','country_mother','country_self','citizenship','own_or_self','vet_question','vet_benefits','weeks_worked','year']
protected = ['race','sex']
outcome = ['income_50k']
original_dataset = data = Dataset.load(dataset, domain)

In [3]:
def normalize(vec):
    if(sum(vec) == 0):
        return vec
    normalized_vec = vec / sum(vec)
    #normalized_vec = vec / np.sqrt(np.sum(vec**2))
    return normalized_vec

In [4]:
def div(varlist,const):
    newList = [x / const for x in varlist]
    return newList

# Utility Metrics

## 1 Way Marginal Total Variation Distance

### $\epsilon$ = 0.1

In [6]:
workload = list(itertools.combinations(data.domain, 1))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_1_way_var_distance_01 = []
mst_1_way_var_distance_01 = []
fair_opt_mst_1_way_var_distance_01= []
privbayes_1_way_var_distance_01 = []
fair_privbayes_1_way_var_distance_01 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=0.1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=0.1/results_original_{i}.csv', domain)

    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_1_way_var_distance_01.append(fair_greedy_mst_variation)
    mst_1_way_var_distance_01.append(mst_variation)
    
print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_1_way_var_distance_01)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_1_way_var_distance_01)))


Average Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.1121489229275765
Average Total Variation Distance over 10 Runs MST : 0.12021301016748842


### $\epsilon$ = 1

In [7]:
workload = list(itertools.combinations(data.domain, 1))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_1_way_var_distance_1 = []
mst_1_way_var_distance_1 = []
fair_opt_mst_1_way_var_distance_01= []
privbayes_1_way_var_distance_01 = []
fair_privbayes_1_way_var_distance_01 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=1/results_original_{i}.csv', domain)

    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_1_way_var_distance_1.append(fair_greedy_mst_variation)
    mst_1_way_var_distance_1.append(mst_variation)
    
print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_1_way_var_distance_1)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_1_way_var_distance_1)))


Average Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.12085496240692795
Average Total Variation Distance over 10 Runs MST : 0.11919031915603957


In [60]:
print(np.mean(fair_greedy_mst_1_way_var_distance_1)/np.mean(mst_1_way_var_distance_1))

1.0139662622155503


### $\epsilon$ = 10

In [8]:
workload = list(itertools.combinations(data.domain, 1))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_1_way_var_distance_10 = []
mst_1_way_var_distance_10 = []
fair_opt_mst_1_way_var_distance_01= []
privbayes_1_way_var_distance_01 = []
fair_privbayes_1_way_var_distance_01 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=10/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=10/results_original_{i}.csv', domain)

    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_1_way_var_distance_10.append(fair_greedy_mst_variation)
    mst_1_way_var_distance_10.append(mst_variation)
    
print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_1_way_var_distance_10)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_1_way_var_distance_10)))


Average Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.0979701056315345
Average Total Variation Distance over 10 Runs MST : 0.09834819125407598


## 2 Way Marginal Total Variation Distance

### $\epsilon$ = 0.1

In [9]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_2_way_var_distance_01 = []
mst_2_way_var_distance_01 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=0.1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=0.1/results_original_{i}.csv', domain)
    
    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_2_way_var_distance_01.append(fair_greedy_mst_variation)
    mst_2_way_var_distance_01.append(mst_variation)
    
    

print("Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_2_way_var_distance_01)))
print("Average 2-way  Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_2_way_var_distance_01)))


Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.22128619392184784
Average 2-way  Total Variation Distance over 10 Runs MST : 0.23099221394162509


### $\epsilon$ = 1

In [10]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_2_way_var_distance_1 = []
mst_2_way_var_distance_1 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=1/results_original_{i}.csv', domain)
    
    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_2_way_var_distance_1.append(fair_greedy_mst_variation)
    mst_2_way_var_distance_1.append(mst_variation)
    
    

print("Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_2_way_var_distance_1)))
print("Average 2-way  Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_2_way_var_distance_1)))


Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.23488054932969815
Average 2-way  Total Variation Distance over 10 Runs MST : 0.2329102849509138


In [61]:
print(np.mean(fair_greedy_mst_2_way_var_distance_1)/np.mean(mst_2_way_var_distance_1))

1.0084593275011433


### $\epsilon$ = 10

In [11]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload ]
data = Dataset.load(dataset, domain)

fair_greedy_mst_2_way_var_distance_10 = []
mst_2_way_var_distance_10 = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=10/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=10/results_original_{i}.csv', domain)
    
    fair_greedy_mst_variation =0 
    mst_variation = 0
    
    for proj in workload:
        data_vec = data.project(proj).datavector()
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).datavector()
        mst_vec = mst_data.project(proj).datavector()
        
        data_vec = normalize(data_vec)
        fair_greedy_mst_vec = normalize(fair_greedy_mst_vec)
        mst_vec = normalize(mst_vec)
        
        fair_greedy_mst_variation += sum(abs(data_vec - fair_greedy_mst_vec)/2)
        mst_variation += sum(abs(data_vec - mst_vec)/2)
        
    fair_greedy_mst_variation = fair_greedy_mst_variation/len(workload)
    mst_variation = mst_variation/len(workload)
    
    fair_greedy_mst_2_way_var_distance_01.append(fair_greedy_mst_variation)
    mst_2_way_var_distance_01.append(mst_variation)
    
    

print("Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_2_way_var_distance_10)))
print("Average 2-way  Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_2_way_var_distance_10)))


Average 2-way Total Variation Distance over 10 Runs Fair_Greedy_MST : nan
Average 2-way  Total Variation Distance over 10 Runs MST : nan


## Pairwise Cramer's V

In [62]:
def CramerV(dataframe):
    
    vals = dataframe.to_numpy()
    vals[vals == 0] = 1
    col = 2
    rows = min(vals.shape)
    chi2 = stats.chi2_contingency(vals, correction=False)[0]
    sample_size = np.sum(vals)
    k = col - (((col-1)**2)/(sample_size -1))
    r= rows - (((rows-1)**2)/(sample_size -1))

    phi2 = max(0,(chi2 / sample_size) - ((col-1)*(rows-1))/(sample_size -1) )
    cramer_v = np.sqrt(phi2/min(k-1,r-1) )

    return cramer_v

### $\epsilon$ = 0.1

In [12]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload]
data = Dataset.load(dataset, domain)

fair_greedy_mst_corr_distance_01 = []
mst_corr_distance_01  = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=0.1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=0.1/results_original_{i}.csv', domain)
    
    fair_greedy_mst_distance =0 
    mst_distance = 0
    
    for proj in workload:
        data_vec = data.project(proj).df
        data_vec = CramerV(data_vec)
        
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).df
        fair_greedy_mst_vec= CramerV(fair_greedy_mst_vec)
        
        mst_vec = mst_data.project(proj).df
        mst_vec= CramerV(mst_vec)
        
        fair_greedy_mst_distance += abs(data_vec-fair_greedy_mst_vec)/2
        mst_distance += abs(data_vec-mst_vec)/2
        
    fair_greedy_mst_distance = fair_greedy_mst_distance/len(workload)
    mst_distance = mst_distance/len(workload)
    
    fair_greedy_mst_corr_distance_01.append(fair_greedy_mst_distance)
    mst_corr_distance_01.append(mst_distance)
    
    

print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_corr_distance_01)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_corr_distance_01)))


NameError: name 'CramerV' is not defined

### $\epsilon$ = 1

In [63]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload ]
data = Dataset.load(dataset, domain)

fair_greedy_mst_corr_distance_1 = []
mst_corr_distance_1  = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=1/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=1/results_original_{i}.csv', domain)
    
    fair_greedy_mst_distance =0 
    mst_distance = 0
    
    for proj in workload:
        data_vec = data.project(proj).df
        data_vec = CramerV(data_vec)
        
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).df
        fair_greedy_mst_vec= CramerV(fair_greedy_mst_vec)
        
        mst_vec = mst_data.project(proj).df
        mst_vec= CramerV(mst_vec)
        
        fair_greedy_mst_distance += abs(data_vec-fair_greedy_mst_vec)/2
        mst_distance += abs(data_vec-mst_vec)/2
        
    fair_greedy_mst_distance = fair_greedy_mst_distance/len(workload)
    mst_distance = mst_distance/len(workload)
    
    fair_greedy_mst_corr_distance_1.append(fair_greedy_mst_distance)
    mst_corr_distance_1.append(mst_distance)
    
    

print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_corr_distance_1)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_corr_distance_1)))


Average Total Variation Distance over 10 Runs Fair_Greedy_MST : 0.03277384791587584
Average Total Variation Distance over 10 Runs MST : 0.03141989877806668


In [64]:
print(np.mean(fair_greedy_mst_corr_distance_1)/np.mean(mst_corr_distance_1))

1.043092091014447


### $\epsilon$ = 10

In [None]:
workload = list(itertools.combinations(data.domain, 2))
workload = [cl for cl in workload ]
data = Dataset.load(dataset, domain)

fair_greedy_mst_corr_distance_10 = []
mst_corr_distance_10  = []
for i in range(10):
    
    fair_greedy_mst_data = Dataset.load(f'data/fair_greedy/KDD/eps=10/results_greedy_{i}.csv', domain)
    mst_data = Dataset.load(f'data/original/KDD/eps=10/results_original_{i}.csv', domain)
    
    fair_greedy_mst_distance =0 
    mst_distance = 0
    
    for proj in workload:
        data_vec = data.project(proj).df
        data_vec = CramerV(data_vec)
        
        fair_greedy_mst_vec = fair_greedy_mst_data.project(proj).df
        fair_greedy_mst_vec= CramerV(fair_greedy_mst_vec)
        
        mst_vec = mst_data.project(proj).df
        mst_vec= CramerV(mst_vec)
        
        fair_greedy_mst_distance += abs(data_vec-fair_greedy_mst_vec)/2
        mst_distance += abs(data_vec-mst_vec)/2
        
    fair_greedy_mst_distance = fair_greedy_mst_distance/len(workload)
    mst_distance = mst_distance/len(workload)
    
    fair_greedy_mst_corr_distance_10.append(fair_greedy_mst_distance)
    mst_corr_distance_10.append(mst_distance)
    
    

print("Average Total Variation Distance over 10 Runs Fair_Greedy_MST : " + str(np.mean(fair_greedy_mst_corr_distance_10)))
print("Average Total Variation Distance over 10 Runs MST : " + str(np.mean(mst_corr_distance_10)))


## Visualizations

In [None]:

artifactspath = "vis/KDD/"
plt.rcParams.update({'font.size': 18})

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
plt.yscale("log")

xvals = [np.mean(div(mst_1_way_var_distance_01,workload_len)), np.mean(div(mst_1_way_var_distance_1,workload_len)), np.mean(div(mst_1_way_var_distance_10,workload_len))]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(div(privbayes_1_way_var_distance_01,workload_len)), np.mean(div(privbayes_1_way_var_distance_1,workload_len)), np.mean(div(privbayes_1_way_var_distance_10,workload_len))]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(div(fair_greedy_mst_1_way_var_distance_01,workload_len)), np.mean(div(fair_greedy_mst_1_way_var_distance_1,workload_len)), np.mean(div(fair_greedy_mst_1_way_var_distance_10,workload_len))]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(div(fair_opt_mst_1_way_var_distance_01,workload_len)), np.mean(div(fair_opt_mst_1_way_var_distance_1,workload_len)), np.mean(div(fair_opt_mst_1_way_var_distance_10,workload_len))]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(div(fair_privbayes_1_way_var_distance_01,workload_len)), np.mean(div(fair_privbayes_1_way_var_distance_1,workload_len)), np.mean(div(fair_privbayes_1_way_var_distance_10,workload_len))]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Average Total Variation Distance')
plt.title("1-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + '1wayAVG.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
plt.yscale("log")

xvals = [np.mean(mst_1_way_var_distance_01), np.mean(mst_1_way_var_distance_1), np.mean(mst_1_way_var_distance_10)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(privbayes_1_way_var_distance_01), np.mean(privbayes_1_way_var_distance_1), np.mean(privbayes_1_way_var_distance_10)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_1_way_var_distance_01), np.mean(fair_greedy_mst_1_way_var_distance_1), np.mean(fair_greedy_mst_1_way_var_distance_10)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_1_way_var_distance_01), np.mean(fair_opt_mst_1_way_var_distance_1), np.mean(fair_opt_mst_1_way_var_distance_10)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_1_way_var_distance_01), np.mean(fair_privbayes_1_way_var_distance_1), np.mean(fair_privbayes_1_way_var_distance_10)]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Total Variation Distance')
plt.title("1-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + '1way.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")

xvals = [np.mean(mst_1_way_var_distance_01), np.mean(mst_1_way_var_distance_1), np.mean(mst_1_way_var_distance_10)]
bar1 = plt.bar(ind, xvals, width)

  
zvals = [np.mean(fair_greedy_mst_1_way_var_distance_01), np.mean(fair_greedy_mst_1_way_var_distance_1), np.mean(fair_greedy_mst_1_way_var_distance_10)]
bar3 = plt.bar(ind+width*1, zvals, width)


  
plt.xlabel("Privacy Budgets")
plt.ylabel('Average Variation Distance')
plt.title("1-Way Marginals")

plt.ylim(0,0.16)
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1,  bar3), ('MST', 'Greedy-PreFair'), fontsize = 15 )
plt.savefig(artifactspath + '1wayMST.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
plt.yscale("log")

xvals = [np.mean(div(mst_2_way_var_distance_01,workload_len)), np.mean(div(mst_2_way_var_distance_1,workload_len)), np.mean(div(mst_2_way_var_distance_10,workload_len))]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(div(privbayes_2_way_var_distance_01,workload_len)), np.mean(div(privbayes_2_way_var_distance_1,workload_len)), np.mean(div(privbayes_2_way_var_distance_10,workload_len))]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(div(fair_greedy_mst_2_way_var_distance_01,workload_len)), np.mean(div(fair_greedy_mst_2_way_var_distance_1,workload_len)), np.mean(div(fair_greedy_mst_2_way_var_distance_10,workload_len))]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(div(fair_opt_mst_2_way_var_distance_01,workload_len)), np.mean(div(fair_opt_mst_2_way_var_distance_1,workload_len)), np.mean(div(fair_opt_mst_2_way_var_distance_10,workload_len))]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(div(fair_privbayes_2_way_var_distance_01,workload_len)), np.mean(div(fair_privbayes_2_way_var_distance_1,workload_len)), np.mean(div(fair_privbayes_2_way_var_distance_10,workload_len))]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Average Total Variation Distance')
plt.title("2-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + '2wayAVG.pdf', bbox_inches='tight')

In [None]:
print(np.mean(fair_greedy_mst_corr_distance_01)/np.mean(mst_corr_distance_01))
print(np.mean(fair_greedy_mst_corr_distance_1)/np.mean(mst_corr_distance_1))
print(np.mean(fair_greedy_mst_corr_distance_10)/np.mean(mst_corr_distance_10))

In [None]:
print(np.mean(fair_opt_mst_corr_distance_01)/np.mean(mst_corr_distance_01))
print(np.mean(fair_opt_mst_corr_distance_1)/np.mean(mst_corr_distance_1))
print(np.mean(fair_opt_mst_corr_distance_10)/np.mean(mst_corr_distance_10))

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
plt.yscale("log")

xvals = [np.mean(mst_2_way_var_distance_01), np.mean(mst_2_way_var_distance_1), np.mean(mst_2_way_var_distance_10)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(privbayes_2_way_var_distance_01), np.mean(privbayes_2_way_var_distance_1), np.mean(privbayes_2_way_var_distance_10)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_2_way_var_distance_01), np.mean(fair_greedy_mst_2_way_var_distance_1), np.mean(fair_greedy_mst_2_way_var_distance_10)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_2_way_var_distance_01), np.mean(fair_opt_mst_2_way_var_distance_1), np.mean(fair_opt_mst_2_way_var_distance_10)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_2_way_var_distance_01), np.mean(fair_privbayes_2_way_var_distance_1), np.mean(fair_privbayes_2_way_var_distance_10)]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Total Variation Distance')
plt.title("2-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + '2way.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")

xvals = [np.mean(mst_2_way_var_distance_01), np.mean(mst_2_way_var_distance_1), np.mean(mst_2_way_var_distance_10)]
bar1 = plt.bar(ind, xvals, width)

zvals = [np.mean(fair_greedy_mst_2_way_var_distance_01), np.mean(fair_greedy_mst_2_way_var_distance_1), np.mean(fair_greedy_mst_2_way_var_distance_10)]
bar3 = plt.bar(ind+width*1, zvals, width)

  
plt.xlabel("Privacy Budgets")
plt.ylabel('Average Variation Distance')
plt.title("2-Way Marginals")
plt.ylim(0,0.16)
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1,  bar3, bar4), ('MST', 'GreedyMST', 'ExpMST') )
plt.savefig(artifactspath + '2wayMST.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
plt.yscale("log")

xvals = [np.mean(mst_2_way_var_distance_01), np.mean(mst_2_way_var_distance_1), np.mean(mst_2_way_var_distance_10)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(privbayes_2_way_var_distance_01), np.mean(privbayes_2_way_var_distance_1), np.mean(privbayes_2_way_var_distance_10)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_2_way_var_distance_01), np.mean(fair_greedy_mst_2_way_var_distance_1), np.mean(fair_greedy_mst_2_way_var_distance_10)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_2_way_var_distance_01), np.mean(fair_opt_mst_2_way_var_distance_1), np.mean(fair_opt_mst_2_way_var_distance_10)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_2_way_var_distance_01), np.mean(fair_privbayes_2_way_var_distance_1), np.mean(fair_privbayes_2_way_var_distance_10)]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Total Variation Distance')
plt.title("2-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + '2way.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")

xvals = [np.mean(mst_corr_distance_01), np.mean(mst_corr_distance_1), np.mean(mst_corr_distance_10)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(privbayes_corr_distance_01), np.mean(privbayes_corr_distance_1), np.mean(privbayes_corr_distance_10)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_corr_distance_01), np.mean(fair_greedy_mst_corr_distance_1), np.mean(fair_greedy_mst_corr_distance_10)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_corr_distance_01), np.mean(fair_opt_mst_corr_distance_1), np.mean(fair_opt_mst_corr_distance_10)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_corr_distance_01), np.mean(fair_privbayes_corr_distance_1), np.mean(fair_privbayes_corr_distance_10)]
bar5 = plt.bar(ind+width*4, xyvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Total Variation Distance')
plt.title("Cramers V")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3, bar4, bar5), ('MST', 'Privbayes', 'GreedyMST', 'ExpMST', "GreedyPrivbayes") )
plt.savefig(artifactspath + 'corr.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")

xvals = [np.mean(mst_corr_distance_01), np.mean(mst_corr_distance_1), np.mean(mst_corr_distance_10)]
bar1 = plt.bar(ind, xvals, width)

zvals = [np.mean(fair_greedy_mst_corr_distance_01), np.mean(fair_greedy_mst_corr_distance_1), np.mean(fair_greedy_mst_corr_distance_10)]
bar3 = plt.bar(ind+width*1, zvals, width)

xxvals = [np.mean(fair_opt_mst_corr_distance_01), np.mean(fair_opt_mst_corr_distance_1), np.mean(fair_opt_mst_corr_distance_10)]
bar4 = plt.bar(ind+width*2, xxvals, width)

  
plt.xlabel("Privacy Budgets")
plt.ylabel('Average Difference')
plt.title("Cramers V")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1, bar3, bar4), ('MST', 'GreedyMST', 'ExpMST') )
plt.savefig(artifactspath + 'corrMST.pdf', bbox_inches='tight')

In [None]:
N = 5
ind = np.arange(N) 
width = 0.25
plt.yscale("log")
xvals = [np.mean(mst_2_way_var_distance_01), np.mean(mst_2_way_var_distance_1), np.mean(mst_2_way_var_distance_10),np.mean(mst_2_way_var_distance_100),np.mean(mst_2_way_var_distance_1000)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(fair_greedy_mst_2_way_var_distance_01), np.mean(fair_greedy_mst_2_way_var_distance_1), np.mean(fair_greedy_mst_2_way_var_distance_10),np.mean(fair_greedy_mst_2_way_var_distance_100),np.mean(fair_greedy_mst_2_way_var_distance_1000)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_opt_mst_2_way_var_distance_01), np.mean(fair_opt_mst_2_way_var_distance_1), np.mean(fair_opt_mst_2_way_var_distance_10),np.mean(fair_opt_mst_2_way_var_distance_100),np.mean(fair_opt_mst_2_way_var_distance_1000)]
bar3 = plt.bar(ind+width*2, zvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Variation Distance')
plt.title("2-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10','ε=100','ε=1000'])
plt.legend( (bar1, bar2, bar3), ('MST', 'GreedyMST', 'ExpMST') )
plt.savefig(artifactspath + '2wayall5.pdf', bbox_inches='tight')

In [None]:
N = 5
ind = np.arange(N) 
width = 0.25
#plt.yscale("log")
xvals = [np.mean(mst_corr_distance_01), np.mean(mst_corr_distance_1), np.mean(mst_corr_distance_10),np.mean(mst_corr_distance_100),np.mean(mst_corr_distance_1000)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(fair_greedy_mst_corr_distance_01), np.mean(fair_greedy_mst_corr_distance_1), np.mean(fair_greedy_mst_corr_distance_10),np.mean(fair_greedy_mst_corr_distance_100),np.mean(fair_greedy_mst_corr_distance_1000)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_opt_mst_corr_distance_01), np.mean(fair_opt_mst_corr_distance_1), np.mean(fair_opt_mst_corr_distance_10),np.mean(fair_opt_mst_corr_distance_100),np.mean(fair_opt_mst_corr_distance_1000)]
bar3 = plt.bar(ind+width*2, zvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Variation Distance')
plt.title("Cramers V")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10','ε=100','ε=1000'])
plt.legend( (bar1, bar2, bar3), ('MST', 'GreedyMST', 'ExpMST') )
plt.savefig(artifactspath + 'corrall5.pdf', bbox_inches='tight')

## 1-Way Marginals

In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_1_way_var_distance_01,privbayes_1_way_var_distance_01,fair_greedy_mst_1_way_var_distance_01,fair_opt_mst_1_way_var_distance_01,fair_privbayes_1_way_var_distance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, 1 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '1_way_01.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","EMST"]

for i in range(len(mst_1_way_var_distance_01)):
    fair_greedy_mst_1_way_var_distance_01[i]= fair_greedy_mst_1_way_var_distance_01[i]/mst_1_way_var_distance_01[i]
    fair_opt_mst_1_way_var_distance_01[i]= fair_opt_mst_1_way_var_distance_01[i]/mst_1_way_var_distance_01[i]
    mst_1_way_var_distance_01[i]= mst_1_way_var_distance_01[i]/mst_1_way_var_distance_01[i]
    
    
values = [mst_1_way_var_distance_01,fair_greedy_mst_1_way_var_distance_01,fair_opt_mst_1_way_var_distance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.8, 1.2])

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, 1 way marginals")
plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '1_way_01_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_1_way_var_distance_1,privbayes_1_way_var_distance_1,fair_greedy_mst_1_way_var_distance_1,fair_opt_mst_1_way_var_distance_1,fair_privbayes_1_way_var_distance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, 1 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '1_way_1.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","EMST"]
values = [mst_1_way_var_distance_1,fair_greedy_mst_1_way_var_distance_1,fair_opt_mst_1_way_var_distance_1]

for i in range(len(mst_1_way_var_distance_1)):
    fair_greedy_mst_1_way_var_distance_1[i]= fair_greedy_mst_1_way_var_distance_1[i]/mst_1_way_var_distance_1[i]
    fair_opt_mst_1_way_var_distance_1[i]= fair_opt_mst_1_way_var_distance_1[i]/mst_1_way_var_distance_1[i]
    mst_1_way_var_distance_1[i]= mst_1_way_var_distance_1[i]/mst_1_way_var_distance_1[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.8, 1.2])

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, 1 way marginals")
#plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '1_way_1_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,]
x_axis = ["PB", "GPB"]
values = [privbayes_1_way_var_distance_1,fair_privbayes_1_way_var_distance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, 1 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '1_way_1_PB.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_1_way_var_distance_10,privbayes_1_way_var_distance_10,fair_greedy_mst_1_way_var_distance_10,fair_opt_mst_1_way_var_distance_10,fair_privbayes_1_way_var_distance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, 1 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '1_way_10.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","EMST"]
values = [mst_1_way_var_distance_10,fair_greedy_mst_1_way_var_distance_10,fair_opt_mst_1_way_var_distance_10]


for i in range(len(mst_1_way_var_distance_10)):
    fair_greedy_mst_1_way_var_distance_10[i]= fair_greedy_mst_1_way_var_distance_10[i]/mst_1_way_var_distance_10[i]
    fair_opt_mst_1_way_var_distance_10[i]= fair_opt_mst_1_way_var_distance_10[i]/mst_1_way_var_distance_10[i]
    mst_1_way_var_distance_10[i]= mst_1_way_var_distance_10[i]/mst_1_way_var_distance_10[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.8, 1.2])

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, 1 way marginals")
#plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '1_way_10_MST.pdf', bbox_inches='tight')


## 2-way Marginals

In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_2_way_var_distance_01,privbayes_2_way_var_distance_01,fair_greedy_mst_2_way_var_distance_01,fair_opt_mst_2_way_var_distance_01,fair_privbayes_2_way_var_distance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, 2 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '2_way_01.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST","GMST","EMST"]
values = [mst_2_way_var_distance_01,fair_greedy_mst_2_way_var_distance_01,fair_opt_mst_2_way_var_distance_01]
for i in range(len(mst_2_way_var_distance_01)):
    fair_greedy_mst_2_way_var_distance_01[i]= fair_greedy_mst_2_way_var_distance_01[i]/mst_2_way_var_distance_01[i]
    fair_opt_mst_2_way_var_distance_01[i]= fair_opt_mst_2_way_var_distance_01[i]/mst_2_way_var_distance_01[i]
    mst_2_way_var_distance_01[i]= mst_2_way_var_distance_01[i]/mst_2_way_var_distance_01[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])


plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, 2 way marginals")
plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '2_way_01_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","GMST", "GMST","OMST", "GPB"]
values = [mst_2_way_var_distance_1,privbayes_2_way_var_distance_1,fair_greedy_mst_2_way_var_distance_1,fair_opt_mst_2_way_var_distance_1,fair_privbayes_2_way_var_distance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, 2 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '2_way_1.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST","GMST","EMST"]
values = [mst_2_way_var_distance_1,fair_greedy_mst_2_way_var_distance_1,fair_opt_mst_2_way_var_distance_1]
for i in range(len(mst_2_way_var_distance_1)):
    fair_greedy_mst_2_way_var_distance_1[i]= fair_greedy_mst_2_way_var_distance_1[i]/mst_2_way_var_distance_1[i]
    fair_opt_mst_2_way_var_distance_1[i]= fair_opt_mst_2_way_var_distance_1[i]/mst_2_way_var_distance_1[i]
    mst_2_way_var_distance_1[i]= mst_2_way_var_distance_1[i]/mst_2_way_var_distance_1[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, 2 way marginals")
#plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '2_way_1_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_2_way_var_distance_10,privbayes_2_way_var_distance_10,fair_greedy_mst_2_way_var_distance_10,fair_opt_mst_2_way_var_distance_10,fair_privbayes_2_way_var_distance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, 2 way marginals")
plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + '2_way_10.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST","GMST","EMST"]
values = [mst_2_way_var_distance_10,fair_greedy_mst_2_way_var_distance_10,fair_opt_mst_2_way_var_distance_10]
for i in range(len(mst_2_way_var_distance_10)):
    fair_greedy_mst_2_way_var_distance_10[i]= fair_greedy_mst_2_way_var_distance_10[i]/mst_2_way_var_distance_10[i]
    fair_opt_mst_2_way_var_distance_10[i]= fair_opt_mst_2_way_var_distance_10[i]/mst_2_way_var_distance_10[i]
    mst_2_way_var_distance_10[i]= mst_2_way_var_distance_10[i]/mst_2_way_var_distance_10[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, 2 way marginals")
#plt.ylabel("Relative Variation Distance")
plt.savefig(artifactspath + '2_way_10_MST.pdf', bbox_inches='tight')


## Correlation Error

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_corr_distance_01,privbayes_corr_distance_01,fair_greedy_mst_corr_distance_01,fair_opt_mst_corr_distance_01,fair_privbayes_corr_distance_01]



#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Correlation")
plt.ylabel("Cramers V difference")
plt.savefig(artifactspath + 'corr_01.pdf', bbox_inches='tight')


In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","EMST",]
values = [mst_corr_distance_01,fair_greedy_mst_corr_distance_01,fair_opt_mst_corr_distance_01]

for i in range(len(mst_corr_distance_01)):
    fair_greedy_mst_corr_distance_01[i]= fair_greedy_mst_corr_distance_01[i]/mst_corr_distance_01[i]
    fair_opt_mst_corr_distance_01[i]= fair_opt_mst_corr_distance_01[i]/mst_corr_distance_01[i]
    mst_corr_distance_01[i]= mst_corr_distance_01[i]/mst_corr_distance_01[i]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Correlation")
plt.ylabel("Relative Cramers V")
plt.savefig(artifactspath + 'corr_01_MST.pdf', bbox_inches='tight')


In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_corr_distance_1,privbayes_corr_distance_1,fair_greedy_mst_corr_distance_1,fair_opt_mst_corr_distance_1,fair_privbayes_corr_distance_1]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Correlation")
plt.ylabel("Cramers V difference")
plt.savefig(artifactspath + 'corr_1.pdf', bbox_inches='tight')


In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","EMST",]
values = [mst_corr_distance_1,fair_greedy_mst_corr_distance_1,fair_opt_mst_corr_distance_1]
for i in range(len(mst_corr_distance_1)):
    fair_greedy_mst_corr_distance_1[i]= fair_greedy_mst_corr_distance_1[i]/mst_corr_distance_1[i]
    fair_opt_mst_corr_distance_1[i]= fair_opt_mst_corr_distance_1[i]/mst_corr_distance_1[i]
    mst_corr_distance_1[i]= mst_corr_distance_1[i]/mst_corr_distance_1[i]
#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Correlation")
#plt.ylabel("Relative Cramers V")
plt.savefig(artifactspath + 'corr_1_MST.pdf', bbox_inches='tight')


In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_corr_distance_10,privbayes_corr_distance_10,fair_greedy_mst_corr_distance_10,fair_opt_mst_corr_distance_10,fair_privbayes_corr_distance_10]

#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Correlation")
plt.ylabel("Cramers V difference")
plt.savefig(artifactspath + 'corr_10.pdf', bbox_inches='tight')


In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","EMST",]
values = [mst_corr_distance_10,fair_greedy_mst_corr_distance_10,fair_opt_mst_corr_distance_10]

for i in range(len(mst_corr_distance_10)):
    fair_greedy_mst_corr_distance_10[i]= fair_greedy_mst_corr_distance_10[i]/mst_corr_distance_10[i]
    fair_opt_mst_corr_distance_10[i]= fair_opt_mst_corr_distance_10[i]/mst_corr_distance_10[i]
    mst_corr_distance_10[i]= mst_corr_distance_10[i]/mst_corr_distance_10[i]
#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.4, 1.6])

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Correlation")
#plt.ylabel("Relative Cramers V")
plt.savefig(artifactspath + 'corr_10_MST.pdf', bbox_inches='tight')


# Fairness Metrics 

In [53]:

SHUFFLE_BUFFER = 500
BATCH_SIZE = 50


In [54]:
def mlp_test(path):
    #eps = 0.1 MST
    accuracy_vals = []
    accuracy_parity_vals = []
    demographic_parity_vals = []
    TPRBalance_vals = []
    TNRBalance_vals =[]
    conditional_demographic_parity_vals = []
    conditional_TPRBalance_vals = []
    conditional_TNRBalance_vals =[]

    for i in range(10):
        # training models on greedy_fair_mst
        testpath = "data/cleaned_KDD.csv"
        test = pd.read_csv(testpath)
        test_sex0 = copy.copy(test).loc[test['sex'] == 0]
        test_sex1 = copy.copy(test).loc[test['sex'] == 1]
        test_lables = test.pop('income_50k')
        test_sex0_lables = test_sex0.pop('income_50k')
        test_sex1_lables = test_sex1.pop('income_50k')
        #tf.convert_to_tensor(test)


        trainpath = f'{path}{i}.csv'
        train = pd.read_csv(trainpath)
        train_lables = train.pop('income_50k')
        tf.convert_to_tensor(train)

        model = Sequential()
        model.add(Dense(20, activation='relu', kernel_initializer='he_normal'))
        model.add(Dense(20, activation='relu', kernel_initializer='he_normal'))
        model.add(Dense(10, activation='relu', kernel_initializer='he_normal'))
        model.add(Dense(5, activation='relu', kernel_initializer='he_normal'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

        model.fit(train, train_lables, epochs=20, batch_size=BATCH_SIZE, verbose=0)
        #model.fit(train, train_lables, epochs=10, batch_size=BATCH_SIZE)
        predictions = model.predict(test)
        loss = model.evaluate(test, test_lables, verbose=0)
        accuracy = loss[1]
        accuracy_vals.append(accuracy)
        sex0_accuracy = model.evaluate(test_sex0, test_sex0_lables, verbose=0)[1]
        sex1_accuracy = model.evaluate(test_sex1, test_sex1_lables, verbose=0)[1]
        accuracy_parity_vals.append(abs(sex1_accuracy-sex0_accuracy))

        test['Income'] = test_lables
        test['Predicted'] = predictions
        test = test.round({'Predicted': 0})
        test["Predicted"]=test["Predicted"].astype(int)

        sex1 = test.loc[test['sex'] == 1]
        sex0 = test.loc[test['sex'] == 0]
        sex1_count = len(sex1.index)
        sex0_count = len(sex0.index)
        demographic_parity = abs((sex1['Predicted'].sum()/sex1_count) -(sex0['Predicted'].sum()/sex0_count))
        demographic_parity_vals.append(demographic_parity)

        sex1TPR = len(sex1.loc[(sex1['Income'] == 1) & (sex1['Predicted'] == 1)].index)/len(sex1.loc[(sex1['Income'] == 1)].index)
        sex0TPR = len(sex0.loc[(sex0['Income'] == 1) & (sex0['Predicted'] == 1)].index)/len(sex0.loc[(sex0['Income'] == 1)].index) 
        TPRBalance = abs(sex1TPR - sex0TPR)
        TPRBalance_vals.append(TPRBalance)

        sex1TNR = len(sex1.loc[(sex1['Income'] == 0) & (sex1['Predicted'] == 0)].index)/len(sex1.loc[(sex1['Income'] == 0)].index)
        sex0TNR = len(sex0.loc[(sex0['Income'] == 0) & (sex0['Predicted'] == 0)].index)/len(sex0.loc[(sex0['Income'] == 0)].index) 
        TNRBalance = abs(sex1TNR - sex0TNR)
        TNRBalance_vals.append(TNRBalance)


        admis_group = test.groupby(admissible, sort = False)
        test['S0TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 0) 
        test['S0TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0Pos'] = (test['Income'] == 1)  & (test['sex'] == 0) 
        test['S0Neg'] = (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 0) 
        test['S0count'] = (test['sex'] == 0)
        test['S1TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 1) 
        test['S1TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1Pos'] = (test['Income'] == 1)  & (test['sex'] == 1) 
        test['S1Neg'] = (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 1) 
        test['S1count'] = (test['sex'] == 1)


        tp0 = admis_group['S0TP'].sum()
        pos0 = admis_group['S0Pos'].sum()
        tn0 = admis_group['S0TN'].sum()
        neg0 = admis_group['S0Neg'].sum()
        pred_pos0 = admis_group['S0pred_pos'].sum()
        counts0 = admis_group['S0count'].sum()

        sex0_group_tpr = (tp0/pos0)
        sex0_group_tpr = sex0_group_tpr.values
        sex0_group_tpr = sex0_group_tpr[~np.isnan(sex0_group_tpr)]

        sex0_group_tnr = (tn0/neg0)
        sex0_group_tnr =sex0_group_tnr.values
        sex0_group_tnr = sex0_group_tnr[~np.isnan(sex0_group_tnr)]

        sex0_group_dp = (pred_pos0/counts0)
        sex0_group_dp =sex0_group_dp.values
        sex0_group_dp = sex0_group_dp[~np.isnan(sex0_group_dp)]


        tp1 = admis_group['S1TP'].sum()
        pos1 = admis_group['S1Pos'].sum()
        tn1 = admis_group['S1TN'].sum()
        neg1 = admis_group['S1Neg'].sum()
        pred_pos1 = admis_group['S1pred_pos'].sum()
        counts1 = admis_group['S1count'].sum()

        sex1_group_tpr = (tp1/pos1)
        sex1_group_tpr = sex1_group_tpr.values
        sex1_group_tpr = sex1_group_tpr[~np.isnan(sex1_group_tpr)]

        sex1_group_tnr = (tn1/neg1)
        sex1_group_tnr =sex1_group_tnr.values
        sex1_group_tnr = sex1_group_tnr[~np.isnan(sex1_group_tnr)]

        sex1_group_dp = (pred_pos1/counts1)
        sex1_group_dp =sex1_group_dp.values
        sex1_group_dp = sex1_group_dp[~np.isnan(sex1_group_dp)]


        conditional_demographic_parity_vals.append(abs(np.mean(sex1_group_dp)-np.mean(sex0_group_dp)))
        conditional_TPRBalance_vals.append(abs(np.mean(sex1_group_tpr)-np.mean(sex0_group_tpr)))
        conditional_TNRBalance_vals.append(abs(np.mean(sex1_group_tnr)-np.mean(sex0_group_tnr)))


    print("Average Accuracy: " + str(np.mean(accuracy_vals)))
    print("Average Accuracy Parity: " + str(np.mean(accuracy_parity_vals)))
    print("Average Demographic Parity: " + str(np.mean(demographic_parity_vals)))
    print("Average TPR Balance: " + str(np.mean(TPRBalance_vals)))
    print("Average TNR Balance: " + str(np.mean(TNRBalance_vals)))

    print("Average Conditional Demographic Parity: " + str(np.mean(conditional_demographic_parity_vals)))
    print("Average Conditional TPR Balance: " + str(np.mean(conditional_TPRBalance_vals)))
    print("Average Conditional TNR Balance: " + str(np.mean(conditional_TNRBalance_vals)))

    return((accuracy_vals,accuracy_parity_vals,demographic_parity_vals,TPRBalance_vals,TNRBalance_vals,conditional_demographic_parity_vals,conditional_TPRBalance_vals,conditional_TNRBalance_vals))

In [55]:
#eps = 0.1 MST

path = 'data/original/KDD/eps=0.1/results_original_'
vals = mlp_test(path)
mst_accuracy_01 = vals[0]
mst_accuracy_parity_01 = vals[1]
mst_demographic_parity_01 = vals[2]
mst_TPRBalance_01 = vals[3]
mst_TNRBalance_01 =vals[4]
mst_conditional_demographic_parity_01 = vals[5]
mst_conditional_TPRBalance_01 = vals[6]
mst_conditional_TNRBalance_01 =vals[7]

#eps = 1 MST
path = 'data/original/KDD/eps=1/results_original_'
vals = mlp_test(path)
mst_accuracy_1 = vals[0]
mst_accuracy_parity_1 = vals[1]
mst_demographic_parity_1 = vals[2]
mst_TPRBalance_1 = vals[3]
mst_TNRBalance_1 =vals[4]
mst_conditional_demographic_parity_1 = vals[5]
mst_conditional_TPRBalance_1 = vals[6]
mst_conditional_TNRBalance_1 =vals[7]

#eps = 10 MST
path = 'data/original/KDD/eps=10/results_original_'
vals = mlp_test(path)
mst_accuracy_10 = vals[0]
mst_accuracy_parity_10 = vals[1]
mst_demographic_parity_10 = vals[2]
mst_TPRBalance_10 = vals[3]
mst_TNRBalance_10 =vals[4]

mst_conditional_demographic_parity_10 = vals[5]
mst_conditional_TPRBalance_10 = vals[6]
mst_conditional_TNRBalance_10 =vals[7]



Average Accuracy: 0.934273236989975
Average Accuracy Parity: 0.07542330622673035
Average Demographic Parity: 0.003997644980670493
Average TPR Balance: 0.008555335455785608
Average TNR Balance: 0.0022397751201846173
Average Conditional Demographic Parity: 0.00407796081311154
Average Conditional TPR Balance: 0.008555335455785608
Average Conditional TNR Balance: 0.0023004261715863115
Average Accuracy: 0.936753648519516
Average Accuracy Parity: 0.0754007339477539
Average Demographic Parity: 0.003941881079778909
Average TPR Balance: 0.0132408203372445
Average TNR Balance: 0.002446037736628459
Average Conditional Demographic Parity: 0.004015015395516185
Average Conditional TPR Balance: 0.0132408203372445
Average Conditional TNR Balance: 0.0024990767129010718
Average Accuracy: 0.93561292886734
Average Accuracy Parity: 0.07477244734764099
Average Demographic Parity: 0.011778104722190862
Average TPR Balance: 0.027498668267386017
Average TNR Balance: 0.006175880358520347
Average Conditional Demo

In [56]:
#eps = 0.1 fair_greedy_MST
path = 'data/fair_greedy/KDD/eps=0.1/results_greedy_'
vals = mlp_test(path)
fair_greedy_mst_accuracy_01 = vals[0]
fair_greedy_mst_accuracy_parity_01 = vals[1]
fair_greedy_mst_demographic_parity_01 = vals[2]
fair_greedy_mst_TPRBalance_01 = vals[3]
fair_greedy_mst_TNRBalance_01 =vals[4]

fair_greedy_mst_conditional_demographic_parity_01 = vals[5]
fair_greedy_mst_conditional_TPRBalance_01 = vals[6]
fair_greedy_mst_conditional_TNRBalance_01 =vals[7]

#eps = 1 fair_greedy_MST
path = 'data/fair_greedy/KDD/eps=1/results_greedy_'
vals = mlp_test(path)
fair_greedy_mst_accuracy_1 = vals[0]
fair_greedy_mst_accuracy_parity_1 = vals[1]
fair_greedy_mst_demographic_parity_1 = vals[2]
fair_greedy_mst_TPRBalance_1 = vals[3]
fair_greedy_mst_TNRBalance_1 =vals[4]

fair_greedy_mst_conditional_demographic_parity_1 = vals[5]
fair_greedy_mst_conditional_TPRBalance_1 = vals[6]
fair_greedy_mst_conditional_TNRBalance_1 =vals[7]

#eps = 10 fair_greedy_MST
path = 'data/fair_greedy/KDD/eps=10/results_greedy_'
vals = mlp_test(path)
fair_greedy_mst_accuracy_10 = vals[0]
fair_greedy_mst_accuracy_parity_10 = vals[1]
fair_greedy_mst_demographic_parity_10 = vals[2]
fair_greedy_mst_TPRBalance_10 = vals[3]
fair_greedy_mst_TNRBalance_10 =vals[4]

fair_greedy_mst_conditional_demographic_parity_10 = vals[5]
fair_greedy_mst_conditional_TPRBalance_10 = vals[6]
fair_greedy_mst_conditional_TNRBalance_10 =vals[7]



Average Accuracy: 0.9275557160377502
Average Accuracy Parity: 0.06989051103591919
Average Demographic Parity: 0.01744430380439268
Average TPR Balance: 0.02878017233568573
Average TNR Balance: 0.008445890955035385
Average Conditional Demographic Parity: 0.017780043610690625
Average Conditional TPR Balance: 0.02878017233568573
Average Conditional TNR Balance: 0.008657888916228419
Average Accuracy: 0.9330703616142273
Average Accuracy Parity: 0.07479828000068664
Average Demographic Parity: 0.017450309505292368
Average TPR Balance: 0.04158671666699444
Average TNR Balance: 0.010020799429699112
Average Conditional Demographic Parity: 0.017774459228633233
Average Conditional TPR Balance: 0.04158671666699444
Average Conditional TNR Balance: 0.01024155103690202
Average Accuracy: 0.9367466330528259
Average Accuracy Parity: 0.07459798455238342
Average Demographic Parity: 0.007458319011632064
Average TPR Balance: 0.015521942011762213
Average TNR Balance: 0.003571502288835804
Average Conditional Dem

In [75]:
print(np.mean(fair_greedy_mst_accuracy_1)/np.mean(mst_accuracy_1))
print(np.mean(fair_greedy_mst_demographic_parity_1)/np.mean(mst_demographic_parity_1))
print(np.mean(fair_greedy_mst_TPRBalance_1)/np.mean(mst_TPRBalance_1))
print(np.mean(fair_greedy_mst_TNRBalance_1)/np.mean(mst_TNRBalance_1))
print(np.mean(fair_greedy_mst_conditional_demographic_parity_1)/np.mean(mst_conditional_demographic_parity_1))
print(np.mean(fair_greedy_mst_conditional_TPRBalance_1)/np.mean(mst_conditional_TPRBalance_1))
print(np.mean(fair_greedy_mst_conditional_TNRBalance_1)/np.mean(mst_conditional_TNRBalance_1))

0.9960680303609065
4.4268990241255874
3.1407960842136844
4.09674768285115
4.426996531192151
3.1407960842136844
4.0981339164306965


In [None]:
#eps = 0.1 privbayes
path = 'data/Bayes_original/eps=0.1/sythetic_data_'
vals = mlp_test(path)

privbayes_accuracy_01 = vals[0]
privbayes_accuracy_parity_01 = vals[1]
privbayes_demographic_parity_01 = vals[2]
privbayes_TPRBalance_01 = vals[3]
privbayes_TNRBalance_01 =vals[4]
privbayes_conditional_demographic_parity_01 = vals[5]
privbayes_conditional_TPRBalance_01 = vals[6]
privbayes_conditional_TNRBalance_01 =vals[7]

#eps = 1 privbayes
path = 'data/Bayes_original/eps=1/sythetic_data_'
vals = mlp_test(path)

privbayes_accuracy_1 = vals[0]
privbayes_accuracy_parity_1 = vals[1]
privbayes_demographic_parity_1 = vals[2]
privbayes_TPRBalance_1 = vals[3]
privbayes_TNRBalance_1 =vals[4]
privbayes_conditional_demographic_parity_1 = vals[5]
privbayes_conditional_TPRBalance_1 = vals[6]
privbayes_conditional_TNRBalance_1 =vals[7]

#eps = 10 privbayes
path = 'data/Bayes_original/eps=10/sythetic_data_'
vals = mlp_test(path)

privbayes_accuracy_10 = vals[0]
privbayes_accuracy_parity_10 = vals[1]
privbayes_demographic_parity_10 = vals[2]
privbayes_TPRBalance_10 = vals[3]
privbayes_TNRBalance_10 =vals[4]
privbayes_conditional_demographic_parity_10 = vals[5]
privbayes_conditional_TPRBalance_10 = vals[6]
privbayes_conditional_TNRBalance_10 =vals[7]

#eps = 100 privbayes
path = 'data/Bayes_original/eps=100/sythetic_data_'
vals = mlp_test(path)

privbayes_accuracy_100 = vals[0]
privbayes_accuracy_parity_100 = vals[1]
privbayes_demographic_parity_100 = vals[2]
privbayes_TPRBalance_100 = vals[3]
privbayes_TNRBalance_100 =vals[4]
privbayes_conditional_demographic_parity_100 = vals[5]
privbayes_conditional_TPRBalance_100 = vals[6]
privbayes_conditional_TNRBalance_100 =vals[7]

#eps = 1000 privbayes
path = 'data/Bayes_original/eps=1000/sythetic_data_'
vals = mlp_test(path)

privbayes_accuracy_1000 = vals[0]
privbayes_accuracy_parity_1000 = vals[1]
privbayes_demographic_parity_1000 = vals[2]
privbayes_TPRBalance_1000 = vals[3]
privbayes_TNRBalance_1000 =vals[4]
privbayes_conditional_demographic_parity_1000 = vals[5]
privbayes_conditional_TPRBalance_1000 = vals[6]
privbayes_conditional_TNRBalance_1000 =vals[7]

In [None]:
path = 'data/Bayes_fair/eps=0.1/sythetic_data_'
vals = mlp_test(path)

fair_privbayes_accuracy_01 = vals[0]
fair_privbayes_accuracy_parity_01 = vals[1]
fair_privbayes_demographic_parity_01 = vals[2]
fair_privbayes_TPRBalance_01 = vals[3]
fair_privbayes_TNRBalance_01 =vals[4]
fair_privbayes_conditional_demographic_parity_01 = vals[5]
fair_privbayes_conditional_TPRBalance_01 = vals[6]
fair_privbayes_conditional_TNRBalance_01 =vals[7]


path = 'data/Bayes_fair/eps=1/sythetic_data_'
vals = mlp_test(path)

fair_privbayes_accuracy_1 = vals[0]
fair_privbayes_accuracy_parity_1 = vals[1]
fair_privbayes_demographic_parity_1 = vals[2]
fair_privbayes_TPRBalance_1 = vals[3]
fair_privbayes_TNRBalance_1 =vals[4]
fair_privbayes_conditional_demographic_parity_1 = vals[5]
fair_privbayes_conditional_TPRBalance_1 = vals[6]
fair_privbayes_conditional_TNRBalance_1 =vals[7]


path = 'data/Bayes_fair/eps=10/sythetic_data_'
vals = mlp_test(path)

fair_privbayes_accuracy_10 = vals[0]
fair_privbayes_accuracy_parity_10 = vals[1]
fair_privbayes_demographic_parity_10 = vals[2]
fair_privbayes_TPRBalance_10 = vals[3]
fair_privbayes_TNRBalance_10 =vals[4]
fair_privbayes_conditional_demographic_parity_10 = vals[5]
fair_privbayes_conditional_TPRBalance_10 = vals[6]
fair_privbayes_conditional_TNRBalance_10 =vals[7]

path = 'data/Bayes_fair/eps=100/sythetic_data_'
vals = mlp_test(path)

fair_privbayes_accuracy_100 = vals[0]
fair_privbayes_accuracy_parity_100 = vals[1]
fair_privbayes_demographic_parity_100 = vals[2]
fair_privbayes_TPRBalance_100 = vals[3]
fair_privbayes_TNRBalance_100 =vals[4]
fair_privbayes_conditional_demographic_parity_100 = vals[5]
fair_privbayes_conditional_TPRBalance_100 = vals[6]
fair_privbayes_conditional_TNRBalance_100 =vals[7]

path = 'data/Bayes_fair/eps=1000/sythetic_data_'
vals = mlp_test(path)

fair_privbayes_accuracy_1000 = vals[0]
fair_privbayes_accuracy_parity_1000 = vals[1]
fair_privbayes_demographic_parity_1000 = vals[2]
fair_privbayes_TPRBalance_1000 = vals[3]
fair_privbayes_TNRBalance_1000 =vals[4]
fair_privbayes_conditional_demographic_parity_1000 = vals[5]
fair_privbayes_conditional_TPRBalance_1000 = vals[6]
fair_privbayes_conditional_TNRBalance_1000 =vals[7]

In [None]:
path = 'data/fair_opt/eps=0.1/results_opt_'
vals = mlp_test(path)


fair_opt_mst_accuracy_01 = vals[0]
fair_opt_mst_accuracy_parity_01 = vals[1]
fair_opt_mst_demographic_parity_01 = vals[2]
fair_opt_mst_TPRBalance_01 = vals[3]
fair_opt_mst_TNRBalance_01 =vals[4]
fair_opt_mst_conditional_demographic_parity_01 = vals[5]
fair_opt_mst_conditional_TPRBalance_01 = vals[6]
fair_opt_mst_conditional_TNRBalance_01 =vals[7]

path = 'data/fair_opt/eps=1/results_opt_'
vals = mlp_test(path)


fair_opt_mst_accuracy_1 = vals[0]
fair_opt_mst_accuracy_parity_1 = vals[1]
fair_opt_mst_demographic_parity_1 = vals[2]
fair_opt_mst_TPRBalance_1 = vals[3]
fair_opt_mst_TNRBalance_1 =vals[4]
fair_opt_mst_conditional_demographic_parity_1 = vals[5]
fair_opt_mst_conditional_TPRBalance_1 = vals[6]
fair_opt_mst_conditional_TNRBalance_1 =vals[7]

path = 'data/fair_opt/eps=10/results_opt_'
vals = mlp_test(path)


fair_opt_mst_accuracy_10 = vals[0]
fair_opt_mst_accuracy_parity_10 = vals[1]
fair_opt_mst_demographic_parity_10 = vals[2]
fair_opt_mst_TPRBalance_10 = vals[3]
fair_opt_mst_TNRBalance_10 =vals[4]
fair_opt_mst_conditional_demographic_parity_10 = vals[5]
fair_opt_mst_conditional_TPRBalance_10 = vals[6]
fair_opt_mst_conditional_TNRBalance_10=vals[7]

path = 'data/fair_opt/eps=100/results_opt_'
vals = mlp_test(path)


fair_opt_mst_accuracy_100 = vals[0]
fair_opt_mst_accuracy_parity_100 = vals[1]
fair_opt_mst_demographic_parity_100 = vals[2]
fair_opt_mst_TPRBalance_100 = vals[3]
fair_opt_mst_TNRBalance_100 =vals[4]
fair_opt_mst_conditional_demographic_parity_100 = vals[5]
fair_opt_mst_conditional_TPRBalance_100 = vals[6]
fair_opt_mst_conditional_TNRBalance_100=vals[7]

path = 'data/fair_opt/eps=1000/results_opt_'
vals = mlp_test(path)


fair_opt_mst_accuracy_1000 = vals[0]
fair_opt_mst_accuracy_parity_1000 = vals[1]
fair_opt_mst_demographic_parity_1000 = vals[2]
fair_opt_mst_TPRBalance_1000 = vals[3]
fair_opt_mst_TNRBalance_1000 =vals[4]
fair_opt_mst_conditional_demographic_parity_1000 = vals[5]
fair_opt_mst_conditional_TPRBalance_1000 = vals[6]
fair_opt_mst_conditional_TNRBalance_1000=vals[7]



## Classification accuracy score

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01), np.mean(mst_accuracy_1), np.mean(mst_accuracy_10)]
bar1 = plt.bar(ind, xvals, width)

yvals = [np.mean(privbayes_accuracy_01), np.mean(privbayes_accuracy_1), np.mean(privbayes_accuracy_10)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_accuracy_01), np.mean(fair_greedy_mst_accuracy_1), np.mean(fair_greedy_mst_accuracy_10)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01), np.mean(fair_opt_mst_accuracy_1), np.mean(fair_opt_mst_accuracy_10)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_accuracy_01), np.mean(fair_privbayes_accuracy_1), np.mean(fair_privbayes_accuracy_10)]
bar5 = plt.bar(ind+width*4, xyvals, width)


plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("MLP Accuracy")


plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
plt.legend( (bar1, bar2, bar3,bar4,bar5), ('MST','Privbayes', 'GreedyMST', 'ExpMST','GreedyPrivbayes'), fontsize = 10)
plt.savefig(artifactspath + 'ACCMLP.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01), np.mean(mst_accuracy_1), np.mean(mst_accuracy_10)]
bar1 = plt.bar(ind, xvals, width)

  
zvals = [np.mean(fair_greedy_mst_accuracy_01), np.mean(fair_greedy_mst_accuracy_1), np.mean(fair_greedy_mst_accuracy_10)]
bar3 = plt.bar(ind+width*1, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01), np.mean(fair_opt_mst_accuracy_1), np.mean(fair_opt_mst_accuracy_10)]
bar4 = plt.bar(ind+width*2, xxvals, width)


plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("MLP Accuracy")

plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1,  bar3,bar4), ('MST', 'GreedyMST', 'ExpMST'), fontsize = 10 )
plt.savefig(artifactspath + 'ACCMLPMST.pdf', bbox_inches='tight')

In [None]:
N = 5
ind = np.arange(N) 
width = 0.25
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01), np.mean(mst_accuracy_1), np.mean(mst_accuracy_10),np.mean(mst_accuracy_100),np.mean(mst_accuracy_1000)]
bar1 = plt.bar(ind, xvals, width)


yvals = [np.mean(fair_greedy_mst_accuracy_01), np.mean(fair_greedy_mst_accuracy_1), np.mean(fair_greedy_mst_accuracy_10),np.mean(fair_greedy_mst_accuracy_100),np.mean(fair_greedy_mst_accuracy_1000)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_opt_mst_accuracy_01), np.mean(fair_opt_mst_accuracy_1), np.mean(fair_opt_mst_accuracy_10),np.mean(fair_opt_mst_accuracy_100),np.mean(fair_opt_mst_accuracy_1000)]
bar3 = plt.bar(ind+width*2, zvals, width)
  
plt.xlabel("Privacy Budgets")
plt.ylabel('Variation Distance')
plt.title("1-Way Marginals")
  
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10','ε=100','ε=1000'])
plt.legend( (bar1, bar2, bar3), ('MST', 'GreedyMST', 'ExpMST') )
plt.savefig(artifactspath + 'ACCall5.pdf', bbox_inches='tight')

In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_01,privbayes_accuracy_01,fair_greedy_mst_accuracy_01,fair_opt_mst_accuracy_01,fair_privbayes_accuracy_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.5, 1])

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Classification Accuracy")
plt.ylabel("Accuracy")
plt.savefig(artifactspath + 'accuracy_01.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
print(type(mst_accuracy_01))
values = [mst_accuracy_01,fair_greedy_mst_accuracy_01,fair_opt_mst_accuracy_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.5, 1])
plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Classification Accuracy")
plt.ylabel("Accuracy")
plt.savefig(artifactspath + 'accuracy_01_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_1,privbayes_accuracy_1,fair_greedy_mst_accuracy_1,fair_opt_mst_accuracy_1,fair_privbayes_accuracy_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Classification Accuracy")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_1.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST",]
values = [mst_accuracy_1,fair_greedy_mst_accuracy_1,fair_opt_mst_accuracy_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.5, 1])

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Classification Accuracy")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_1_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_10,privbayes_accuracy_10,fair_greedy_mst_accuracy_10,fair_opt_mst_accuracy_10,fair_privbayes_accuracy_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Classification Accuracy")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_10.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST","GMST","OMST"]
values = [mst_accuracy_10,fair_greedy_mst_accuracy_10,fair_opt_mst_accuracy_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)
ax.set_ylim([0.5, 1])

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Classification Accuracy")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_10_MST.pdf', bbox_inches='tight')


## Accuracy Pairity (F1) Score

In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_parity_01,privbayes_accuracy_parity_01,fair_greedy_mst_accuracy_parity_01,fair_opt_mst_accuracy_parity_01,fair_privbayes_accuracy_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_01.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_accuracy_parity_01,fair_greedy_mst_accuracy_parity_01,fair_opt_mst_accuracy_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_01_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_parity_1,privbayes_accuracy_parity_1,fair_greedy_mst_accuracy_parity_1,fair_opt_mst_accuracy_parity_1,fair_privbayes_accuracy_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_1.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_accuracy_parity_1,fair_greedy_mst_accuracy_parity_1,fair_opt_mst_accuracy_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_1_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_parity_10,privbayes_accuracy_parity_10,fair_greedy_mst_accuracy_parity_10,fair_opt_mst_accuracy_parity_10,fair_privbayes_accuracy_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_10.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_accuracy_parity_10,fair_greedy_mst_accuracy_parity_10,fair_opt_mst_accuracy_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Classification Disparity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'accuracy_parity_10_MST.pdf', bbox_inches='tight')


## Demographic Parity 

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_demographic_parity_01,privbayes_demographic_parity_01,fair_greedy_mst_demographic_parity_01,fair_opt_mst_demographic_parity_01,fair_privbayes_demographic_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_demographic_parity_01,fair_greedy_mst_demographic_parity_01,fair_opt_mst_demographic_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_01_MST.pdf', bbox_inches='tight')

In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_parity_1,privbayes_accuracy_parity_1,fair_greedy_mst_accuracy_parity_1,fair_opt_mst_accuracy_parity_1,fair_privbayes_accuracy_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_1.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST", "OMST"]
values = [mst_accuracy_parity_1,fair_greedy_mst_accuracy_parity_1,fair_opt_mst_accuracy_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_1_MST.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_accuracy_parity_10,privbayes_accuracy_parity_10,fair_greedy_mst_accuracy_parity_10,fair_opt_mst_accuracy_parity_10,fair_privbayes_accuracy_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_10.pdf', bbox_inches='tight')


In [None]:

x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_accuracy_parity_10,fair_greedy_mst_accuracy_parity_10,fair_opt_mst_accuracy_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'dem_parity_10_MST.pdf', bbox_inches='tight')


## True Positive Rate Balance

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TPRBalance_01,privbayes_TPRBalance_01,fair_greedy_mst_TPRBalance_01,fair_opt_mst_TPRBalance_01,fair_privbayes_TPRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST","PB","OMST", ]
values = [mst_TPRBalance_01,fair_greedy_mst_TPRBalance_01,fair_opt_mst_TPRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_01_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TPRBalance_1,privbayes_TPRBalance_1,fair_greedy_mst_TPRBalance_1,fair_opt_mst_TPRBalance_1,fair_privbayes_TPRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_TPRBalance_1,fair_greedy_mst_TPRBalance_1,fair_opt_mst_TPRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TPRBalance_10,privbayes_TPRBalance_10,fair_greedy_mst_TPRBalance_10,fair_opt_mst_TPRBalance_10,fair_privbayes_TPRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_10.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_TPRBalance_10,fair_greedy_mst_TPRBalance_10,fair_opt_mst_TPRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TPR_Balance_10_MST.pdf', bbox_inches='tight')

## True Negative Rate Balance

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TNRBalance_01,privbayes_TNRBalance_01,fair_greedy_mst_TNRBalance_01,fair_opt_mst_TNRBalance_01,fair_privbayes_TNRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_TNRBalance_01,fair_greedy_mst_TNRBalance_01,fair_opt_mst_TNRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_01_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TNRBalance_1,privbayes_TNRBalance_1,fair_greedy_mst_TNRBalance_1,fair_opt_mst_TNRBalance_1,fair_privbayes_TNRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_1.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_TNRBalance_1,fair_greedy_mst_TNRBalance_1,fair_opt_mst_TNRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_TNRBalance_10,privbayes_TNRBalance_10,fair_greedy_mst_TNRBalance_10,fair_opt_mst_TNRBalance_10,fair_privbayes_TNRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_10.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_TNRBalance_10,fair_greedy_mst_TNRBalance_10,fair_opt_mst_TNRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'TNR_Balance_10._MST.pdf', bbox_inches='tight')

## Conditioned Demographic Parity

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_demographic_parity_01,privbayes_conditional_demographic_parity_01,fair_greedy_mst_conditional_demographic_parity_01,fair_opt_mst_conditional_demographic_parity_01,fair_privbayes_conditional_demographic_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_demographic_parity_01,fair_greedy_mst_conditional_demographic_parity_01,fair_opt_mst_conditional_demographic_parity_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_01_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_demographic_parity_1,privbayes_conditional_demographic_parity_1,fair_greedy_mst_conditional_demographic_parity_1,fair_opt_mst_conditional_demographic_parity_1,fair_privbayes_conditional_demographic_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_1.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_demographic_parity_1,fair_greedy_mst_conditional_demographic_parity_1,fair_opt_mst_conditional_demographic_parity_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_demographic_parity_10,privbayes_conditional_demographic_parity_10,fair_greedy_mst_conditional_demographic_parity_10,fair_opt_mst_conditional_demographic_parity_10,fair_privbayes_conditional_demographic_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_10.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_demographic_parity_10,fair_greedy_mst_conditional_demographic_parity_10,fair_opt_mst_conditional_demographic_parity_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Conditional Demographic Parity")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_dem_parity_10_MST.pdf', bbox_inches='tight')

## Conditioned True Positive Rate

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TPRBalance_01,privbayes_conditional_TPRBalance_01,fair_greedy_mst_conditional_TPRBalance_01,fair_opt_mst_conditional_TPRBalance_01,fair_privbayes_conditional_TPRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST","PB","OMST", ]
values = [mst_conditional_TPRBalance_01,fair_greedy_mst_conditional_TPRBalance_01,fair_opt_mst_conditional_TPRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_01_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TPRBalance_1,privbayes_conditional_TPRBalance_1,fair_greedy_mst_conditional_TPRBalance_1,fair_opt_mst_conditional_TPRBalance_1,fair_privbayes_conditional_TPRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_1.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST","GMST","OMST"]
values = [mst_conditional_TPRBalance_1,fair_greedy_mst_conditional_TPRBalance_1,fair_opt_mst_conditional_TPRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1,Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TPRBalance_10,privbayes_conditional_TPRBalance_10,fair_greedy_mst_conditional_TPRBalance_10,fair_opt_mst_conditional_TPRBalance_10,fair_privbayes_conditional_TPRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10,Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_10.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST","GMST","OMST", ]
values = [mst_conditional_TPRBalance_10,fair_greedy_mst_conditional_TPRBalance_10,fair_opt_mst_conditional_TPRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Conditional TPR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TPR_Balance_10_MST.pdf', bbox_inches='tight')

## Conditioned True Negative Balance 

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TNRBalance_01,privbayes_conditional_TNRBalance_01,fair_greedy_mst_conditional_TNRBalance_01,fair_opt_mst_conditional_TNRBalance_01,fair_privbayes_conditional_TNRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1, Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_01.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_TNRBalance_01,fair_greedy_mst_conditional_TNRBalance_01,fair_opt_mst_conditional_TNRBalance_01]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 0.1,Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_01_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TNRBalance_1,privbayes_conditional_TNRBalance_1,fair_greedy_mst_conditional_TNRBalance_1,fair_opt_mst_conditional_TNRBalance_1,fair_privbayes_conditional_TNRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1,Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_1.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_TNRBalance_1,fair_greedy_mst_conditional_TNRBalance_1,fair_opt_mst_conditional_TNRBalance_1]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 1, Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_1_MST.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2,3,4]
x_axis = ["MST","PB", "GMST","OMST", "GPB"]
values = [mst_conditional_TNRBalance_10,privbayes_conditional_TNRBalance_10,fair_greedy_mst_conditional_TNRBalance_10,fair_opt_mst_conditional_TNRBalance_10,fair_privbayes_conditional_TNRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_10.pdf', bbox_inches='tight')

In [None]:
x=[0,1,2]
x_axis = ["MST", "GMST","OMST"]
values = [mst_conditional_TNRBalance_10,fair_greedy_mst_conditional_TNRBalance_10,fair_opt_mst_conditional_TNRBalance_10]


#ax = sns.boxplot(x = x_axis, data = np.array(values))

ax = sns.boxplot(data = values)

plt.xticks(x,x_axis)
plt.xlabel("ε = 10, Conditional TNR Balance")
#plt.ylabel("Total Variation Distance")
plt.savefig(artifactspath + 'con_TNR_Balance_10_MST.pdf', bbox_inches='tight')

# Linear Regression

In [None]:
def LR_test(path):
    accuracy_vals= []
    accuracy_parity_vals = []
    demographic_parity_vals = []
    TPRBalance_vals = []
    TNRBalance_vals =[]
    conditional_demographic_parity_vals = []
    conditional_TPRBalance_vals = []
    conditional_TNRBalance_vals =[]

    for i in range(10):
        # training models on greedy_fair_mst
        testpath = "data/adult.csv"
        test = pd.read_csv(testpath)
        test_lables = test.pop('income>50K')
        #tf.convert_to_tensor(test)


        trainpath = f'{path}{i}.csv'
        train = pd.read_csv(trainpath)
        train_lables = train.pop('income>50K')

        regr = LinearRegression()
        regr.fit(train, train_lables)
        predictions = regr.predict(test)
        #model.fit(train, train_lables, epochs=10, batch_size=BATCH_SIZE)

        test['Income'] = test_lables
        test['Predicted'] = predictions

        test = test.round({'Predicted': 0})
        test["Predicted"]=test["Predicted"].astype(int)

        test['ACC'] = ((test['Predicted'] == 1) & (test['Income'] == 1)) | ((test['Predicted'] == 0) & (test['Income'] == 0))
        test['S0TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 0) 
        test['S0TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0Pos'] = (test['Income'] == 1)  & (test['sex'] == 0) 
        test['S0Neg'] = (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 0) 
        test['S0count'] = (test['sex'] == 0)
        test['S1TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 1) 
        test['S1TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1Pos'] = (test['Income'] == 1)  & (test['sex'] == 1) 
        test['S1Neg'] = (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 1) 
        test['S1count'] = (test['sex'] == 1)
        test['S0ACC'] = (test['ACC'] == 1)  & (test['sex'] == 1) 
        test['S1ACC'] =(test['ACC'] == 1)  & (test['sex'] == 0) 

        accuracy = test['ACC'].sum()/test.shape[0]

        accuracy_vals.append(accuracy)


        sex0_accuracy = test['S0ACC'].sum()/test['S0count'].sum()
        sex1_accuracy = test['S1ACC'].sum()/test['S1count'].sum()
        accuracy_parity_vals.append(abs(sex1_accuracy-sex0_accuracy))


        sex1 = test.loc[test['sex'] == 1]
        sex0 = test.loc[test['sex'] == 0]
        sex1_count = len(sex1.index)
        sex0_count = len(sex0.index)
        demographic_parity = abs((sex1['Predicted'].sum()/sex1_count) -(sex0['Predicted'].sum()/sex0_count))
        demographic_parity_vals.append(demographic_parity)

        sex1TPR = len(sex1.loc[(sex1['Income'] == 1) & (sex1['Predicted'] == 1)].index)/len(sex1.loc[(sex1['Income'] == 1)].index)
        sex0TPR = len(sex0.loc[(sex0['Income'] == 1) & (sex0['Predicted'] == 1)].index)/len(sex0.loc[(sex0['Income'] == 1)].index) 
        TPRBalance = abs(sex1TPR - sex0TPR)
        TPRBalance_vals.append(TPRBalance)

        sex1TNR = len(sex1.loc[(sex1['Income'] == 0) & (sex1['Predicted'] == 0)].index)/len(sex1.loc[(sex1['Income'] == 0)].index)
        sex0TNR = len(sex0.loc[(sex0['Income'] == 0) & (sex0['Predicted'] == 0)].index)/len(sex0.loc[(sex0['Income'] == 0)].index) 
        TNRBalance = abs(sex1TNR - sex0TNR)
        TNRBalance_vals.append(TNRBalance)


        admis_group = test.groupby(admissible, sort = False)


        tp0 = admis_group['S0TP'].sum()
        pos0 = admis_group['S0Pos'].sum()
        tn0 = admis_group['S0TN'].sum()
        neg0 = admis_group['S0Neg'].sum()
        pred_pos0 = admis_group['S0pred_pos'].sum()
        counts0 = admis_group['S0count'].sum()

        sex0_group_tpr = (tp0/pos0)
        sex0_group_tpr = sex0_group_tpr.values
        sex0_group_tpr = sex0_group_tpr[~np.isnan(sex0_group_tpr)]

        sex0_group_tnr = (tn0/neg0)
        sex0_group_tnr =sex0_group_tnr.values
        sex0_group_tnr = sex0_group_tnr[~np.isnan(sex0_group_tnr)]

        sex0_group_dp = (pred_pos0/counts0)
        sex0_group_dp =sex0_group_dp.values
        sex0_group_dp = sex0_group_dp[~np.isnan(sex0_group_dp)]


        tp1 = admis_group['S1TP'].sum()
        pos1 = admis_group['S1Pos'].sum()
        tn1 = admis_group['S1TN'].sum()
        neg1 = admis_group['S1Neg'].sum()
        pred_pos1 = admis_group['S1pred_pos'].sum()
        counts1 = admis_group['S1count'].sum()

        sex1_group_tpr = (tp1/pos1)
        sex1_group_tpr = sex1_group_tpr.values
        sex1_group_tpr = sex1_group_tpr[~np.isnan(sex1_group_tpr)]

        sex1_group_tnr = (tn1/neg1)
        sex1_group_tnr =sex1_group_tnr.values
        sex1_group_tnr = sex1_group_tnr[~np.isnan(sex1_group_tnr)]

        sex1_group_dp = (pred_pos1/counts1)
        sex1_group_dp =sex1_group_dp.values
        sex1_group_dp = sex1_group_dp[~np.isnan(sex1_group_dp)]

        conditional_demographic_parity_vals.append(abs(np.mean(sex1_group_dp)-np.mean(sex0_group_dp)))
        conditional_TPRBalance_vals.append(abs(np.mean(sex1_group_tpr)-np.mean(sex0_group_tpr)))
        conditional_TNRBalance_vals.append(abs(np.mean(sex1_group_tnr)-np.mean(sex0_group_tnr)))

    print("eps = 0.1")
    print("Average Accuracy: " + str(np.mean(accuracy_vals)))
    print("Average Accuracy Parity: " + str(np.mean(accuracy_parity_vals)))
    print("Average Demographic Parity: " + str(np.mean(demographic_parity_vals)))
    print("Average TPR Balance: " + str(np.mean(TPRBalance_vals)))
    print("Average TNR Balance: " + str(np.mean(TNRBalance_vals)))

    print("Average Conditional Demographic Parity: " + str(np.mean(conditional_demographic_parity_vals)))
    print("Average Conditional TPR Balance: " + str(np.mean(conditional_TPRBalance_vals)))
    print("Average Conditional TNR Balance: " + str(np.mean(conditional_TNRBalance_vals)))

    return((accuracy_vals,accuracy_parity_vals,demographic_parity_vals,TPRBalance_vals,TNRBalance_vals,conditional_demographic_parity_vals,conditional_TPRBalance_vals,conditional_TNRBalance_vals))

In [None]:
#eps = 0.1 MST

path = 'data/original/eps=0.1/results_original_'
print("eps=0.1")
vals = LR_test(path)
mst_accuracy_01LR = vals[0]
mst_accuracy_parity_01LR = vals[1]
mst_demographic_parity_01LR = vals[2]
mst_TPRBalance_01LR = vals[3]
mst_TNRBalance_01LR =vals[4]
mst_conditional_demographic_parity_01LR = vals[5]
mst_conditional_TPRBalance_01LR = vals[6]
mst_conditional_TNRBalance_01LR =vals[7]

#eps = 1 MST
path = 'data/original/eps=1/results_original_'
print("eps=1")
vals = LR_test(path)
mst_accuracy_1LR = vals[0]
mst_accuracy_parity_1LR = vals[1]
mst_demographic_parity_1LR = vals[2]
mst_TPRBalance_1LR = vals[3]
mst_TNRBalance_1LR =vals[4]
mst_conditional_demographic_parity_1LR = vals[5]
mst_conditional_TPRBalance_1LR = vals[6]
mst_conditional_TNRBalance_1LR =vals[7]

#eps = 10 MST
path = 'data/original/eps=10/results_original_'
print("eps=10")
vals = LR_test(path)
mst_accuracy_10LR = vals[0]
mst_accuracy_parity_10LR = vals[1]
mst_demographic_parity_10LR = vals[2]
mst_TPRBalance_10LR = vals[3]
mst_TNRBalance_10LR =vals[4]

mst_conditional_demographic_parity_10LR = vals[5]
mst_conditional_TPRBalance_10LR = vals[6]
mst_conditional_TNRBalance_10LR =vals[7]

In [None]:
#eps = 0.1 fair_greedy_mst
path = 'data/fair_greedy/eps=0.1/results_greedy_'
print("eps=0.1")
vals = LR_test(path)

fair_greedy_mst_accuracy_01LR= vals[0]
fair_greedy_mst_accuracy_parity_01LR = vals[1]
fair_greedy_mst_demographic_parity_01LR = vals[2]
fair_greedy_mst_TPRBalance_01LR = vals[3]
fair_greedy_mst_TNRBalance_01LR =vals[4]
fair_greedy_mst_conditional_demographic_parity_01LR = vals[5]
fair_greedy_mst_conditional_TPRBalance_01LR = vals[6]
fair_greedy_mst_conditional_TNRBalance_01LR =vals[7]


path = 'data/fair_greedy/eps=1/results_greedy_'
print("eps=1")
vals = LR_test(path)

fair_greedy_mst_accuracy_1LR= vals[0]
fair_greedy_mst_accuracy_parity_1LR = vals[1]
fair_greedy_mst_demographic_parity_1LR = vals[2]
fair_greedy_mst_TPRBalance_1LR = vals[3]
fair_greedy_mst_TNRBalance_1LR =vals[4]
fair_greedy_mst_conditional_demographic_parity_1LR = vals[5]
fair_greedy_mst_conditional_TPRBalance_1LR = vals[6]
fair_greedy_mst_conditional_TNRBalance_1LR =vals[7]



path = 'data/fair_greedy/eps=10/results_greedy_'
print("eps=10")
vals = LR_test(path)

fair_greedy_mst_accuracy_10LR= vals[0]
fair_greedy_mst_accuracy_parity_10LR = vals[1]
fair_greedy_mst_demographic_parity_10LR = vals[2]
fair_greedy_mst_TPRBalance_10LR = vals[3]
fair_greedy_mst_TNRBalance_10LR =vals[4]
fair_greedy_mst_conditional_demographic_parity_10LR = vals[5]
fair_greedy_mst_conditional_TPRBalance_10LR = vals[6]
fair_greedy_mst_conditional_TNRBalance_10LR =vals[7]


In [None]:
#eps = 0.1 fair_opt_mst
path = 'data/fair_opt/eps=0.1/results_opt_'
print("eps=0.1")
vals = LR_test(path)
fair_opt_mst_accuracy_01LR= vals[0]
fair_opt_mst_accuracy_parity_01LR = vals[1]
fair_opt_mst_demographic_parity_01LR = vals[2]
fair_opt_mst_TPRBalance_01LR = vals[3]
fair_opt_mst_TNRBalance_01LR =vals[4]
fair_opt_mst_conditional_demographic_parity_01LR = vals[5]
fair_opt_mst_conditional_TPRBalance_01LR = vals[6]
fair_opt_mst_conditional_TNRBalance_01LR =vals[7]

path = 'data/fair_opt/eps=1/results_opt_'
print("eps=1")
vals = LR_test(path)
fair_opt_mst_accuracy_1LR= vals[0]
fair_opt_mst_accuracy_parity_1LR = vals[1]
fair_opt_mst_demographic_parity_1LR = vals[2]
fair_opt_mst_TPRBalance_1LR = vals[3]
fair_opt_mst_TNRBalance_1LR =vals[4]
fair_opt_mst_conditional_demographic_parity_1LR = vals[5]
fair_opt_mst_conditional_TPRBalance_1LR = vals[6]
fair_opt_mst_conditional_TNRBalance_1LR =vals[7]


path = 'data/fair_opt/eps=10/results_opt_'
print("eps=10")
vals = LR_test(path)
fair_opt_mst_accuracy_10LR= vals[0]
fair_opt_mst_accuracy_parity_10LR = vals[1]
fair_opt_mst_demographic_parity_10LR = vals[2]
fair_opt_mst_TPRBalance_10LR = vals[3]
fair_opt_mst_TNRBalance_10LR =vals[4]
fair_opt_mst_conditional_demographic_parity_10LR = vals[5]
fair_opt_mst_conditional_TPRBalance_10LR = vals[6]
fair_opt_mst_conditional_TNRBalance_10LR =vals[7]



In [None]:
#eps = 0.1 privbayes
path = 'data/Bayes_original/eps=0.1/sythetic_data_'
print('eps=0.1')

values = LR_test(path)
privbayes_accuracy_01LR= vals[0]
privbayes_accuracy_parity_01LR = vals[1]
privbayes_demographic_parity_01LR = vals[2]
privbayes_TPRBalance_01LR = vals[3]
privbayes_TNRBalance_01LR =vals[4]
privbayes_conditional_demographic_parity_01LR = vals[5]
privbayes_conditional_TPRBalance_01LR = vals[6]
privbayes_conditional_TNRBalance_01LR =vals[7]

path = 'data/Bayes_original/eps=1/sythetic_data_'
print('eps=1')

values = LR_test(path)
privbayes_accuracy_1LR= vals[0]
privbayes_accuracy_parity_1LR = vals[1]
privbayes_demographic_parity_1LR = vals[2]
privbayes_TPRBalance_1LR = vals[3]
privbayes_TNRBalance_1LR =vals[4]
privbayes_conditional_demographic_parity_1LR = vals[5]
privbayes_conditional_TPRBalance_1LR = vals[6]
privbayes_conditional_TNRBalance_1LR =vals[7]


path = 'data/Bayes_original/eps=10/sythetic_data_'
print('eps=10')

values = LR_test(path)
privbayes_accuracy_10LR= vals[0]
privbayes_accuracy_parity_10LR = vals[1]
privbayes_demographic_parity_10LR = vals[2]
privbayes_TPRBalance_10LR = vals[3]
privbayes_TNRBalance_10LR =vals[4]
privbayes_conditional_demographic_parity_10LR = vals[5]
privbayes_conditional_TPRBalance_10LR = vals[6]
privbayes_conditional_TNRBalance_10LR =vals[7]


In [None]:
#eps = 0.1 fair privbayes
path = 'data/Bayes_fair/eps=0.1/sythetic_data_'
print('eps=0.1')

values = LR_test(path)
fair_privbayes_accuracy_01LR= vals[0]
fair_privbayes_accuracy_parity_01LR = vals[1]
fair_privbayes_demographic_parity_01LR = vals[2]
fair_privbayes_TPRBalance_01LR = vals[3]
fair_privbayes_TNRBalance_01LR =vals[4]
fair_privbayes_conditional_demographic_parity_01LR = vals[5]
fair_privbayes_conditional_TPRBalance_01LR = vals[6]
fair_privbayes_conditional_TNRBalance_01LR =vals[7]

path = 'data/Bayes_fair/eps=1/sythetic_data_'
print('eps=1')

values = LR_test(path)
fair_privbayes_accuracy_1LR= vals[0]
fair_privbayes_accuracy_parity_1LR = vals[1]
fair_privbayes_demographic_parity_1LR = vals[2]
fair_privbayes_TPRBalance_1LR = vals[3]
fair_privbayes_TNRBalance_1LR =vals[4]
fair_privbayes_conditional_demographic_parity_1LR = vals[5]
fair_privbayes_conditional_TPRBalance_1LR = vals[6]
fair_privbayes_conditional_TNRBalance_1LR =vals[7]


path = 'data/Bayes_fair/eps=10/sythetic_data_'
print('eps=10')

values = LR_test(path)
fair_privbayes_accuracy_10LR= vals[0]
fair_privbayes_accuracy_parity_10LR = vals[1]
fair_privbayes_demographic_parity_10LR = vals[2]
fair_privbayes_TPRBalance_10LR = vals[3]
fair_privbayes_TNRBalance_10LR =vals[4]
fair_privbayes_conditional_demographic_parity_10LR = vals[5]
fair_privbayes_conditional_TPRBalance_10LR = vals[6]
fair_privbayes_conditional_TNRBalance_10LR =vals[7]


# Random Forest

In [None]:
def RF_test(path):
    accuracy_vals= []
    accuracy_parity_vals = []
    demographic_parity_vals = []
    TPRBalance_vals = []
    TNRBalance_vals =[]
    conditional_demographic_parity_vals = []
    conditional_TPRBalance_vals = []
    conditional_TNRBalance_vals =[]

    for i in range(10):
        # training models on greedy_fair_mst
        testpath = "data/adult.csv"
        test = pd.read_csv(testpath)
        test_lables = test.pop('income>50K')
        #tf.convert_to_tensor(test)


        trainpath = f'{path}{i}.csv'
        train = pd.read_csv(trainpath)
        train_lables = train.pop('income>50K')

        rf = RandomForestRegressor(n_estimators = 200)
        rf.fit(train, train_lables)
        predictions = rf.predict(test)
        #model.fit(train, train_lables, epochs=10, batch_size=BATCH_SIZE)

        test['Income'] = test_lables
        test['Predicted'] = predictions

        test = test.round({'Predicted': 0})
        test["Predicted"]=test["Predicted"].astype(int)

        test['ACC'] = ((test['Predicted'] == 1) & (test['Income'] == 1)) | ((test['Predicted'] == 0) & (test['Income'] == 0))
        test['S0TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 0) 
        test['S0TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0Pos'] = (test['Income'] == 1)  & (test['sex'] == 0) 
        test['S0Neg'] = (test['Income'] == 0)  & (test['sex'] == 0) 
        test['S0pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 0) 
        test['S0count'] = (test['sex'] == 0)
        test['S1TP'] = (test['Predicted'] == 1) & (test['Income'] == 1) & (test['sex'] == 1) 
        test['S1TN'] = (test['Predicted'] == 0) & (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1Pos'] = (test['Income'] == 1)  & (test['sex'] == 1) 
        test['S1Neg'] = (test['Income'] == 0)  & (test['sex'] == 1) 
        test['S1pred_pos'] = (test['Predicted'] == 1) & (test['sex'] == 1) 
        test['S1count'] = (test['sex'] == 1)
        test['S0ACC'] = (test['ACC'] == 1)  & (test['sex'] == 1) 
        test['S1ACC'] =(test['ACC'] == 1)  & (test['sex'] == 0) 

        accuracy = test['ACC'].sum()/test.shape[0]

        accuracy_vals.append(accuracy)


        sex0_accuracy = test['S0ACC'].sum()/test['S0count'].sum()
        sex1_accuracy = test['S1ACC'].sum()/test['S1count'].sum()
        accuracy_parity_vals.append(abs(sex1_accuracy-sex0_accuracy))


        sex1 = test.loc[test['sex'] == 1]
        sex0 = test.loc[test['sex'] == 0]
        sex1_count = len(sex1.index)
        sex0_count = len(sex0.index)
        demographic_parity = abs((sex1['Predicted'].sum()/sex1_count) -(sex0['Predicted'].sum()/sex0_count))
        demographic_parity_vals.append(demographic_parity)

        sex1TPR = len(sex1.loc[(sex1['Income'] == 1) & (sex1['Predicted'] == 1)].index)/len(sex1.loc[(sex1['Income'] == 1)].index)
        sex0TPR = len(sex0.loc[(sex0['Income'] == 1) & (sex0['Predicted'] == 1)].index)/len(sex0.loc[(sex0['Income'] == 1)].index) 
        TPRBalance = abs(sex1TPR - sex0TPR)
        TPRBalance_vals.append(TPRBalance)

        sex1TNR = len(sex1.loc[(sex1['Income'] == 0) & (sex1['Predicted'] == 0)].index)/len(sex1.loc[(sex1['Income'] == 0)].index)
        sex0TNR = len(sex0.loc[(sex0['Income'] == 0) & (sex0['Predicted'] == 0)].index)/len(sex0.loc[(sex0['Income'] == 0)].index) 
        TNRBalance = abs(sex1TNR - sex0TNR)
        TNRBalance_vals.append(TNRBalance)


        admis_group = test.groupby(admissible, sort = False)


        tp0 = admis_group['S0TP'].sum()
        pos0 = admis_group['S0Pos'].sum()
        tn0 = admis_group['S0TN'].sum()
        neg0 = admis_group['S0Neg'].sum()
        pred_pos0 = admis_group['S0pred_pos'].sum()
        counts0 = admis_group['S0count'].sum()

        sex0_group_tpr = (tp0/pos0)
        sex0_group_tpr = sex0_group_tpr.values
        sex0_group_tpr = sex0_group_tpr[~np.isnan(sex0_group_tpr)]

        sex0_group_tnr = (tn0/neg0)
        sex0_group_tnr =sex0_group_tnr.values
        sex0_group_tnr = sex0_group_tnr[~np.isnan(sex0_group_tnr)]

        sex0_group_dp = (pred_pos0/counts0)
        sex0_group_dp =sex0_group_dp.values
        sex0_group_dp = sex0_group_dp[~np.isnan(sex0_group_dp)]


        tp1 = admis_group['S1TP'].sum()
        pos1 = admis_group['S1Pos'].sum()
        tn1 = admis_group['S1TN'].sum()
        neg1 = admis_group['S1Neg'].sum()
        pred_pos1 = admis_group['S1pred_pos'].sum()
        counts1 = admis_group['S1count'].sum()

        sex1_group_tpr = (tp1/pos1)
        sex1_group_tpr = sex1_group_tpr.values
        sex1_group_tpr = sex1_group_tpr[~np.isnan(sex1_group_tpr)]

        sex1_group_tnr = (tn1/neg1)
        sex1_group_tnr =sex1_group_tnr.values
        sex1_group_tnr = sex1_group_tnr[~np.isnan(sex1_group_tnr)]

        sex1_group_dp = (pred_pos1/counts1)
        sex1_group_dp =sex1_group_dp.values
        sex1_group_dp = sex1_group_dp[~np.isnan(sex1_group_dp)]

        conditional_demographic_parity_vals.append(abs(np.mean(sex1_group_dp)-np.mean(sex0_group_dp)))
        conditional_TPRBalance_vals.append(abs(np.mean(sex1_group_tpr)-np.mean(sex0_group_tpr)))
        conditional_TNRBalance_vals.append(abs(np.mean(sex1_group_tnr)-np.mean(sex0_group_tnr)))

    print("Average Accuracy: " + str(np.mean(accuracy_vals)))
    print("Average Accuracy Parity: " + str(np.mean(accuracy_parity_vals)))
    print("Average Demographic Parity: " + str(np.mean(demographic_parity_vals)))
    print("Average TPR Balance: " + str(np.mean(TPRBalance_vals)))
    print("Average TNR Balance: " + str(np.mean(TNRBalance_vals)))

    print("Average Conditional Demographic Parity: " + str(np.mean(conditional_demographic_parity_vals)))
    print("Average Conditional TPR Balance: " + str(np.mean(conditional_TPRBalance_vals)))
    print("Average Conditional TNR Balance: " + str(np.mean(conditional_TNRBalance_vals)))

    return((accuracy_vals,accuracy_parity_vals,demographic_parity_vals,TPRBalance_vals,TNRBalance_vals,conditional_demographic_parity_vals,conditional_TPRBalance_vals,conditional_TNRBalance_vals))

In [None]:
#eps = 0.1 MST
path = 'data/original/eps=0.1/results_original_'
print("eps=0.1")
vals = RF_test(path)
mst_accuracy_01RF= vals[0]
mst_accuracy_parity_01RF = vals[1]
mst_demographic_parity_01RF = vals[2]
mst_TPRBalance_01RF = vals[3]
mst_TNRBalance_01RF =vals[4]
mst_conditional_demographic_parity_01RF = vals[5]
mst_conditional_TPRBalance_01RF = vals[6]
mst_conditional_TNRBalance_01RF =vals[7]

path = 'data/original/eps=1/results_original_'
print("eps=1")
vals = RF_test(path)
mst_accuracy_1RF= vals[0]
mst_accuracy_parity_1RF = vals[1]
mst_demographic_parity_1RF = vals[2]
mst_TPRBalance_1RF = vals[3]
mst_TNRBalance_1RF =vals[4]
mst_conditional_demographic_parity_1RF = vals[5]
mst_conditional_TPRBalance_1RF = vals[6]
mst_conditional_TNRBalance_1RF =vals[7]

path = 'data/original/eps=10/results_original_'
print("eps=10")
vals = RF_test(path)
mst_accuracy_10RF= vals[0]
mst_accuracy_parity_10RF = vals[1]
mst_demographic_parity_10RF = vals[2]
mst_TPRBalance_10RF = vals[3]
mst_TNRBalance_10RF =vals[4]
mst_conditional_demographic_parity_10RF = vals[5]
mst_conditional_TPRBalance_10RF = vals[6]
mst_conditional_TNRBalance_10RF =vals[7]


In [None]:
#eps = 0.1 fair_greedy_mst
path = 'data/fair_greedy/eps=0.1/results_greedy_'
print("eps=0.1")
vals = RF_test(path)
fair_greedy_mst_accuracy_01RF= vals[0]
fair_greedy_mst_accuracy_parity_01RF = vals[1]
fair_greedy_mst_demographic_parity_01RF = vals[2]
fair_greedy_mst_TPRBalance_01RF = vals[3]
fair_greedy_mst_TNRBalance_01RF =vals[4]
fair_greedy_mst_conditional_demographic_parity_01RF = vals[5]
fair_greedy_mst_conditional_TPRBalance_01RF = vals[6]
fair_greedy_mst_conditional_TNRBalance_01RF =vals[7]

path = 'data/fair_greedy/eps=1/results_greedy_'
print("eps=1")
vals = RF_test(path)
fair_greedy_mst_accuracy_1RF= vals[0]
fair_greedy_mst_accuracy_parity_1RF = vals[1]
fair_greedy_mst_demographic_parity_1RF = vals[2]
fair_greedy_mst_TPRBalance_1RF = vals[3]
fair_greedy_mst_TNRBalance_1RF =vals[4]
fair_greedy_mst_conditional_demographic_parity_1RF = vals[5]
fair_greedy_mst_conditional_TPRBalance_1RF = vals[6]
fair_greedy_mst_conditional_TNRBalance_1RF =vals[7]

path = 'data/fair_greedy/eps=10/results_greedy_'
print("eps=10")
vals = RF_test(path)
fair_greedy_mst_accuracy_10RF= vals[0]
fair_greedy_mst_accuracy_parity_10RF = vals[1]
fair_greedy_mst_demographic_parity_10RF = vals[2]
fair_greedy_mst_TPRBalance_10RF = vals[3]
fair_greedy_mst_TNRBalance_10RF =vals[4]
fair_greedy_mst_conditional_demographic_parity_10RF = vals[5]
fair_greedy_mst_conditional_TPRBalance_10RF = vals[6]
fair_greedy_mst_conditional_TNRBalance_10RF =vals[7]



In [None]:
#eps = 0.1 privbayes
path = 'data/Bayes_original/eps=0.1/sythetic_data_'
print("eps=0.1")
vals = RF_test(path)
privbayes_accuracy_01RF= vals[0]
privbayes_accuracy_parity_01RF = vals[1]
privbayes_demographic_parity_01RF = vals[2]
privbayes_TPRBalance_01RF = vals[3]
privbayes_TNRBalance_01RF =vals[4]
privbayes_conditional_demographic_parity_01RF = vals[5]
privbayes_conditional_TPRBalance_01RF = vals[6]
privbayes_conditional_TNRBalance_01RF =vals[7]

path = 'data/Bayes_original/eps=1/sythetic_data_'
print("eps=1")
vals = RF_test(path)
privbayes_accuracy_1RF= vals[0]
privbayes_accuracy_parity_1RF = vals[1]
privbayes_demographic_parity_1RF = vals[2]
privbayes_TPRBalance_1RF = vals[3]
privbayes_TNRBalance_1RF =vals[4]
privbayes_conditional_demographic_parity_1RF = vals[5]
privbayes_conditional_TPRBalance_1RF = vals[6]
privbayes_conditional_TNRBalance_1RF =vals[7]

path = 'data/Bayes_original/eps=10/sythetic_data_'
print("eps=10")
vals = RF_test(path)
privbayes_accuracy_10RF= vals[0]
privbayes_accuracy_parity_10RF = vals[1]
privbayes_demographic_parity_10RF = vals[2]
privbayes_TPRBalance_10RF = vals[3]
privbayes_TNRBalance_10RF =vals[4]
privbayes_conditional_demographic_parity_10RF = vals[5]
privbayes_conditional_TPRBalance_10RF = vals[6]
privbayes_conditional_TNRBalance_10RF =vals[7]

In [None]:
#eps = 0.1 fair_privbayes
path = 'data/Bayes_fair/eps=0.1/sythetic_data_'
print("eps=0.1")
vals = RF_test(path)
fair_privbayes_accuracy_01RF= vals[0]
fair_privbayes_accuracy_parity_01RF = vals[1]
fair_privbayes_demographic_parity_01RF = vals[2]
fair_privbayes_TPRBalance_01RF = vals[3]
fair_privbayes_TNRBalance_01RF =vals[4]
fair_privbayes_conditional_demographic_parity_01RF = vals[5]
fair_privbayes_conditional_TPRBalance_01RF = vals[6]
fair_privbayes_conditional_TNRBalance_01RF =vals[7]


path = 'data/Bayes_fair/eps=1/sythetic_data_'
print("eps=1")
vals = RF_test(path)
fair_privbayes_accuracy_1RF= vals[0]
fair_privbayes_accuracy_parity_1RF = vals[1]
fair_privbayes_demographic_parity_1RF = vals[2]
fair_privbayes_TPRBalance_1RF = vals[3]
fair_privbayes_TNRBalance_1RF =vals[4]
fair_privbayes_conditional_demographic_parity_1RF = vals[5]
fair_privbayes_conditional_TPRBalance_1RF = vals[6]
fair_privbayes_conditional_TNRBalance_1RF =vals[7]


path = 'data/Bayes_fair/eps=10/sythetic_data_'
print("eps=10")
vals = RF_test(path)
fair_privbayes_accuracy_10RF= vals[0]
fair_privbayes_accuracy_parity_10RF = vals[1]
fair_privbayes_demographic_parity_10RF = vals[2]
fair_privbayes_TPRBalance_10RF = vals[3]
fair_privbayes_TNRBalance_10RF =vals[4]
fair_privbayes_conditional_demographic_parity_10RF = vals[5]
fair_privbayes_conditional_TPRBalance_10RF = vals[6]
fair_privbayes_conditional_TNRBalance_10RF =vals[7]


In [None]:
#eps = 0.1 fair_opt_mst
path = 'data/fair_opt/eps=0.1/results_opt_'
print("eps=0.1")
vals = RF_test(path)

fair_opt_mst_accuracy_01RF= vals[0]
fair_opt_mst_accuracy_parity_01RF = vals[1]
fair_opt_mst_demographic_parity_01RF = vals[2]
fair_opt_mst_TPRBalance_01RF = vals[3]
fair_opt_mst_TNRBalance_01RF =vals[4]
fair_opt_mst_conditional_demographic_parity_01RF = vals[5]
fair_opt_mst_conditional_TPRBalance_01RF = vals[6]
fair_opt_mst_conditional_TNRBalance_01RF =vals[7]

path = 'data/fair_opt/eps=1/results_opt_'
print("eps=1")
vals = RF_test(path)

fair_opt_mst_accuracy_1RF= vals[0]
fair_opt_mst_accuracy_parity_1RF = vals[1]
fair_opt_mst_demographic_parity_1RF = vals[2]
fair_opt_mst_TPRBalance_1RF = vals[3]
fair_opt_mst_TNRBalance_1RF =vals[4]
fair_opt_mst_conditional_demographic_parity_1RF = vals[5]
fair_opt_mst_conditional_TPRBalance_1RF = vals[6]
fair_opt_mst_conditional_TNRBalance_1RF =vals[7]

path = 'data/fair_opt/eps=10/results_opt_'
print("eps=10")
vals = RF_test(path)

fair_opt_mst_accuracy_10RF= vals[0]
fair_opt_mst_accuracy_parity_10RF = vals[1]
fair_opt_mst_demographic_parity_10RF = vals[2]
fair_opt_mst_TPRBalance_10RF = vals[3]
fair_opt_mst_TNRBalance_10RF =vals[4]
fair_opt_mst_conditional_demographic_parity_10RF = vals[5]
fair_opt_mst_conditional_TPRBalance_10RF = vals[6]
fair_opt_mst_conditional_TNRBalance_10RF =vals[7]


## Vizualizations 

In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_10), np.mean(privbayes_demographic_parity_10), np.mean(fair_greedy_mst_demographic_parity_10), np.mean(fair_opt_mst_demographic_parity_10), np.mean(fair_privbayes_demographic_parity_10) ],
                   [np.mean(mst_TPRBalance_10), np.mean(privbayes_TPRBalance_10), np.mean(fair_greedy_mst_TPRBalance_10), np.mean(fair_opt_mst_TPRBalance_10), np.mean(fair_privbayes_TPRBalance_10) ],
                    [np.mean(mst_TNRBalance_10), np.mean(privbayes_TNRBalance_10), np.mean(fair_greedy_mst_TNRBalance_10), np.mean(fair_opt_mst_TNRBalance_10), np.mean(fair_privbayes_TNRBalance_10) ],
                    [np.mean(mst_conditional_demographic_parity_10), np.mean(privbayes_conditional_demographic_parity_10), np.mean(fair_greedy_mst_conditional_demographic_parity_10), np.mean(fair_opt_mst_conditional_demographic_parity_10), np.mean(fair_privbayes_conditional_demographic_parity_10) ],
                   [np.mean(mst_conditional_TPRBalance_10), np.mean(privbayes_conditional_TPRBalance_10), np.mean(fair_greedy_mst_conditional_TPRBalance_10), np.mean(fair_opt_mst_conditional_TPRBalance_10), np.mean(fair_privbayes_conditional_TPRBalance_10) ],
                    [np.mean(mst_conditional_TNRBalance_10), np.mean(privbayes_conditional_TNRBalance_10), np.mean(fair_greedy_mst_conditional_TNRBalance_10), np.mean(fair_opt_mst_conditional_TNRBalance_10), np.mean(fair_privbayes_conditional_TNRBalance_10) ]])


plt.figure(figsize=(9, 9))
palette = sns.color_palette("Blues_d",n_colors=15)
palette.reverse()
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 10, MLP")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'MLP10.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_10LR), np.mean(privbayes_demographic_parity_10LR), np.mean(fair_greedy_mst_demographic_parity_10LR), np.mean(fair_opt_mst_demographic_parity_10LR), np.mean(fair_privbayes_demographic_parity_10LR) ],
                   [np.mean(mst_TPRBalance_10LR), np.mean(privbayes_TPRBalance_10LR), np.mean(fair_greedy_mst_TPRBalance_10LR), np.mean(fair_opt_mst_TPRBalance_10LR), np.mean(fair_privbayes_TPRBalance_10LR) ],
                    [np.mean(mst_TNRBalance_10LR), np.mean(privbayes_TNRBalance_10LR), np.mean(fair_greedy_mst_TNRBalance_10LR), np.mean(fair_opt_mst_TNRBalance_10LR), np.mean(fair_privbayes_TNRBalance_10LR) ],
                    [np.mean(mst_conditional_demographic_parity_10LR), np.mean(privbayes_conditional_demographic_parity_10LR), np.mean(fair_greedy_mst_conditional_demographic_parity_10LR), np.mean(fair_opt_mst_conditional_demographic_parity_10LR), np.mean(fair_privbayes_conditional_demographic_parity_10LR) ],
                   [np.mean(mst_conditional_TPRBalance_10LR), np.mean(privbayes_conditional_TPRBalance_10LR), np.mean(fair_greedy_mst_conditional_TPRBalance_10LR), np.mean(fair_opt_mst_conditional_TPRBalance_10LR), np.mean(fair_privbayes_conditional_TPRBalance_10LR) ],
                    [np.mean(mst_conditional_TNRBalance_10LR), np.mean(privbayes_conditional_TNRBalance_10LR), np.mean(fair_greedy_mst_conditional_TNRBalance_10LR), np.mean(fair_opt_mst_conditional_TNRBalance_10LR), np.mean(fair_privbayes_conditional_TNRBalance_10LR) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 10, LR")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'LR10.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_10RF), np.mean(privbayes_demographic_parity_10RF), np.mean(fair_greedy_mst_demographic_parity_10RF), np.mean(fair_opt_mst_demographic_parity_10RF), np.mean(fair_privbayes_demographic_parity_10RF) ],
                   [np.mean(mst_TPRBalance_10RF), np.mean(privbayes_TPRBalance_10RF), np.mean(fair_greedy_mst_TPRBalance_10RF), np.mean(fair_opt_mst_TPRBalance_10RF), np.mean(fair_privbayes_TPRBalance_10RF) ],
                    [np.mean(mst_TNRBalance_10RF), np.mean(privbayes_TNRBalance_10RF), np.mean(fair_greedy_mst_TNRBalance_10RF), np.mean(fair_opt_mst_TNRBalance_10RF), np.mean(fair_privbayes_TNRBalance_10RF) ],
                    [np.mean(mst_conditional_demographic_parity_10RF), np.mean(privbayes_conditional_demographic_parity_10RF), np.mean(fair_greedy_mst_conditional_demographic_parity_10RF), np.mean(fair_opt_mst_conditional_demographic_parity_10RF), np.mean(fair_privbayes_conditional_demographic_parity_10RF) ],
                   [np.mean(mst_conditional_TPRBalance_10RF), np.mean(privbayes_conditional_TPRBalance_10RF), np.mean(fair_greedy_mst_conditional_TPRBalance_10RF), np.mean(fair_opt_mst_conditional_TPRBalance_10RF), np.mean(fair_privbayes_conditional_TPRBalance_10RF) ],
                    [np.mean(mst_conditional_TNRBalance_10RF), np.mean(privbayes_conditional_TNRBalance_10RF), np.mean(fair_greedy_mst_conditional_TNRBalance_10RF), np.mean(fair_opt_mst_conditional_TNRBalance_10RF), np.mean(fair_privbayes_conditional_TNRBalance_10RF) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
ax.set_xlabel("ε = 10, RF")
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'RF10.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_1), np.mean(privbayes_demographic_parity_1), np.mean(fair_greedy_mst_demographic_parity_1), np.mean(fair_opt_mst_demographic_parity_1), np.mean(fair_privbayes_demographic_parity_1) ],
                   [np.mean(mst_TPRBalance_1), np.mean(privbayes_TPRBalance_1), np.mean(fair_greedy_mst_TPRBalance_1), np.mean(fair_opt_mst_TPRBalance_1), np.mean(fair_privbayes_TPRBalance_1) ],
                    [np.mean(mst_TNRBalance_1), np.mean(privbayes_TNRBalance_1), np.mean(fair_greedy_mst_TNRBalance_1), np.mean(fair_opt_mst_TNRBalance_1), np.mean(fair_privbayes_TNRBalance_1) ],
                    [np.mean(mst_conditional_demographic_parity_1), np.mean(privbayes_conditional_demographic_parity_1), np.mean(fair_greedy_mst_conditional_demographic_parity_1), np.mean(fair_opt_mst_conditional_demographic_parity_1), np.mean(fair_privbayes_conditional_demographic_parity_1) ],
                   [np.mean(mst_conditional_TPRBalance_1), np.mean(privbayes_conditional_TPRBalance_1), np.mean(fair_greedy_mst_conditional_TPRBalance_1), np.mean(fair_opt_mst_conditional_TPRBalance_1), np.mean(fair_privbayes_conditional_TPRBalance_1) ],
                    [np.mean(mst_conditional_TNRBalance_1), np.mean(privbayes_conditional_TNRBalance_1), np.mean(fair_greedy_mst_conditional_TNRBalance_1), np.mean(fair_opt_mst_conditional_TNRBalance_1), np.mean(fair_privbayes_conditional_TNRBalance_1) ]])


plt.figure(figsize=(9, 9))
palette = sns.color_palette("Blues_d",n_colors=15)
palette.reverse()
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 1, MLP")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'MLP1.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_1LR), np.mean(privbayes_demographic_parity_1LR), np.mean(fair_greedy_mst_demographic_parity_1LR), np.mean(fair_opt_mst_demographic_parity_1LR), np.mean(fair_privbayes_demographic_parity_1LR) ],
                   [np.mean(mst_TPRBalance_1LR), np.mean(privbayes_TPRBalance_1LR), np.mean(fair_greedy_mst_TPRBalance_1LR), np.mean(fair_opt_mst_TPRBalance_1LR), np.mean(fair_privbayes_TPRBalance_1LR) ],
                    [np.mean(mst_TNRBalance_1LR), np.mean(privbayes_TNRBalance_1LR), np.mean(fair_greedy_mst_TNRBalance_1LR), np.mean(fair_opt_mst_TNRBalance_1LR), np.mean(fair_privbayes_TNRBalance_1LR) ],
                    [np.mean(mst_conditional_demographic_parity_1LR), np.mean(privbayes_conditional_demographic_parity_1LR), np.mean(fair_greedy_mst_conditional_demographic_parity_1LR), np.mean(fair_opt_mst_conditional_demographic_parity_1LR), np.mean(fair_privbayes_conditional_demographic_parity_1LR) ],
                   [np.mean(mst_conditional_TPRBalance_1LR), np.mean(privbayes_conditional_TPRBalance_1LR), np.mean(fair_greedy_mst_conditional_TPRBalance_1LR), np.mean(fair_opt_mst_conditional_TPRBalance_1LR), np.mean(fair_privbayes_conditional_TPRBalance_1LR) ],
                    [np.mean(mst_conditional_TNRBalance_1LR), np.mean(privbayes_conditional_TNRBalance_1LR), np.mean(fair_greedy_mst_conditional_TNRBalance_1LR), np.mean(fair_opt_mst_conditional_TNRBalance_1LR), np.mean(fair_privbayes_conditional_TNRBalance_1LR) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 1, LR")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'LR1.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_1RF), np.mean(privbayes_demographic_parity_1RF), np.mean(fair_greedy_mst_demographic_parity_1RF), np.mean(fair_opt_mst_demographic_parity_1RF), np.mean(fair_privbayes_demographic_parity_1RF) ],
                   [np.mean(mst_TPRBalance_1RF), np.mean(privbayes_TPRBalance_1RF), np.mean(fair_greedy_mst_TPRBalance_1RF), np.mean(fair_opt_mst_TPRBalance_1RF), np.mean(fair_privbayes_TPRBalance_1RF) ],
                    [np.mean(mst_TNRBalance_1RF), np.mean(privbayes_TNRBalance_1RF), np.mean(fair_greedy_mst_TNRBalance_1RF), np.mean(fair_opt_mst_TNRBalance_1RF), np.mean(fair_privbayes_TNRBalance_1RF) ],
                    [np.mean(mst_conditional_demographic_parity_1RF), np.mean(privbayes_conditional_demographic_parity_1RF), np.mean(fair_greedy_mst_conditional_demographic_parity_1RF), np.mean(fair_opt_mst_conditional_demographic_parity_1RF), np.mean(fair_privbayes_conditional_demographic_parity_1RF) ],
                   [np.mean(mst_conditional_TPRBalance_1RF), np.mean(privbayes_conditional_TPRBalance_1RF), np.mean(fair_greedy_mst_conditional_TPRBalance_1RF), np.mean(fair_opt_mst_conditional_TPRBalance_1RF), np.mean(fair_privbayes_conditional_TPRBalance_1RF) ],
                    [np.mean(mst_conditional_TNRBalance_1RF), np.mean(privbayes_conditional_TNRBalance_1RF), np.mean(fair_greedy_mst_conditional_TNRBalance_1RF), np.mean(fair_opt_mst_conditional_TNRBalance_1RF), np.mean(fair_privbayes_conditional_TNRBalance_1RF) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
ax.set_xlabel("ε = 1, RF")
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'RF1.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_01), np.mean(privbayes_demographic_parity_01), np.mean(fair_greedy_mst_demographic_parity_01), np.mean(fair_opt_mst_demographic_parity_01), np.mean(fair_privbayes_demographic_parity_01) ],
                   [np.mean(mst_TPRBalance_01), np.mean(privbayes_TPRBalance_01), np.mean(fair_greedy_mst_TPRBalance_01), np.mean(fair_opt_mst_TPRBalance_01), np.mean(fair_privbayes_TPRBalance_01) ],
                    [np.mean(mst_TNRBalance_01), np.mean(privbayes_TNRBalance_01), np.mean(fair_greedy_mst_TNRBalance_01), np.mean(fair_opt_mst_TNRBalance_01), np.mean(fair_privbayes_TNRBalance_01) ],
                    [np.mean(mst_conditional_demographic_parity_01), np.mean(privbayes_conditional_demographic_parity_01), np.mean(fair_greedy_mst_conditional_demographic_parity_01), np.mean(fair_opt_mst_conditional_demographic_parity_01), np.mean(fair_privbayes_conditional_demographic_parity_01) ],
                   [np.mean(mst_conditional_TPRBalance_01), np.mean(privbayes_conditional_TPRBalance_01), np.mean(fair_greedy_mst_conditional_TPRBalance_01), np.mean(fair_opt_mst_conditional_TPRBalance_01), np.mean(fair_privbayes_conditional_TPRBalance_01) ],
                    [np.mean(mst_conditional_TNRBalance_01), np.mean(privbayes_conditional_TNRBalance_01), np.mean(fair_greedy_mst_conditional_TNRBalance_01), np.mean(fair_opt_mst_conditional_TNRBalance_01), np.mean(fair_privbayes_conditional_TNRBalance_01) ]])


plt.figure(figsize=(9, 9))
palette = sns.color_palette("Blues_d",n_colors=15)
palette.reverse()
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 0.1, MLP")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'MLP01.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_01LR), np.mean(privbayes_demographic_parity_01LR), np.mean(fair_greedy_mst_demographic_parity_01LR), np.mean(fair_opt_mst_demographic_parity_01LR), np.mean(fair_privbayes_demographic_parity_01LR) ],
                   [np.mean(mst_TPRBalance_01LR), np.mean(privbayes_TPRBalance_01LR), np.mean(fair_greedy_mst_TPRBalance_01LR), np.mean(fair_opt_mst_TPRBalance_01LR), np.mean(fair_privbayes_TPRBalance_01LR) ],
                    [np.mean(mst_TNRBalance_01LR), np.mean(privbayes_TNRBalance_01LR), np.mean(fair_greedy_mst_TNRBalance_01LR), np.mean(fair_opt_mst_TNRBalance_01LR), np.mean(fair_privbayes_TNRBalance_01LR) ],
                    [np.mean(mst_conditional_demographic_parity_01LR), np.mean(privbayes_conditional_demographic_parity_01LR), np.mean(fair_greedy_mst_conditional_demographic_parity_01LR), np.mean(fair_opt_mst_conditional_demographic_parity_01LR), np.mean(fair_privbayes_conditional_demographic_parity_01LR) ],
                   [np.mean(mst_conditional_TPRBalance_01LR), np.mean(privbayes_conditional_TPRBalance_01LR), np.mean(fair_greedy_mst_conditional_TPRBalance_01LR), np.mean(fair_opt_mst_conditional_TPRBalance_01LR), np.mean(fair_privbayes_conditional_TPRBalance_01LR) ],
                    [np.mean(mst_conditional_TNRBalance_01LR), np.mean(privbayes_conditional_TNRBalance_01LR), np.mean(fair_greedy_mst_conditional_TNRBalance_01LR), np.mean(fair_opt_mst_conditional_TNRBalance_01LR), np.mean(fair_privbayes_conditional_TNRBalance_01LR) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
ax.set_xlabel("ε = 0.1, LR")
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'LR01.pdf', bbox_inches='tight')
plt.show()





In [None]:
measures = ["DP", "TPR", "TNR", "CDP",
              "CTPR", "CTNR"]
mechanisms = ["MST", "PrivBayes.", "GreedyMST",
           "ExpMST", "GreedyPrivBayes"]

values = np.array([[np.mean(mst_demographic_parity_01RF), np.mean(privbayes_demographic_parity_01RF), np.mean(fair_greedy_mst_demographic_parity_01RF), np.mean(fair_opt_mst_demographic_parity_01RF), np.mean(fair_privbayes_demographic_parity_01RF) ],
                   [np.mean(mst_TPRBalance_01RF), np.mean(privbayes_TPRBalance_01RF), np.mean(fair_greedy_mst_TPRBalance_01RF), np.mean(fair_opt_mst_TPRBalance_01RF), np.mean(fair_privbayes_TPRBalance_01RF) ],
                    [np.mean(mst_TNRBalance_01RF), np.mean(privbayes_TNRBalance_01RF), np.mean(fair_greedy_mst_TNRBalance_01RF), np.mean(fair_opt_mst_TNRBalance_01RF), np.mean(fair_privbayes_TNRBalance_01RF) ],
                    [np.mean(mst_conditional_demographic_parity_01RF), np.mean(privbayes_conditional_demographic_parity_01RF), np.mean(fair_greedy_mst_conditional_demographic_parity_01RF), np.mean(fair_opt_mst_conditional_demographic_parity_01RF), np.mean(fair_privbayes_conditional_demographic_parity_01RF) ],
                   [np.mean(mst_conditional_TPRBalance_01RF), np.mean(privbayes_conditional_TPRBalance_01RF), np.mean(fair_greedy_mst_conditional_TPRBalance_01RF), np.mean(fair_opt_mst_conditional_TPRBalance_01RF), np.mean(fair_privbayes_conditional_TPRBalance_01RF) ],
                    [np.mean(mst_conditional_TNRBalance_01RF), np.mean(privbayes_conditional_TNRBalance_01RF), np.mean(fair_greedy_mst_conditional_TNRBalance_01RF), np.mean(fair_opt_mst_conditional_TNRBalance_01RF), np.mean(fair_privbayes_conditional_TNRBalance_01RF) ]])


plt.figure(figsize=(9, 9))
ax = sns.heatmap(values,annot=True, linewidth=0.5, cmap = sns.cm.rocket_r)
#ax.set_xticks(range(len(mechanisms)))
ax.set_xticklabels(mechanisms,rotation = 90)
ax.set_xlabel("ε = 0.1, RF")
#ax.set_yticks(range(len(measures)))
ax.set_yticklabels(measures,rotation = 0)
plt.savefig(artifactspath + 'RF01.pdf', bbox_inches='tight')
plt.show()





In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01LR), np.mean(mst_accuracy_1LR), np.mean(mst_accuracy_10LR)]
bar1 = plt.bar(ind, xvals, width)

yvals = [np.mean(privbayes_accuracy_01LR), np.mean(privbayes_accuracy_1LR), np.mean(privbayes_accuracy_10LR)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_accuracy_01LR), np.mean(fair_greedy_mst_accuracy_1LR), np.mean(fair_greedy_mst_accuracy_10LR)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01LR), np.mean(fair_opt_mst_accuracy_1LR), np.mean(fair_opt_mst_accuracy_10LR)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_accuracy_01LR), np.mean(fair_privbayes_accuracy_1LR), np.mean(fair_privbayes_accuracy_10LR)]
bar5 = plt.bar(ind+width*4, xyvals, width)


plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("LR Accuracy")

plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1, bar2, bar3,bar4,bar5), ('MST','Privbayes', 'GreedyMST', 'ExpMST','GreedyPrivbayes'), fontsize = 10 )
plt.savefig(artifactspath + 'ACCLR.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01LR), np.mean(mst_accuracy_1LR), np.mean(mst_accuracy_10LR)]
bar1 = plt.bar(ind, xvals, width)

  
zvals = [np.mean(fair_greedy_mst_accuracy_01LR), np.mean(fair_greedy_mst_accuracy_1LR), np.mean(fair_greedy_mst_accuracy_10LR)]
bar3 = plt.bar(ind+width*1, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01LR), np.mean(fair_opt_mst_accuracy_1LR), np.mean(fair_opt_mst_accuracy_10LR)]
bar4 = plt.bar(ind+width*2, xxvals, width)



plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("LR Accuracy")

plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1, bar2, bar3,bar4,bar5), ('MST','Privbayes', 'GreedyMST', 'ExpMST','GreedyPrivbayes'), fontsize = 10 )
plt.savefig(artifactspath + 'ACCLRMST.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01RF), np.mean(mst_accuracy_1RF), np.mean(mst_accuracy_10RF)]
bar1 = plt.bar(ind, xvals, width)

yvals = [np.mean(privbayes_accuracy_01RF), np.mean(privbayes_accuracy_1RF), np.mean(privbayes_accuracy_10RF)]
bar2 = plt.bar(ind+width, yvals, width)
  
zvals = [np.mean(fair_greedy_mst_accuracy_01RF), np.mean(fair_greedy_mst_accuracy_1RF), np.mean(fair_greedy_mst_accuracy_10RF)]
bar3 = plt.bar(ind+width*2, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01RF), np.mean(fair_opt_mst_accuracy_1RF), np.mean(fair_opt_mst_accuracy_10RF)]
bar4 = plt.bar(ind+width*3, xxvals, width)

xyvals = [np.mean(fair_privbayes_accuracy_01RF), np.mean(fair_privbayes_accuracy_1RF), np.mean(fair_privbayes_accuracy_10RF)]
bar5 = plt.bar(ind+width*4, xyvals, width)


plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("RF Accuracy")

plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1, bar2, bar3,bar4,bar5), ('MST','Privbayes', 'GreedyMST', 'ExpMST','GreedyPrivbayes'), fontsize = 10 )
plt.savefig(artifactspath + 'ACCRF.pdf', bbox_inches='tight')

In [None]:
N = 3
workload_len = len(list(itertools.combinations(data.domain, 1)))
ind = np.arange(N) 
width = 0.15
#plt.yscale("log")
xvals = [np.mean(mst_accuracy_01RF), np.mean(mst_accuracy_1RF), np.mean(mst_accuracy_10RF)]
bar1 = plt.bar(ind, xvals, width)

zvals = [np.mean(fair_greedy_mst_accuracy_01RF), np.mean(fair_greedy_mst_accuracy_1RF), np.mean(fair_greedy_mst_accuracy_10RF)]
bar3 = plt.bar(ind+width*1, zvals, width)

xxvals = [np.mean(fair_opt_mst_accuracy_01RF), np.mean(fair_opt_mst_accuracy_1RF), np.mean(fair_opt_mst_accuracy_10RF)]
bar4 = plt.bar(ind+width*2, xxvals, width)


plt.xlabel("Privacy Budgets")
plt.ylabel('Accuracy')
plt.title("RF Accuracy")

plt.ylim([0, 1])
plt.xticks(ind+width,['ε=0.1','ε=1','ε=10'])
#plt.legend( (bar1, bar2, bar3,bar4,bar5), ('MST','Privbayes', 'GreedyMST', 'ExpMST','GreedyPrivbayes'), fontsize = 10 )
plt.savefig(artifactspath + 'ACCRFMST.pdf', bbox_inches='tight')

In [None]:
print(np.mean(fair_privbayes_accuracy_01RF)-np.mean(privbayes_accuracy_01RF))
print(np.mean(fair_privbayes_accuracy_1RF)-np.mean(privbayes_accuracy_1RF))
print(np.mean(fair_privbayes_accuracy_10RF)-np.mean(privbayes_accuracy_10RF))

In [None]:
print(np.mean(fair_privbayes_accuracy_01)-np.mean(privbayes_accuracy_01))
print(np.mean(fair_privbayes_accuracy_1)-np.mean(privbayes_accuracy_1))
print(np.mean(fair_privbayes_accuracy_10)-np.mean(privbayes_accuracy_10))

In [None]:
print(np.mean(fair_privbayes_accuracy_01)-np.mean(privbayes_accuracy_01))
print(np.mean(fair_privbayes_accuracy_1)-np.mean(privbayes_accuracy_1))
print(np.mean(fair_privbayes_accuracy_10)-np.mean(privbayes_accuracy_10))