In [None]:
import json, os, pickle
from os.path import join as ojoin

In [None]:
with open('attribute_values.json') as f:
    attribute_values = json.load(f)

In [None]:
sim_dir = ojoin('..', 'simulation_results') 

directories = dict()
for approach in ('mip', 'fairim'):
    for attribute in ('age', 'gender', 'ethnicity'):
        directories[(approach, attribute)] = ojoin(sim_dir, approach, 'dc', attribute)

In [None]:
graph_dir = ojoin('..', '..', '..', '..', 'code', 'fairim', 'networks')

In [None]:
def get_group_coverage(approach, attribute, value, graph_number):

    graph_name = 'graph_spa_500_%d'%graph_number

    graph_fpath = ojoin(graph_dir, '%s.pickle'%graph_name)
    with open(graph_fpath, 'rb') as f:
        graph = pickle.load(f)    

    subgraph_nodes = [node_id for node_id, attributes in graph.nodes(data=True) 
                      if attributes[attribute] == value]

    coverage_fpath = ojoin(sim_dir, approach, 'dc', attribute, 'output_%d.txt'%graph_number)

    with open(coverage_fpath) as f:
        coverage_list = [float(line) for line in f if line.strip()]
        assert len(coverage_list) == 500

    group_coverage = sum([coverage_list[i] for i in subgraph_nodes])
    
    return group_coverage

In [None]:
with open('dc_rhs.json') as f:
    dc_rhs = json.load(f)

rhs_dict= dict()
for d in dc_rhs:
    graph_number = int(d['graph_name'].split('_')[-1])
    rhs_dict[(d['attribute'], d['value'], graph_number)] = d['DC_RHS']

In [None]:
stats = []
for attribute in ('age', 'gender', 'ethnicity'):
    for value in attribute_values[attribute]:
        for graph_number in range(20):
            info = dict(attribute=attribute, value=value, 
                        graph_name='graph_spa_500_%d'%graph_number)
            rhs = rhs_dict[(attribute, value, graph_number)]
            info['rhs'] = rhs
            for approach in ('mip', 'fairim'):
                group_coverage = get_group_coverage(approach, attribute, value, graph_number)
                info['%s_group_coverage'%approach] = group_coverage
                violation = 0 if group_coverage > rhs else rhs - group_coverage
                info['%s_violation'%approach] = violation
            stats.append(info)
            
with open('dc_violations.json', 'w') as f:
    json.dump(stats, f)