In [None]:
import os, json, pickle
from os.path import join as ojoin

In [None]:
sim_dir = ojoin('..', 'simulation_results') 
graph_dir = ojoin('..', '..', '..', '..', 'code', 'fairim', 'networks')


In [None]:
def extract_dispersion(fpath, gpath, attribute, values):
    coverages = []
    with open(fpath) as f:
        for line in f:
            line = line.strip()
            if not line: continue
            coverages.append(float(line))
    assert len(coverages) == 500
    
    with open(gpath, 'rb') as f:
        graph = pickle.load(f)
    
    relative_group_coverages = []
    for value in values:
        subgroup_indices = [nodeid for nodeid, attr_dict in graph.nodes(data=True) 
                            if attr_dict[attribute] == value]    
        rgc = sum([coverages[i] for i in subgroup_indices]) / len(subgroup_indices)
        relative_group_coverages.append(rgc)
        
    mean_rgc = sum(relative_group_coverages) / len(values)
    return sum([abs(rgc - mean_rgc) for rgc in relative_group_coverages])

In [None]:
def get_dispersion_dict():
    dispersion_dict = dict()

    for attribute in ('age', 'gender', 'ethnicity'):
        values = attribute_values[attribute]
        dispersion_list = []
        for graph_number in range(20):
            fpath = ojoin(sim_dir, 'tim', 'output_%d.txt'%graph_number)
            gpath = ojoin(graph_dir, 'graph_spa_500_%d.pickle'%graph_number)
            dispersion = extract_dispersion(fpath, gpath, attribute, values)
            dispersion_list.append(dispersion)
        dispersion_dict[('tim', attribute)] = dispersion_list
        
    for problem in ('group_maximin', 'dc', 'equality', 'equity'):
        for attribute in ('age', 'gender', 'ethnicity'):
            output_dir = ojoin(sim_dir, 'mip', problem, attribute)
            values = attribute_values[attribute]
            dispersion_list = []
            for graph_number in range(20):
                fpath = ojoin(output_dir, 'output_%d.txt'%graph_number)
                gpath = ojoin(graph_dir, 'graph_spa_500_%d.pickle'%graph_number)
                dispersion = extract_dispersion(fpath, gpath, attribute, values)
                dispersion_list.append(dispersion)
            dispersion_dict[problem, attribute] = dispersion_list
    
    return dispersion_dict

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np



def draw(values, labels, colors, title, ylabel, fpath, ylim=None, figsize=None):
    if figsize is None:
        figsize=(3.5, 2)
    plt.figure(figsize=figsize)
    means = [np.mean(v) for v in values]
    xpos = range(len(labels))

    bars = plt.bar(xpos, means, width=0.5, align='center', capsize=4)
    for i in xpos:
        bars[i].set_color(colors[i])
    _ = plt.xticks(xpos, labels, rotation=0, fontsize=10)
    if ylim:
        _ = plt.ylim(ylim)
    _ = plt.ylabel(ylabel, fontsize=10)
    _ = plt.title(title, fontsize=12)
    _ = plt.savefig(fpath, bbox_inches='tight')
    

In [None]:
with open('attribute_values.json') as f:
    attribute_values = json.load(f)
dispersion_dict = get_dispersion_dict()

labels = ['tim', 'equality', 'equity', 'maximin', 'diversity']
colors = ['#ff85d0'] + ['#8fffa0'] * 4

### !!! NOTE! The labels and names of directories do not match !!!
for attribute in ('age', 'gender', 'ethnicity'):
    values = [np.array(dispersion_dict[(method, attribute)]) 
              for method in ('tim', 'equity', 'equality', 'group_maximin', 'dc')]
    title = attribute[0].upper() + attribute[1:]
    fpath = 'images/dispersion/%s.pdf'%attribute
    draw(values, labels, colors, title, 'L1 Dispersion', fpath, figsize=(3.5, 2.5))