In [1]:
import os
import json
import math

import numpy as np
import networkx as nx
import community as community_louvain
import plotly.graph_objects as go
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
import matplotlib as mpl
import matplotlib.font_manager as font_manager
from matplotlib import rcParams

font_path = '/home/andyclee/.fonts/LinLibertine_R.ttf'  # Your font path goes here
font_manager.fontManager.addfont(font_path)
prop = font_manager.FontProperties(fname=font_path)

mpl.rcParams['font.family'] = 'Linux Libertine'

In [3]:
# Metrics
def triangle_count(adj_mat):
    return np.trace(np.linalg.matrix_power(adj_mat, 3)) / 6

def assortativity_coeff(adj_mat, types):
    n = len(adj_mat)
    
    # 2 times the edge count, based on paper normalization
    m = np.sum(adj_mat)
    e = np.zeros((2, 2))
    for i in range(n):
        for j in range(n):
            if adj_mat[i][j] != 1:
                continue
            
            if types[i] == types[j] and types[i] == -1:
                e[0][0] += 1
            elif types[i] != types[j] and types[i] == -1:
                e[0][1] += 1
            elif types[i] != types[j] and types[i] == 1:
                e[1][0] += 1
            elif types[i] == types[j] and types[i] == 1:
                e[1][1] += 1
    
    e = e / m
    e_square = np.square(e)
    es_sum = np.sum(e_square)
    e_trace = np.trace(e)
    return (e_trace - es_sum) / (1 - es_sum)

In [4]:
# b (beta) is sc
# a (alpha) is ho

def read_stats(data_dir, a, b, cn=None, ck=None):
    stats = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')

        if sc != str(b):
            continue
        if ho != str(a):
            continue
        if cn is not None and int(n) != cn:
            continue
        if ck is not None and int(k) != ck:
            continue

        if ftype == 'stats':

            # Get ablation stats
            sdata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                sdata = json.loads(sf.read())
            stats = sdata
    return stats

def read_ntwks(data_dir, a, b, cn=None, ck=None):
    ntwks = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')
        
        if sc != str(b):
            continue
        if ho != str(a):
            continue
        if cn is not None and int(n) != cn:
            continue
        if ck is not None and int(k) != ck:
            continue

        if ftype == 'networks':

            # Get ablation stats
            ndata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                ndata = json.loads(sf.read())
            ntwks['networks'] = ndata
        elif ftype == 'types':
            tdata = {}
            with open(os.path.join(data_dir, fn), 'r') as tf:
                tdata = json.loads(tf.read())
            ntwks['types'] = tdata
    return ntwks

def ntwk_data(data, model_key):
    ntwks = None
    if model_key == 'standard':
        ntwks = data['networks'][model_key]
    elif type(model_key) == tuple:
        ntwks = data['networks'][model_key[0]][model_key[1]]
    types_dict = data['types']
    n = len(types_dict)
    types_arr = [ types_dict[str(i)]['init_attrs'] for i in range(n) ]

    tcounts = []
    acoeffs = []
    for adjm in ntwks:
        acf = assortativity_coeff(adjm, types_arr)
        tct = triangle_count(adjm)
        tcounts.append(tct)
        acoeffs.append(acf)
    triangles = np.mean(tcounts)
    assort = np.mean(acoeffs)
                
    return triangles, assort

def stat_data(data, model_key):
    metrics = None
    if model_key == 'standard':
        metrics = list(data[model_key].keys())
    elif type(model_key) == tuple:
        metrics = list(data[model_key[0]][model_key[1]].keys())

    metric_data = { m : 0 for m in metrics }
    
    for m in metrics:
        stat = None
        if model_key == 'standard':
            stat = data[model_key][m]
        elif type(model_key) == tuple:
            stat = data[model_key[0]][model_key[1]][m]

        metric_data[m] = stat
                
    return metric_data

In [5]:
#abl_metrics = ['util_dist', 'modularity', 'stable_triad_count', 'num_comm']
abl_metrics = ['util_dist', 'stable_triad_count']
alpha_betas = [(0.5, 0.5)]

In [6]:
std_var_dir = 'data/standard_var/'
abl_data_dir = 'data/comparison_proposal/'
fig_dir = 'figures/ablation'

for a, b in alpha_betas:
    sv_ntwks = read_ntwks(std_var_dir, a, b, ck=10)
    sv_tri, sv_assort = ntwk_data(sv_ntwks, 'standard')

    sv_stats = read_stats(std_var_dir, a, b, ck=10)
    sv_stat_data = stat_data(sv_stats, 'standard')
    
    abl_ntwks = read_ntwks(abl_data_dir, a, b, ck=10)
    
    var_deltas = { 'nl' : {}, 'budget' : {} }
    
    nl_dists = list(abl_ntwks['networks']['nonlocal'])
    budgets = list(abl_ntwks['networks']['budget'])
    
    for nld in nl_dists:
        tri, assort = ntwk_data(abl_ntwks, ('nonlocal', nld))

        var_deltas['nl'][nld] = {}
        var_deltas['nl'][nld]['assort'] = (assort - sv_assort) / sv_assort
        var_deltas['nl'][nld]['tri'] = (tri - sv_tri) / sv_tri

        abl_stats = read_stats(abl_data_dir, a, b, ck=10)
        abl_stat_data = stat_data(abl_stats, ('nonlocal', nld))

        for m in abl_metrics:
            var_deltas['nl'][nld][m] = (abl_stat_data[m] - sv_stat_data[m]) / sv_stat_data[m]

    for bdgt in budgets:
        tri, assort = ntwk_data(abl_ntwks, ('budget', bdgt))

        var_deltas['budget'][bdgt] = {}

        var_deltas['budget'][bdgt]['assort'] = (assort - sv_assort) / sv_assort
        var_deltas['budget'][bdgt]['tri'] = (tri - sv_tri) / sv_tri

        abl_stats = read_stats(abl_data_dir, a, b, ck=10)
        abl_stat_data = stat_data(abl_stats, ('budget', bdgt))

        for m in abl_metrics:
            var_deltas['budget'][bdgt][m] = (abl_stat_data[m] - sv_stat_data[m]) / sv_stat_data[m]

    def mlabel(m):
        m_label = ''
        if m == 'assort':
            m_label = 'Assortativity Coefficient'
        elif m == 'tri':
            m_label = 'Triangle Count'
        elif m == 'util_dist':
            m_label = 'Average Utility'
        elif m == 'modularity':
            m_label = 'Modularity'
        elif m == 'stable_triad_count':
            m_label = 'Stable Triad Count'
        elif m == 'num_comm':
            m_label = 'Detected Community Count'
        return m_label

    bdgt_x_vals = [ int(bdgt) for bdgt in budgets ] + [10]
    nld_x_vals = [ int(nld) for nld in nl_dists ] + [2]
    bdgt_x_vals.sort()
    nld_x_vals.sort()
    all_metrics = abl_metrics + ['assort', 'tri']
    
    var_deltas['budget']['10'] = {}
    var_deltas['nl']['2'] = {}
    for m in all_metrics:
        var_deltas['budget']['10'][m] = 0.0
        var_deltas['nl']['2'][m] = 0.0
        
    print(var_deltas)

    fig_nl = plt.figure(figsize=(10, 10), frameon=False)
    
    plt.xlabel('Maximum proposal distance', fontsize=30)
    plt.ylabel('Relative metric difference between ablation model \n and proposals at dist. 2', fontsize=30)
    #plt.title('Metric relative differences (nonlocal)')

    for m in all_metrics:
        nld_y_vals = [ var_deltas['nl'][str(nld)][m] for nld in nld_x_vals ]
        plt.plot(nld_x_vals, nld_y_vals, marker='o', label=mlabel(m), linewidth=2.5)

    plt.xticks(nld_x_vals, fontsize=25)
    plt.yticks(fontsize=25)
    plt.box(False)
    fig_nl.savefig(os.path.join(fig_dir, '{a}_{b}_rel_diff_nonlocal.pdf'.format(a=a, b=b)), bbox_inches='tight')
    plt.close('all')

    fig_bdgt = plt.figure(figsize=(10, 10), frameon=False)
    plt.xlabel('Maximum connections possible per agent', fontsize=30)
    plt.ylabel('Relative metric difference between ablation model \n and maximum of 10 connections', fontsize=30)
    #plt.title('Metric relative differences (budgets)')

    for m in all_metrics:
        bdgt_y_vals = [ var_deltas['budget'][str(bdgt)][m] for bdgt in bdgt_x_vals ]
        plt.plot(bdgt_x_vals, bdgt_y_vals, marker='o', label=mlabel(m), linewidth=2.5)

    plt.legend(loc='upper left', prop={'size' : 25})
    plt.xticks(bdgt_x_vals, fontsize=25)
    plt.yticks(fontsize=25)
    plt.box(False)
    fig_bdgt.savefig(os.path.join(fig_dir, '{a}_{b}_rel_diff_budgets.pdf'.format(a=a, b=b)), bbox_inches='tight')
    plt.close('all')

{'nl': {'3': {'assort': 0.2710476145223598, 'tri': -0.25455315544260904, 'util_dist': -0.33358158339207117, 'stable_triad_count': -0.223956136651202}, '4': {'assort': 0.426785129156208, 'tri': -0.25455315544260904, 'util_dist': -0.42022528692830463, 'stable_triad_count': -0.3673555461830451}, '5': {'assort': 0.3823901669559932, 'tri': -0.3434985175772977, 'util_dist': -0.45187716941736616, 'stable_triad_count': -0.44749051033319276}, '6': {'assort': 0.4320610641033203, 'tri': -0.3858534519271495, 'util_dist': -0.5415796683201894, 'stable_triad_count': -0.6625896246309574}, '7': {'assort': 0.39150201868630663, 'tri': -0.21643371452774246, 'util_dist': -0.4938358623808067, 'stable_triad_count': -0.4137494727962885}, '2': {'util_dist': 0.0, 'stable_triad_count': 0.0, 'assort': 0.0, 'tri': 0.0}}, 'budget': {'6': {'assort': -0.08823969920819018, 'tri': -0.8517577297755189, 'util_dist': -0.08854547630763485, 'stable_triad_count': -0.8861239983129481}, '75': {'assort': 0.2378772171382972, 'tr