In [1]:
import os
import json

import numpy as np
import networkx as nx
import community as community_louvain
import plotly.graph_objects as go
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
# Vis code
def plot_heat_map(data, title, vmin, sc, ho, save_dir='', save=False):
    
    # sc, ho are arrays
    #
    x_tick_labels = sc
    y_tick_labels = ho[::-1]
    ax = sns.heatmap(data, vmin = vmin,  cmap = 'YlOrBr', xticklabels = x_tick_labels, yticklabels = y_tick_labels, linewidth=0.5)
    plt.title(title)
    ax.set_xlabel('Homophily Prop')
    ax.set_ylabel('Social Capital Prop')
    if save:
        title_save = os.path.join(save_dir, title + '.png')
        plt.savefig(title_save, dpi = 300)
        plt.close('all')
    else:
        plt.show()

In [3]:
# Metrics
def triangle_count(adj_mat):
    return np.trace(np.linalg.matrix_power(adj_mat, 3)) / 6

def assortativity_coeff(adj_mat, types):
    n = len(adj_mat)
    
    # 2 times the edge count, based on paper normalization
    m = np.sum(adj_mat)
    e = np.zeros((2, 2))
    for i in range(n):
        for j in range(n):
            if adj_mat[i][j] != 1:
                continue
            
            if types[i] == types[j] and types[i] == -1:
                e[0][0] += 1
            elif types[i] != types[j] and types[i] == -1:
                e[0][1] += 1
            elif types[i] != types[j] and types[i] == 1:
                e[1][0] += 1
            elif types[i] == types[j] and types[i] == 1:
                e[1][1] += 1
    
    e = e / m
    e_square = np.square(e)
    es_sum = np.sum(e_square)
    e_trace = np.trace(e)
    return (e_trace - es_sum) / (1 - es_sum)

In [4]:
def read_stats(data_dir, cn=None, ck=None):
    stats = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')

        if cn is not None and int(n) != cn:
            continue
        if ck is not None and int(k) != ck:
            continue
            
        if sc not in stats:
            stats[sc] = {}
        if ho not in stats[sc]:
            stats[sc][ho] = {}

        if ftype == 'stats':

            # Get ablation stats
            sdata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                sdata = json.loads(sf.read())
            stats[sc][ho] = sdata
    return stats

def read_ntwks(data_dir, cn=None, ck=None):
    ntwks = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')
        
        if cn is not None and int(n) != cn:
            continue
        if ck is not None and int(k) != ck:
            continue

        if sc not in ntwks:
            ntwks[sc] = {}
        if ho not in ntwks[sc]:
            ntwks[sc][ho] = {}

        if ftype == 'networks':

            # Get ablation stats
            ndata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                ndata = json.loads(sf.read())
            ntwks[sc][ho]['networks'] = ndata
        elif ftype == 'types':
            tdata = {}
            with open(os.path.join(data_dir, fn), 'r') as tf:
                tdata = json.loads(tf.read())
            ntwks[sc][ho]['types'] = tdata
    return ntwks

def ntwk_heatmap_data(data, model_key):
    
    # In paper alpha is homophily prop, beta is sc prop
    beta_values = list(data.keys())
    alpha_values = list(data[beta_values[0]].keys())
    alpha_values.sort()
    beta_values.sort()

    triangles = np.zeros((len(beta_values), len(alpha_values)))
    assort = np.zeros((len(beta_values), len(alpha_values)))

    bsize = len(beta_values)
    
    for b_i, b in enumerate(beta_values):
        for a_i, a in enumerate(alpha_values):
            ntwks = None
            if model_key == 'standard':
                ntwks = data[b][a]['networks'][model_key]
            elif type(model_key) == tuple:
                ntwks = data[b][a]['networks'][model_key[0]][model_key[1]]
            types_dict = data[b][a]['types']
            n = len(types_dict)
            types_arr = [ types_dict[str(i)]['init_attrs'] for i in range(n) ]
            
            tcounts = []
            acoeffs = []
            for adjm in ntwks:
                acf = assortativity_coeff(adjm, types_arr)
                tct = triangle_count(adjm)
                tcounts.append(tct)
                acoeffs.append(acf)
            triangles[bsize - b_i - 1][a_i] = np.mean(tcounts)
            assort[bsize - b_i - 1][a_i] = np.mean(acoeffs)
                
    return triangles, assort, alpha_values, beta_values

def stat_heatmap_data(data, model_key):
    
    # In paper alpha is homophily prop, beta is sc prop
    beta_values = list(data.keys())
    alpha_values = list(data[beta_values[0]].keys())
    alpha_values.sort()
    beta_values.sort()
    
    metrics = None
    if model_key == 'standard':
        metrics = list(data[beta_values[0]][alpha_values[0]][model_key].keys())
    elif type(model_key) == tuple:
        metrics = list(data[beta_values[0]][alpha_values[0]][model_key[0]][model_key[1]].keys())

    metric_data = { m : np.zeros((len(beta_values), len(alpha_values))) for m in metrics }

    bsize = len(beta_values)
    
    for b_i, b in enumerate(beta_values):
        for a_i, a in enumerate(alpha_values):
            for m in metrics:
                stat = None
                if model_key == 'standard':
                    stat = data[b][a][model_key][m]
                elif type(model_key) == tuple:
                    stat = data[b][a][model_key[0]][model_key[1]][m]

                metric_data[m][bsize - b_i - 1][a_i] = stat
                
    return metric_data, alpha_values, beta_values

In [5]:
abl_metrics = ['util_dist', 'modularity', 'stable_triad_count', 'num_comm']

In [6]:
# Read in standard data
std_var_dir = 'data/standard_var/'

# k = 5 ablation
sv_ntwks = read_ntwks(std_var_dir, ck=5)
sv_tri, sv_assort, sv_alpha, sv_beta = ntwk_heatmap_data(sv_ntwks, 'standard')

sv_stats = read_stats(std_var_dir, ck=5)
sv_stat_data, sv_alpha, sv_beta = stat_heatmap_data(sv_stats, 'standard')

In [7]:
# Read in fixed ablation data
abl_data_dir = 'data/comparison_proposal/'

abl_ntwks = read_ntwks(abl_data_dir, ck=5)

beta_vals = list(abl_ntwks.keys())
alpha_vals = list(abl_ntwks[beta_vals[0]].keys())

budgets = list(abl_ntwks[beta_vals[0]][alpha_vals[0]]['networks']['budget'].keys())
nl_dists = list(abl_ntwks[beta_vals[0]][alpha_vals[0]]['networks']['nonlocal'].keys())
budgets.sort()
nl_dists.sort()

var_deltas = { 'nl' : {}, 'budget' : {} }
for nld in nl_dists:
    tri, assort, alpha, beta = ntwk_heatmap_data(abl_ntwks, ('nonlocal', nld))
    
    var_deltas['nl'][nld] = {}
    var_deltas['nl'][nld]['assort'] = (assort - sv_assort) / sv_assort
    var_deltas['nl'][nld]['tri'] = (tri - sv_tri) / sv_tri
    
    abl_stats = read_stats(abl_data_dir, ck=5)
    abl_stat_data, abl_alpha, abl_beta = stat_heatmap_data(abl_stats, ('nonlocal', nld))
    
    for m in abl_metrics:
        var_deltas['nl'][nld][m] = (sv_stat_data[m] - abl_stat_data[m]) / sv_stat_data[m]
    
for bdgt in budgets:
    tri, assort, alpha, beta = ntwk_heatmap_data(abl_ntwks, ('budget', bdgt))
    
    var_deltas['budget'][bdgt] = {}
    
    var_deltas['budget'][bdgt]['assort'] = sv_assort - assort
    var_deltas['budget'][bdgt]['tri'] = sv_tri - tri
    
    abl_stats = read_stats(abl_data_dir, ck=5)
    abl_stat_data, abl_alpha, abl_beta = stat_heatmap_data(abl_stats, ('budget', bdgt))
    
    for m in abl_metrics:
        var_deltas['budget'][bdgt][m] = abl_stat_data[m] - sv_stat_data[m]

In [8]:
print(var_deltas)

fig_dir = 'figures/ablation'

bdgt_x_vals = [ int(bdgt) for bdgt in budgets ]
nld_x_vals = [ int(nld) for nld in nl_dists ]
bdgt_x_vals.sort()
nld_x_vals.sort()
all_metrics = abl_metrics + ['assort', 'tri']

fig_nl = plt.figure(figsize=(10, 10))
plt.xlabel('Nonlocal distance')
plt.ylabel('Difference')
plt.title('Metric relative differences (nonlocal)')

for m in all_metrics:
    nld_y_vals = [ var_deltas['nl'][str(nld)][m][0] for nld in nl_dists ]
    plt.plot(nld_x_vals, nld_y_vals, marker='o', label=m)

plt.legend()
fig_nl.savefig(os.path.join(fig_dir, 'rel_diff_nonlocal.pdf'))
plt.close('all')

fig_bdgt = plt.figure(figsize=(10, 10))
plt.xlabel('Budget')
plt.ylabel('Difference')
plt.title('Metric relative differences (budgets)')
        
for m in all_metrics:
    bdgt_y_vals = [ var_deltas['budget'][str(bdgt)][m][0] for bdgt in budgets ]
    plt.plot(bdgt_x_vals, bdgt_y_vals, marker='o', label=m)

plt.legend()
fig_bdgt.savefig(os.path.join(fig_dir, 'rel_diff_budgets.pdf'))
plt.close('all')

{'nl': {'3': {'assort': array([[-4.11501294]]), 'tri': array([[-1.]]), 'util_dist': array([[0.15890449]]), 'modularity': array([[-0.44125075]]), 'stable_triad_count': array([[0.76190476]]), 'num_comm': array([[-0.21212121]])}, '4': {'assort': array([[0.00233849]]), 'tri': array([[-0.13621262]]), 'util_dist': array([[0.27220909]]), 'modularity': array([[-0.38122431]]), 'stable_triad_count': array([[0.61904762]]), 'num_comm': array([[-0.01010101]])}, '5': {'assort': array([[-2.15501323]]), 'tri': array([[-0.90033223]]), 'util_dist': array([[0.22146531]]), 'modularity': array([[-0.39902798]]), 'stable_triad_count': array([[0.66666667]]), 'num_comm': array([[-0.31313131]])}, '6': {'assort': array([[1.9764776]]), 'tri': array([[-0.53488372]]), 'util_dist': array([[0.33198943]]), 'modularity': array([[-0.37165851]]), 'stable_triad_count': array([[0.66666667]]), 'num_comm': array([[-0.01010101]])}, '7': {'assort': array([[1.59442052]]), 'tri': array([[-0.97009967]]), 'util_dist': array([[0.28