In [1]:
import os
import json

import numpy as np
import networkx as nx
import community as community_louvain
import plotly.graph_objects as go
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
# Vis code
def plot_heat_map(data, title, vmin, sc, ho, save_dir='', save=False):
    
    # sc, ho are arrays
    #
    x_tick_labels = sc
    y_tick_labels = ho[::-1]
    ax = sns.heatmap(data, vmin = vmin,  cmap = 'YlOrBr', xticklabels = x_tick_labels, yticklabels = y_tick_labels, linewidth=0.5)
    plt.title(title)
    ax.set_xlabel('Homophily Prop')
    ax.set_ylabel('Social Capital Prop')
    if save:
        title_save = os.path.join(save_dir, title + '.png')
        plt.savefig(title_save, dpi = 300)
        plt.close('all')
    else:
        plt.show()

In [3]:
# Metrics
def triangle_count(adj_mat):
    return np.trace(np.linalg.matrix_power(adj_mat, 3)) / 6

def assortativity_coeff(adj_mat, types):
    n = len(adj_mat)
    
    # 2 times the edge count, based on paper normalization
    m = np.sum(adj_mat)
    e = np.zeros((2, 2))
    for i in range(n):
        for j in range(n):
            if adj_mat[i][j] != 1:
                continue
            
            if types[i] == types[j] and types[i] == -1:
                e[0][0] += 1
            elif types[i] != types[j] and types[i] == -1:
                e[0][1] += 1
            elif types[i] != types[j] and types[i] == 1:
                e[1][0] += 1
            elif types[i] == types[j] and types[i] == 1:
                e[1][1] += 1
    
    e = e / m
    e_square = np.square(e)
    es_sum = np.sum(e_square)
    e_trace = np.trace(e)
    return (e_trace - es_sum) / (1 - es_sum)

In [4]:
def read_stats(data_dir):
    stats = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')

        if sc not in stats:
            stats[sc] = {}
        if ho not in stats[sc]:
            stats[sc][ho] = {}

        if ftype == 'stats':

            # Get ablation stats
            sdata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                sdata = json.loads(sf.read())
            stats[sc][ho] = sdata
    return stats

def read_ntwks(data_dir):
    ntwks = {}
    for fn in os.listdir(data_dir):
        n, k, sc, ho, ftype_ext = fn.split('_')
        ftype, ext = ftype_ext.split('.')

        if sc not in ntwks:
            ntwks[sc] = {}
        if ho not in ntwks[sc]:
            ntwks[sc][ho] = {}

        if ftype == 'networks':

            # Get ablation stats
            ndata = {}
            with open(os.path.join(data_dir, fn), 'r') as sf:
                ndata = json.loads(sf.read())
            ntwks[sc][ho]['networks'] = ndata
        elif ftype == 'types':
            tdata = {}
            with open(os.path.join(data_dir, fn), 'r') as tf:
                tdata = json.loads(tf.read())
            ntwks[sc][ho]['types'] = tdata
    return ntwks

def ntwk_heatmap_data(data, model_key):
    
    # In paper alpha is homophily prop, beta is sc prop
    beta_values = list(data.keys())
    alpha_values = list(data[beta_values[0]].keys())
    alpha_values.sort()
    beta_values.sort()

    triangles = np.zeros((len(beta_values), len(alpha_values)))
    assort = np.zeros((len(beta_values), len(alpha_values)))

    bsize = len(beta_values)
    
    for b_i, b in enumerate(beta_values):
        for a_i, a in enumerate(alpha_values):
            ntwks = None
            if model_key == 'standard':
                ntwks = data[b][a]['networks'][model_key]
            elif type(model_key) == tuple:
                ntwks = data[b][a]['networks'][model_key[0]][model_key[1]]
            types_dict = data[b][a]['types']
            n = len(types_dict)
            types_arr = [ types_dict[str(i)]['init_attrs'] for i in range(n) ]
            
            tcounts = []
            acoeffs = []
            for adjm in ntwks:
                acf = assortativity_coeff(adjm, types_arr)
                tct = triangle_count(adjm)
                tcounts.append(tct)
                acoeffs.append(acf)
            triangles[bsize - b_i - 1][a_i] = np.mean(tcounts)
            assort[bsize - b_i - 1][a_i] = np.mean(acoeffs)
                
    return triangles, assort, alpha_values, beta_values

def stat_heatmap_data(data, model_key):
    
    # In paper alpha is homophily prop, beta is sc prop
    beta_values = list(data.keys())
    alpha_values = list(data[beta_values[0]].keys())
    alpha_values.sort()
    beta_values.sort()
    
    metrics = None
    if model_key == 'standard':
        metrics = list(data[beta_values[0]][alpha_values[0]][model_key].keys())
    elif type(model_key) == tuple:
        metrics = list(data[beta_values[0]][alpha_values[0]][model_key[0]][model_key[1]].keys())

    metric_data = { m : np.zeros((len(beta_values), len(alpha_values))) for m in metrics }

    bsize = len(beta_values)
    
    for b_i, b in enumerate(beta_values):
        for a_i, a in enumerate(alpha_values):
            for m in metrics:
                stat = None
                if model_key == 'standard':
                    stat = data[b][a][model_key][m]
                elif type(model_key) == tuple:
                    stat = data[b][a][model_key[0]][model_key[1]][m]

                metric_data[m][bsize - b_i - 1][a_i] = stat
                
    return metric_data, alpha_values, beta_values

In [5]:
# Read in standard data
fixed_std_dir = 'data/standard/'

fs_ntwks = read_ntwks(fixed_std_dir)

fs_tri, fs_assort, fs_alpha, fs_beta = ntwk_heatmap_data(fs_ntwks, 'standard')

plot_heat_map(fs_assort,
              'Assortativity Coefficient, Standard Model (k log k iterations)',
              fs_assort.min(),
              fs_beta,
              fs_alpha,
              'figures/standard',
              True)

plot_heat_map(fs_tri,
              'Triangle Count, Standard Model (k log k iterations)',
              fs_tri.min(),
              fs_beta,
              fs_alpha,
              'figures/standard',
              True)

fs_stats = read_stats(fixed_std_dir)

fs_stat_data, fs_alpha, fs_beta = stat_heatmap_data(fs_stats, 'standard')

for m, mdata in fs_stat_data.items():
    plot_heat_map(mdata,
                  '{m}, Standard Model (k log k iterations)'.format(m=m),
                  mdata.min(),
                  fs_alpha,
                  fs_beta,
                  'figures/standard',
                  True)

In [6]:
# Read in standard data
fixed_std_var_dir = 'data/standard_var/'

fsv_ntwks = read_ntwks(fixed_std_var_dir)

fsv_tri, fsv_assort, fsv_alpha, fsv_beta = ntwk_heatmap_data(fsv_ntwks, 'standard')

plot_heat_map(fsv_assort,
              'Assortativity Coefficient, Standard Model (Stable Triad Stopping)',
              fsv_assort.min(),
              fsv_beta,
              fsv_alpha,
              'figures/standard_var',
              True)

plot_heat_map(fsv_tri,
              'Triangle Count, Standard Model (Stable Triad Stopping)',
              fsv_tri.min(),
              fsv_beta,
              fsv_alpha,
              'figures/standard_var',
              True)

fsv_stats = read_stats(fixed_std_var_dir)

fsv_stat_data, fsv_alpha, fsv_beta = stat_heatmap_data(fsv_stats, 'standard')

for m, mdata in fsv_stat_data.items():
    plot_heat_map(mdata,
                  '{m}, Standard Model (Stable Triad Stopping)'.format(m=m),
                  mdata.min(),
                  fsv_alpha,
                  fsv_beta,
                  'figures/standard_var',
                  True)

In [7]:
# Look at deltas of variable iteration count and fixed iteration count

tri_delta = fs_tri - fsv_tri
assort_delta = fs_assort - fsv_assort

plot_heat_map(assort_delta,
              'Assortativity Coefficient, Standard Model (Variable and Fixed Iteration Difference)',
              assort_delta.min(),
              fsv_beta,
              fsv_alpha,
              'figures/standard_delta',
              True)

plot_heat_map(tri_delta,
              'Triangle Count, Standard Model (Variable and Fixed Iteration Difference)',
              tri_delta.min(),
              fsv_beta,
              fsv_alpha,
              'figures/standard_delta',
              True)

delta_metrics = ['exit_iter', 'stable_triad_count', 'num_comm']
for m in delta_metrics:
    mdelta = fs_stat_data[m] - fsv_stat_data[m]
    plot_heat_map(mdelta,
              '{m}, Standard Model (Variable and Fixed Iteration Difference)'.format(m=m),
              mdelta.min(),
              fsv_alpha,
              fsv_beta,
              'figures/standard_delta',
              True)

In [8]:
# Read in fixed ablation data
fixed_data_dir = 'data/comparison_proposal_fixed/'

f_abl_ntwks = read_ntwks(fixed_data_dir)

beta_vals = list(f_abl_ntwks.keys())
alpha_vals = list(f_abl_ntwks[beta_vals[0]].keys())

budgets = list(f_abl_ntwks[beta_vals[0]][alpha_vals[0]]['networks']['budget'].keys())
nl_dists = list(f_abl_ntwks[beta_vals[0]][alpha_vals[0]]['networks']['nonlocal'].keys())
budgets.sort()
nl_dists.sort()

fixed_deltas = { 'nl' : {}, 'budget' : {} }
for nld in nl_dists:
    tri, assort, alpha, beta = ntwk_heatmap_data(f_abl_ntwks, ('nonlocal', nld))
    
    fixed_deltas['nl'][nld] = { 'assort' : None, 'tri' : None }
    
    plot_heat_map(assort,
              'Assortativity Coefficient, Nonlocal Ball {nld} Model (k log k iterations)'.format(nld=nld),
              assort.min(),
              beta,
              alpha,
              'figures/nonlocal',
              save=True)

    plot_heat_map(tri,
              'Triangle Count, Nonlocal Ball {nld} Model (k log k iterations)'.format(nld=nld),
              tri.min(),
              beta,
              alpha,
              'figures/nonlocal',
              save=True)
    
    fixed_deltas['nl'][nld]['assort'] = fs_assort - assort
    fixed_deltas['nl'][nld]['tri'] = fs_tri - tri
    
for bdgt in budgets:
    tri, assort, alpha, beta = ntwk_heatmap_data(f_abl_ntwks, ('budget', bdgt))
    
    fixed_deltas['budget'][bdgt] = { 'assort' : None, 'tri' : None }
    
    plot_heat_map(assort,
              'Assortativity Coefficient, Budget k={bdgt} Model (k log k iterations)'.format(bdgt=bdgt),
              assort.min(),
              beta,
              alpha,
              'figures/budgets',
              save=True)

    plot_heat_map(tri,
              'Triangle Count, Budget k={bdgt} Model (k log k iterations)'.format(bdgt=bdgt),
              tri.min(),
              beta,
              alpha,
              'figures/budgets',
              save=True)
    
    fixed_deltas['budget'][bdgt]['assort'] = fs_assort - assort
    fixed_deltas['budget'][bdgt]['tri'] = fs_tri - tri

In [9]:
print('Fixed iteration deltas')
print('Nonlocal models:')
for nld in nl_dists:
    print('Ball', nld, 'distance')
    print(fixed_deltas['nl'][nld])
for bdgt in budgets:
    print('Budget', bdgt)
    print(fixed_deltas['budget'][bdgt])

Fixed iteration deltas
Nonlocal models:
Ball 3 distance
{'assort': array([[ 0.42342033,  0.15132745,  0.06702999, -0.06134887, -0.13592071,
        -0.19433137, -0.20348675, -0.23807897, -0.22864108],
       [ 0.32567911,  0.17900821, -0.06749271, -0.07459825, -0.1422546 ,
        -0.17981819, -0.19882687, -0.17914566, -0.21395342],
       [ 0.26311097,  0.14912008, -0.03649128, -0.07015131, -0.16612329,
        -0.18875539, -0.17767823, -0.12120666, -0.21586452],
       [ 0.20766351,  0.10682774, -0.06318408, -0.05607784, -0.1751706 ,
        -0.12037648, -0.13203477, -0.15533593, -0.12830875],
       [ 0.05285564,  0.16641661,  0.00551552, -0.05143585, -0.0796437 ,
        -0.09140947, -0.11461463, -0.10291379, -0.07004273],
       [ 0.04754194, -0.00673971,  0.01044502, -0.08635698, -0.05034942,
        -0.0860412 , -0.06717004, -0.07106556, -0.04877013],
       [ 0.11566924, -0.03592984, -0.01662806, -0.05086125,  0.04549699,
        -0.04807917,  0.01680282, -0.03817936, -0.040853