In [4]:
import construction as cs
import matplotlib.pyplot as plt

### read font
from matplotlib import font_manager

font_dirs = ['Barlow/']
font_files = font_manager.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    font_manager.fontManager.addfont(font_file)

# set font
plt.rcParams['font.family'] = 'Barlow'

import networkx as nx

import networkx as nx
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

from topological_metrics import *

import os

# example of calculating the kl divergence between two mass functions
from math import log2
from scipy.stats import wasserstein_distance as em
from scipy.stats import ks_2samp

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Different distances

In [5]:
def compute_probs(data, n=10): 
    data = np.array(data)
    h, e = np.histogram(data, n)
    p = h/data.shape[0]
    return e, p

def support_intersection(p, q): 
    sup_int = (
        list(
            filter(
                lambda x: (x[0]!=0) & (x[1]!=0), zip(p, q)
            )
        )
    )
    return sup_int

def get_probs(list_of_tuples): 
    p = np.array([p[0] for p in list_of_tuples])
    q = np.array([p[1] for p in list_of_tuples])
    return p, q

def kl_divergence(p, q): 
    return np.sum(p*np.log(p/q))

def js_divergence(p, q):
    m = (1./2.)*(p + q)
    return (1./2.)*kl_divergence(p, m) + (1./2.)*kl_divergence(q, m)

def compute_kl_divergence(train_sample, test_sample, n_bins=10,js=False): 
    """
    Computes the KL Divergence using the support 
    intersection between two different samples
    """
    E = 0.0000000001
    e, p = compute_probs(train_sample, n=n_bins)
    _, q = compute_probs(test_sample, n=e)
    
    p = np.array(p) + E 
    q = np.array(q) + E 
    
    p = p/sum(p)
    q = q/sum(q)
    
    list_of_tuples = support_intersection(p, q)
    p, q = get_probs(list_of_tuples)
    
    if js:
        return js_divergence(p, q)
    else:
        return kl_divergence(p, q)
def comp_stat(ori,competitor,dist,names):
    res = dict()
    c = 0
    for met in competitor:
        tmp = []
        for comp in met:
            if dist == "js":
                val = compute_kl_divergence(ori[c], comp, n_bins=50,js=True)
            elif dist == "kl":
                val = compute_kl_divergence(ori[c], comp, n_bins=50,js=False)
            elif dist == "em":
                val = em(ori[c][0],comp)
            elif dist == "ks":
                val = (ks_2samp(ori[c][0],comp)[0])
                
            tmp.append(val)
            
        res[names[c]] = (np.mean(tmp),np.std(tmp))
        c = c + 1 
        
    return res




def load_comp_metric(file_name,dist="ks"):
    o_in = load_topo_original(file_name)
    e_in = load_topo_distributions("etngen",file_name)
    t_in = load_topo_distributions("taggen",file_name)
    d_in = load_topo_distributions("dymgen",file_name)
    s_in = load_topo_distributions("stmgen",file_name)

    res_e = comp_stat(o_in,e_in,dist= dist,names=names)
    res_d = comp_stat(o_in,d_in,dist= dist,names=names)
    res_s = comp_stat(o_in,s_in,dist= dist,names=names)
    res_t = comp_stat(o_in,t_in,dist= dist,names=names)

    x1 = np.array(list(res_e.values()))
    x2 = np.array(list(res_s.values()))
    x3 = np.array(list(res_t.values()))
    x4 = np.array(list(res_d.values()))
    
    return x1,x2,x3,x4


In [6]:
def load_topo_distributions(generator,file_name):
    
    den = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/density.npy",allow_pickle=True)
    inter_indiv = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/interacting_indiv.npy",allow_pickle=True)
    new_conv = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/new_con.npy",allow_pickle=True)
    durat = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/dur.npy",allow_pickle=True)
    clust = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/clust.npy",allow_pickle=True)
    #s_met = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/s_metric.npy",allow_pickle=True)
    ass = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/ass.npy",allow_pickle=True)
    #asp = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/asp.npy",allow_pickle=True)
    hclose = np.load("topology_results/topology_results_giulia/"+generator+"/Multiple_run/distributions/"+file_name+"/hclose.npy",allow_pickle=True)
    hbet = np.load("topology_results/topology_results_giulia/"+generator+"/Multiple_run/distributions/"+file_name+"/hbet.npy",allow_pickle=True)
    whbet = np.load("topology_results/topology_results_giulia/"+generator+"/Multiple_run/distributions/"+file_name+"/whbet.npy",allow_pickle=True)
    conncomp = np.load("topology_results/topology_results_giulia/"+generator+"/Multiple_run/distributions/"+file_name+"/conncomp.npy",allow_pickle=True)
    hmod = np.load("topology_results/topology_results_giulia/"+generator+"/Multiple_run/distributions/"+file_name+"/hmod.npy",allow_pickle=True)
    
    hs_met = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/hs_metric.npy",allow_pickle=True)
    hasp = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/hasp.npy",allow_pickle=True)
    #nb_inter = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/nb_interactions.npy",allow_pickle=True)
    #stren = np.load("topology_results/"+generator+"/Multiple_run/distributions/"+file_name+"/streng.npy",allow_pickle=True 

    #return den,inter_indiv,new_conv,durat,clust,ass,hclose,hbet,whbet,conncomp,hmod,hs_met,hasp
    return den,inter_indiv,new_conv,durat,clust,ass,conncomp,hclose,hbet,whbet,hmod,hs_met,hasp

def load_topo_original(file_name):
    den = np.load("topology_results/original_distributions/"+file_name+"/density.npy",allow_pickle=True)
    inter_indiv = np.load("topology_results/original_distributions/"+file_name+"/interacting_indiv.npy",allow_pickle=True)
    new_conv = np.load("topology_results/original_distributions/"+file_name+"/new_con.npy",allow_pickle=True)
    durat = np.load("topology_results/original_distributions/"+file_name+"/dur.npy",allow_pickle=True)
    clust = np.load("topology_results/original_distributions/"+file_name+"/clust.npy",allow_pickle=True)
    ass = np.load("topology_results/original_distributions/"+file_name+"/ass.npy",allow_pickle=True)
    hclose = np.load("topology_results/topology_results_giulia/original_distributions/"+file_name+"/hclose.npy",allow_pickle=True)
    hbet = np.load("topology_results/topology_results_giulia/original_distributions/"+file_name+"/hbet.npy",allow_pickle=True)
    whbet = np.load("topology_results/topology_results_giulia/original_distributions/"+file_name+"/whbet.npy",allow_pickle=True)
    conncomp = np.load("topology_results/topology_results_giulia/original_distributions/"+file_name+"/conncomp.npy",allow_pickle=True)
    hmod = np.load("topology_results/topology_results_giulia/original_distributions/"+file_name+"/hmod.npy",allow_pickle=True)
    
    hs_met = np.load("topology_results/original_distributions/"+file_name+"/hs_metric.npy",allow_pickle=True)
    hasp = np.load("topology_results/original_distributions/"+file_name+"/hasp.npy",allow_pickle=True)
    
    
    
    #s_met = np.load("topology_results/original_distributions/"+file_name+"/s_metric.npy",allow_pickle=True)
    #asp = np.load("topology_results/original_distributions/"+file_name+"/asp.npy",allow_pickle=True)
    #stren = np.load("topology_results/original_distributions/"+file_name+"/streng.npy",allow_pickle=True)
    #nb_inter = np.load("topology_results/original_distributions/"+file_name+"/nb_interactions.npy",allow_pickle=True)
    
    return den,inter_indiv,new_conv,durat,clust,ass,[conncomp],[hclose],[hbet],[whbet],[hmod],[hs_met],[hasp]



In [27]:
names = ["Density","Interacting  individuals","New conversations","Duration of contacts","Global clustering coefficient",
         "Assortativity","Nb connected components","Hour closeness","Hour betweenness","Weighted hour betweenness",
         "Hour modularity","Hour S-metric","Hour average shortestpath length"]

# etn stm tag dym
#x1w,x2w,x3w,x4w = load_comp_metric("InVS13",dist=dist)
#x1s,x2s,x3s,x4s = load_comp_metric("High_School11",dist=dist)

In [65]:
file_name  = "High_School11"


x1h,x2h,x3h,x4h = load_comp_metric(file_name,dist="ks")
tmp = []
for i in range(len(x1h)):
    a = x1h[i][0]
    b = x2h[i][0]
    c = x3h[i][0]
    d = x4h[i][0]
    tmp.append(["{:.2f}".format(a),"{:.2f}".format(b),"{:.2f}".format(c),"{:.2f}".format(d)])
    
x1h,x2h,x3h,x4h = load_comp_metric(file_name,dist="js")
cc = 0
for i in range(len(x1h)):
    a = x1h[i][0]
    b = x2h[i][0]
    c = x3h[i][0]
    d = x4h[i][0]
    tmp[cc].extend(["{:.2f}".format(a),"{:.2f}".format(b),"{:.2f}".format(c),"{:.2f}".format(d)])
    cc = cc + 1
    
x1h,x2h,x3h,x4h = load_comp_metric(file_name,dist="kl")
cc = 0
for i in range(len(x1h)):
    a = x1h[i][0]
    b = x2h[i][0]
    c = x3h[i][0]
    d = x4h[i][0]
    tmp[cc].extend(["{:.2f}".format(a),"{:.2f}".format(b),"{:.2f}".format(c),"{:.2f}".format(d)])
    cc = cc + 1
    
x1h,x2h,x3h,x4h = load_comp_metric(file_name,dist="em")
cc = 0
for i in range(len(x1h)):
    a = x1h[i][0]
    b = x2h[i][0]
    c = x3h[i][0]
    d = x4h[i][0]
    tmp[cc].extend(["{:.5f}".format(a),"{:.5f}".format(b),"{:.5f}".format(c),"{:.5f}".format(d)])
    cc = cc + 1

In [66]:
names2 = ["Density ","Int. ind.","New conv.","Dur.","GCC","Ass.","Con. com.","H. clos.","H betw.","W. h. betw.",
"H. modu."   ,
"H. S-met."  ,
"H. aspl"]

In [67]:
c = 0
for i in tmp:
    s = names2[c]+"&"
    for j in i:
        s = s + j + " & "
    c = c +1 
    print(s[:-2]+"\\\\")

Density &0.09 & 0.55 & 0.28 & 0.30 & 0.03 & 0.18 & 0.10 & 0.16 & 0.56 & 8.03 & 0.84 & 3.26 & 0.00043 & 0.00112 & 0.00503 & 0.00117 \\
Int. ind.&0.09 & 0.56 & 0.28 & 0.27 & 0.03 & 0.19 & 0.10 & 0.08 & 0.56 & 3.33 & 0.84 & 1.66 & 3.40660 & 7.84999 & 39.64996 & 6.18392 \\
New conv.&0.16 & 0.27 & 0.66 & 0.18 & 0.07 & 0.09 & 0.31 & 0.07 & 1.67 & 3.35 & 4.87 & 2.55 & 5.56380 & 8.31258 & 74.41332 & 6.98200 \\
Dur.&0.24 & 0.38 & 0.20 & 0.37 & 0.06 & 0.14 & 0.04 & 0.14 & 0.34 & 2.94 & 0.28 & 4.02 & 0.53687 & 0.38945 & 0.23613 & 0.38596 \\
GCC&0.14 & 0.08 & 0.18 & 0.13 & 0.07 & 0.05 & 0.13 & 0.05 & 1.59 & 1.25 & 1.90 & 0.78 & 0.05790 & 0.04507 & 0.05844 & 0.09096 \\
Ass.&0.33 & 0.70 & 0.54 & 0.32 & 0.13 & 0.45 & 0.26 & 0.18 & 2.25 & 8.70 & 5.98 & 1.43 & 0.24615 & 0.41422 & 0.36310 & 0.23898 \\
Con. com.&0.10 & 1.00 & 0.28 & 0.99 & 0.04 & 0.34 & 0.10 & 0.61 & 0.53 & 1.93 & 1.24 & 16.83 & 2.28525 & 247.55778 & 15.54240 & 63.70242 \\
H. clos.&0.22 & 0.49 & 0.40 & 0.58 & 0.14 & 0.43 & 0.27 & 0.30 & 

In [50]:
for i in names:
    print(i)

Density
Interacting  individuals
New conversations
Duration of contacts
Global clustering coefficient
Assortativity
Nb connected components
Hour closeness
Hour betweenness
Weighted hour betweenness
Hour modularity
Hour S-metric
Hour average shortestpath length


In [59]:
0.08 & 0.07 & 0.05 & 0.38 \\
0.09 & 0.07 & 0.06 & 0.37 \\