In [1]:
import glob
import networkx as nx
from tqdm.notebook import tqdm
import numpy as np

In [2]:
def read_network(filename):
    G = nx.read_gml(filename)
    G.remove_edges_from(list(nx.selfloop_edges(G)))
    G.remove_nodes_from(list(nx.isolates(G)))
    return G

In [3]:
# orig_filepath = 'Processed/years/average/months/network-1.gml'
orig_filepath = 'Processed/years/average/temp/network-temp-4.gml'

G = read_network(orig_filepath)

In [4]:
null_models_pattern = orig_filepath.replace('temp', 'temp_null').replace('.gml', "-*.gml")

null_models_paths = glob.glob(null_models_pattern)

In [6]:
# null_models = list(map(read_network, null_models_paths))
import multiprocessing as mp

with mp.Pool() as pool:
    null_models = pool.map(read_network, null_models_paths, chunksize=1)

In [7]:
def arr_to_dict(arr, G):
    return dict(zip(list(G.nodes()), arr))

In [8]:
from scipy.stats import norm

def get_pvalue(null_dist, obs_value):
    mu = np.mean(null_dist)
    sigma = np.std(null_dist, ddof=1)

    if sigma == 0:
        if mu == obs_value:
            return 1
        else:
            return 0
            
    z = (obs_value - mu) / sigma
    p = 2 * norm.sf(np.abs(z))
    return p

## (global) efficiency

In [7]:
# https://github.com/aestrivex/bctpy/blob/1b40e281eda081060707e30b68106ac1ebf54130/bct/algorithms/distance.py#L107

# https://github.com/aestrivex/bctpy/blob/master/bct/algorithms/efficiency.py

In [183]:
from bct.algorithms.efficiency import efficiency_wei
from bct.utils.other import weight_conversion

In [184]:
A = nx.to_numpy_array(G, weight='duration_weights')

In [185]:
An = weight_conversion(A, 'normalize') # An = A / A.max()

In [186]:
orig_result = efficiency_wei(An, 'global')

In [None]:
def do_efficiencies(G):

    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        An = weight_conversion(A, 'normalize')
        return efficiency_wei(An, 'global')
    except Exception as e:
        return None


with mp.Pool() as pool:
    results = pool.map(do_efficiencies, null_models, chunksize=1)

In [8]:
from bct.algorithms.efficiency import efficiency_wei
from bct.utils.other import weight_conversion
import multiprocessing as mp

def do_global_efficiency(G):
    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        An = weight_conversion(A, 'normalize')
        return efficiency_wei(An, 'global')
    except Exception as e:
        return None

def get_global_efficiency(G, G_nulls):

    orig_result = do_global_efficiency(G)

    with mp.Pool() as pool:
        results = pool.map(do_global_efficiency, null_models, chunksize=1)

    return {'orig': orig_result, 'nulls': results}

In [9]:
temp = get_global_efficiency(G, null_models)

## local efficiency

In [57]:
from bct.algorithms.efficiency import efficiency_wei
from bct.utils.other import weight_conversion

In [58]:
A = nx.to_numpy_array(G, weight='duration_weights')

In [59]:
An = weight_conversion(A, 'normalize') # An = A / A.max()

In [60]:
orig_result = efficiency_wei(An, 'local')

In [12]:
import multiprocessing as mp
from bct.algorithms.efficiency import efficiency_wei
from bct.utils.other import weight_conversion

def do_local_efficiency(G):

    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        An = weight_conversion(A, 'normalize') # An = A / A.max()
        local_efficiency = efficiency_wei(An, 'local')
        return local_efficiency
    except Exception as e:
        return None

def get_local_efficiency(G, G_nulls):
    orig_results = do_local_efficiency(G)
    
    with mp.Pool() as pool:
        results = pool.map(do_local_efficiency, G_nulls, chunksize=1)

    results_stacked = np.vstack(results).T

    return {'orig': arr_to_dict(orig_results, G), 'nulls': arr_to_dict(results_stacked, G)}


In [13]:
temp = do_local_efficiency(G)

In [14]:
temp.shape

(565,)

## motifs

In [9]:
# https://github.com/aestrivex/bctpy/blob/1b40e281eda081060707e30b68106ac1ebf54130/bct/algorithms/motifs.py#L405

In [9]:
from bct.algorithms.motifs import motif3struct_wei, make_motif34lib, find_motif34
from bct.utils.other import weight_conversion

In [10]:
make_motif34lib()

motif34lib already exists


In [11]:
A = nx.to_numpy_array(G, weight='trip_count_weights')

In [12]:
An = weight_conversion(A, 'normalize') # An = A / A.max()

In [13]:
I, Q, F = motif3struct_wei(An)

In [56]:
def do_motif_analysis(G):

    A = nx.to_numpy_array(G, weight='trip_count_weights')
    An = weight_conversion(A, 'normalize') # An = A / A.max()
    I, Q, F = motif3struct_wei(An)

    return I.T, Q.T, F.T


def get_motif_analysis(G, G_nulls):

    orig_result_I, orig_result_Q, orig_result_F = do_motif_analysis(G)

    with mp.Pool() as pool:
        results = pool.map(do_motif_analysis, null_models, chunksize=1)

    results_I, results_Q, results_F = zip(*results)

    results_stacked_I, results_stacked_Q, results_stacked_F = np.stack(results_I, axis=-1), np.stack(results_Q, axis=-1), np.stack(results_F, axis=-1)

    return {'orig': {'I': arr_to_dict(orig_result_I, G),'Q': arr_to_dict(orig_result_Q, G),'F': arr_to_dict(orig_result_F, G)},
            'nulls': {'I': arr_to_dict(results_stacked_I, G),'Q': arr_to_dict(results_stacked_Q, G),'F': arr_to_dict(results_stacked_F, G)}}


## strength centrality

In [11]:
def do_strength_centrality(G):
    try:
        A = nx.to_numpy_array(G, weight='tpd_weights')
        istr = np.sum(A, axis=0)
        ostr = np.sum(A, axis=1)
        return (istr, ostr)
    except Exception as e:
        return None

def get_strength_centrality(G, G_nulls):

    orig_result_in, orig_result_out = do_strength_centrality(G)

    with mp.Pool() as pool:
        results = pool.map(do_strength_centrality, null_models, chunksize=1)

    results_in, results_out = zip(*results)

    results_stacked_in, results_stacked_out = np.vstack(results_in).T, np.vstack(results_out).T

    return {'orig': {'in': arr_to_dict(orig_result_in, G) ,'out': arr_to_dict(orig_result_out, G)}, 'nulls': {'in': arr_to_dict(results_stacked_in, G),'out': arr_to_dict(results_stacked_out, G)}}


In [12]:
temp = get_strength_centrality(G, null_models)

In [164]:
null_result_in, null_result_out = zip(*results)

In [169]:
null_strength_in, null_strength_out = np.vstack(null_result_in).T, np.vstack(null_result_out).T

In [172]:
p_values_strength_in = [get_pvalue(null_dist, obs_value) for null_dist, obs_value in list(zip(null_strength_in, orig_result_in))]
p_values_strength_out = [get_pvalue(null_dist, obs_value) for null_dist, obs_value in list(zip(null_strength_out, orig_result_out))]

In [176]:
dict(sorted(list(zip(list(G.nodes()), p_values_strength_in)), key=lambda item: item[1])[:5])

{'7004': 0.2991853933763128,
 '7245': 0.3550656950544403,
 '7253': 0.38378785701356843,
 '7928': 0.41139309416458625,
 '7045': 0.41931197670767617}

## eigenvector centrality

In [10]:
# https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality.html

In [21]:
import multiprocessing as mp

def do_eigenvector_centrality(G):

    try:
        in_eig = nx.eigenvector_centrality(G, weight='tpd_weights')
        out_eig = nx.eigenvector_centrality(G.reverse(), weight='tpd_weights')
        return in_eig, out_eig
    except Exception as e:
        return None

def get_eigenvector_centrality(G, G_nulls):
    
    orig_result_in, orig_result_out = do_eigenvector_centrality(G)

    with mp.Pool() as pool:
        results = pool.map(do_eigenvector_centrality, null_models, chunksize=1)

    null_result_in, null_result_out = zip(*results)

    null_result_in_agg = {}
    for k in list(G.nodes()):
      null_result_in_agg[k] = list(d[k] for d in null_result_in)
    
    null_result_out_agg = {}
    for k in list(G.nodes()):
      null_result_out_agg[k] = list(d[k] for d in null_result_out)

    return {'orig': {'in': orig_result_in ,'out': orig_result_out}, 'nulls': {'in': null_result_in_agg,'out': null_result_out_agg}}


In [22]:
temp = get_eigenvector_centrality(G, null_models)

In [19]:
orig_result_in = nx.eigenvector_centrality(G, weight='tpd_weights')
orig_result_out = nx.eigenvector_centrality(G.reverse(), weight='tpd_weights')

In [25]:
null_result_in, null_result_out = zip(*results)

In [33]:
null_result_in_agg = {}
for k in list(G.nodes()):
  null_result_in_agg[k] = list(d[k] for d in null_result_in)

null_result_out_agg = {}
for k in list(G.nodes()):
  null_result_out_agg[k] = list(d[k] for d in null_result_out)

In [138]:
pvalue_eig_in = dict([(node, get_pvalue(null_result_in_agg[node], orig_result_in[node])) for node in list(G.nodes())])
pvalue_eig_out = dict([(node, get_pvalue(null_result_out_agg[node], orig_result_out[node])) for node in list(G.nodes())])

In [139]:
dict(sorted(pvalue_eig_in.items(), key=lambda item: item[1])[:5])

{'7033': 3.443399557134471e-09,
 '7408': 5.114139007848561e-09,
 '7017': 6.745208009582761e-09,
 '7202': 4.931274533195474e-07,
 '7012': 5.026564061613139e-07}

In [140]:
dict(sorted(pvalue_eig_out.items(), key=lambda item: item[1])[:5])

{'7408': 1.1623695027589606e-07,
 '7161': 1.452443961699128e-07,
 '7250': 1.3262246807918202e-06,
 '7020': 2.4139085705415033e-06,
 '7012': 3.514325117420618e-06}

## betweenness centrality

In [99]:
from bct.algorithms.centrality import betweenness_wei
from bct.utils.other import weight_conversion

In [100]:
A = nx.to_numpy_array(G, weight='duration_weights')

In [101]:
L = weight_conversion(A, 'lengths')

In [102]:
BC = betweenness_wei(L)

In [29]:
import multiprocessing as mp
from bct.algorithms.centrality import betweenness_wei
from bct.utils.other import weight_conversion

def do_betweenness_centrality(G):

    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        L = weight_conversion(A, 'lengths')
        BC = betweenness_wei(L)
        return BC
    except Exception as e:
        return None

def get_betweenness_centrality(G, G_nulls):
    orig_results = do_betweenness_centrality(G)
    
    with mp.Pool() as pool:
        results = pool.map(do_betweenness_centrality, null_models, chunksize=1)

    results_stacked = np.vstack(results).T

    return {'orig': arr_to_dict(orig_results, G), 'nulls': arr_to_dict(results_stacked, G)}


In [30]:
temp = get_betweenness_centrality(G, null_models)

In [120]:
null_betweenness = np.vstack(results).T

In [141]:
p_values_betweenness = [get_pvalue(null_dist, obs_value) for null_dist, obs_value in list(zip(null_betweenness, BC))]

In [158]:
dict(sorted(list(zip(list(G.nodes()), p_values_betweenness)), key=lambda item: item[1])[:5])

{'7171': 0.0, '7146': 0.0, '7133': 0.0, '7095': 0.0, '7180': 0.0}

## all at the same time

In [3]:
import glob
import networkx as nx
from tqdm.notebook import tqdm
import numpy as np
import multiprocessing as mp
from bct.algorithms.centrality import betweenness_wei
from bct.algorithms.efficiency import efficiency_wei
from bct.utils.other import weight_conversion
import sys
import pickle
import os

def read_network(filename):
    G = nx.read_gml(filename)
    G.remove_edges_from(list(nx.selfloop_edges(G)))
    G.remove_nodes_from(list(nx.isolates(G)))
    return G

def arr_to_dict(arr, G):
    return dict(zip(list(G.nodes()), arr))

## global efficiency

def do_global_efficiency(G):
    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        An = weight_conversion(A, 'normalize')
        return efficiency_wei(An, 'global')
    except Exception as e:
        return None

def get_global_efficiency(G, G_nulls):

    orig_result = do_global_efficiency(G)

    with mp.Pool() as pool:
        results = pool.map(do_global_efficiency, G_nulls, chunksize=1)

    return {'orig': orig_result, 'nulls': results}

## strength centrality

def do_strength_centrality(G):
    try:
        A = nx.to_numpy_array(G, weight='tpd_weights')
        istr = np.sum(A, axis=0)
        ostr = np.sum(A, axis=1)
        return (istr, ostr)
    except Exception as e:
        return None

def get_strength_centrality(G, G_nulls):

    orig_result_in, orig_result_out = do_strength_centrality(G)

    with mp.Pool() as pool:
        results = pool.map(do_strength_centrality, G_nulls, chunksize=1)

    results_in, results_out = zip(*results)

    results_stacked_in, results_stacked_out = np.vstack(results_in).T, np.vstack(results_out).T

    return {'orig': {'in': arr_to_dict(orig_result_in, G) ,'out': arr_to_dict(orig_result_out, G)}, 'nulls': {'in': arr_to_dict(results_stacked_in, G),'out': arr_to_dict(results_stacked_out, G)}}

## eigenvector centrality

def do_eigenvector_centrality(G):

    try:
        in_eig = nx.eigenvector_centrality(G, weight='tpd_weights')
        out_eig = nx.eigenvector_centrality(G.reverse(), weight='tpd_weights')
        return in_eig, out_eig
    except Exception as e:
        return None

def get_eigenvector_centrality(G, G_nulls):
    
    orig_result_in, orig_result_out = do_eigenvector_centrality(G)

    with mp.Pool() as pool:
        results = pool.map(do_eigenvector_centrality, G_nulls, chunksize=1)

    null_result_in, null_result_out = zip(*results)

    null_result_in_agg = {}
    for k in list(G.nodes()):
      null_result_in_agg[k] = list(d[k] for d in null_result_in)
    
    null_result_out_agg = {}
    for k in list(G.nodes()):
      null_result_out_agg[k] = list(d[k] for d in null_result_out)

    return {'orig': {'in': orig_result_in ,'out': orig_result_out}, 'nulls': {'in': null_result_in_agg,'out': null_result_out_agg}}


## betweenness centrality

def do_betweenness_centrality(G):

    try:
        A = nx.to_numpy_array(G, weight='duration_weights')
        L = weight_conversion(A, 'lengths')
        BC = betweenness_wei(L)
        return BC
    except Exception as e:
        return None

def get_betweenness_centrality(G, G_nulls):
    orig_results = do_betweenness_centrality(G)
    
    with mp.Pool() as pool:
        results = pool.map(do_betweenness_centrality, G_nulls, chunksize=1)

    results_stacked = np.vstack(results).T

    return {'orig': arr_to_dict(orig_results, G), 'nulls': arr_to_dict(results_stacked, G)}


In [16]:
import re

pattern = re.compile(r"Processed\/([a-z]+)\/average\/([a-z]+)\/.*\.gml")

## main

# usertype = sys.argv[1]
# nettype = "months"

# orig_filepaths = glob.glob(f'Processed/{usertype}/average/{nettype}/*.gml')
orig_filepaths = ["Processed/years/average/months/network-12.gml", "Processed/members/average/months/network-12.gml"]

for orig_filepath in orig_filepaths:

    usertype, nettype = pattern.match(orig_filepath).group(1), pattern.match(orig_filepath).group(2)
    print(usertype, nettype)

    print(f"processing {orig_filepath}...")

    filename = orig_filepath.split('/')[-1].split('.')[0]

    G = read_network(orig_filepath)

    null_models_pattern = orig_filepath.replace(f'{nettype}', f'{nettype}_null').replace('.gml', "-*.gml")

    null_models_paths = glob.glob(null_models_pattern)

    with mp.Pool() as pool:
        null_models = pool.map(read_network, null_models_paths, chunksize=1)

    ## global efficiency
    ge_filename = os.path.join(f'results/efficiency/global/{usertype}/{nettype}', f'{filename}.pkl')

    if not os.path.isfile(ge_filename):

        ge = get_global_efficiency(G, null_models)

        with open(ge_filename, 'wb') as f:
            pickle.dump(ge, f)

    ## strength centrality
    sc_filename = os.path.join(f'results/centrality/strength/{usertype}/{nettype}', f'{filename}.pkl')

    if not os.path.isfile(sc_filename):
        
        sc = get_strength_centrality(G, null_models)
    
        with open(sc_filename, 'wb') as f:
            pickle.dump(sc, f)

    ## eigenvector centrality
    ec_filename = os.path.join(f'results/centrality/eigenvector/{usertype}/{nettype}', f'{filename}.pkl')

    if not os.path.isfile(ec_filename):
        
        ec = get_eigenvector_centrality(G, null_models)
    
        with open(ec_filename, 'wb') as f:
            pickle.dump(ec, f)

    ## betweenness centrality
    bc_filename = os.path.join(f'results/centrality/betweenness/{usertype}/{nettype}', f'{filename}.pkl')

    if not os.path.isfile(bc_filename):
        
        bc = get_betweenness_centrality(G, null_models)
    
        with open(bc_filename, 'wb') as f:
            pickle.dump(bc, f)

years months
processing Processed/years/average/months/network-12.gml...
members months
processing Processed/members/average/months/network-12.gml...
