# Module

In [1]:
# From the repository
from util import *
from read_data import *
data_names = list(name2file_name.keys())
print(data_names)

# Basic modules
import os
import glob
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from graph_tool.all import *

#name = "wiod2016"
name = data_names[3]
print("We are going to use: " + name)
# Enable original_format to use the format as originally provided
data_dict = get_data(name,original_format=False)
data_dict.keys()
#df_nodes = data_dict["df_nodes"]
df_edges = data_dict["df_edges"]

df_edges = df_edges[["source","target"]].drop_duplicates()
cond = df_edges["source"] != df_edges["target"]
df_edges = df_edges.loc[cond].copy()

['blogcatalog', 'homosapiens', 'wikipos', 'enron', 'unvote', 'untrade', 'uslegis_net', 'uslegis_net_small_dyn', 'uslegis_net_dyn', 'uslegis_hyp_dyn', 'contacts', 'dawn_net', 'dawn_hyp', 'ndc_net', 'ndc_hyp', 'coauth_dblp_net', 'coauth_dblp_hyp', 'wiod2016', 'wiod2013', 'wiodlong', 'eth', 'bitcoinalpha', 'bitcoinotc', 'uscourt']
We are going to use: enron


# Create Grah Object

In [2]:
g = Graph()
uni_nodes = list(set(df_edges["source"]))
uni_nodes.extend(list(set(df_edges["target"])))
uni_nodes = list(set(uni_nodes))  

node2index = dict()
for i in range(len(uni_nodes)):
    node2index.update({uni_nodes[i]:i})
    

g.add_vertex(len(uni_nodes))
weight = g.new_edge_property("double")


for i in range(len(df_edges)):
    
    source_index = node2index[df_edges["source"].iloc[i]]
    target_index = node2index[df_edges["target"].iloc[i]]
    e = g.add_edge(g.vertex(source_index),g.vertex(target_index))
    weight[e] = 1

g.edge_properties["weight"] = weight

# Approximation but it works well

Return stochastic block model

In [4]:
%%time
state_ndc = minimize_nested_blockmodel_dl(g, state_args=dict(deg_corr=False))
state_dc  = minimize_nested_blockmodel_dl(g, state_args=dict(deg_corr=True))

print("Non-degree-corrected DL:\t", state_ndc.entropy())
print("Degree-corrected DL:\t", state_dc.entropy())

Non-degree-corrected DL:	 8045.926370061207
Degree-corrected DL:	 7948.366998416335
CPU times: user 1.52 s, sys: 2.33 ms, total: 1.52 s
Wall time: 1.52 s


# If you want to refine your result

In [5]:
%%time
if 1 == 0:
    state_ndc = NestedBlockState(g,state_args=dict(deg_corr=False))
    state_dc = NestedBlockState(g,state_args=dict(deg_corr=True))


    # Now we run 1000 sweeps of the MCMC

    dS, nmoves = 0, 0
    for i in range(100):
        ret = state_ndc.multiflip_mcmc_sweep(niter=10)
        dS += ret[0]
        nmoves += ret[1]
    print("Change in description length:", dS)
    print("Number of accepted vertex moves:", nmoves)


    dS, nmoves = 0, 0
    for i in range(100):
        ret = state_dc.multiflip_mcmc_sweep(niter=10)
        dS += ret[0]
        nmoves += ret[1]
    print("Change in description length:", dS)
    print("Number of accepted vertex moves:", nmoves)


    print("Non-degree-corrected DL:\t", state_ndc.entropy())
    print("Degree-corrected DL:\t", state_dc.entropy())

CPU times: user 121 µs, sys: 0 ns, total: 121 µs
Wall time: 129 µs


# View Result

In [6]:
#state_ndc.print_summary()
levels = state_ndc.get_levels()
num_levels = len(levels)
for s in levels:
    print(s)
    if s.get_N() == 1:
        break

<BlockState object with 182 blocks (24 nonempty), for graph <Graph object, directed, with 182 vertices and 3007 edges, 1 internal edge property, at 0x7f5b2439c820>, at 0x7f5ac437bd00>
<BlockState object with 49 blocks (9 nonempty), for graph <Graph object, directed, with 182 vertices and 389 edges, at 0x7f5ac437bf10>, at 0x7f5ac4388700>
<BlockState object with 10 blocks (3 nonempty), for graph <Graph object, directed, with 49 vertices and 79 edges, at 0x7f5ac437b970>, at 0x7f5ac4388400>
<BlockState object with 3 blocks (1 nonempty), for graph <Graph object, directed, with 10 vertices and 9 edges, at 0x7f5ac4389b10>, at 0x7f5ac43897e0>
<BlockState object with 1 blocks (1 nonempty), for graph <Graph object, directed, with 3 vertices and 1 edge, at 0x7f5ac438a8f0>, at 0x7f5ac438a590>


In [7]:
node_id = 4
r = node_id
for i in range(num_levels):
    r = levels[i].get_blocks()[r]    # group membership of node 46 in level 0
    print("Group membership of node " + str(node_id) + " in level " + str(i+1) + ": " + str(r))

Group membership of node 4 in level 1: 129
Group membership of node 4 in level 2: 17
Group membership of node 4 in level 3: 8
Group membership of node 4 in level 4: 0
Group membership of node 4 in level 5: 0
Group membership of node 4 in level 6: 0
Group membership of node 4 in level 7: 0
Group membership of node 4 in level 8: 0
Group membership of node 4 in level 9: 0


In [8]:
df_nodes = pd.DataFrame(node2index.items())
df_nodes.columns = ["Id","index"]
for choose_level in range(num_levels):
    
    b = levels[choose_level].get_blocks()

    out = []
    for i in range(len(df_nodes)):
        ind = df_nodes["index"].iloc[i]
        out.append(b[ind])

    df_nodes["sbm_" + str(choose_level)] = out

In [9]:
df_nodes

Unnamed: 0,Id,index,sbm_0,sbm_1,sbm_2,sbm_3,sbm_4,sbm_5,sbm_6,sbm_7,sbm_8
0,1,0,24,18,5,0,0,0,0,0,0
1,2,1,170,18,8,0,0,0,0,0,0
2,3,2,181,13,8,0,0,0,0,0,0
3,4,3,101,29,9,0,0,0,0,0,0
4,5,4,129,13,5,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
177,180,177,181,15,0,0,0,0,0,0,0
178,181,178,101,13,0,0,0,0,0,0,0
179,182,179,19,18,0,0,0,0,0,0,0
180,183,180,101,32,0,0,0,0,0,0,0


# Export results

In [10]:
df_nodes.to_csv("./tables/df_nested_sbm.csv",index=False)