# Demo of how to use the Directed CELL model

In [1]:
# Load the CORA graph
import networkx as nx
import os
from directed_cell.utils import load_dataset, graph_summary, to_unweighted
cora_path = os.path.join(os.getcwd(), 'datasets', 'facebook_messages.txt')

G_weighted = load_dataset(cora_path, directed=True, delim=',', format=['from', 'to', 'weight'])
G = to_unweighted(G_weighted)
# print some summarizing statistics
print(graph_summary(G))

{'|N|': 1899, '|E|': 20291, '#Nodes in largest SCC': 1294}


## The DirectedCELL model

In [2]:
from directed_cell.cell import CELL
import directed_cell.options as options

A = nx.to_scipy_sparse_matrix(G)

model = CELL(
    A = A,
    H = 25,
    n_edges = len(G.edges),
    loss_fn = options.RegularLossFunction(), # can also be e.g. LazyLossFunction()
    sampling_fn = options.SampleGraphRegular(), # can also be e.g. SampleGraphLazy()
    criterion = options.EdgeOverlapCriterion(
        A = A,
        interval = 10,
        overlap = 0.52,
        verbose = True,
    ),
    augmentation_denominator=10,
    directed=True
)

model.train(
    steps = 200,
    lr = 0.1,
    weight_decay = 1e-7,
    verbose = False
)

  from .autonotebook import tqdm as notebook_tqdm


overlap: 0.0063574983983046675
overlap: 0.23069341087181508
overlap: 0.35271795377260856
overlap: 0.4264452220196146
overlap: 0.4825292001379922
overlap: 0.510571189197181
overlap: 0.530678625991819


In [3]:
# Sample some graphs and evaluate them
from directed_cell.evaluation import directed_evaluation_pipeline

evaluation_pipeline = directed_evaluation_pipeline(verbose = True)
n_graphs = 10
G_hats = [nx.DiGraph(model.sample_graph()) for i in range(n_graphs)]
stats = evaluation_pipeline.evaluate_G_hats(G_hats)

evaluating max. in-deg
evaluating min. in-deg
evaluating max. out-deg
evaluating min. out-deg
evaluating power law exp. (in)
evaluating power law exp. (out)
evaluating gini coef. (in)
evaluating gini coef (out)
evaluating assortativity
evaluating avg. loc. clust. coef.
evaluating und. wedge count
evaluating dir. wedge count
evaluating und. triangle count
evaluating dir. triangle count
evaluating und. square count
evaluating dir. square count
evaluating char. path. len.
evaluating diameter
evaluating largest scc


In [4]:
G_stats = evaluation_pipeline.evaluate_G(G)

evaluating max. in-deg
evaluating min. in-deg
evaluating max. out-deg
evaluating min. out-deg
evaluating power law exp. (in)
evaluating power law exp. (out)
evaluating gini coef. (in)
evaluating gini coef (out)
evaluating assortativity
evaluating avg. loc. clust. coef.
evaluating und. wedge count
evaluating dir. wedge count
evaluating und. triangle count
evaluating dir. triangle count
evaluating und. square count
evaluating dir. square count
evaluating char. path. len.
evaluating diameter
evaluating largest scc


In [12]:
import pandas as pd
concat_stats = pd.concat([stats, G_stats['value']], axis=1).round(3)
concat_stats.rename(columns={'value':'ground truth'})

Unnamed: 0,metric,ci_0.95l,ci_0.95u,synth. mean,synth. std. err.,ground truth
0,max. in-deg,49.867,53.133,51.5,1.633,237.0
1,min. in-deg,0.539,1.061,0.8,0.261,0.0
2,max. out-deg,295.129,305.071,300.1,4.971,137.0
3,min. out-deg,0.0,0.0,0.0,0.0,0.0
4,power law exp. (in),1.298,1.299,1.299,0.001,1.394
5,power law exp. (out),1.373,1.375,1.374,0.001,1.33
6,gini coef. (in),0.533,0.535,0.534,0.001,0.738
7,gini coef (out),0.718,0.72,0.719,0.001,0.618
8,assortativity,0.125,0.13,0.127,0.003,-0.138
9,avg. loc. clust. coef.,0.076,0.078,0.077,0.001,0.087
