# Direction Classification CELL

This notebook contains a demo on how to use the direction classification model that uses an undirected GGM with node embeddings and a classification model to create synthetic directed graphs

In [1]:
# Load the Messages graph
import networkx as nx
import os
from directed_cell.utils import load_dataset, graph_summary, to_unweighted
messages_path = os.path.join(os.getcwd(), 'datasets', 'facebook_messages.txt')

G_weighted = load_dataset(messages_path, directed=True, delim=',', format=['from', 'to', 'weight'])
G = to_unweighted(G_weighted)
# print some summarizing statistics
print(graph_summary(G))

{'|N|': 1899, '|E|': 20291, '#Nodes in largest SCC': 1294}


## The DirClassCELL model

In [2]:
from directed_cell.cell import CELL
import directed_cell.options as options

# First, an undirected CELL model is trained

G_undirected = G.to_undirected()

model = CELL(
    A = nx.to_scipy_sparse_matrix(G_undirected),
    H = 25,
    n_edges = len(G_undirected.edges),
    loss_fn = options.RegularLossFunction(),
    sampling_fn = options.SampleGraphUndirectedCELL(verbose=True), 
    criterion = options.EdgeOverlapCriterion(
        A = nx.to_scipy_sparse_matrix(G_undirected),
        interval = 10,
        overlap = 0.52,
        verbose = True,
        directed = False
    ),
    augmentation_denominator=10,
    directed=False
)

model.train(
    steps = 200,
    lr = 0.1,
    weight_decay = 1e-7,
    verbose = False
)

  from .autonotebook import tqdm as notebook_tqdm


performing weak connectivity augmentation...
generated edges: 13833.0 , desired: 13833
overlap: 0.00831345333622497
generated edges: 13833.0 , desired: 13833
overlap: 0.28692257644762525
generated edges: 13833.0 , desired: 13833
overlap: 0.43352851875948817
generated edges: 13833.0 , desired: 13833
overlap: 0.4907829104315767
generated edges: 13833.0 , desired: 13833
overlap: 0.5225909058049591


## Sample an undirected graph and make it directed

In [4]:
from directed_cell.direction_classification_cell import DirectionClassifier
from directed_cell.embeddings import Node2VecEmbedder
from xgboost import XGBClassifier
# Then, we sample a graph, embed it, and use that to create a directed graph.

G_hat_undirected = nx.Graph(model.sample_graph())
direction_classifier = DirectionClassifier(
    G = G,
    embedder = Node2VecEmbedder(workers = 4),
    classifier = XGBClassifier(),
)

G_hat = nx.DiGraph(direction_classifier.train_classifier_and_sample_graph(G_hat_undirected, verbose=True))

generated edges: 13833.0 , desired: 13833
Sampling undirected graph
Embedding input and output graph


Computing transition probabilities: 100%|██████████| 3798/3798 [00:17<00:00, 220.74it/s]


Creating features from embeddings
Training classifier
              precision    recall  f1-score   support

           0       0.75      0.38      0.50      7375
           1       0.75      0.38      0.50      7375
           2       0.56      0.88      0.68     12916

    accuracy                           0.61     27666
   macro avg       0.68      0.54      0.56     27666
weighted avg       0.66      0.61      0.59     27666

(1899, 1899)


In [5]:
# Sample some graphs and evaluate them
from directed_cell.evaluation import directed_evaluation_pipeline
import pandas as pd
evaluation_pipeline = directed_evaluation_pipeline(verbose = True)
stats = evaluation_pipeline.evaluate_G_hats([G_hat])
G_stats = evaluation_pipeline.evaluate_G(G)

concat_stats = pd.concat([stats, G_stats['value']], axis=1).round(3)
concat_stats.rename(columns={'value':'ground truth'})

evaluating max. in-deg
evaluating min. in-deg
evaluating max. out-deg
evaluating min. out-deg
evaluating power law exp. (in)
evaluating power law exp. (out)
evaluating gini coef. (in)
evaluating gini coef (out)
evaluating assortativity
evaluating avg. loc. clust. coef.
evaluating und. wedge count
evaluating dir. wedge count
evaluating und. triangle count
evaluating dir. triangle count
evaluating und. square count
evaluating dir. square count
evaluating char. path. len.
evaluating diameter
evaluating largest scc
evaluating max. in-deg
evaluating min. in-deg
evaluating max. out-deg
evaluating min. out-deg
evaluating power law exp. (in)
evaluating power law exp. (out)
evaluating gini coef. (in)
evaluating gini coef (out)
evaluating assortativity
evaluating avg. loc. clust. coef.
evaluating und. wedge count
evaluating dir. wedge count
evaluating und. triangle count
evaluating dir. triangle count
evaluating und. square count
evaluating dir. square count
evaluating char. path. len.
evaluatin

Unnamed: 0,metric,ci_0.95l,ci_0.95u,synth. mean,synth. std. err.,ground truth
0,max. in-deg,,,391.0,,237.0
1,min. in-deg,,,0.0,,0.0
2,max. out-deg,,,329.0,,137.0
3,min. out-deg,,,0.0,,0.0
4,power law exp. (in),,,1.328,,1.394
5,power law exp. (out),,,1.29,,1.33
6,gini coef. (in),,,0.675,,0.738
7,gini coef (out),,,0.574,,0.618
8,assortativity,,,-0.241,,-0.138
9,avg. loc. clust. coef.,,,0.068,,0.087
