# Weighted CELL

This notebook contains an example that uses DirectedCELL with node embeddings and a regression model to predict edge weights in a synthetic graph

In [1]:
# Load the Messages graph
import networkx as nx
import os
from directed_cell.utils import load_dataset, graph_summary, to_unweighted
messages_path = os.path.join(os.getcwd(), 'datasets', 'facebook_messages.txt')

G_weighted = load_dataset(messages_path, directed=True, delim=',', format=['from', 'to', 'weight'])

G_unweighted = to_unweighted(G_weighted)
# print some summarizing statistics
print(graph_summary(G_unweighted))

{'|N|': 1899, '|E|': 20291, '#Nodes in largest SCC': 1294}


## Train a DirectedCELL model on an unweighted version of the graph

In [2]:
from directed_cell.cell import CELL
import directed_cell.options as options

A_unweighted = nx.to_scipy_sparse_matrix(G_unweighted)

model = CELL(
    A = A_unweighted,
    H = 25,
    loss_fn = options.RegularLossFunction(),
    sampling_fn = options.SampleGraphRegular(),
    criterion = options.EdgeOverlapCriterion(
        A=A_unweighted, 
        interval=10, 
        overlap=0.52, 
        verbose=True, 
        directed=True
    ),
    augmentation_denominator=10,
    directed = True
)

model.train(
    steps = 200,
    lr = 0.1,
    weight_decay = 1e-7,
    verbose = False
)

  from .autonotebook import tqdm as notebook_tqdm


performing strong connectivity augmentation...
overlap: 0.0055689714651816075
overlap: 0.2243851954068306
overlap: 0.3602089596372776
overlap: 0.42782514415257994
overlap: 0.47937509240549997
overlap: 0.5078113449312504
overlap: 0.5326992262579469


## Sample an unweighted graph and make it weighted

In [10]:
from directed_cell.weight_regression_cell import WeightRegressor
from directed_cell.embeddings import Node2VecEmbedder
from xgboost import XGBRegressor

weight_regressor = WeightRegressor(
    G = G_weighted,
    regression_model = XGBRegressor(),
    embedder = Node2VecEmbedder(workers=5)
)

G_hat_unweighted = nx.DiGraph(model.sample_graph())
G_hat = weight_regressor.train_regression_model_and_sample_graph(G_hat_unweighted, verbose=True)

Embedding input and output graph


Computing transition probabilities: 100%|██████████| 3798/3798 [00:06<00:00, 607.10it/s] 


Creating features from embeddings
Training regressor
Metrics for regression:
      mean absolute error:               71.782
    median absolute error:               52.817
       mean squared error:            15468.193
                max error:             2949.147
                 r2 score:                0.245
 explained variance score:                0.245

Percentiles:
                        5:              -88.536
                       25:              -60.795
                       50:              -33.757
                       75:               18.368
                       95:              197.354
predictions during training: 31.688597 1342.3553
predictions on test set 29.377153 1342.3553


## Compare evaluation metrics between synthetic and original weighted graph

In [None]:
# Sample some graphs and evaluate them
from directed_cell.evaluation import weighted_evaluation_pipeline
import pandas as pd
evaluation_pipeline = weighted_evaluation_pipeline(verbose = True)
stats = evaluation_pipeline.evaluate_G_hats([G_hat])
G_stats = evaluation_pipeline.evaluate_G(G_weighted)

concat_stats = pd.concat([stats, G_stats['value']], axis=1).round(3)
concat_stats.rename(columns={'value':'ground truth'})