# CELL

In [3]:
pwd

'/home/ssikdar/tmp/Attributed-VRG-tmp/other_models/CELL'

In [6]:
import warnings
warnings.filterwarnings('ignore')

import pickle
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import sys; sys.path.extend(['../', '../../', '../../../', '../../../../'])

import torch

In [16]:
from other_models.CELL.src.cell.utils import link_prediction_performance
from other_models.CELL.src.cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from other_models.CELL.src.cell.graph_statistics import compute_graph_statistics

# Load graph and validation-/ test edges (same split as in paper)

In [17]:
train_graph = load_npz('./data/CORA-ML_train.npz')
with open('./data/link_prediction.p', 'rb') as handle:
    val_ones, val_zeros, test_ones, test_zeros = pickle.load(handle)

## Edge overlap criterion

In [18]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

In [19]:
print(model)

<other_models.CELL.src.cell.cell.Cell object at 0x7f9015ad3850>


In [20]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 6.01374 Edge-Overlap: 0.033 Total-Time: 11
Step:  20/200 Loss: 4.10873 Edge-Overlap: 0.281 Total-Time: 23
Step:  30/200 Loss: 3.34057 Edge-Overlap: 0.456 Total-Time: 34
Step:  40/200 Loss: 2.97447 Edge-Overlap: 0.548 Total-Time: 46


In [21]:
generated_graph = model.sample_graph()

In [28]:
model.sample_graph?

[0;31mSignature:[0m [0mmodel[0m[0;34m.[0m[0msample_graph[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Samples a graph from the learned parameters W.

Edges are sampled independently from the score maxtrix.

Returns:
    sampled_graph(sp.csr.csr_matrix): A synthetic graph generated by the model.
[0;31mFile:[0m      ~/tmp/Attributed-VRG-tmp/other_models/CELL/src/cell/cell.py
[0;31mType:[0m      method


In [22]:
compute_graph_statistics(generated_graph)

{'d_max': 216.0,
 'd_min': 1.0,
 'd': 4.8277580071174375,
 'LCC': 2806,
 'wedge_count': 85342.0,
 'claw_count': 2175766.0,
 'triangle_count': 1395,
 'square_count': 6535.0,
 'power_law_exp': 1.8239736190078895,
 'gini': 0.4533849276740103,
 'rel_edge_distr_entropy': 0.949336856274682,
 'assortativity': -0.07386853615348378,
 'clustering_coefficient': 0.049037988329310306,
 'cpl': 5.176381816717263}

In [23]:
compute_graph_statistics(train_graph)

{'d_max': 238.0,
 'd_min': 1.0,
 'd': 4.8277580071174375,
 'LCC': 2810,
 'wedge_count': 101747.0,
 'claw_count': 3033514.0,
 'triangle_count': 2802,
 'square_count': 14268.0,
 'power_law_exp': 1.8550648593086239,
 'gini': 0.4825742921255409,
 'rel_edge_distr_entropy': 0.9406652031225722,
 'assortativity': -0.07626405450439543,
 'clustering_coefficient': 0.08261668648707088,
 'cpl': 5.630006245811316}

## Validation criterion

In [24]:
# initialize model with LP-criterion
model = Cell(A=train_graph,
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                                val_zeros=val_zeros,
                                                max_patience=3)])

In [25]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.92668 ROC-AUC Score: 0.588 Average Precision: 0.568 Total-Time: 2
Step:   4/200 Loss: 7.72150 ROC-AUC Score: 0.658 Average Precision: 0.653 Total-Time: 5
Step:   6/200 Loss: 7.29833 ROC-AUC Score: 0.707 Average Precision: 0.707 Total-Time: 7
Step:   8/200 Loss: 6.68779 ROC-AUC Score: 0.749 Average Precision: 0.749 Total-Time: 10
Step:  10/200 Loss: 6.01435 ROC-AUC Score: 0.798 Average Precision: 0.806 Total-Time: 12
Step:  12/200 Loss: 5.41172 ROC-AUC Score: 0.842 Average Precision: 0.857 Total-Time: 15
Step:  14/200 Loss: 4.96227 ROC-AUC Score: 0.872 Average Precision: 0.887 Total-Time: 17
Step:  16/200 Loss: 4.63122 ROC-AUC Score: 0.889 Average Precision: 0.903 Total-Time: 20
Step:  18/200 Loss: 4.34932 ROC-AUC Score: 0.898 Average Precision: 0.911 Total-Time: 22
Step:  20/200 Loss: 4.10049 ROC-AUC Score: 0.905 Average Precision: 0.918 Total-Time: 25
Step:  22/200 Loss: 3.90924 ROC-AUC Score: 0.913 Average Precision: 0.926 Total-Time: 27
Step:  24/200 Loss: 3.74

#### Link prediction performance for ROC-AUC score and average precision

In [26]:
link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9436187500000001, 0.9462628874999618)