In [11]:
import os

import numpy as np
import sktensor as skt
import yaml

# Jupyter Tutorial for Probabilistic Generative Models for Network Analysis (PGM)

Welcome to this tutorial on using the _Probabilistic Generative Models for Network Analysis_ (`pgm`) package. In this tutorial, we'll walk through the process of running the _CRep0_ algorithm on a sample dataset.


## Configuration
Let's start by configuring the algorithm. We'll use the _CRep0_ algorithm, and we need to set the input folder and adjacency file.

In [2]:
# setting to run the algorithm
algorithm = 'CRep0'
in_folder = '../pgm/config/data/input/'
adj = 'syn111.dat'
config_path = 'setting_' + algorithm + '.yaml'

We load the configuration file using the data files in the `pgm` package:

In [6]:
import importlib.resources as res

with res.open_binary('pgm.data.model', config_path) as fp:
    conf = yaml.load(fp, Loader=yaml.Loader)

In [9]:
# Print the configuration file
print(yaml.dump(conf))

N_real: 5
assortative: true
constrained: true
decision: 10
end_file: _CRep0
err: 0.1
err_max: 1.0e-12
eta0: 0
files: ../data/input/synthetic/theta.npz
fix_eta: true
inf: 10000000000.0
initialization: 0
max_iter: 1000
out_folder: ./CRep0_output/
out_inference: true
rseed: 0
tolerance: 0.0001
undirected: false
verbose: false



Now, let's change the path to the output folder:

In [7]:
# Change the output folder
conf['out_folder'] = './' + algorithm + '_output/'

In [8]:
# Ensure the output folder exists
if not os.path.exists(conf['out_folder']):
    os.makedirs(conf['out_folder'])

In [15]:
# Save the configuration file
output_config_path = conf['out_folder'] + '/setting_' + algorithm + '.yaml'
with open(output_config_path, 'w') as f:
    yaml.dump(conf, f)

## Importing Data
Now, let's import the data using the `pgm` package:

In [16]:
from pgm.input.loader import import_data

ego = 'source'
alter = 'target'
force_dense = False

network = in_folder + adj  # network complete path
# TODO: the following function probably reads local data. Should we use the data in the package?
A, B, B_T, data_T_vals = import_data(network,
                                     ego=ego,
                                     alter=alter,
                                     force_dense=force_dense,
                                     header=0)
nodes = A[0].nodes()

../pgm/config/data/input/syn111.dat shape: (5512, 3)
Removing self loops
Number of nodes = 600
Number of layers = 1
Number of edges and average degree in each layer:
E[0] = 5512 - <k> = 18.373
M[0] = 5899 - <k_weighted> = 19.663
Reciprocity (networkX) = 0.423
Reciprocity (intended as the proportion of bi-directional edges over the unordered pairs) = 0.268
Reciprocity (considering the weights of the edges) = 0.515


In [None]:
valid_types = [np.ndarray, skt.dtensor, skt.sptensor]
assert any(isinstance(B, vt) for vt in valid_types)

## Running the Model
Finally, let's run the _CRep0_ model:

In [None]:
from pgm.model import CRep as CREP
import time

print(f'\n### Run {algorithm} ###')

K = 3
flag_conv = 'log'

time_start = time.time()
model = CREP.CRep(N=A[0].number_of_nodes(), L=len(A), K=K, **conf)
_ = model.fit(data=B,
              data_T=B_T,
              data_T_vals=data_T_vals,
              flag_conv=flag_conv,
              nodes=nodes)

print(f'\nTime elapsed: {np.round(time.time() - time_start, 2)} seconds.')