## Graph Dynamical Systems

This notebooks contains the experiments to evaluate graph edit networks on simple graph dynamical systems, namely the edit cycles, degree rules, and game of life datasets.

### Hyperparameter setup

In [1]:
import time
import numpy as np
import torch
from torch_geometric.utils import dense_to_sparse

import pytorch_graph_edit_networks as gen
import baseline_models
import os
import time
import hep_th

# model hyperparameters
num_layers = 2
dim_hid = 64

# training hyperparameters
learning_rate  = 1E-3
weight_decay   = 1E-5
loss_threshold = 1E-3
max_epochs     = 50000
print_step     = 1000

R = 5        # number of repetitions for each experiment
N_test = 10  # number of test time series we use to evaluate learning afterwards

## Model setup

In [2]:
# SETUP FUNCTIONS
def setup_vgae(dim_in, nonlin):
    return baseline_models.VGAE(num_layers=num_layers,
                                dim_in=dim_in,
                                dim_hid=dim_hid,
                                beta=1E-3,
                                sigma_scaling=1E-3,
                                nonlin=nonlin)


def setup_vgrnn(dim_in, nonlin):
    return baseline_models.VGRNN(num_layers=num_layers,
                                 dim_in=dim_in,
                                 dim_hid=dim_hid)


def setup_gen(dim_in, nonlin):
    return gen.GEN(num_layers=num_layers,
                   dim_in=dim_in,
                   dim_hid=dim_hid,
                   nonlin=nonlin)

In [3]:
# LOSS FUNCTIONS
loss_fun = gen.GEN_loss()
crossent_loss_fun = gen.GEN_loss_crossent()
def vgae_loss(model, A, X, delta, Epsilon, state=None):
    B = A + Epsilon
    # delete all outgoing and incoming edges of deleted nodes
    B[delta < -0.5, :] = 0
    B[:, delta < -0.5] = 0
    loss = model.compute_loss(torch.tensor(A, dtype=torch.float),
                              torch.tensor(B, dtype=torch.float),
                              torch.tensor(X, dtype=torch.float))

    return loss, state


def vgrnn_loss(model, A, X, delta, Epsilon, state=None):
    A = torch.tensor(A, dtype=torch.float)
    X = torch.tensor(X, dtype=torch.float)
    edge_index, _ = dense_to_sparse(A)

    predicted = model(X, edge_index, hidden_in=state)
    predicted, state = predicted[:-1], predicted[-1]

    target = A + Epsilon
    target[delta < -0.5, :] = 0
    target[:, delta < -0.5] = 0

    return model.compute_loss(*predicted, target), state


def gen_loss_crossent(model, A, X, delta, Epsilon, state=None):
    delta_pred, Epsilon_pred = model(torch.tensor(A, dtype=torch.float),
                                     torch.tensor(X, dtype=torch.float))
    loss = crossent_loss_fun(delta_pred, Epsilon_pred,
                             torch.tensor(delta, dtype=torch.float),
                             torch.tensor(Epsilon, dtype=torch.float),
                             torch.tensor(A, dtype=torch.float))

    return loss, state


def gen_loss(model, A, X, delta, Epsilon, state=None):
    delta_pred, Epsilon_pred = model(torch.tensor(A, dtype=torch.float),
                                     torch.tensor(X, dtype=torch.float))
    loss = loss_fun(delta_pred, Epsilon_pred,
                    torch.tensor(delta, dtype=torch.float),
                    torch.tensor(Epsilon, dtype=torch.float),
                    torch.tensor(A, dtype=torch.float))

    return loss, state

In [4]:
# PREDICTION FUNCTIONS
def vgae_pred(model, A, X, state=None):
    B = model(torch.tensor(A, dtype=torch.float), torch.tensor(X, dtype=torch.float))
    B = B.detach().numpy()
    Epsilon = B - A
    delta = np.zeros(A.shape[0])
    delta[np.sum(B, 1) < 0.5] = -1.
    Epsilon[delta < -0.5, :] = 0.
    Epsilon[:, delta < -0.5] = 0.
    return delta, Epsilon, state


def vgrnn_pred(model, A, X, state=None):
    A = torch.tensor(A, dtype=torch.float)
    X = torch.tensor(X, dtype=torch.float)
    edge_index, _ = dense_to_sparse(A)

    predicted = model(X, edge_index, hidden_in=state)
    predicted, state = predicted[0], predicted[-1]

    n = A.shape[0]
    predicted = predicted[:n, :][:, :n].detach().numpy()

    Epsilon = predicted - A.numpy()
    delta = np.zeros(n)
    delta[np.sum(predicted, 1) < 0.5] = -1.
    Epsilon[delta < -0.5, :] = 0.
    Epsilon[:, delta < -0.5] = 0.

    return delta, Epsilon, state


def gen_pred(model, A, X, state=None):
    delta_pred, Epsilon_pred = model(torch.tensor(A, dtype=torch.float), torch.tensor(X, dtype=torch.float))
    delta_pred = delta_pred.detach().numpy()
    Epsilon_pred = Epsilon_pred.detach().numpy()
    delta = np.zeros(A.shape[0])
    delta[delta_pred > 0.5] = 1.
    delta[delta_pred < -0.5] = -1.
    Epsilon = np.zeros(A.shape)
    Epsilon[np.logical_and(A > 0.5, Epsilon_pred < -0.5)] = -1.
    Epsilon[np.logical_and(A < 0.5, Epsilon_pred > +0.5)] = +1.
    return delta, Epsilon, state


In [5]:
# util function for VGRNN
def pad_xs(xs):
    target_len = len(xs)
    max_n = max(x.shape[-2] for x in xs)
    target_shape = (max_n, xs[0].shape[-1])
    output = np.zeros((target_len,) + target_shape, dtype=xs[0].dtype)
    for i in range(target_len):
        slc = (i,) + tuple(slice(shp) for shp in xs[i].shape)
        output[slc] = xs[i]

    return output

In [6]:
# EVALUATION FUNCTIONS
eval_criteria = ['node_ins_recall',
                 'node_ins_precision',
                 'node_del_recall',
                 'node_del_precision',
                 'edge_ins_recall',
                 'edge_ins_precision',
                 'edge_del_recall',
                 'edge_del_precision']
# set up a function to compute precision and recall
def prec_rec(X, Y):
    # X is the prediction, Y is the target
    target_insertions = Y > 0.5
    predicted_insertions = X > 0.5
    target_deletions = Y < -0.5
    predicted_deletions = X < -0.5
    # first, check the insertion recall
    if np.sum(target_insertions) < 0.5:
        ins_rec = 1.
    else:
        ins_rec = np.mean(X[target_insertions] > 0.5)
    # then the insertion precision
    if np.sum(predicted_insertions) < 0.5:
        ins_prec = 1.
    else:
        ins_prec = np.mean(Y[predicted_insertions] > 0.5)
    # then the deletion recall
    if np.sum(target_deletions) < 0.5:
        del_rec = 1.
    else:
        del_rec = np.mean(X[target_deletions] < -0.5)
    # and finally the deletion precision
    if np.sum(predicted_deletions) < 0.5:
        del_prec = 1.
    else:
        del_prec = np.mean(Y[predicted_deletions] < -0.5)
    return ins_rec, ins_prec, del_rec, del_prec

## Dataset setup

In [7]:
import graph_edit_cycles
import degree_rules
import game_of_life
import random


# DATASET SETUP
def generate_edit_cycle():
    As, Xs, tuples = graph_edit_cycles.generate_time_series(random.randrange(3), random.randrange(12), random.randrange(4, 12))
    deltas = []
    Epsilons = []
    for tpl in tuples:
        deltas.append(tpl[0])
        Epsilons.append(tpl[1])
    return As, Xs, deltas, Epsilons


def generate_degree_rules():
    # the initial number of nodes in each graph
    n_init = 8
    # the maximum number of nodes that can occur in each graph during evolution
    n_max  = n_init * 4
    return degree_rules.generate_time_series_from_random_matrix(n_init, n_max = n_max)


def generate_game_of_life():
    # set hyper-parameters for the game of life random grid generation
    grid_size = 10
    num_shapes = 1
    p = 0.1
    T_max = 10
    A, Xs, deltas = game_of_life.generate_random_time_series(grid_size, num_shapes, p, T_max)
    As = [A] * len(Xs)
    Epsilons = [np.zeros_like(A)] * len(Xs)
    return As, Xs, deltas, Epsilons

In [8]:
# CONFIG FOR EXPERIMENTS

# Models
models = ['VGAE', 'VGRNN', 'GEN_crossent', 'GEN']
setup_funs = [setup_vgae, setup_vgrnn, setup_gen, setup_gen]
loss_funs = [vgae_loss, vgrnn_loss, gen_loss_crossent, gen_loss]
pred_funs = [vgae_pred, vgrnn_pred, gen_pred, gen_pred]

# Datasets
datasets = ['edit_cycles', 'degree_rules', 'game_of_life']
dim_ins  = [4, 32, 1]
generator_funs = [generate_edit_cycle, generate_degree_rules, generate_game_of_life]

### Actual Experiment

In [9]:
for d in range(len(datasets)):
    print('\n--- data set %s ---\n' % datasets[d])
    # load partial runtime results if possible
    runtimes_file = 'results/%s_runtimes.csv' % datasets[d]
    if os.path.exists(runtimes_file):
        runtimes = np.loadtxt(runtimes_file, skiprows=1, delimiter='\t')
    else:
        runtimes = np.full((R, len(models)), np.nan)
    # iterate over all models
    for k in range(len(models)):
        print('--- model %s ---' % models[k])
        # load partial results if possible
        results_file = 'results/%s_%s_results.csv' % (datasets[d], models[k])
        curves_file = 'results/%s_%s_learning_curves.csv' % (datasets[d], models[k])
        if os.path.exists(results_file):
            results = np.loadtxt(results_file, skiprows=1, delimiter='\t')
            learning_curves = np.loadtxt(curves_file, delimiter='\t')
        else:
            results = np.full((R, len(eval_criteria)), np.nan)
            learning_curves = np.full((max_epochs, R), np.nan)
        # iterate over experimental repeats
        for r in range(R):
            # check if this repeat is already evaluated; if so, skip it
            if not np.isnan(learning_curves[0, r]):
                continue
            print('-- repeat %d of %d --' % (r + 1, R))
            start_time = time.time()
            # set up model
            if datasets[d] == 'game_of_life':
                nonlin = torch.nn.Sigmoid()
            else:
                nonlin = torch.nn.ReLU()
            model = setup_funs[k](dim_ins[d], nonlin)
            # set up optimizer
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
            # initialize moving loss average for printing
            loss_avg = None
            # start training
            for epoch in range(max_epochs):
                optimizer.zero_grad()
                # sample a time series from the data set
                As, Xs, deltas, Epsilons = generator_funs[d]()
                # pad node features for VGRNN
                if models[k] == 'VGRNN':
                    Xs = pad_xs(Xs)
                # compute the loss over all time steps
                loss = 0.
                state = None
                for t in range(len(As)):
                    # compute loss
                    loss_obj, state = loss_funs[k](model, As[t], Xs[t], deltas[t], Epsilons[t], state=state)
                    # compute gradient
                    loss_obj.backward()
                    # accumulate loss
                    loss += loss_obj.item()
                # perform an optimizer step
                optimizer.step()
                # store the current loss value in the learning curve
                learning_curves[epoch, r] = loss
                # compute a new moving average over the loss
                if loss_avg is None:
                    loss_avg = loss
                else:
                    loss_avg = loss_avg * 0.9 + 0.1 * loss
                # print every print_step steps
                if (epoch + 1) % print_step == 0:
                    print('loss avg after %d epochs: %g' % (epoch + 1, loss_avg))
                # stop early if the moving average is small
                if loss_avg < loss_threshold:
                    break
            # perform evaluation on new time series
            results[r, :] = 0.
            T = 0
            for j in range(N_test):
                # get a random time series from the dataset
                As, Xs, deltas, Epsilons = generator_funs[d]()
                if models[k] == 'VGRNN':
                    Xs = pad_xs(Xs)
                state = None
                for t in range(len(As)):
                    # predict the current time step with the network
                    delta, Epsilon, state = pred_funs[k](model, As[t], Xs[t], state=state)
                    # assess node edit precision and recall
                    results[r, :4] += prec_rec(delta, deltas[t])
                    # assess edge edit precision and recall
                    results[r, 4:] += prec_rec(Epsilon, Epsilons[t])

                T += len(As)
            results[r, :] /= T
            # store runtime
            runtimes[r, k] = time.time() - start_time
            np.savetxt(runtimes_file, runtimes, delimiter='\t', fmt='%g', header='\t'.join(models), comments='')
            # store results
            np.savetxt(results_file, results, delimiter='\t', fmt='%g', header='\t'.join(eval_criteria), comments='')
            # store learning curves
            np.savetxt(curves_file, learning_curves, delimiter='\t', fmt='%g')
        # print results
        for crit in range(len(eval_criteria)):
            print('%s: %g +- %g' % (eval_criteria[crit], np.mean(results[:, crit]), np.std(results[:, crit])))


--- data set edit_cycles ---

--- model VGAE ---
node_ins_recall: 0.653407 +- 0.0344482
node_ins_precision: 1 +- 0
node_del_recall: 0.983871 +- 0.032258
node_del_precision: 0.803924 +- 0.131027
edge_ins_recall: 1 +- 0
edge_ins_precision: 1 +- 0
edge_del_recall: 1 +- 0
edge_del_precision: 1 +- 0
--- model VGRNN ---
-- repeat 1 of 5 --
loss avg after 1000 epochs: 5.47242
loss avg after 2000 epochs: 5.47023
loss avg after 3000 epochs: 6.245
loss avg after 4000 epochs: 5.98484
loss avg after 5000 epochs: 5.86518
loss avg after 6000 epochs: 5.70864
loss avg after 7000 epochs: 5.67238
loss avg after 8000 epochs: 5.26569
loss avg after 9000 epochs: 5.05257
loss avg after 10000 epochs: 5.41263
loss avg after 11000 epochs: 5.49596
loss avg after 12000 epochs: 6.39
loss avg after 13000 epochs: 5.28252
loss avg after 14000 epochs: 5.07282
loss avg after 15000 epochs: 5.76806
loss avg after 16000 epochs: 5.01504
loss avg after 17000 epochs: 5.38503
loss avg after 18000 epochs: 5.9747
loss avg aft

loss avg after 14000 epochs: 6.045
loss avg after 15000 epochs: 6.02037
loss avg after 16000 epochs: 6.06009
loss avg after 17000 epochs: 5.76344
loss avg after 18000 epochs: 5.43921
loss avg after 19000 epochs: 5.13954
loss avg after 20000 epochs: 5.49525
loss avg after 21000 epochs: 5.94155
loss avg after 22000 epochs: 5.81894
loss avg after 23000 epochs: 5.54514
loss avg after 24000 epochs: 6.69348
loss avg after 25000 epochs: 5.00692
loss avg after 26000 epochs: 6.21436
loss avg after 27000 epochs: 6.13624
loss avg after 28000 epochs: 5.78238
loss avg after 29000 epochs: 5.62429
loss avg after 30000 epochs: 5.45688
loss avg after 31000 epochs: 5.85331
loss avg after 32000 epochs: 6.45893
loss avg after 33000 epochs: 5.7255
loss avg after 34000 epochs: 6.61136
loss avg after 35000 epochs: 6.35065
loss avg after 36000 epochs: 5.99933
loss avg after 37000 epochs: 5.4338
loss avg after 38000 epochs: 5.54456
loss avg after 39000 epochs: 5.86461
loss avg after 40000 epochs: 5.73404
loss 

loss avg after 6000 epochs: 8.61882
loss avg after 7000 epochs: 8.32989
loss avg after 8000 epochs: 8.30106
loss avg after 9000 epochs: 8.04884
loss avg after 10000 epochs: 8.69912
loss avg after 11000 epochs: 8.50798
loss avg after 12000 epochs: 8.21513
loss avg after 13000 epochs: 8.22318
loss avg after 14000 epochs: 7.67616
loss avg after 15000 epochs: 7.76724
loss avg after 16000 epochs: 8.12565
loss avg after 17000 epochs: 8.19194
loss avg after 18000 epochs: 7.22552
loss avg after 19000 epochs: 7.27982
loss avg after 20000 epochs: 8.54416
loss avg after 21000 epochs: 7.43262
loss avg after 22000 epochs: 7.97145
loss avg after 23000 epochs: 7.88548
loss avg after 24000 epochs: 7.63718
loss avg after 25000 epochs: 7.61813
loss avg after 26000 epochs: 7.72526
loss avg after 27000 epochs: 7.76815
loss avg after 28000 epochs: 7.60908
loss avg after 29000 epochs: 7.26438
loss avg after 30000 epochs: 7.59843
loss avg after 31000 epochs: 7.29211
loss avg after 32000 epochs: 7.63401
loss 

loss avg after 2000 epochs: 31950.5
loss avg after 3000 epochs: 26336.8
loss avg after 4000 epochs: 23857.8
loss avg after 5000 epochs: 22095.9
loss avg after 6000 epochs: 21778
loss avg after 7000 epochs: 21281.8
loss avg after 8000 epochs: 20995.5
loss avg after 9000 epochs: 20418.3
loss avg after 10000 epochs: 20751.1
loss avg after 11000 epochs: 20196.4
loss avg after 12000 epochs: 20329.7
loss avg after 13000 epochs: 20606
loss avg after 14000 epochs: 20482.9
loss avg after 15000 epochs: 20759.6
loss avg after 16000 epochs: 20314.1
loss avg after 17000 epochs: 20846.8
loss avg after 18000 epochs: 20483.3
loss avg after 19000 epochs: 20151.7
loss avg after 20000 epochs: 19855.8
loss avg after 21000 epochs: 19927.6
loss avg after 22000 epochs: 20214.1
loss avg after 23000 epochs: 20238.9
loss avg after 24000 epochs: 20140.1
loss avg after 25000 epochs: 19539.5
loss avg after 26000 epochs: 19697.9
loss avg after 27000 epochs: 19445.7
loss avg after 28000 epochs: 19517.8
loss avg afte

KeyboardInterrupt: 

In [None]:
# visualize learning curves
import matplotlib.pyplot as plt
smoothing_steps = 10
plt.figure(figsize = (16, 4 * len(datasets)))
fig, axes = plt.subplots(ncols=1, nrows=len(datasets))
for d in range(len(datasets)):
    for k in range(len(models)):
        curves_file  = 'results/%s_%s_learning_curves.csv' % (datasets[d], models[k])
        learning_curves = np.loadtxt(curves_file, delimiter = '\t')
        acum = np.cumsum(np.nanmean(learning_curves, 1))
        axes[d].semilogy((acum[smoothing_steps:] - acum[:-smoothing_steps])/smoothing_steps)
    axes[d].set_xlabel('epoch')
    axes[d].set_ylabel('loss')
    axes[d].set_title(datasets[d])
    axes[d].legend(models)
plt.show()