In [1]:
import torch
import torch.nn.functional as F

from random import shuffle
import networkx as nx
from torch.autograd import Variable

import numpy as np 
import os 
from torch.optim.lr_scheduler import MultiStepLR
import time as tm 
import random 
import pickle as pkl 
import scipy.sparse as sp

#import dgl
import torch.nn as nn
# tensorboard --logdir=tensorboard

Implementation from DeepGenerativeModels.GraphRNN_master
[From Github Repo](https://github.com/JiaxuanYou/graph-generation/blob/master/main_DeepGMG.py)

Note: 
**networkx==1.11** was used 
File ~\PycharmProjects\MA\venv\lib\site-packages\networkx\algorithms\dag.py:2
from math import gcd # not fractions
The code-modifications only work with 1.11 as well, which makes them redundant in the first place.  

Methodology: 
**The batches are graphs, but no node features are considered**

# Data (Load and save)

In [2]:
def caveman_special(c=2,k=20,p_path=0.1,p_edge=0.3):
    p = p_path
    path_count = max(int(np.ceil(p * k)),1)
    G = nx.caveman_graph(c, k)
    # remove 50% edges
    p = 1-p_edge
    for (u, v) in list(G.edges()):
        if np.random.rand() < p and ((u < k and v < k) or (u >= k and v >= k)):
            G.remove_edge(u, v)
    # add path_count links
    for i in range(path_count):
        u = np.random.randint(0, k)
        v = np.random.randint(k, k * 2)
        G.add_edge(u, v)
    
    G = max([G.subgraph(c).copy() for c in nx.connected_components(G)], key=len)
    return G

def save_graph_list(G_list, fname):
    with open(fname, "wb") as f:
        pkl.dump(G_list, f)
        
# load ENZYMES and PROTEIN and DD dataset
def Graph_load_batch(min_num_nodes = 20, max_num_nodes = 1000, name = 'ENZYMES',node_attributes = True,graph_labels=True):
    '''
    load many graphs, e.g. enzymes
    :return: a list of graphs
    '''
    print('Loading graph dataset: '+str(name))
    G = nx.Graph()
    # load data
    path = 'dataset/'+name+'/'
    data_adj = np.loadtxt(path+name+'_A.txt', delimiter=',').astype(int)
    if node_attributes:
        data_node_att = np.loadtxt(path+name+'_node_attributes.txt', delimiter=',')
    data_node_label = np.loadtxt(path+name+'_node_labels.txt', delimiter=',').astype(int)
    data_graph_indicator = np.loadtxt(path+name+'_graph_indicator.txt', delimiter=',').astype(int)
    if graph_labels:
        data_graph_labels = np.loadtxt(path+name+'_graph_labels.txt', delimiter=',').astype(int)

def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def Graph_load(dataset = 'cora'):
    '''
    Load a single graph dataset
    :param dataset: dataset name
    :return:
    '''
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        load = pkl.load(open("dataset/ind.{}.{}".format(dataset, names[i]), 'rb'), encoding='latin1')
        # print('loaded')
        objects.append(load)
        # print(load)
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file("dataset/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    G = nx.from_dict_of_lists(graph)
    adj = nx.adjacency_matrix(G)
    return adj, features, G


# Helper functions 

In [3]:
def calc_graph_embedding(node_embedding_cat, model):
    """
    
    :param node_embedding_cat: 
    :param model: 
    :return: 
    """
    node_embedding_graph = model.f_m(node_embedding_cat)
    node_embedding_graph_gate = model.f_gate(node_embedding_cat)
    graph_embedding = torch.sum(torch.mul(node_embedding_graph, node_embedding_graph_gate), dim=0, keepdim=True)
    return graph_embedding

def calc_init_embedding(node_embedding_cat, model):
    """
    
    :param node_embedding_cat: 
    :param model: An instantiated nn.module 
    :return: 
    """
    node_embedding_init = model.f_m_init(node_embedding_cat)
    node_embedding_init_gate = model.f_gate_init(node_embedding_cat)
    init_embedding = torch.sum(torch.mul(node_embedding_init, node_embedding_init_gate), dim=0, keepdim=True)
    init_embedding = model.f_init(init_embedding)
    return init_embedding

def message_passing(node_neighbor, node_embedding, model):
    """
    
    :param node_neighbor: degree (freind, of a friend, of a friend -> 3) 
    :param node_embedding: 
    :param model: 
    :return: 
    """
    node_embedding_new = []
    for i in range(len(node_neighbor)):
        neighbor_num = len(node_neighbor[i])
        if neighbor_num > 0:
            node_self = node_embedding[i].expand(neighbor_num, node_embedding[i].size(1))
            node_self_neighbor = torch.cat([node_embedding[j] for j in node_neighbor[i]], dim=0)
            message = torch.sum(model.m_uv_1(torch.cat((node_self, node_self_neighbor), dim=1)), dim=0, keepdim=True)
            node_embedding_new.append(model.f_n_1(message, node_embedding[i]))
        else:
            message_null = Variable(torch.zeros((node_embedding[i].size(0),node_embedding[i].size(1)*2)))
            node_embedding_new.append(model.f_n_1(message_null, node_embedding[i]))
    node_embedding = node_embedding_new
    node_embedding_new = []
    for i in range(len(node_neighbor)):
        neighbor_num = len(node_neighbor[i])
        if neighbor_num > 0:
            node_self = node_embedding[i].expand(neighbor_num, node_embedding[i].size(1))
            node_self_neighbor = torch.cat([node_embedding[j] for j in node_neighbor[i]], dim=0)
            message = torch.sum(model.m_uv_1(torch.cat((node_self, node_self_neighbor), dim=1)), dim=0, keepdim=True)
            node_embedding_new.append(model.f_n_1(message, node_embedding[i]))
        else:
            message_null = Variable(torch.zeros((node_embedding[i].size(0), node_embedding[i].size(1) * 2)))
            node_embedding_new.append(model.f_n_1(message_null, node_embedding[i]))
    return node_embedding_new

def sample_tensor(y,sample=True, thresh=0.5):
    # do sampling
    if sample:
        y_thresh = Variable(torch.rand(y.size()))
        y_result = torch.gt(y,y_thresh).float()
    # do max likelihood based on some threshold
    else:
        y_thresh = Variable(torch.ones(y.size())*thresh)
        y_result = torch.gt(y, y_thresh).float()
    return y_result

def gumbel_softmax(logits, temperature, eps=1e-9):
    '''

    :param logits: shape: N*L
    :param temperature:
    :param eps:
    :return:
    '''
    # get gumbel noise
    noise = torch.rand(logits.size())
    noise.add_(eps).log_().neg_()
    noise.add_(eps).log_().neg_()
    noise = Variable(noise)

    x = (logits + noise) / temperature
    x = F.softmax(x)
    return x


# Model with arguments 

In [4]:
class DGM_graphs(nn.Module):
    def __init__(self,h_size):
        # h_size: node embedding size
        # h_size*2: graph embedding size

        super(DGM_graphs, self).__init__()
        ### all modules used by the model
        ## 1 message passing, 2 times
        self.m_uv_1 = nn.Linear(h_size*2, h_size*2)
        self.f_n_1 = nn.GRUCell(h_size*2, h_size) # input_size, hidden_size

        self.m_uv_2 = nn.Linear(h_size * 2, h_size * 2)
        self.f_n_2 = nn.GRUCell(h_size * 2, h_size)  # input_size, hidden_size

        ## 2 graph embedding and new node embedding
        # for graph embedding
        self.f_m = nn.Linear(h_size, h_size*2)
        self.f_gate = nn.Sequential(
            nn.Linear(h_size,1),
            nn.Sigmoid()
        )
        # for new node embedding
        self.f_m_init = nn.Linear(h_size, h_size*2)
        self.f_gate_init = nn.Sequential(
            nn.Linear(h_size,1),
            nn.Sigmoid()
        )
        self.f_init = nn.Linear(h_size*2, h_size)

        ## 3 f_addnode
        self.f_an = nn.Sequential(
            nn.Linear(h_size*2,1),
            nn.Sigmoid()
        )

        ## 4 f_addedge
        self.f_ae = nn.Sequential(
            nn.Linear(h_size * 2, 1),
            nn.Sigmoid()
        )

        ## 5 f_nodes
        self.f_s = nn.Linear(h_size*2, 1)

class Args_DGMG():
    def __init__(self):
        ### CUDA
        self.cuda = 0

        ### model type
        self.note = 'Baseline_DGMG' # do GCN after adding each edge
        # self.note = 'Baseline_DGMG_fast' # do GCN only after adding each node

        ### data config
        self.graph_type = 'caveman_small'
        # self.graph_type = 'grid_small'
        # self.graph_type = 'ladder_small'
        # self.graph_type = 'enzymes_small'
        # self.graph_type = 'barabasi_small'
        # self.graph_type = 'citeseer_small'

        self.max_num_node = 20

        ### network config
        self.node_embedding_size = 64
        self.test_graph_num = 200


        ### training config
        self.epochs = 2000  # now one epoch means self.batch_ratio x batch_size
        self.load_epoch = 2000
        self.epochs_test_start = 100
        self.epochs_test = 100
        self.epochs_log = 100
        self.epochs_save = 100
        if 'fast' in self.note:
            self.is_fast = True
        else:
            self.is_fast = False

        self.lr = 0.001
        self.milestones = [300, 600, 1000]
        self.lr_rate = 0.3

        ### output config
        self.model_save_path = 'model_save/'
        self.graph_save_path = 'graphs/'
        self.figure_save_path = 'figures/'
        self.timing_save_path = 'timing/'
        self.figure_prediction_save_path = 'figures_prediction/'
        self.nll_save_path = 'nll/'


        self.fname = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size)
        self.fname_pred = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_pred_'
        self.fname_train = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_train_'
        self.fname_test = self.note + '_' + self.graph_type + '_' + str(self.node_embedding_size) + '_test_'

        self.load = False
        self.save = True


# Train and test loops 

In [5]:
def train_DGMG_epoch(epoch, args, model, dataset, optimizer, scheduler, is_fast = False):
    model.train()
    graph_num = len(dataset)
    order = list(range(graph_num))
    shuffle(order)


    loss_addnode = 0
    loss_addedge = 0
    loss_node = 0
    for i in order:
        model.zero_grad()

        graph = dataset[i]
        # do random ordering: relabel nodes
        node_order = list(range(graph.number_of_nodes()))
        shuffle(node_order)
        order_mapping = dict(zip(graph.nodes(), node_order))
        graph = nx.relabel_nodes(graph, order_mapping, copy=True)


        # NOTE: when starting loop, we assume a node has already been generated
        node_count = 1
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size))] 
        # list of torch tensors, each size: 1*hidden


        loss = 0
        while node_count<=graph.number_of_nodes():
            # .adjacency_list()  deprecated now: .adj or list(G.adjacency())
            # adjacency_list = [[n for n in G.neighbors(node)] for node in G.nodes()]
            # The induced subgraph of the graph contains the nodes in nodes and the edges between those nodes.
            
        
            node_neighbor = [[n for n in graph.subgraph(list(range(node_count))).neighbors(node)]
                             for node in graph.subgraph(list(range(node_count))).nodes()]
            # list of lists (first node is zero)
 
            node_neighbor_new = [[n for n in graph.subgraph(list(range(node_count+1))).neighbors(node)]
                                 for node in graph.subgraph(list(range(node_count+1))).nodes()][-1]
            #node_neighbor_new = graph.subgraph(list(range(node_count+1))).adj[-1] # list of new node's neighbors
            print(node_neighbor_new)
            
            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            if node_count < graph.number_of_nodes():
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))))
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
            else:
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.zeros((1, 1))))
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
                break


            edge_count = 0
            while edge_count<=len(node_neighbor_new):
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)

                if edge_count < len(node_neighbor_new):
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.ones((1, 1))))
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data

                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0), dim=1)
                    # get ground truth
                    a_node = torch.zeros((1,p_node.size(1)))
                    # print('node_neighbor_new',node_neighbor_new, edge_count)
                    a_node[0,node_neighbor_new[edge_count]] = 1
                    a_node = Variable(a_node)
                    # add edge
                    node_neighbor[-1].append(node_neighbor_new[edge_count])
                    node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
                    # calc loss
                    loss_node_step = F.binary_cross_entropy(p_node,a_node)
                    # loss_node_step.backward(retain_graph=True)
                    loss += loss_node_step
                    loss_node += loss_node_step.data

                else:
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.zeros((1, 1))))
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data
                    break

                edge_count += 1
            node_count += 1

        # update deterministic and lstm
        loss.backward()
        optimizer.step()
        scheduler.step()

    loss_all = loss_addnode + loss_addedge + loss_node

    if epoch % args.epochs_log==0:
        print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
            epoch, args.epochs,loss_all[0], args.graph_type, args.node_embedding_size))


    # loss_sum += loss.data[0]*x.size(0)
    # return loss_sum


def train_DGMG_forward_epoch(args, model, dataset, is_fast = False):
    model.train()
    graph_num = len(dataset)
    order = list(range(graph_num))
    shuffle(order)


    loss_addnode = 0
    loss_addedge = 0
    loss_node = 0
    for i in order:
        model.zero_grad()

        graph = dataset[i]
        # do random ordering: relabel nodes
        node_order = list(range(graph.number_of_nodes()))
        shuffle(node_order)
        order_mapping = dict(zip(graph.nodes(), node_order))
        graph = nx.relabel_nodes(graph, order_mapping, copy=True)


        # NOTE: when starting loop, we assume a node has already been generated
        node_count = 1
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size))] # list of torch tensors, each size: 1*hidden


        loss = 0
        while node_count<=graph.number_of_nodes():
            
            node_neighbor = [[n for n in graph.subgraph(list(range(node_count))).neighbors(node)]
                             for node in graph.subgraph(list(range(node_count))).nodes()]
            # list of lists (first node is zero)
 
            node_neighbor_new = [[n for n in graph.subgraph(list(range(node_count+1))).neighbors(node)]
                                 for node in graph.subgraph(list(range(node_count+1))).nodes()][-1]
            # list of new node's neighbors
            print(node_neighbor_new)

            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            if node_count < graph.number_of_nodes():
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode,Variable(torch.ones((1,1))))
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
            else:
                # calc loss
                loss_addnode_step = F.binary_cross_entropy(p_addnode, Variable(torch.zeros((1, 1))))
                # loss_addnode_step.backward(retain_graph=True)
                loss += loss_addnode_step
                loss_addnode += loss_addnode_step.data
                break


            edge_count = 0
            while edge_count<=len(node_neighbor_new):
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)

                if edge_count < len(node_neighbor_new):
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.ones((1, 1))))
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data

                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0), dim=1)
                    # get ground truth
                    a_node = torch.zeros((1,p_node.size(1)))
                    # print('node_neighbor_new',node_neighbor_new, edge_count)
                    a_node[0,node_neighbor_new[edge_count]] = 1
                    a_node = Variable(a_node)
                    # add edge
                    node_neighbor[-1].append(node_neighbor_new[edge_count])
                    node_neighbor[node_neighbor_new[edge_count]].append(len(node_neighbor)-1)
                    # calc loss
                    loss_node_step = F.binary_cross_entropy(p_node,a_node)
                    # loss_node_step.backward(retain_graph=True)
                    loss += loss_node_step
                    loss_node += loss_node_step.data*p_node.size(1)

                else:
                    # calc loss
                    loss_addedge_step = F.binary_cross_entropy(p_addedge, Variable(torch.zeros((1, 1))))
                    # loss_addedge_step.backward(retain_graph=True)
                    loss += loss_addedge_step
                    loss_addedge += loss_addedge_step.data
                    break

                edge_count += 1
            node_count += 1


    loss_all = loss_addnode + loss_addedge + loss_node

    # if epoch % args.epochs_log==0:
    #     print('Epoch: {}/{}, train loss: {:.6f}, graph type: {}, hidden: {}'.format(
    #         epoch, args.epochs,loss_all[0], args.graph_type, args.node_embedding_size))


    return loss_all[0]/len(dataset)


def test_DGMG_epoch(args, model, is_fast=False):
    model.eval()
    graph_num = args.test_graph_num

    graphs_generated = []
    for i in range(graph_num):
        # NOTE: when starting loop, we assume a node has already been generated
        node_neighbor = [[]]  # list of lists (first node is zero)
        node_embedding = [Variable(torch.ones(1,args.node_embedding_size))] 
        # list of torch tensors, each size: 1*hidden

        node_count = 1
        while node_count<=args.max_num_node:
            # 1 message passing
            # do 2 times message passing
            node_embedding = message_passing(node_neighbor, node_embedding, model)

            # 2 graph embedding and new node embedding
            node_embedding_cat = torch.cat(node_embedding, dim=0)
            graph_embedding = calc_graph_embedding(node_embedding_cat, model)
            init_embedding = calc_init_embedding(node_embedding_cat, model)

            # 3 f_addnode
            p_addnode = model.f_an(graph_embedding)
            a_addnode = sample_tensor(p_addnode)
            # print(a_addnode.data[0][0])
            if a_addnode.data[0][0]==1:
                # print('add node')
                # add node
                node_neighbor.append([])
                node_embedding.append(init_embedding)
                if is_fast:
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
            else:
                break

            edge_count = 0
            while edge_count<args.max_num_node:
                if not is_fast:
                    node_embedding = message_passing(node_neighbor, node_embedding, model)
                    node_embedding_cat = torch.cat(node_embedding, dim=0)
                    graph_embedding = calc_graph_embedding(node_embedding_cat, model)

                # 4 f_addedge
                p_addedge = model.f_ae(graph_embedding)
                a_addedge = sample_tensor(p_addedge)
                # print(a_addedge.data[0][0])

                if a_addedge.data[0][0]==1:
                    # print('add edge')
                    # 5 f_nodes
                    # excluding the last node (which is the new node)
                    node_new_embedding_cat = node_embedding_cat[-1,:].expand(node_embedding_cat.size(0)-1,node_embedding_cat.size(1))
                    s_node = model.f_s(torch.cat((node_embedding_cat[0:-1,:],node_new_embedding_cat),dim=1))
                    p_node = F.softmax(s_node.permute(1,0), dim=1)
                    a_node = gumbel_softmax(p_node, temperature=0.01)
                    _, a_node_id = a_node.topk(1)
                    a_node_id = int(a_node_id.data[0][0])
                    # add edge
                    node_neighbor[-1].append(a_node_id)
                    node_neighbor[a_node_id].append(len(node_neighbor)-1)
                else:
                    break

                edge_count += 1
            node_count += 1
        # save graph
        node_neighbor_dict = dict(zip(list(range(len(node_neighbor))), node_neighbor))
        graph = nx.from_dict_of_lists(node_neighbor_dict)
        graphs_generated.append(graph)

    return graphs_generated



########### train function for LSTM + VAE
def train_DGMG(args, dataset_train, model):
    # check if load existing model
    if args.load:
        fname = args.model_save_path + args.fname + 'model_' + str(args.load_epoch) + '.dat'
        model.load_state_dict(torch.load(fname))

        args.lr = 0.00001
        epoch = args.load_epoch
        print('model loaded!, lr: {}'.format(args.lr))
    else:
        epoch = 1

    # initialize optimizer
    optimizer = torch.optim.Adam(list(model.parameters()), lr=args.lr)

    scheduler = MultiStepLR(optimizer, milestones=args.milestones, gamma=args.lr_rate)

    # start main loop
    time_all = np.zeros(args.epochs)
    while epoch <= args.epochs:
        time_start = tm.time()
        # train
        train_DGMG_epoch(epoch, args, model, dataset_train, optimizer, scheduler, is_fast=args.is_fast)
        time_end = tm.time()
        time_all[epoch - 1] = time_end - time_start
        # print('time used',time_all[epoch - 1])
        # test
        if epoch % args.epochs_test == 0 and epoch >= args.epochs_test_start:
            graphs = test_DGMG_epoch(args,model, is_fast=args.is_fast)
            fname = args.graph_save_path + args.fname_pred + str(epoch) + '.dat'
            save_graph_list(graphs, fname)
            # print('test done, graphs saved')

        # save model checkpoint
        if args.save:
            if epoch % args.epochs_save == 0:
                fname = args.model_save_path + args.fname + 'model_' + str(epoch) + '.dat'
                torch.save(model.state_dict(), fname)
        epoch += 1
    np.save(args.timing_save_path + args.fname, time_all)


########### train function for LSTM + VAE
def train_DGMG_nll(args, dataset_train,dataset_test, model,max_iter=1000):
    # check if load existing model
    fname = args.model_save_path + args.fname + 'model_' + str(args.load_epoch) + '.dat'
    model.load_state_dict(torch.load(fname))

    fname_output = args.nll_save_path + args.note + '_' + args.graph_type + '.csv'
    with open(fname_output, 'w+') as f:
        f.write('train,test\n')
        # start main loop
        for iter in range(max_iter):
            nll_train = train_DGMG_forward_epoch(args, model, dataset_train, is_fast=args.is_fast)
            nll_test = train_DGMG_forward_epoch(args, model, dataset_test, is_fast=args.is_fast)
            print('train', nll_train, 'test', nll_test)
            f.write(str(nll_train) + ',' + str(nll_test) + '\n')
    

In [7]:
args = Args_DGMG()
# self.graph_type = 'caveman_small' 
# self.node_embedding_size = 64

os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda)
print('CUDA', args.cuda)
print('File name prefix',args.fname)


graphs = []
for i in range(4, 10):
    graphs.append(nx.ladder_graph(i))
    
model = DGM_graphs(h_size = args.node_embedding_size)

if args.graph_type == 'ladder_small':
    graphs = []
    for i in range(2, 11):
        graphs.append(nx.ladder_graph(i))
    args.max_prev_node = 10
 
if args.graph_type=='caveman_small':
    print("!")
    graphs = []
    for i in range(2, 3):
        for j in range(6, 11):
            for k in range(20):
                graphs.append(caveman_special(i, j, p_edge=0.8))
    args.max_prev_node = 20
    
if args.graph_type == 'grid_small':
    graphs = []
    for i in range(2, 5):
        for j in range(2, 6):
            graphs.append(nx.grid_2d_graph(i, j))
    args.max_prev_node = 15
    
if args.graph_type == 'barabasi_small':
    graphs = []
    for i in range(4, 21):
        for j in range(3, 4):
            for k in range(10):
                graphs.append(nx.barabasi_albert_graph(i, j))
    args.max_prev_node = 20

if args.graph_type == 'enzymes_small':
    graphs_raw = Graph_load_batch(min_num_nodes=10, name='ENZYMES')
    graphs = []
    for G in graphs_raw:
        if G.number_of_nodes()<=20:
            graphs.append(G)
    args.max_prev_node = 15

if args.graph_type == 'citeseer_small':
    _, _, G = Graph_load(dataset='citeseer')
    G = G = max([G.subgraph(c).copy() for c in nx.connected_components(G)], key=len)
    G = nx.convert_node_labels_to_integers(G)
    graphs = []
    for i in range(G.number_of_nodes()):
        G_ego = nx.ego_graph(G, i, radius=1)
        if (G_ego.number_of_nodes() >= 4) and (G_ego.number_of_nodes() <= 20):
            graphs.append(G_ego)
    shuffle(graphs)
    graphs = graphs[0:200]
    args.max_prev_node = 15


# remove self loops
for graph in graphs:
    #edges_with_selfloops = graph.selfloop_edges() #1.1
    edges_with_selfloops = list(nx.selfloop_edges(graph)) #3.3
    if len(edges_with_selfloops) > 0:
        graph.remove_edges_from(edges_with_selfloops)

# split datasets
random.seed(123)
shuffle(graphs)
graphs_len = len(graphs)
graphs_test = graphs[int(0.8 * graphs_len):]
graphs_train = graphs[0:int(0.8 * graphs_len)]

args.max_num_node = max([graphs[i].number_of_nodes() for i in range(len(graphs))])
# args.max_num_node = 2000
# show graphs statistics
print('total graph num: {}, training set: {}'.format(len(graphs), len(graphs_train)))
print('max number node: {}'.format(args.max_num_node))
print('max previous node: {}'.format(args.max_prev_node))


### train
train_DGMG(args,graphs,model)

CUDA 0
File name prefix Baseline_DGMG_caveman_small_64
!
total graph num: 100, training set: 80
max number node: 20
max previous node: 20
[]
[0]
[]
[2]
[3]
[0, 2, 4, 1]
[0, 6, 4, 1]
[5, 8]


IndexError: index 8 is out of bounds for dimension 1 with size 8

In [2]:
# Warnings and changes: 
# depreceated: G.selfloop_edges()
# now: list(nx.selfloop_edges(graph))

# adjacency_list deprecated: 
# https://networkx.org/documentation/networkx-1.11/reference/generated/networkx.Graph.adjacency_list.html

G = nx.path_graph(4) 
#nx.draw(G, with_labels=True)

# Access the adjacency list
adjacency_list = [[n for n in G.neighbors(node)] for node in G.nodes()]
print(G.adj)

print(adjacency_list)

{0: {1: {}}, 1: {0: {}, 2: {}}, 2: {1: {}, 3: {}}, 3: {2: {}}}
[[1], [0, 2], [1, 3], [2]]


In [18]:
graphs = []
node_count = 10

for i in range(2, 5):
    for j in range(2, 6):
        graphs.append(nx.grid_2d_graph(i, j))

graph = graphs[3]
# do random ordering: relabel nodes
node_order = list(range(graph.number_of_nodes()))
shuffle(node_order)
order_mapping = dict(zip(graph.nodes(), node_order))
graph = nx.relabel_nodes(graph, order_mapping, copy=True)

In [19]:
m1 = [[n for n in graph.subgraph(list(range(node_count))).neighbors(node)]
                             for node in graph.subgraph(list(range(node_count))).nodes()]

m1n = [[n for n in graph.subgraph(list(range(node_count+1))).neighbors(node)]
                     for node in graph.subgraph(list(range(node_count+1))).nodes()][-1]

m2 = graph.subgraph(list(range(node_count))).adjacency_list() 
m2n = graph.subgraph(list(range(node_count+1))).adjacency_list()[-1]

In [20]:
print(m1)
print(m1n)
print(":"*40)
print(m2)
print(m2n)

[[1, 3, 4], [0, 2, 5], [1, 8], [0, 6, 9], [0, 5, 6], [1, 4, 8], [3, 4, 7], [6, 9], [2, 5], [7, 3]]
[7, 3]
::::::::::::::::::::::::::::::::::::::::
[[1, 3, 4], [0, 2, 5], [1, 8], [0, 6, 9], [0, 5, 6], [1, 4, 8], [3, 4, 7], [6, 9], [2, 5], [7, 3]]
[7, 3]
