In [1]:
import torch
try:
  import torch_geometric
except:
  !pip -q install torch-scatter     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-sparse      -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-cluster     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-geometric
  import torch_geometric
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
from torch_geometric.utils.convert import to_networkx
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from scipy.sparse.csgraph import shortest_path
from scipy.sparse import csr_matrix
from torch_geometric.utils.random import erdos_renyi_graph
import time
import random
from math import floor, ceil
from copy import deepcopy

from os import path
import bz2
import pickle
import _pickle as cPickle
import sys

[K     |████████████████████████████████| 2.6MB 27.1MB/s 
[K     |████████████████████████████████| 1.5MB 30.4MB/s 
[K     |████████████████████████████████| 1.0MB 7.6MB/s 
[K     |████████████████████████████████| 389kB 19.6MB/s 
[K     |████████████████████████████████| 215kB 27.7MB/s 
[K     |████████████████████████████████| 235kB 43.7MB/s 
[K     |████████████████████████████████| 2.2MB 45.2MB/s 
[K     |████████████████████████████████| 51kB 8.5MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
folder = '/content/drive/MyDrive/USI/GDLProject/'
drive.mount('/content/drive/', force_remount=True)

sys.path.append(folder)

from syntheticGraph import syntheticGraph
from syntheticGraphDataset import syntheticGraphDataset
from reducedGraph import reducedGraph
from GINConv import GINConv
from GNN import GNN

torch.set_default_tensor_type(torch.FloatTensor)

Mounted at /content/drive/


In [6]:
loss_dict = dict()
loss_avg = []
test_loss = []
losses = []

hyperparams = {
    'loss_epoch' : 0,
    'lr' : 0.001,
    'embedding_dim' : 50,
    'n_layers' : 3,
    'n_supernodes' : 100,
    'n_eig' : 40,
    'n_epochs' : 5,
    'batch_size': 1,
    'reduction_ratio':0.5,
}

In [7]:
training_file = folder+'training_set_'+str(hyperparams['reduction_ratio'])+'.pbz2'
valid_file = folder+'validation_set_'+str(hyperparams['reduction_ratio'])+'.pbz2'
test_file = folder+'test_set_'+str(hyperparams['reduction_ratio'])+'.pbz2'
training_set = syntheticGraphDataset.import_dataset(training_file)
validation_set = syntheticGraphDataset.import_dataset(valid_file)
# test_set = syntheticGraphDataset.import_dataset(test_file)

Loading the compressed set...
Dataset loaded in  9.94755220413208 seconds

Loading the compressed set...
Dataset loaded in  20.49639081954956 seconds



In [8]:
model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers']).float()

load = False
if load:
  model.load_state_dict(torch.load('state_dict_model.pt'))

optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

_ = model.evaluate(validation_set, hyperparams, verbose = True)
# print()
# _ = model.evaluate(test_set, hyperparams, verbose = True)

Evaluation started..
Graph  0 --- relative improvement percentage  -202700.72021484375 %
Graph  1 --- relative improvement percentage  -157357.45849609375 %
Graph  2 --- relative improvement percentage  -184975.37841796875 %
Graph  3 --- relative improvement percentage  -194662.48779296875 %
Graph  4 --- relative improvement percentage  -152568.44482421875 %


In [7]:
# _ = model.evaluate(training_set, hyperparams, verbose = True)

In [9]:
initial_start = time.time()
with torch.enable_grad():
  training_set.resetDispatcher()
  training_set.reset_w_hat()

  for graph in training_set.batchesIndices: 
    print("Graph ", graph)
    losses = []
    for epoch in range(hyperparams['n_epochs']):
      print("\tEpoch ", epoch, end=" ")
      start = time.time()
      for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
        A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
        out = model(A, X, E)
        training_set.store_w_hat(graph, out, x, y)

      loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
      losses.append(loss.item())
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      training_set.reset_w_hat(graph)
      print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
    print()
    _ = model.evaluate(validation_set, hyperparams, verbose = True)
    print()
print("Total training completed in ", time.time()-initial_start, "seconds")
print()
# _ = model.evaluate(test_set, hyperparams, verbose = True)

Graph  0
	Epoch  0  --- completed in  7.053288698196411 seconds with loss  11167.8896484375
	Epoch  1  --- completed in  7.379492521286011 seconds with loss  3919.178466796875
	Epoch  2  --- completed in  7.416180372238159 seconds with loss  43.251895904541016
	Epoch  3  --- completed in  7.493308067321777 seconds with loss  73.9354019165039
	Epoch  4  --- completed in  7.3314526081085205 seconds with loss  89.17642211914062

Evaluation started..
Graph  0 --- relative improvement percentage  -1582.5979232788086 %
Graph  1 --- relative improvement percentage  -1239.4184112548828 %
Graph  2 --- relative improvement percentage  -1424.1143226623535 %
Graph  3 --- relative improvement percentage  -1479.2683601379395 %
Graph  4 --- relative improvement percentage  -1161.3075256347656 %

Graph  1
	Epoch  0  --- completed in  10.100066184997559 seconds with loss  102.94280242919922
	Epoch  1  --- completed in  10.114293336868286 seconds with loss  109.97029876708984
	Epoch  2  --- completed in

In [None]:
initial_start = time.time()
with torch.enable_grad():
  training_set.resetDispatcher()
  training_set.reset_w_hat()

  for graph in training_set.batchesIndices: 
    print("Graph ", graph)
    losses = []
    for epoch in range(hyperparams['n_epochs']):
      print("\tEpoch ", epoch, end=" ")
      start = time.time()
      for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
        A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
        out = model(A, X, E)
        training_set.store_w_hat(graph, out, x, y)

      loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
      losses.append(loss.item())
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      training_set.reset_w_hat(graph)
      print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
    print()
    _ = model.evaluate(validation_set, hyperparams, verbose = True)
    print()
print("Total training completed in ", time.time()-initial_start, "seconds")
print()
_ = model.evaluate(test_set, hyperparams, verbose = True)

In [None]:
# initial_start = time.time()

# with torch.enable_grad():
#   training_set.resetDispatcher()
#   training_set.reset_w_hat()
#   for e in range(hyperparams['n_epochs']):
#     print("Big epoch ", e)
#     for graph in training_set.batchesIndices: 
#       print("\tGraph ", graph)
#       losses = []
#       for epoch in range(hyperparams['n_epochs'], ):
#         print("\t\tEpoch ", epoch, end=" ")
#         start = time.time()
#         for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
#           A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
#           out = model(A, X, E)
#           training_set.store_w_hat(graph, out, x, y)

#         loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
#         losses.append(loss.item())
#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()
#         training_set.reset_w_hat(graph)
#         print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
#     training_set.shuffle()
# print("Total training completed in ", time.time()-initial_start, "seconds")

In [None]:
# torch.save(model.state_dict(), "state_dict_model2.pt")

In [None]:
# model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers']).float()
# optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

# with torch.enable_grad():
#   training_set.resetDispatcher()
#   training_set.reset_w_hat()
#   for epoch in range(hyperparams['n_epochs']):
#     print("Epoch ", epoch)
#     losses = []
#     for graph in training_set.batchesIndices: 
#       print("\tGraph ", graph, end=" ")
#       start = time.time()
#       for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
#         A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
#         out = model(A, X, E)
#         training_set.store_w_hat(graph, out, x, y)

#       loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
#       losses.append(loss.item())
#       loss.backward()
#       optimizer.step()
#       optimizer.zero_grad()
#       training_set.reset_w_hat(graph)
#       print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
#     training_set.shuffle()

In [4]:
start = time.time()
folder = '/content/drive/MyDrive/USI/GDLProject/'
reduction_ratio = 0.5
rr= str(reduction_ratio)
training_name = folder+'training_set_'+rr+'.pbz2'
valid_name = folder+'validation_set_'+rr+'.pbz2'
test_name = folder+'test_set_'+rr+'.pbz2'


# TRAINING SET
start = time.time()
print("Creating the training set...")
training_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(512, 1012, 100)]
preprocess = [True] * len(sizes)
for key, s in enumerate(sizes):
  print("\t Creating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  training_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Training set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(training_name,training_set)


#VALIDATION SET
start = time.time()
print("Creating the validation set...")
validation_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(1012, 1512, 100)]
preprocess = [True]*len(sizes)
for key, s in enumerate(sizes):
  print("\tCreating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  validation_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Validation set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(valid_name,validation_set)


# #TEST SET
# start = time.time()

# print("Creating the test set...")
# test_set = syntheticGraphDataset(entireMatrix=False)
# sizes = [i for i in range(1512, 2913, 100)]
# preprocess = [True]*len(sizes)
# for key, s in enumerate(sizes):
#   print("\tCreating graph ", key, " of size ",s)
#   graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s, p = (0.1*512)/s)
#   red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes = int(s*reduction_ratio))
#   test_set.addGraph(graph, red_graph, hyperparams['n_eig'] , preprocessData = preprocess[key])
# print("Dataset created in ", time.time() - start, "seconds")

# syntheticGraphDataset.export_dataset(test_name, test_set)


Creating the training set...
	 Creating graph  0  of size  512
	 Creating graph  1  of size  612
	 Creating graph  2  of size  712
	 Creating graph  3  of size  812
	 Creating graph  4  of size  912
Training set created in  17.795174837112427 seconds

Creating the validation set...
	Creating graph  0  of size  1012
	Creating graph  1  of size  1112
	Creating graph  2  of size  1212
	Creating graph  3  of size  1312
	Creating graph  4  of size  1412
Validation set created in  48.68718719482422 seconds



In [None]:
start = time.time()
folder = '/content/drive/MyDrive/USI/GDLProject/'
reduction_ratio = 0.5
rr= str(reduction_ratio)
training_name = folder+'training_set_'+rr+'.pbz2'
valid_name = folder+'validation_set_'+rr+'.pbz2'
test_name = folder+'test_set_'+rr+'.pbz2'



start = time.time()
print("Creating the training set...")
training_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(512, 1012, 100)]
preprocess = [True] * len(sizes)
for key, s in enumerate(sizes):
  print("\t Creating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  training_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Training set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(training_name,training_set)


#VALIDATION SET
start = time.time()
print("Creating the validation set...")
validation_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(1012, 1512, 100)]
preprocess = [True]*len(sizes)
for key, s in enumerate(sizes):
  print("\tCreating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  validation_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Validation set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(valid_name,validation_set)


#TEST SET
start = time.time()

print("Creating the test set...")
test_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(1512, 2913, 100)]
preprocess = [True]*len(sizes)
for key, s in enumerate(sizes):
  print("\tCreating graph ", key, " of size ",s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s, p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes = int(s*reduction_ratio))
  test_set.addGraph(graph, red_graph, hyperparams['n_eig'] , preprocessData = preprocess[key])
print("Dataset created in ", time.time() - start, "seconds")

syntheticGraphDataset.export_dataset(test_name, test_set)


In [None]:
start = time.time()
folder = '/content/drive/MyDrive/USI/GDLProject/'
reduction_ratio = 0.7
rr= str(reduction_ratio)
training_name = folder+'training_set_'+rr+'.pbz2'
valid_name = folder+'validation_set_'+rr+'.pbz2'
test_name = folder+'test_set_'+rr+'.pbz2'



start = time.time()
print("Creating the training set...")
training_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(512, 1012, 100)]
preprocess = [True] * len(sizes)
for key, s in enumerate(sizes):
  print("\t Creating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  training_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Training set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(training_name,training_set)


#VALIDATION SET
start = time.time()
print("Creating the validation set...")
validation_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(1012, 1512, 100)]
preprocess = [True]*len(sizes)
for key, s in enumerate(sizes):
  print("\tCreating graph ", key, " of size ", s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=int(s*reduction_ratio))
  validation_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
print("Validation set created in ", time.time()-start, "seconds\n")

syntheticGraphDataset.export_dataset(valid_name,validation_set)


#TEST SET
start = time.time()

print("Creating the test set...")
test_set = syntheticGraphDataset(entireMatrix=False)
sizes = [i for i in range(1512, 2913, 100)]
preprocess = [True]*len(sizes)
for key, s in enumerate(sizes):
  print("\tCreating graph ", key, " of size ",s)
  graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s, p = (0.1*512)/s)
  red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes = int(s*reduction_ratio))
  test_set.addGraph(graph, red_graph, hyperparams['n_eig'] , preprocessData = preprocess[key])
print("Dataset created in ", time.time() - start, "seconds")

syntheticGraphDataset.export_dataset(test_name, test_set)
