In [1]:
import torch
try:
  import torch_geometric
except:
  !pip -q install torch-scatter     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-sparse      -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-cluster     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-geometric
  import torch_geometric
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
from torch_geometric.utils.convert import to_networkx
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from scipy.sparse.csgraph import shortest_path
from scipy.sparse import csr_matrix
from torch_geometric.utils.random import erdos_renyi_graph
import time
import random
from math import floor, ceil
from copy import deepcopy

from os import path
import bz2
import pickle
import _pickle as cPickle
import sys

[K     |████████████████████████████████| 2.6MB 7.1MB/s 
[K     |████████████████████████████████| 1.5MB 7.2MB/s 
[K     |████████████████████████████████| 1.0MB 7.4MB/s 
[K     |████████████████████████████████| 389kB 7.5MB/s 
[K     |████████████████████████████████| 215kB 7.4MB/s 
[K     |████████████████████████████████| 235kB 11.2MB/s 
[K     |████████████████████████████████| 2.2MB 13.0MB/s 
[K     |████████████████████████████████| 51kB 9.0MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
from google.colab import drive
folder = '/content/drive/MyDrive/USI/GDLProject/'
drive.mount('/content/drive/')

sys.path.append(folder)

from syntheticGraph import syntheticGraph
from syntheticGraphDataset import syntheticGraphDataset
from reducedGraph import reducedGraph
from GINConv import GINConv
from GNN import GNN

torch.set_default_tensor_type(torch.FloatTensor)

Mounted at /content/drive/


In [6]:
loss_dict = dict()
loss_avg = []
test_loss = []
losses = []

hyperparams = {
    'loss_epoch' : 0,
    'lr' : 0.001,
    'embedding_dim' : 50,
    'n_layers' : 3,
    'n_supernodes' : 100,
    'n_eig' : 40,
    'n_epochs' : 2,
    'batch_size': 1,
}

In [4]:
training_file = folder+'training_set.pbz2'
valid_file = folder+'validation_set.pbz2'
test_file = folder+'test_set.pbz2'
training_set = syntheticGraphDataset.import_dataset(training_file)
validation_set = syntheticGraphDataset.import_dataset(valid_file)
test_set = syntheticGraphDataset.import_dataset(test_file)

Loading the compressed set...
Dataset loaded in  4.33670711517334 seconds

Loading the compressed set...
Dataset loaded in  7.7959253787994385 seconds

Loading the compressed set...
Dataset loaded in  53.11611270904541 seconds



In [5]:
model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers']).float()

load = False
if load:
  model.load_state_dict(torch.load('state_dict_model.pt'))

optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

In [7]:
with torch.enable_grad():
  training_set.resetDispatcher()
  training_set.reset_w_hat()

  for graph in training_set.batchesIndices: 
    print("Graph ", graph)
    losses = []
    for epoch in range(hyperparams['n_epochs']):
      print("\tEpoch ", epoch, end=" ")
      start = time.time()
      for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
        A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
        out = model(A, X, E)
        training_set.store_w_hat(graph, out, x, y)

      loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
      losses.append(loss.item())
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      training_set.reset_w_hat(graph)
      print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())

Graph  0
	Epoch  0  --- completed in  2.475904703140259 seconds with loss  1115213.25
	Epoch  1  --- completed in  2.372847318649292 seconds with loss  793335.9375
Graph  1
	Epoch  0  --- completed in  2.466357946395874 seconds with loss  857614.875
	Epoch  1  --- completed in  2.475942850112915 seconds with loss  540596.125
Graph  2
	Epoch  0  --- completed in  2.8549461364746094 seconds with loss  780649.75
	Epoch  1  --- completed in  2.842714786529541 seconds with loss  413602.09375
Graph  3
	Epoch  0  --- completed in  2.966904878616333 seconds with loss  780929.6875
	Epoch  1  --- completed in  2.997213840484619 seconds with loss  949683.5625
Graph  4
	Epoch  0  --- completed in  3.3611745834350586 seconds with loss  737627.75
	Epoch  1  --- completed in  3.436617612838745 seconds with loss  723390.875


In [None]:
# torch.save(model.state_dict(), "state_dict_model2.pt")

In [8]:
with torch.no_grad():
  improvements = []
  test_set.reset_w_hat()
  test_set.resetDispatcher()
  for graph in test_set.batchesIndices: 
    print("Graph ", graph, end=" ")
    start = time.time()
    loss = 0
    for batch in range(test_set.graphNumberBatches(graph, hyperparams['batch_size'])):
      A, X, E, _, x, y = test_set.getNextBatch(graph, hyperparams['batch_size'])
      out = model(A, X, E)
      test_set.store_w_hat(graph, out, x, y)
    loss = test_set.rayleigh_loss(graph, hyperparams['n_eig'])
    imp = (test_set.originalGraphsLoss[graph] - loss)/test_set.originalGraphsLoss[graph]
    improvements.append(imp)
    print("--- relative improvement percentage ", imp.item()*100, "%")

Graph  0 --- relative improvement percentage  29.79338765144348 %
Graph  1 --- relative improvement percentage  24.912086129188538 %
Graph  2 --- relative improvement percentage  17.965778708457947 %
Graph  3 --- relative improvement percentage  19.178752601146698 %
Graph  4 --- relative improvement percentage  18.964123725891113 %
Graph  5 --- relative improvement percentage  16.427312791347504 %
Graph  6 --- relative improvement percentage  13.370072841644287 %
Graph  7 --- relative improvement percentage  14.781858026981354 %
Graph  8 --- relative improvement percentage  12.223676592111588 %
Graph  9 --- relative improvement percentage  11.243606358766556 %
Graph  10 --- relative improvement percentage  10.709107667207718 %
Graph  11 --- relative improvement percentage  9.578172117471695 %
Graph  12 --- relative improvement percentage  9.585058689117432 %
Graph  13 --- relative improvement percentage  9.248370677232742 %
Graph  14 --- relative improvement percentage  9.7588464617729

# OTHER ATTEMPT

In [None]:
model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers']).float()
optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

with torch.enable_grad():
  training_set.resetDispatcher()
  training_set.reset_w_hat()
  for epoch in range(hyperparams['n_epochs']):
    print("Epoch ", epoch)
    losses = []
    for graph in training_set.batchesIndices: 
      print("\tGraph ", graph, end=" ")
      start = time.time()
      for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
        A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
        out = model(A, X, E)
        training_set.store_w_hat(graph, out, x, y)

      loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
      losses.append(loss.item())
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      training_set.reset_w_hat(graph)
      print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
    training_set.shuffle()

Epoch  0
	Graph  0  --- completed in  2.351430892944336 seconds with loss  187934.640625
	Graph  1  --- completed in  2.4727249145507812 seconds with loss  1060777.25
	Graph  2  --- completed in  2.7637321949005127 seconds with loss  1333261.0
	Graph  3  --- completed in  3.0248165130615234 seconds with loss  965242.0625
	Graph  4  --- completed in  3.376180648803711 seconds with loss  1424756.375
Epoch  1
	Graph  3  --- completed in  2.961644172668457 seconds with loss  509789.15625
	Graph  2  --- completed in  2.876694679260254 seconds with loss  274335.125
	Graph  0  --- completed in  2.3509979248046875 seconds with loss  1571025.0
	Graph  4  --- completed in  3.3066422939300537 seconds with loss  813699.625
	Graph  1  --- completed in  2.5115864276885986 seconds with loss  938868.625
Epoch  2
	Graph  1  --- completed in  2.498171806335449 seconds with loss  815024.1875
	Graph  2  --- completed in  2.826765298843384 seconds with loss  320499.5
	Graph  4  --- completed in  3.44147372

In [9]:
# #TRAINING SET
# start = time.time()
# folder = '/content/drive/MyDrive/USI/GDLProject/'
# training_name = folder+'training_set.pbz2'
# valid_name = folder+'validation_set.pbz2'
# test_name = folder+'test_set.pbz2'

# train = path.isfile(training_name)
# valid = path.isfile(valid_name)
# test = path.isfile(test_name)
# compressed = True

# if (train):
#   print("Loading the compressed training set...")
#   training_set = decompress_pickle(training_name) 
#   print("Compressed training set loaded in ", time.time()-start, "seconds\n")
# else:
#   print("Creating the training set...")
#   training_set = syntheticGraphDataset(entireMatrix=False)
#   sizes = [i for i in range(512, 1012, 100)]
#   preprocess = [True] * len(sizes)
#   for key, s in enumerate(sizes):
#     print("\t Creating graph ", key, " of size ", s)
#     graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
#     red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=hyperparams['n_supernodes'])
#     training_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
#   print("Training set created in ", time.time()-start, "seconds\n")

# #VALIDATION SET
# start = time.time()
# if (valid):
#   print("Loading the compressed validation set...")
#   validation_set = decompress_pickle(valid_name)
#   print("Compressed validation set loaded in ", time.time()-start, "seconds\n")
# else:
#   print("Creating the validation set...")
#   validation_set = syntheticGraphDataset(entireMatrix=False)
#   sizes = [i for i in range(1012, 1512, 100)]
#   preprocess = [True]*len(sizes)
#   for key, s in enumerate(sizes):
#     print("\tCreating graph ", key, " of size ", s)
#     graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s,  p = (0.1*512)/s)
#     red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes=hyperparams['n_supernodes'])
#     validation_set.addGraph(graph, red_graph, hyperparams['n_eig'], preprocessData = preprocess[key])
#   print("Validation set created in ", time.time()-start, "seconds\n")


# #TEST SET
# start = time.time()
# if (test):
#   print("Loading the compressed test set...")
#   test_set = decompress_pickle(test_name)
#   print("Compressed test set loaded in ", time.time()-start, "seconds\n")
# else:
#   print("Creating the test set...")
#   test_set = syntheticGraphDataset(entireMatrix=False)
#   sizes = [i for i in range(1512, 2913, 100)]
#   preprocess = [True]*len(sizes)
#   for key, s in enumerate(sizes):
#     print("\tCreating graph ", key, " of size ",s)
#     graph = syntheticGraph(name_graph_class='erdos_renyi_graph', size = s, p = (0.1*512)/s)
#     red_graph = reducedGraph(graph, coarse_type='baseline', n_supernodes = hyperparams['n_supernodes'])
#     test_set.addGraph(graph, red_graph, hyperparams['n_eig'] , preprocessData = preprocess[key])
#   print("Dataset created in ", time.time() - start, "seconds")


# if not train:
#   compressed_pickle(folder+training_name, training_set)
# if not valid:
#   compressed_pickle(folder+valid_name, validation_set)
# if not test:
#   compressed_pickle(folder+test_name, test_set)