In [9]:
import torch
try:
  import torch_geometric
except:
  !pip -q install torch-scatter     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-sparse      -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-cluster     -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
  !pip -q install torch-geometric
  import torch_geometric
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
from torch_geometric.utils.convert import to_networkx
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from scipy.sparse.csgraph import shortest_path
from scipy.sparse import csr_matrix
from torch_geometric.utils.random import erdos_renyi_graph
import time
import random
from math import floor, ceil
from copy import deepcopy
import importlib
from os import path, makedirs
import bz2
import pickle
import _pickle as cPickle
import sys
from google.colab import drive
folder = '/content/drive/MyDrive/USI/GDLProject/'
drive.mount('/content/drive/', force_remount=True)
 
sys.path.append(folder)
 

import syntheticGraph
import syntheticGraphDataset
import reducedGraph
import GINConv
import GNN
import graph

importlib.reload(syntheticGraph)
importlib.reload(syntheticGraphDataset)
importlib.reload(reducedGraph)
importlib.reload(GINConv)
importlib.reload(GNN)
importlib.reload(graph)

from graph import Graph, Node
from syntheticGraph import syntheticGraph
from syntheticGraphDataset import syntheticGraphDataset
from reducedGraph import reducedGraph
from GINConv import GINConv
from GNN import GNN
 
torch.set_default_tensor_type(torch.FloatTensor)

Mounted at /content/drive/


In [10]:
def exportData(trainings, validations, tests, folder, rr):

  a = np.asarray(trainings)
  np.savetxt(folder + "training_losses_"+rr+".csv", a, delimiter=";")
  b = np.asarray(validations)
  np.savetxt(folder + "validation_losses_"+rr+".csv", b, delimiter=";")
  c = np.asarray(tests)
  np.savetxt(folder + "test_losses_"+rr+".csv", c, delimiter=";")

In [11]:
def train(model, datasets, hyperparams, verbose=False, exportModel=False, exportEvaluationData = False, folder = ''):
  validations = []
  tests = []
  trainings = []

  if (verbose): initial_start = time.time()
  with torch.enable_grad():
    datasets["training_set"].resetDispatcher()
    datasets["training_set"].reset_w_hat()
  
    for graph in datasets["training_set"].batchesIndices: 
      if (verbose): print("GRAPH ", graph)
      losses = []
      for epoch in range(hyperparams['n_epochs']):
        if (verbose): print("\tEpoch ", epoch, end=" ")
        start = time.time()
        for batch in range(datasets["training_set"].graphNumberBatches(graph, hyperparams['batch_size'])):
          A, X, E, _, x, y = datasets["training_set"].getNextBatch(graph, hyperparams['batch_size'])
          out = model(A, X, E)
          datasets["training_set"].store_w_hat(graph, out, x, y)
  
        loss = datasets["training_set"].rayleigh_loss(graph, hyperparams['n_eig'])
        losses.append(loss.item())
        model.backpropagate(loss)
        datasets["training_set"].reset_w_hat(graph)
        if (verbose): print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
      trainings.append(losses)
      validations.append(model.evaluateRayleighLoss(datasets["validation_set"], hyperparams, verbose = verbose))
      print("validations", validations)
  if (verbose): print("Total training completed in ", time.time()-initial_start, "seconds")
  if (exportModel): model.export(folder+"model_"+str(hyperparams['reduction_ratio'])+".pt")
  tests.append(model.evaluateRayleighLoss(datasets["test_set"], hyperparams, verbose = verbose))
  if (exportEvaluationData): exportData(trainings, validations, tests, folder, rr = str(hyperparams['reduction_ratio']))
  return trainings, validations, tests

In [12]:
folder = '/content/drive/MyDrive/USI/GDLProject/'
name_list = ["training_set", "validation_set", "test_set"]
# reduction_ratio_list = [0.3, 0.5, 0.7]
name_graph_class_list = ['barabasi_albert_graph' ,'erdos_renyi_graph']
reducution_type_list = ['hem','baseline']
# reduction_ratio_list = [0.3]
# name_graph_class_list = ['erdos_renyi_graph']
# reducution_type_list = ['baseline']


hyperparams = {
    'loss_epoch' : 0,
    'lr' : 0.001,
    'embedding_dim' : 50,
    'n_layers' : 3,
    'n_eig' : 40,
    'n_epochs' : 1,
    'batch_size': 1,
}

In [13]:
datasets = {}

for reduction_ratio in reduction_ratio_list:
  hyperparams['reduction_ratio'] = reduction_ratio
  for name_graph_class in name_graph_class_list:
    for reducution_type in reducution_type_list:
      
      subfolder = 'trained_models/'+name_graph_class+'/'+reducution_type+'/'
      if not path.exists(folder+subfolder):
          os.makedirs(folder+subfolder)

      for name in name_list:
        dataset_name = folder+name+'_'+str(reduction_ratio)+"_"+name_graph_class+'_'+reducution_type+'_'+'.pbz2'
        if(name == "training_set"):
          datasets[name] = syntheticGraphDataset.import_dataset(dataset_name, verbose = True)
        else:
          datasets[name]=datasets["training_set"]
      model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers'], hyperparams['lr'], pathname = None)
      trainings, validations, tests = train(model, datasets, hyperparams, verbose=True, exportModel=True, exportEvaluationData = True, folder=folder+subfolder)

Loading the compressed set...
Dataset loaded in  9.761805772781372 seconds

GRAPH  0
	Epoch  0  --- completed in  5.765455722808838 seconds with loss  14.685006141662598
Rayleigh evaluation started..
Graph  0 --- relative improvement percentage  -301.52368545532227 %
Graph  1 --- relative improvement percentage  -153.39410305023193 %
Graph  2 --- relative improvement percentage  -235.7372522354126 %
Graph  3 --- relative improvement percentage  -215.32669067382812 %
Graph  4 --- relative improvement percentage  -164.81797695159912 %
validations [[-3.0152368545532227, -1.5339410305023193, -2.357372522354126, -2.1532669067382812, -1.6481797695159912]]
GRAPH  1
	Epoch  0  --- completed in  8.209980487823486 seconds with loss  20.42694854736328
Rayleigh evaluation started..
Graph  0 --- relative improvement percentage  60.592347383499146 %
Graph  1 --- relative improvement percentage  54.67997193336487 %
Graph  2 --- relative improvement percentage  55.18280863761902 %
Graph  3 --- relativ

In [14]:
# initial_start = time.time()

# with torch.enable_grad():
#   training_set.resetDispatcher()
#   training_set.reset_w_hat()
#   for e in range(3):
#     print("Big epoch ", e)
#     for graph in training_set.batchesIndices: 
#       print("\tGraph ", graph)
#       losses = []
#       for epoch in range(hyperparams['n_epochs'], ):
#         print("\t\tEpoch ", epoch, end=" ")
#         start = time.time()
#         for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
#           A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
#           out = model(A, X, E)
#           training_set.store_w_hat(graph, out, x, y)

#         loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
#         losses.append(loss.item())
#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()
#         training_set.reset_w_hat(graph)
#         print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
#     training_set.shuffle()
# print("Total training completed in ", time.time()-initial_start, "seconds")

In [15]:
# model = GNN(hyperparams['embedding_dim'], hyperparams['n_layers']).float()
# optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

# with torch.enable_grad():
#   training_set.resetDispatcher()
#   training_set.reset_w_hat()
#   for epoch in range(hyperparams['n_epochs']):
#     print("Epoch ", epoch)
#     losses = []
#     for graph in training_set.batchesIndices: 
#       print("\tGraph ", graph, end=" ")
#       start = time.time()
#       for batch in range(training_set.graphNumberBatches(graph, hyperparams['batch_size'])):
#         A, X, E, _, x, y = training_set.getNextBatch(graph, hyperparams['batch_size'])
#         out = model(A, X, E)
#         training_set.store_w_hat(graph, out, x, y)

#       loss = training_set.rayleigh_loss(graph, hyperparams['n_eig'])
#       losses.append(loss.item())
#       loss.backward()
#       optimizer.step()
#       optimizer.zero_grad()
#       training_set.reset_w_hat(graph)
#       print(" --- completed in ", time.time()-start, "seconds with loss ", loss.item())
#     training_set.shuffle()