In [1]:
import json
import os
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt

import argparse
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader


from Utilities import score
from Utilities import plot_matrix_runs, plot_results, save_plot_losses
from Utilities import Add_ID_Count_Neighbours, PairData, prepare_dataloader_distance_scale

from training import training_loop

from models import GCN_k_m

In [2]:
# Setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Load dataset
dataset = TUDataset(root='/tmp/ENZYMES_transformed', name='ENZYMES', pre_transform=Add_ID_Count_Neighbours(), use_node_attr=True)

In [4]:
# Load the homomorphism counts vectors:
name_dataset = 'ENZYMES'
n_homs=50
hom_counts_path = 'data/homomorphism_counts/' + name_dataset + "_" + str(n_homs) + ".homson"
if not os.path.exists(hom_counts_path):
    raise FileNotFoundError(f"The file '{hom_counts_path}' was not found.")

In [5]:
# torch.manual_seed(args.seed)
batch_size = 32
distance = 'cosine'
distance_scaling = 'counts'
train_loader, val_loader, test_loader = prepare_dataloader_distance_scale(hom_counts_path, dataset, batch_size=batch_size, dist=distance, device = device, scaling = distance_scaling)

In [6]:
# Prepare the model:
hid_size = 32
emb_size = n_homs
lr = 0.01
name = 'GCN_k_m_trial'
model = GCN_k_m(input_features=dataset.num_node_features, hidden_channels=hid_size, output_embeddings=emb_size, n_conv_layers=2, n_linear_layers=0, p=0.2, name = name, apply_relu_conv= False, dist='cosine', mlp_dist=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss().to(device)

In [9]:
print(model)

GCN_k_m(
  (GCN_layers): ModuleList(
    (0): GCNConv(1, 32)
  )
  (Linear_layers): ModuleList()
  (final_GCN): GCNConv(32, 50)
  (dropout): Dropout(p=0.2, inplace=False)
  (relu): ReLU()
  (linear_dist): Linear(in_features=50, out_features=1, bias=True)
  (pdist): CosineSimilarity()
)


In [8]:
train_losses, validation_losses = training_loop(model, train_loader, optimizer, criterion, val_loader, epoch_number=30, patience=10, return_losses=True)

Epoch: 10 | Time for 10 epochs: 4m 46s
	Train Loss: 0.000
	 Val. Loss: 0.000
Early stopping triggered at epoch 12
Best epoch was 1


In [10]:
y, predictions = score(model, test_loader, device)

MSE Loss: 0.0004790598468389362
