In [9]:
import os
import torch
import numpy as np
import pytorch_lightning as pl
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from lightning.pytorch import loggers as pl_loggers
import torch_geometric.transforms as T
import torch_geometric.data as geom_data
from torch.utils.tensorboard import SummaryWriter

%reload_ext autoreload
%autoreload 2

#### Configure device

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device != "cpu":
    torch.set_float32_matmul_precision('high')
    
num_workers = os.cpu_count()
device, num_workers

(device(type='cuda', index=0), 12)

### Directory configuration and load data

In [11]:
cwd = os.getcwd()
tb_logging_dir = os.path.join(cwd, "lightning_logs")
exp_name = "Cora-GraphSAGE-transductive"
exp_dir = os.path.join(tb_logging_dir, exp_name)
dataset_dir = os.path.join(cwd, "dataset", "Cora")
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)
    
CoraDataset = Planetoid(
    root="dataset/Cora", name="Cora", split="full"
)


#### Note that if numebr of epoch is large, the kernel will stuck after training! Need to load the saved model mannually!

In [12]:
from utils.model.RS_GraphSAGE import GraphSage

dataset = CoraDataset

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=5,
    verbose=False,
    mode='min'
)
hparams = {"aggregator_type": 'max',
           "HIDDEN_DIM": [112, 16],  # size of the embedding
           "BATCH_SIZE": 64,
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           }  # The number of neighbors in each order of sampling

tb_logger = pl_loggers.TensorBoardLogger(tb_logging_dir, name=exp_name)
 
trainer = pl.Trainer(max_epochs=100,
                     callbacks=[early_stop_callback],
                     logger=tb_logger,
                     log_every_n_steps=1,
                     # accelerator="cpu"
                    # num_sanity_val_steps = 0
                     )

version_dir = os.path.join(
    exp_dir, "version_"+str(trainer.logger.version))
writer_acc = SummaryWriter(log_dir=version_dir)
writer_loss = SummaryWriter(log_dir=version_dir)

checkpoint_dir = os.path.join(version_dir, "checkpoints")
print("Saving checkpoints to", checkpoint_dir)

SAGEmodel = GraphSage(dataset=dataset, input_dim=dataset.num_features, hparams=hparams, 
                      writer_acc=writer_acc, writer_loss=writer_loss).to(device)
SAGEmodel.data_processing_transductive()

trainer.fit(SAGEmodel)
trainer.test(SAGEmodel)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Saving checkpoints to D:\GitHub\aml-project\GNN\zyq\lightning_logs\Cora-GraphSAGE-transductive\version_14\checkpoints


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params
--------------------------------------------
0 | aggr  | MaxAggregation    | 0     
1 | model | Sequential_24e3a6 | 324 K 
--------------------------------------------
324 K     Trainable params
0         Non-trainable params
324 K     Total params
1.299     Total estimated model params size (MB)


Data Processing Done on: cuda:0


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.5210734009742737, 'test_acc': 0.9663226088156428}]

#### Upload trained model

In [12]:
from utils.model.RS_GraphSAGE import GraphSage

hparams = {"aggregator_type": 'max',
           "HIDDEN_DIM": [112, 16],  # size of the embedding
           "BATCH_SIZE": 64,
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           }  # The number of neighbors in each order of sampling

dataset = CoraDataset
checkpoint_dir = "lightning_logs\\RS_Cora_GraphSAGE_inductive\\best\\checkpoints"
checkpoint_file = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[0])
Loaded_model = GraphSage.load_from_checkpoint(
    checkpoint_file, dataset=dataset, input_dim=dataset.num_features, hparams=hparams
)
Loaded_model.data_processing_transductive()

Data Processing Done on: cpu


#### To see the mini-batch's number of negative edges:

In [7]:
tmpLoader = Loaded_model.train_dataloader()
sample = next(iter(tmpLoader))
sample

Data(x=[2455, 1433], edge_index=[2, 6280], edge_label=[6280], train_mask=[2455], n_id=[2455], e_id=[6280], input_id=[64], batch_size=64)

#### To see the number of training positive edges:

In [17]:
Loaded_model.pos_data["train"]

Data(x=[2708, 1433], edge_index=[2, 4708], edge_label=[4708], train_mask=[2708])

#### Recommendation by transductive for a given node

In [13]:
import torch.nn.functional as F
def recommendation_by_transductive_model(A, model):
    ## Return recommedation for A in the given graph
    neg_graph = model.neg_data["all"]
    candidate = torch.zeros(neg_graph.num_edges)

    candidate = neg_graph.edge_index[1, (neg_graph.edge_index[0] == A)].cpu()
    candidate.sort()
    num_candidates = len(candidate) 
    u = torch.ones(num_candidates, dtype=torch.int) * A
    v = torch.Tensor(candidate)
    candidate_link = torch.stack((u, v))

    embedding = model(model.pos_data["all"].x, model.pos_data["all"].edge_index)

    scores = np.zeros(candidate.max()+1) - 1e8
    
    scores[v] = F.sigmoid((embedding[u] * embedding[v]).sum(dim=1)).detach().numpy()

    rank_idx = np.argsort(scores)[::-1]
    print("Recommendation for node", A)
    for k in range(10):
        print(rank_idx[k], scores[rank_idx[k]])
        
recommendation_by_transductive_model(1676, Loaded_model)

Recommendation for node 1676
1013 0.9597976207733154
181 0.9295782446861267
2395 0.9014472961425781
963 0.8971356153488159
1849 0.8931287527084351
1399 0.8930008411407471
1464 0.8886834383010864
1645 0.8838027715682983
2228 0.8822579979896545
1131 0.881641685962677
