In [1]:
import os
import torch
import numpy as np
import pytorch_lightning as pl
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from lightning.pytorch import loggers as pl_loggers
import torch_geometric.transforms as T
import torch_geometric.data as geom_data
from torch.utils.tensorboard import SummaryWriter

%reload_ext autoreload
%autoreload 2

#### Configure device

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device != "cpu":
    torch.set_float32_matmul_precision('high')
    

num_workers = os.cpu_count()
device, num_workers

(device(type='cuda', index=0), 12)

In [3]:
%reload_ext tensorboard
%tensorboard --logdir 'lightning_logs' 

### Directory configuration and load data

In [4]:
cwd = os.getcwd()
tb_logging_dir = os.path.join(cwd, "lightning_logs")
exp_name = "RS-AG-GCN"
exp_dir = os.path.join(tb_logging_dir, exp_name)
dataset_dir = os.path.join(cwd, "dataset", "CUHKSZ_AcademicGraph")
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)
    
from utils.dataset.CUHKSZ_AcademicGraph import CUHKSZ_AcademicGraph
AGDataset = CUHKSZ_AcademicGraph(dataset_dir, with_title=True, with_label=True)

dataset = AGDataset

D:\GitHub\GNN-Cora-CUHKSZAG\dataset\CUHKSZ_AcademicGraph\raw\CUHKSZ_AcademicGraph_Rawdata.zip
D:\GitHub\GNN-Cora-CUHKSZAG\dataset\CUHKSZ_AcademicGraph\raw\CUHKSZ_AcademicGraph-rawdata_released


#### Note that if numebr of epoch is large, the kernel will stuck after training! Need to load the saved model mannually!

In [5]:
from utils.model.RS_GCN import GCN

dataset = AGDataset

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=10,
    verbose=False,
    mode='min'
)
hparams = {"DROUPOUT_RATE": 0,  
           "BATCH_SIZE": 64, 
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           }  # The number of neighbors in each order of sampling

tb_logger = pl_loggers.TensorBoardLogger(tb_logging_dir, name=exp_name)
 
trainer = pl.Trainer(max_epochs=100,
                     callbacks=[early_stop_callback],
                     logger=tb_logger,
                     log_every_n_steps=1,
                     # accelerator="cpu"
                    # num_sanity_val_steps = 0
                     )

version_dir = os.path.join(
    exp_dir, "version_"+str(trainer.logger.version))
writer_acc = SummaryWriter(log_dir=version_dir)
writer_loss = SummaryWriter(log_dir=version_dir)

checkpoint_dir = os.path.join(version_dir, "checkpoints")
print("Saving checkpoints to", checkpoint_dir)

GCNmodel = GCN(dataset=dataset, input_dim=dataset.num_features, hparams=hparams,
                      writer_acc=writer_acc, writer_loss=writer_loss).to(device)
GCNmodel.data_processing()

trainer.fit(GCNmodel)
trainer.test(GCNmodel)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: D:\GitHub\GNN-Cora-CUHKSZAG\lightning_logs\AG-GCN


Saving checkpoints to D:\GitHub\GNN-Cora-CUHKSZAG\lightning_logs\AG-GCN\version_0\checkpoints


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type    | Params
----------------------------------
0 | conv1 | GCNConv | 86.1 K
1 | conv2 | GCNConv | 1.8 K 
----------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Data Processing Done on: cuda:0


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 1.3938701152801514, 'test_acc': 0.9703965271164857}]

#### Upload trained model

In [5]:
from utils.model.RS_GCN import GCN
hparams = {"DROUPOUT_RATE": 0,  
           "BATCH_SIZE": 64, 
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           } 
checkpoint_dir = "lightning_logs\\RS_AG_GCN\\best\\checkpoints"
checkpoint_file = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[0])
Loaded_model = GCN.load_from_checkpoint(checkpoint_file, dataset=dataset, input_dim=dataset.num_features, hparams=hparams, log_dir="lightning_logs")
Loaded_model.data_processing()

Data Processing Done on: cpu


In [6]:
# Loaded_model = GCNmodel

#### To see the mini-batch's number of negative edges:

In [7]:
tmpLoader = Loaded_model.train_dataloader()
sample = next(iter(tmpLoader))
sample

Data(x=[4226, 768], edge_index=[2, 6690], edge_label=[6690], train_mask=[4226], n_id=[4226], e_id=[6690], input_id=[64], batch_size=64)

#### To see the number of training positive edges:

In [8]:
Loaded_model.pos_data["train"]

Data(x=[6614, 768], edge_index=[2, 7397], edge_label=[7397], train_mask=[6614])

#### Recommendation for a given node

In [10]:
import torch.nn.functional as F
def recommendation_by_transductive_model(A, model):
    ## Return recommedation for A in the given graph
    neg_graph = model.neg_data["all"]
    candidate = torch.zeros(neg_graph.num_edges)

    candidate = neg_graph.edge_index[1, (neg_graph.edge_index[0] == A)].cpu()
    candidate.sort()
    num_candidates = len(candidate) 
    u = torch.ones(num_candidates, dtype=torch.int) * A
    v = torch.Tensor(candidate)
    candidate_link = torch.stack((u, v))

    embedding = model.forward(model.pos_data["train"].x.cpu(), model.pos_data["train"].edge_index.cpu())

    scores = np.zeros(candidate.max()+1) - 1e8
    
    scores[v] = F.sigmoid((embedding[u] * embedding[v]).sum(dim=1)).detach().numpy()

    rank_idx = np.argsort(scores)[::-1]
    
    pos_graph = model.pos_data["all"]
    print("Recommendation for node", A, "with title", pos_graph.title[A])
    for k in range(10):
        print(rank_idx[k], scores[rank_idx[k]], pos_graph.title[rank_idx[k]])
        
recommendation_by_transductive_model(1370, Loaded_model)

Recommendation for node 1370 with title Non-linear matrix completion
2167 0.999921441078186 On the Equivalence of Nonnegative Matrix Factorization and Spectral Clustering
761 0.9999121427536011 Efficient and Robust Feature Selection via Joint ℓ2, 1-Norms Minimization
2994 0.9998934268951416 A min-max cut algorithm for graph partitioning and data clustering
2740 0.9998052716255188 Large-Scale Subspace Clustering via k-Factorization
1145 0.999622106552124 Functional principal components analysis via penalized rank one approximation
6157 0.9995869994163513 Orthogonal nonnegative matrix t-factorizations for clustering
3142 0.9995637536048889 Modular Community Detection in Networks
119 0.9995593428611755 R1-PCA: rotational invariant L1-norm principal component analysis for robust subspace factorization
3587 0.999541163444519 A simple statistical model for depicting the cdc-15 synchronized yeast cell cycle-regulated gene expression data
1067 0.9994924068450928 Revealing network communities w

In [11]:
recommendation_by_transductive_model(1676, Loaded_model)

Recommendation for node 1676 with title A Semismooth Newton Stochastic Proximal Point Algorithm with Variance Reduction
6356 0.9995259046554565 On the convergence of the coordinate descent method for convex differentiable minimization
6603 0.9988107681274414 Error Bound and Convergence Analysis of Matrix Splitting Algorithms for the Affine Variational Inequality Problem
267 0.9986629486083984 On the linear convergence of descent methods for convex essentially smooth minimization
3640 0.9976385831832886 On the linear convergence of the alternating direction method of multipliers
6029 0.9964507818222046 Minimization of agreeably weighted variance in single machine systems
1389 0.9964467883110046 Error bounds and convergence analysis of feasible descent methods: a general approach
6219 0.9963799118995667 A Unified Convergence Analysis of Block Successive Minimization Methods for Nonsmooth Optimization
125 0.995690643787384 Error bounds for analytic systems and their applications
814 0.994