In [13]:
import os
import torch
import numpy as np
import pytorch_lightning as pl
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from lightning.pytorch import loggers as pl_loggers
import torch_geometric.transforms as T
import torch_geometric.data as geom_data
from torch.utils.tensorboard import SummaryWriter

%reload_ext autoreload
%autoreload 2

#### Configure device

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device != "cpu":
    torch.set_float32_matmul_precision('high')
    

num_workers = os.cpu_count()
device, num_workers

(device(type='cuda', index=0), 12)

In [14]:
%reload_ext tensorboard
%tensorboard --logdir '..\\lightning_logs' --port 6003 --host=10.23.115.20

### Directory configuration and load data

In [3]:
cwd = os.getcwd()
tb_logging_dir = os.path.join(cwd, "lightning_logs")
exp_name = "AG-GCN"
exp_dir = os.path.join(tb_logging_dir, exp_name)
dataset_dir = os.path.join(cwd, "utils/dataset", "CUHKSZ_AcademicGraph")
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)
    
from utils.dataset.CUHKSZ_AcademicGraph import CUHKSZ_AcademicGraph
AGDataset = CUHKSZ_AcademicGraph(dataset_dir, with_title=True, with_label=True)

dataset = AGDataset

D:\GitHub\aml-project\GNN\zyq\utils\dataset\CUHKSZ_AcademicGraph\raw\CUHKSZ_AcademicGraph_Rawdata.zip
D:\GitHub\aml-project\GNN\zyq\utils\dataset\CUHKSZ_AcademicGraph\raw\CUHKSZ_AcademicGraph-rawdata_released


#### Note that if numebr of epoch is large, the kernel will stuck after training! Need to load the saved model mannually!

In [15]:
from utils.model.RS_GCN import GCN

dataset = AGDataset

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=10,
    verbose=False,
    mode='min'
)
hparams = {"DROUPOUT_RATE": 0,  
           "BATCH_SIZE": 64, 
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           }  # The number of neighbors in each order of sampling

tb_logger = pl_loggers.TensorBoardLogger(tb_logging_dir, name=exp_name)
 
trainer = pl.Trainer(max_epochs=100,
                     callbacks=[early_stop_callback],
                     logger=tb_logger,
                     log_every_n_steps=1,
                     # accelerator="cpu"
                    # num_sanity_val_steps = 0
                     )

version_dir = os.path.join(
    exp_dir, "version_"+str(trainer.logger.version))
writer_acc = SummaryWriter(log_dir=version_dir)
writer_loss = SummaryWriter(log_dir=version_dir)

checkpoint_dir = os.path.join(version_dir, "checkpoints")
print("Saving checkpoints to", checkpoint_dir)

GCNmodel = GCN(dataset=dataset, input_dim=dataset.num_features, hparams=hparams,
                      writer_acc=writer_acc, writer_loss=writer_loss).to(device)
GCNmodel.data_processing()

trainer.fit(GCNmodel)
trainer.test(GCNmodel)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Saving checkpoints to D:\GitHub\aml-project\GNN\zyq\lightning_logs\AG-GCN\version_7\checkpoints


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type    | Params
----------------------------------
0 | conv1 | GCNConv | 86.1 K
1 | conv2 | GCNConv | 1.8 K 
----------------------------------
87.9 K    Trainable params
0         Non-trainable params
87.9 K    Total params
0.352     Total estimated model params size (MB)


Data Processing Done on: cuda:0


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.5566018223762512, 'test_acc': 0.9754328338637099}]

#### Upload trained model

In [4]:
from utils.model.RS_GCN import GCN
hparams = {"DROUPOUT_RATE": 0,  
           "BATCH_SIZE": 64, 
           "LEARNING_RATE": 0.001,
           "NUM_NEIGHBORS": [10, 10]
           } 
checkpoint_dir = "lightning_logs\\RS_AG_GCN\\best\\checkpoints"
checkpoint_file = os.path.join(checkpoint_dir, os.listdir(checkpoint_dir)[0])
Loaded_model = GCN.load_from_checkpoint(checkpoint_file, dataset=dataset, input_dim=dataset.num_features, hparams=hparams, log_dir="lightning_logs")
Loaded_model.data_processing()

Data Processing Done on: cpu


In [7]:
data = dataset[0]
data.train_mask[1370]

tensor(False)

#### To see the mini-batch's number of negative edges:

In [8]:
tmpLoader = Loaded_model.train_dataloader()
sample = next(iter(tmpLoader))
sample

Data(x=[4207, 768], edge_index=[2, 6640], edge_label=[6640], train_mask=[4207], n_id=[4207], e_id=[6640], input_id=[64], batch_size=64)

In [12]:
# data = dataset[0]
sample.train_mask[1676]
# sample.val_mask[1370]

tensor(True)

#### To see the number of training positive edges:

In [8]:
Loaded_model.pos_data["train"]

Data(x=[6614, 768], edge_index=[2, 7397], edge_label=[7397], train_mask=[6614])

#### Recommendation for a given node

In [5]:
import torch.nn.functional as F
def recommendation_by_transductive_model(A, model):
    ## Return recommedation for A in the given graph
    neg_graph = model.neg_data["all"]
    candidate = torch.zeros(neg_graph.num_edges)

    candidate = neg_graph.edge_index[1, (neg_graph.edge_index[0] == A)].cpu()
    candidate.sort()
    num_candidates = len(candidate) 
    u = torch.ones(num_candidates, dtype=torch.int) * A
    v = torch.Tensor(candidate)
    candidate_link = torch.stack((u, v))

    embedding = model.forward(model.pos_data["train"].x, model.pos_data["train"].edge_index)

    scores = np.zeros(candidate.max()+1) - 1e8
    
    scores[v] = F.sigmoid((embedding[u] * embedding[v]).sum(dim=1)).detach().numpy()

    rank_idx = np.argsort(scores)[::-1]
    
    pos_graph = model.pos_data["all"]
    print("Recommendation for node", A, "with title", pos_graph.title[A])
    for k in range(10):
        print(rank_idx[k], scores[rank_idx[k]], pos_graph.title[rank_idx[k]])
        
recommendation_by_transductive_model(1370, Loaded_model)

Recommendation for node 1370 with title Non-linear matrix completion
119 0.9999980926513672 R1-PCA: rotational invariant L1-norm principal component analysis for robust subspace factorization
2914 0.9999758005142212 Two-dimensional PCA: a new approach to appearance-based face representation and recognition
824 0.999971866607666 Convex and Semi-Nonnegative Matrix Factorizations
761 0.9999206066131592 Efficient and Robust Feature Selection via Joint ℓ2, 1-Norms Minimization
922 0.9998273849487305 Factor Group-Sparse Regularization for Efficient Low-Rank Matrix Recovery
6157 0.9998210072517395 Orthogonal nonnegative matrix t-factorizations for clustering
2167 0.999815046787262 On the Equivalence of Nonnegative Matrix Factorization and Spectral Clustering
3879 0.9996364116668701 Robust nonnegative matrix factorization using L21-norm
417 0.9995802044868469 Matrix Completion via Sparse Factorization Solved by Accelerated Proximal Alternating Linearized Minimization
1886 0.999402642250061 RSP

In [6]:
recommendation_by_transductive_model(1676, Loaded_model)

Recommendation for node 1676 with title A Semismooth Newton Stochastic Proximal Point Algorithm with Variance Reduction
6603 0.9996970891952515 Error Bound and Convergence Analysis of Matrix Splitting Algorithms for the Affine Variational Inequality Problem
6356 0.9996414184570312 On the convergence of the coordinate descent method for convex differentiable minimization
5541 0.9995827078819275 A Proximal Alternating Direction Method of Multiplier for Linearly Constrained Nonconvex Minimization
267 0.9994171857833862 On the linear convergence of descent methods for convex essentially smooth minimization
6519 0.9989262223243713 Quasi-maximum-likelihood multiuser detection using semi-definite relaxation with application to synchronous CDMA
5699 0.9986081719398499 Dynamic Spectrum Management: Complexity and Duality
6330 0.9983186721801758 Approximation Bounds for Quadratic Optimization with Homogeneous Quadratic Constraints
1389 0.9982872605323792 Error bounds and convergence analysis of f

#### Tensorboard

In [None]:
# %reload_ext tensorboard
# %tensorboard --logdir "./lightning_logs" --port 6005 --host=10.23.115.20