In [5]:
import argparse
import logging

import yaml

parser = argparse.ArgumentParser(description="GAT")
parser.add_argument(
    "--task",
    type=str,
    default="classification",
    choices=["classification", "clustering"],
)
parser.add_argument("--dataset", type=str, default="acm")
args, _ = parser.parse_known_args()


def load_best_configs(args, path):
    with open(path, "r") as f:
        configs = yaml.load(f, yaml.FullLoader)

    if args.dataset not in configs:
        logging.info("Best args not found")
        return args

    configs = configs[args.dataset]

    for k, v in configs.items():
        if "lr" in k or "weight_decay" in k:
            v = float(v)
        setattr(args, k, v)
    print("------ Use best configs ------")
    return args

In [7]:
parser = argparse.ArgumentParser(description="GAT")
parser.add_argument(
    "--task",
    type=str,
    default="classification",
    choices=["classification", "clustering"],
)
parser.add_argument("--dataset", type=str, default="acm")
args, _ = parser.parse_known_args()

args = load_best_configs(args, "../myHGMAE/configs.yml")
args

------ Use best configs ------


Namespace(task='classification', dataset='acm', in_dim=1902, hidden_dim=256, category='paper', activation='prelu', feat_drop=0.2, attn_drop=0.5, negative_slope=0.2, n_labels=3, nei_num=2, norm='batchnorm', num_heads=4, num_layers=2, num_out_heads=1, optimizer='adam', patience=10, residual=False, mp_edge_recon_loss_weight=1, mp_edge_mask_rate=0.7, mp_edge_gamma=3, node_mask_rate='0.5,0.005,0.8', attr_restore_loss_weight=1, attr_restore_gamma=1, attr_replace_rate=0.2, attr_unchanged_rate=0.3, mp2vec_negative_size=5, mp2vec_window_size=3, mp2vec_batch_size=128, mp2vec_train_epoch=20, mp2vec_train_lr=0.001, mp2vec_feat_dim=128, mp2vec_feat_pred_loss_weight=0.1, mp2vec_feat_gamma=2, mp2vec_feat_drop=0.2, gpu=0, eva_lr=0.01, eva_wd=0.0005, scheduler=True, scheduler_gamma=0.999, l2_coef=0, lr=0.0008, mps_walk_length=10, mps_walks_per_node=3, mae_epochs=3)

In [10]:
from openhgnn.dataset.NodeClassificationDataset import OHGB_NodeClassification

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
acm = OHGB_NodeClassification("ohgbn-acm", raw_dir="./dataset", logger=None)

Extracting file to ./openhgnn/dataset\ohgbn-acm
Done saving data into cached files.


In [12]:
hg = acm.g
metapaths_dict = acm.meta_paths_dict

In [23]:
import importlib

import dgl
import hgmae
import torch

importlib.reload(hgmae)
from hgmae import HGMAE

h_dict = hg.ndata["h"]
mp2vec_feat_dict = hg.ndata["pap_m2v_emb"].copy()
mp2vec_feat_dict
for k, v in hg.ndata["psp_m2v_emb"].items():
    mp2vec_feat_dict[k] = torch.concat([mp2vec_feat_dict[k], v], dim=1)

In [25]:
model = HGMAE.build_model_from_args(args, hg, metapaths_dict).to("cuda")

In [64]:
loss=model.forward(hg.to("cuda"), h_dict, epoch=3)

tensor(0.1535, device='cuda:0')
tensor(1.0012, device='cuda:0')
tensor(1.1896, device='cuda:0')


In [25]:
import torch.nn as nn

In [104]:
from openhgnn.models import HAN
from openhgnn.utils import extract_metapaths

In [107]:
# HAN.build_model_from_args()

In [121]:
ntypes = set()
ntypes.add("paper")
ntype_meta_paths_dict = {}
for ntype in ntypes:
    ntype_meta_paths_dict[ntype] = {}
    for meta_path_name, meta_path in metapaths_dict.items():
        # a meta path starts with this node type
        if meta_path[0][0] == ntype:
            ntype_meta_paths_dict[ntype][meta_path_name] = meta_path

for ntype, meta_paths_dict in ntype_meta_paths_dict.items():
    if len(meta_paths_dict) == 0:
        ntype_meta_paths_dict[ntype] = extract_metapaths(
            ntype, hg.canonical_etypes, self_loop=False
        )

In [122]:
ntype_meta_paths_dict

{'paper': {'PAP': [('paper', 'paper-author', 'author'),
   ('author', 'author-paper', 'paper')],
  'PSP': [('paper', 'paper-subject', 'subject'),
   ('subject', 'subject-paper', 'paper')]}}

In [123]:
in_dim = 1902
hidden_dim = 256
out_dim = 256

In [124]:
han = HAN(ntype_meta_paths_dict, in_dim, hidden_dim, out_dim, [4, 4], dropout=0.5)

In [125]:
out = han.forward(hg, hg.ndata["h"])

In [126]:
out["paper"]

tensor([[-0.0185, -0.0060, -0.1637,  ..., -0.1354,  0.0114, -0.1894],
        [-0.1291,  0.2156, -0.0368,  ..., -0.0423, -0.0341, -0.1384],
        [-0.1062,  0.1021,  0.0197,  ..., -0.0539, -0.0548, -0.1145],
        ...,
        [-0.0623,  0.1575,  0.0028,  ..., -0.0998, -0.0724, -0.1363],
        [-0.2564,  0.0052,  0.0705,  ..., -0.0033, -0.1203, -0.1832],
        [-0.1525,  0.0833, -0.1657,  ..., -0.1223,  0.0076, -0.2801]],
       grad_fn=<AddmmBackward0>)

In [69]:
from dgl.nn import GATConv

g = dgl.graph(([0, 1, 2, 3, 2, 5], [1, 2, 3, 4, 0, 3]))
g = dgl.add_self_loop(g)
feat = torch.ones(6, 10)
gatconv = GATConv(10, 2, num_heads=3)
res = gatconv(g, feat)

In [128]:
res.shape

torch.Size([6, 3, 2])