In [1]:
import argparse
import logging
import dgl
import yaml

parser = argparse.ArgumentParser(description="GAT")
parser.add_argument(
    "--task",
    type=str,
    default="classification",
    choices=["classification", "clustering"],
)
parser.add_argument("--dataset", type=str, default="acm")
args, _ = parser.parse_known_args()


def load_best_configs(args, path):
    with open(path, "r") as f:
        configs = yaml.load(f, yaml.FullLoader)

    if args.dataset not in configs:
        logging.info("Best args not found")
        return args

    configs = configs[args.dataset]

    for k, v in configs.items():
        if "lr" in k or "weight_decay" in k:
            v = float(v)
        setattr(args, k, v)
    print("------ Use best configs ------")
    return args

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
parser = argparse.ArgumentParser(description="GAT")
parser.add_argument(
    "--task",
    type=str,
    default="classification",
    choices=["classification", "clustering"],
)
parser.add_argument("--dataset", type=str, default="acm")
args, _ = parser.parse_known_args()

args = load_best_configs(args, "../myHGMAE/configs.yml")
args

------ Use best configs ------


Namespace(task='classification', dataset='acm', in_dim=1902, hidden_dim=1024, category='paper', feat_drop=0.2, attn_drop=0.5, negative_slope=0.2, n_labels=3, nei_num=2, norm='batchnorm', num_heads=4, num_layers=2, num_out_heads=1, residual=False, mp_edge_recon_loss_weight=1, mp_edge_mask_rate=0.6, mp_edge_gamma=3, node_mask_rate='0.5,0.005,0.8', attr_restore_loss_weight=1, attr_restore_gamma=1, attr_replace_rate=0.2, attr_unchanged_rate=0.3, mp2vec_negative_size=5, mp2vec_window_size=5, mp2vec_batch_size=256, mp2vec_rw_length=5, mp2vec_walks_per_node=3, mp2vec_train_epoch=2, mp2vec_train_lr=0.01, mp2vec_feat_dim=128, mp2vec_feat_pred_loss_weight=0.1, mp2vec_feat_gamma=1, mp2vec_feat_drop=0.2, optimizer='adam', patience=10, gpu=0, mae_epochs=1000, eva_lr=0.01, eva_wd=0.0005, scheduler=True, scheduler_gamma=0.999, l2_coef=0, lr=0.0008)

In [3]:
from openhgnn.dataset.NodeClassificationDataset import OHGB_NodeClassification

In [4]:
acm = OHGB_NodeClassification("ohgbn-acm", raw_dir="./dataset", logger=None)

Extracting file to ./openhgnn/dataset\ohgbn-acm
Done saving data into cached files.


In [5]:
hg = acm.g
metapaths_dict = acm.meta_paths_dict

In [6]:
from torch.utils.data import DataLoader

In [14]:
import importlib

import dgl
import hgmae
import torch

importlib.reload(hgmae)
from hgmae import HGMAE

h_dict = hg.ndata["h"]
mp2vec_feat_dict = hg.ndata["pap_m2v_emb"].copy()
mp2vec_feat_dict
for k, v in hg.ndata["psp_m2v_emb"].items():
    mp2vec_feat_dict[k] = torch.concat([mp2vec_feat_dict[k], v], dim=1)

In [15]:
model = HGMAE.build_model_from_args(args, hg, metapaths_dict).to("cuda")

In [16]:
loss=model.forward(hg.to("cuda"), h_dict, epoch=3)

Training MetaPath2Vec feat by given metapaths_dict 
Metapath for training mp2vec models: ['paper', 'author', 'paper', 'subject', 'paper']


100%|██████████| 3025/3025 [00:02<00:00, 1511.60it/s]
100%|██████████| 2/2 [00:01<00:00,  1.57it/s]


<class 'torch.Tensor'>
tensor(indices=tensor([[   0,    0,    0,  ..., 3023, 3023, 3024],
                       [   0,    8,   20,  ..., 2998, 3023, 3024]]),
       values=tensor([0.0556, 0.0527, 0.0556,  ..., 0.5000, 0.5000, 1.0000]),
       device='cuda:0', size=(3025, 3025), nnz=29436, layout=torch.sparse_coo)
tensor([[0.0556, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.1111, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.1250,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.2500, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.5000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0000]],
       device='cuda:0')
有tmd两个mps


In [205]:
best_model_state_dict=model.state_dict()

In [206]:
model.load_state_dict(best_model_state_dict)

<All keys matched successfully>

In [221]:
model.eval()
emb=model.get_embeds(hg.to('cuda'),h_dict)
emb

tensor([[ 0.0705,  0.2278,  0.1491,  ...,  0.0207,  0.2788, -0.0295],
        [-0.0525,  0.1709, -0.0020,  ..., -0.0326,  0.1014,  0.0926],
        [ 0.0284,  0.1303, -0.0377,  ...,  0.0231,  0.0842,  0.0510],
        ...,
        [-0.0186,  0.1019, -0.0405,  ..., -0.0203,  0.1192,  0.0159],
        [ 0.0026,  0.1289, -0.0267,  ..., -0.0058,  0.1372,  0.0607],
        [-0.0202,  0.2216, -0.0134,  ...,  0.0193,  0.2289,  0.0461]],
       device='cuda:0')

In [229]:
model.get_mp2vec_feat

tensor([[-0.0104,  0.0649, -0.0720,  ...,  0.0616, -0.0050, -0.0069],
        [ 0.0160,  0.0797, -0.0949,  ...,  0.1019, -0.0014,  0.0130],
        [ 0.0152,  0.0669, -0.0475,  ...,  0.0758,  0.0071,  0.0163],
        ...,
        [ 0.0091,  0.0804, -0.0853,  ...,  0.0720, -0.0224, -0.0068],
        [ 0.0127,  0.0761, -0.0731,  ...,  0.0650,  0.0160,  0.0139],
        [-0.0556,  0.1392, -0.1587,  ...,  0.1254,  0.1180, -0.0390]],
       device='cuda:0')

In [238]:
setattr(args,'meta_paths_dict',metapaths_dict)

In [414]:
x=torch.rand(10030,2000)
print(x)

tensor([[0.1853, 0.4965, 0.6377,  ..., 0.9592, 0.0431, 0.7280],
        [0.3973, 0.7349, 0.5938,  ..., 0.7701, 0.1761, 0.0413],
        [0.3568, 0.1576, 0.2826,  ..., 0.9391, 0.6474, 0.7089],
        ...,
        [0.1578, 0.9387, 0.2520,  ..., 0.0995, 0.5286, 0.3640],
        [0.5275, 0.6333, 0.1917,  ..., 0.9978, 0.2696, 0.1147],
        [0.5265, 0.3134, 0.8362,  ..., 0.8202, 0.5362, 0.8809]])


In [415]:
def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return torch.FloatTensor(features)

def myPrepreposs(feat):
    rowsum = torch.sum(feat, dim=1).reshape(-1, 1)
    r_inv = torch.pow(rowsum, -1)
    r_inv=torch.where(torch.isinf(r_inv), 0, r_inv)
    feat=feat*r_inv
    return feat
    
    

In [416]:
p2 = myPrepreposs(x)
p1=preprocess_features(x)

In [17]:

from scipy.sparse import spmatrix
import torch
from scipy import sparse as sp
import numpy as np

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()

def myNormalize(adj):
    rowsum=torch.sum(adj, dim=1).reshape(-1, 1)
    d_inv_sqrt=torch.pow(rowsum, -0.5)
    d_inv_sqrt=torch.where(torch.isinf(d_inv_sqrt), 0, d_inv_sqrt)
    return d_inv_sqrt.T*adj*d_inv_sqrt  

In [18]:
mps=[dgl.metapath_reachable_graph(hg, mp).adjacency_matrix() for mp in acm.meta_paths_dict.values()]

In [44]:

for mp in acm.meta_paths_dict.values(): 
    adj=dgl.metapath_reachable_graph(hg, mp).adjacency_matrix()
    adj=myNormalize(adj.to_dense()).to_sparse()

In [49]:
adj.todense()

AttributeError: 'Tensor' object has no attribute 'todense'

In [46]:
import dgl.sparse.sparse_matrix as sp
dgl.sparse.sparse_matrix.SparseMatrix

dgl.sparse.sparse_matrix.SparseMatrix

In [47]:
sp.from_torch_sparse(adj.to_dense().to_sparse())

SparseMatrix(indices=tensor([[   0,    0,    0,  ..., 3024, 3024, 3024],
                             [   0,   75,  586,  ..., 3021, 3023, 3024]]),
             values=tensor([0.2000, 0.2000, 0.2000,  ..., 0.0008, 0.0008, 0.0008]),
             shape=(3025, 3025), nnz=2200581)

tensor(indices=tensor([[   0,    0,    0,  ..., 3024, 3024, 3024],
                       [   0,   75,  586,  ..., 3021, 3023, 3024]]),
       values=tensor([1., 1., 1.,  ..., 1., 1., 1.]),
       size=(3025, 3025), nnz=2200581, layout=torch.sparse_coo)

In [473]:
a=torch.randint(0,2,(5000,5000)).float()
asp=sp.csr_matrix(a)


In [474]:
asp_norm=normalize_adj(asp)

In [475]:
norm1=torch.tensor(asp_norm.todense())

In [476]:
norm2=myNormalize(a)

In [477]:
# torch.equal(norm1,norm2.T)
torch.allclose(norm1,norm2.T)

True

In [478]:
norm1

tensor([[0.0000, 0.0004, 0.0000,  ..., 0.0000, 0.0004, 0.0000],
        [0.0000, 0.0004, 0.0000,  ..., 0.0004, 0.0004, 0.0004],
        [0.0000, 0.0000, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0004, 0.0004,  ..., 0.0004, 0.0000, 0.0000],
        [0.0004, 0.0004, 0.0004,  ..., 0.0004, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0004,  ..., 0.0004, 0.0004, 0.0000]])

In [479]:
norm2.T


tensor([[0.0000, 0.0004, 0.0000,  ..., 0.0000, 0.0004, 0.0000],
        [0.0000, 0.0004, 0.0000,  ..., 0.0004, 0.0004, 0.0004],
        [0.0000, 0.0000, 0.0004,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0004, 0.0004,  ..., 0.0004, 0.0000, 0.0000],
        [0.0004, 0.0004, 0.0004,  ..., 0.0004, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0004,  ..., 0.0004, 0.0004, 0.0000]])