In [2]:
import dgl
import numpy as np
import scipy.sparse as sp
import torch as th

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data_folder = "data/"
path = data_folder + "freebase/"

# "freebase": {
#     "type_num": [3492, 2502, 33401, 4459],
#     "nei_num": 3,
#     "n_labels": 3,
# },

type_num = [3492, 2502, 33401, 4459]
ratio = [20, 40, 60]

In [4]:
label = np.load(path + "labels.npy")
nei_d = np.load(path + "nei_d.npy", allow_pickle=True)
nei_a = np.load(path + "nei_a.npy", allow_pickle=True)
nei_w = np.load(path + "nei_w.npy", allow_pickle=True)

feat_m = sp.eye(type_num[0])
feat_d = sp.eye(type_num[1])
feat_a = sp.eye(type_num[2])
feat_w = sp.eye(type_num[3])

mam = sp.load_npz(path + "mam.npz")
mdm = sp.load_npz(path + "mdm.npz")
mwm = sp.load_npz(path + "mwm.npz")
pos = sp.load_npz(path + "pos.npz")

train = [np.load(path + "train_" + str(i) + ".npy") for i in ratio]
test = [np.load(path + "test_" + str(i) + ".npy") for i in ratio]
val = [np.load(path + "val_" + str(i) + ".npy") for i in ratio]

label = th.LongTensor(label)
nei_d = [th.LongTensor(i) for i in nei_d]
nei_a = [th.LongTensor(i) for i in nei_a]
nei_w = [th.LongTensor(i) for i in nei_w]

feat_m = th.FloatTensor(feat_m.todense())
feat_d = th.FloatTensor(feat_d.todense())
feat_a = th.FloatTensor(feat_a.todense())
feat_w = th.FloatTensor(feat_w.todense())

mam = th.LongTensor(mam.todense()).to_sparse()
mdm = th.LongTensor(mdm.todense()).to_sparse()
mwm = th.LongTensor(mwm.todense()).to_sparse()

train = [th.LongTensor(i) for i in train]
val = [th.LongTensor(i) for i in val]
test = [th.LongTensor(i) for i in test]

In [5]:
# adj
neis = [nei_d, nei_a, nei_w]
links = []
for src, nei in enumerate(neis):
    dst_array_concat = th.concatenate(nei)  # .unsqueeze(0)
    src_array_concat = []
    for src_id, dst_array in enumerate(nei):
        src_array_concat.extend([src_id] * len(dst_array))
    src_array_concat = th.tensor(src_array_concat)  # .unsqueeze(0)
    index = th.vstack([src_array_concat, dst_array_concat])
    links.append(index)
    index = th.vstack([dst_array_concat, src_array_concat])
    links.append(index)

In [7]:
data_dict = {
    ("M", "M-D", "D"): (links[0][0], links[0][1]),
    ("D", "D-M", "M"): (links[1][0], links[1][1]),
    ("M", "M-A", "A"): (links[2][0], links[2][1]),
    ("A", "A-M", "M"): (links[3][0], links[3][1]),
    ("M", "M-W", "W"): (links[4][0], links[4][1]),
    ("W", "W-M", "M"): (links[5][0], links[5][1]),
}

meta_paths_dict = {
    "MAM": [("M", "M-A", "A"), ("A", "A-M", "M")],
    "MDM": [("M", "M-D", "D"), ("D", "D-M", "M")],
    "MWM": [('M', 'M-W', 'W'), ('W', 'W-M', 'M')]
}

hg = dgl.heterograph(data_dict)


In [8]:
hg.nodes["M"].data["h"] = feat_m
hg.nodes["M"].data["label"] = label

hg.nodes['A'].data['h'] = feat_a
hg.nodes['W'].data['h'] = feat_w
hg.nodes['D'].data['h'] = feat_d

ratio = [20, 40, 60]
for i, r in enumerate(ratio):
    mask = th.zeros(3492).bool()
    mask[train[i]] = True
    hg.nodes['M'].data['train_%d' % r] = mask

    mask = th.zeros(3492).bool()
    mask[val[i]] = True
    hg.nodes['M'].data['val_%d' % r] = mask

    mask = th.zeros(3492).bool()
    mask[test[i]] = True
    hg.nodes['M'].data['test_%d' % r] = mask

In [9]:
dgl.save_graphs('data/freebase4HGMAE.bin', hg)


In [10]:
a1 = dgl.metapath_reachable_graph(hg, meta_paths_dict['MAM']).adjacency_matrix().to_dense()
a2 = mam.to_dense()
th.equal(a1, a2)


True

In [11]:
a1 = dgl.metapath_reachable_graph(hg, meta_paths_dict['MWM']).adjacency_matrix().to_dense()
a2 = mwm.to_dense()
th.equal(a1, a2)


True

In [12]:
a1 = dgl.metapath_reachable_graph(hg, meta_paths_dict['MAM']).adjacency_matrix().to_dense()
a2 = mam.to_dense()
th.equal(a1, a2)


True