In [1]:
import copy
import torch
import deepsnap
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn

from sklearn.metrics import f1_score
from deepsnap.hetero_gnn import forward_op
from deepsnap.hetero_graph import HeteroGraph
from torch_sparse import SparseTensor, matmul

import pickle
import networkx as nx

In [2]:
class HeteroGNNConv(pyg_nn.MessagePassing):
    def __init__(self, in_channels_src, in_channels_dst, out_channels):
        super(HeteroGNNConv, self).__init__(aggr="mean")

        self.in_channels_src = in_channels_src
        self.in_channels_dst = in_channels_dst
        self.out_channels = out_channels

        # To simplify implementation, please initialize both self.lin_dst
        # and self.lin_src out_features to out_channels
        self.lin_dst = None
        self.lin_src = None

        self.lin_update = None

        ############# Your code here #############
        ## (~3 lines of code)
        
        # Old code from colab, didn't work:
        # self.lin_dst = nn.Linear(self.in_channels_src, self.out_channels)
        # self.lin_src = nn.Linear(self.in_channels_dst, self.out_channels)
        # self.lin_update = nn.Linear(2*self.out_channels, self.out_channels)
        
        self.lin_dst = nn.Linear(in_channels_dst, out_channels)
        self.lin_src = nn.Linear(in_channels_src, out_channels)
        self.lin_update = nn.Linear(2 * out_channels, out_channels)

        ##########################################

    def forward(
        self,
        node_feature_src,
        node_feature_dst,
        edge_index,
        size=None,
        res_n_id=None,
    ):
        ############# Your code here #############
        ## (~1 line of code)

        return self.propagate(edge_index, node_feature_src=node_feature_src, 
                    node_feature_dst=node_feature_dst, size=size, res_n_id=res_n_id)
        ##########################################

    def message_and_aggregate(self, edge_index, node_feature_src):

        ############# Your code here #############
        ## (~1 line of code)
        ## Note:
        ## 1. Different from what we implemented in Colab 3, we use message_and_aggregate
        ## to replace the message and aggregate. The benefit is that we can avoid
        ## materializing x_i and x_j, and make the implementation more efficient.
        ## 2. To implement efficiently, following PyG documentation is helpful:
        ## https://pytorch-geometric.readthedocs.io/en/latest/notes/sparse_tensor.html
        ## 3. Here edge_index is torch_sparse SparseTensor.

        out = matmul(edge_index, node_feature_src, reduce='mean')
        ##########################################

        return out

    def update(self, aggr_out, node_feature_dst, res_n_id):

        ############# Your code here #############
        ## (~4 lines of code)
        dst_out = self.lin_dst(node_feature_dst)
        aggr_out = self.lin_src(aggr_out)
        # print(aggr_out.shape, dst_out.shape)
        aggr_out = torch.cat([dst_out, aggr_out], -1)
        # print(aggr_out.shape, )
        aggr_out = self.lin_update(aggr_out)
        ##########################################

        return aggr_out

In [3]:
class HeteroGNNWrapperConv(deepsnap.hetero_gnn.HeteroConv):
    def __init__(self, convs, args, aggr="mean"):
        super(HeteroGNNWrapperConv, self).__init__(convs, None)
        self.aggr = aggr

        # Map the index and message type
        self.mapping = {}

        # A numpy array that stores the final attention probability
        self.alpha = None

        self.attn_proj = None

        if self.aggr == "attn":
            ############# Your code here #############
            ## (~1 line of code)
            ## Note:
            ## 1. Initialize self.attn_proj here.
            ## 2. You should use nn.Sequential for self.attn_proj
            ## 3. nn.Linear and nn.Tanh are useful.
            ## 4. You can create a vector parameter by using:
            ## nn.Linear(some_size, 1, bias=False)
            ## 5. The first linear layer should have out_features as args['attn_size']
            ## 6. You can assume we only have one "head" for the attention.
            ## 7. We recommend you to implement the mean aggregation first. After 
            ## the mean aggregation works well in the training, then you can 
            ## implement this part.

            self.attn_proj = nn.Sequential(
                nn.Linear(args['hidden_size'], args['attn_size']),
                nn.Tanh(),
                nn.Linear(args['attn_size'], 1, bias=False)
            )
            #########################################
    
    def reset_parameters(self):
        super(HeteroGNNWrapperConv, self).reset_parameters()
        if self.aggr == "attn":
            for layer in self.attn_proj.children():
                layer.reset_parameters()
    
    def forward(self, node_features, edge_indices):
        message_type_emb = {}
        for message_key, message_type in edge_indices.items():
            src_type, edge_type, dst_type = message_key
            node_feature_src = node_features[src_type]
            node_feature_dst = node_features[dst_type]
            edge_index = edge_indices[message_key]
            message_type_emb[message_key] = (
                self.convs[message_key](
                    node_feature_src,
                    node_feature_dst,
                    edge_index,
                )
            )
        node_emb = {dst: [] for _, _, dst in message_type_emb.keys()}
        mapping = {}        
        for (src, edge_type, dst), item in message_type_emb.items():
            mapping[len(node_emb[dst])] = (src, edge_type, dst)
            node_emb[dst].append(item)
        self.mapping = mapping
        for node_type, embs in node_emb.items():
            if len(embs) == 1:
                node_emb[node_type] = embs[0]
            else:
                node_emb[node_type] = self.aggregate(embs)
        return node_emb
    
    def aggregate(self, xs):
        # TODO: Implement this function that aggregates all message type results.
        # Here, xs is a list of tensors (embeddings) with respect to message 
        # type aggregation results.

        if self.aggr == "mean":

            ############# Your code here #############
            ## (~2 lines of code)
            xs = torch.stack(xs)
            out = torch.mean(xs, dim=0)
            return out
            ##########################################

        elif self.aggr == "attn":

            ############# Your code here #############
            ## (~10 lines of code)
            ## Note:
            ## 1. Store the value of attention alpha (as a numpy array) to self.alpha,
            ## which has the shape (len(xs), ) self.alpha will be not be used 
            ## to backpropagate etc. in the model. We will use it to see how much 
            ## attention the layer pays on different message types.
            ## 2. torch.softmax and torch.cat are useful.
            ## 3. You might need to reshape the tensors by using the 
            ## `view()` function https://pytorch.org/docs/stable/tensor_view.html
            xs = torch.stack(xs, dim=0)
            s = self.attn_proj(xs).squeeze(-1)
            s = torch.mean(s, dim=-1)
            self.alpha = torch.softmax(s, dim=0).detach()
            out = self.alpha.reshape(-1, 1, 1) * xs
            out = torch.sum(out, dim=0)
            return out
            ##########################################

In [4]:


def generate_convs(hetero_graph, conv, hidden_size, first_layer=False):
    # TODO: Implement this function that returns a dictionary of `HeteroGNNConv` 
    # layers where the keys are message types. `hetero_graph` is deepsnap `HeteroGraph`
    # object and the `conv` is the `HeteroGNNConv`.

    convs = {}

    ############# Your code here #############
    ## (~9 lines of code)

    all_messages_types = hetero_graph.message_types
    for message_type in all_messages_types:
        if first_layer:
            in_channels_src = hetero_graph.num_node_features(message_type[0])
            in_channels_dst = hetero_graph.num_node_features(message_type[2])
        else:
            in_channels_src = hidden_size
            in_channels_dst = hidden_size
        out_channels = hidden_size
        convs[message_type] = conv(in_channels_src, in_channels_dst, out_channels)
    ##########################################
    
    return convs

# def generate_convs(hetero_graph, conv, hidden_size, first_layer=False):
#     convs = {}

#     # Retrieve the number of features for all node types
#     node_feature_dims = {
#         node_type: hetero_graph.num_node_features(node_type)
#         for node_type in hetero_graph.node_types
#     }

#     for message_type in hetero_graph.message_types:
#         src_type, _, dst_type = message_type
#         if first_layer:
#             # Initialize input channels based on the number of features for each node type
#             in_channels_src = node_feature_dims[src_type]
#             in_channels_dst = node_feature_dims[dst_type]
#         else:
#             # For subsequent layers, use the hidden_size as the dimension
#             in_channels_src = hidden_size
#             in_channels_dst = hidden_size
#         out_channels = hidden_size
#         convs[message_type] = conv(in_channels_src, in_channels_dst, out_channels)
    
#     return convs


In [5]:
class HeteroGNN(torch.nn.Module):
    def __init__(self, hetero_graph, args, num_layers, aggr="mean"):
        super(HeteroGNN, self).__init__()

        self.aggr = aggr
        self.hidden_size = args['hidden_size']

        # self.convs1 = None
        # self.convs2 = None
        
        self.num_layers = num_layers
        self.convs = None
        self.bns = [nn.ModuleDict() for _ in range(num_layers)]
        self.relus = [nn.ModuleDict() for _ in range(num_layers)]
        
        self.bns1 = nn.ModuleDict()
        self.bns2 = nn.ModuleDict()
        self.relus1 = nn.ModuleDict()
        self.relus2 = nn.ModuleDict()
        self.post_mps = nn.ModuleDict()
        self.fc = nn.ModuleDict()

        ############# Your code here #############
        ## (~10 lines of code)
        ## Note:
        ## 1. For self.convs1 and self.convs2, call generate_convs at first and then
        ## pass the returned dictionary of `HeteroGNNConv` to `HeteroGNNWrapperConv`.
        ## 2. For self.bns, self.relus and self.post_mps, the keys are node_types.
        ## `deepsnap.hetero_graph.HeteroGraph.node_types` will be helpful.
        ## 3. Initialize all batchnorms to torch.nn.BatchNorm1d(hidden_size, eps=1.0).
        ## 4. Initialize all relus to nn.LeakyReLU().
        ## 5. For self.post_mps, each value in the ModuleDict is a linear layer 
        ## where the `out_features` is the number of classes for that node type.
        ## `deepsnap.hetero_graph.HeteroGraph.num_node_labels(node_type)` will be
        ## useful.

        # self.convs = [HeteroGNNWrapperConv(generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=True), args, self.aggr),
        #     *[HeteroGNNWrapperConv(generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=False), args, self.aggr)
        #         for _ in range(self.num_layers - 1)]
        # ]
        
        self.convs1 = HeteroGNNWrapperConv(
            generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=True), 
            args, self.aggr)
        self.convs2 = HeteroGNNWrapperConv(
            generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=False), 
            args, self.aggr)

        all_node_types = hetero_graph.node_types
        for node_type in all_node_types:
            # for i in range(num_layers):
            #     self.bns[i][node_type] = nn.BatchNorm1d(self.hidden_size, eps=1.0)
            #     self.relus[i][node_type] = nn.LeakyReLU()
            
            self.bns1[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1.0)
            self.bns2[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1.0)
            
            self.relus1[node_type] = nn.LeakyReLU()
            self.relus2[node_type] = nn.LeakyReLU()
            #self.post_mps[node_type] = nn.Linear(self.hidden_size, hetero_graph.num_node_labels(node_type))
            self.fc[node_type] = nn.Linear(self.hidden_size, 1)
            

        ##########################################

    def forward(self, node_feature, edge_index):
        # TODO: Implement the forward function. Notice that `node_feature` is 
        # a dictionary of tensors where keys are node types and values are 
        # corresponding feature tensors. The `edge_index` is a dictionary of 
        # tensors where keys are message types and values are corresponding
        # edge index tensors (with respect to each message type).
        
        
        x = node_feature

        ############# Your code here #############
        ## (~7 lines of code)
        ## Note:
        ## 1. `deepsnap.hetero_gnn.forward_op` can be helpful.
        x = self.convs1(x, edge_index)
        x = forward_op(x, self.bns1)
        x = forward_op(x, self.relus1)
        x = self.convs2(x, edge_index)
        x = forward_op(x, self.bns2)
        x = forward_op(x, self.relus2)
        
        # for conv, bn, relu in zip(self.convs, self.bns, self.relus):
        #     x = conv(x, edge_index)
        #     x = forward_op(x, bn)
        #     x = forward_op(x, relu)
        
        x = forward_op(x, self.fc)
        #print("X", x)

        ##########################################
        
        return x

    def loss(self, preds, y, indices):
        loss = 0
        loss_func = torch.nn.MSELoss()
        
        def mape_loss(y_pred, y_true):
            """
            Calculate the Mean Absolute Percentage Error (MAPE) for PyTorch tensors.

            Args:
            y_true (Tensor): True values.
            y_pred (Tensor): Predicted values.

            Returns:
            Tensor: MAPE loss.
            """
            # Ensuring y_true and y_pred are on the same device
            #y_pred = y_pred.to(y_true.device)

            # Avoiding division by zero
            non_zero_mask = y_true != 0

            # Calculating MAPE
            loss = torch.mean(torch.abs((torch.masked_select(y_true, non_zero_mask) - torch.masked_select(y_pred, non_zero_mask) / torch.masked_select(y_true, non_zero_mask)))) * 100
    
            return loss


        #loss_func = mape_loss
        ############# Your code here #############
        ## (~3 lines of code)
        ## Note:
        ## 1. For each node type in preds, accumulate computed loss to `loss`
        ## 2. Loss need to be computed with respect to the given 
        
        # print("Preds", preds)
        # print("Y", y)
        # print("Indices", indices)
         
        mask = y['event'][indices['event'], 0] != -1
        non_zero_idx = torch.masked_select(indices['event'], mask)
                
        loss += loss_func(preds['event'][non_zero_idx], y['event'][non_zero_idx, 0])


        ##########################################

        return loss

In [6]:
def train(model, optimizer, hetero_graph, train_idx):
    model.train()
    optimizer.zero_grad()
    preds = model(hetero_graph.node_feature, hetero_graph.edge_index)

    loss = None

    ############# Your code here #############
    ## Note:
    ## 1. `deepsnap.hetero_graph.HeteroGraph.node_label` is useful
    ## 2. Compute the loss here
    
    loss = model.loss(preds, hetero_graph.node_target, train_idx)
    ##########################################

    loss.backward()
    optimizer.step()
    return loss.item()

def test(model, graph, indices, best_model=None, best_val=0):
    model.eval()
    accs = []
    for index in indices:
        preds = model(graph.node_feature, graph.edge_index)
        
        #print("Index", index)
        #print("Preds", preds['event'])

        idx = index['event']

        L1 = torch.sum(torch.abs(preds['event'][idx] - graph.node_target['event'][idx]))
        
        accs.append(L1)
        #print("ACC", s)

        #pred = preds['event'][idx]
        
        # num_node_types = 0
        # micro = 0
        # macro = 0

        # for node_type in preds:
        #     idx = index[node_type]
        #     pred = preds[node_type][idx]
        #     pred = pred.max(1)[1]
        #     label_np = graph.node_label[node_type][idx].cpu().numpy()
        #     pred_np = pred.cpu().numpy()
        #     micro = f1_score(label_np, pred_np, average='micro')
        #     macro = f1_score(label_np, pred_np, average='macro')
        #     num_node_types += 1
        # Averaging f1 score might not make sense, but in our example we only
        # have one node type
        # micro /= num_node_types
        # macro /= num_node_types
        #accs.append((micro, macro))
    if accs[1] < best_val:
        best_val = accs[1]
        best_model = copy.deepcopy(model)
    
    return accs, best_model, best_val

In [7]:
# Please do not change the following parameters
args = {
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'hidden_size': 64,
    'epochs': 200,
    'weight_decay': 1e-4,
    'lr': 0.201,
    'attn_size': 32,
}

In [8]:
S_node_feature = {
    "event": torch.tensor([
                [1, 1, 1],   # event 0
                [2, 2, 2]    # event 1
    ], dtype=torch.float32),
    "concept": torch.tensor([
                [2, 2, 2],   # concept 0
                [3, 3, 3]    # concept 1
    ], dtype=torch.float32)
}

# S_node_label = {
#     "event": torch.tensor([0, 1], dtype=torch.long), # Class 0, Class 1
#     "concept": torch.tensor([0, 1], dtype=torch.long)  # Class 0, Class 1
# }

S_node_targets = {
    "event": torch.tensor([[50], [2000]], dtype=torch.float32),
    # "concept": torch.tensor([[0], [0]], dtype=torch.float32)
}

S_edge_index = {
    ("event", "similar", "event"): torch.tensor([[0,1],[1,0]], dtype=torch.int64),
    ("event", "related", "concept"): torch.tensor([[0,1],[0,1]], dtype=torch.int64),
    ("concept", "related", "event"): torch.tensor([[0,1],[0,1]], dtype=torch.int64)
}

# Testing
hetero_graph = HeteroGraph(
    node_feature=S_node_feature,
    node_target=S_node_targets,
    edge_index=S_edge_index
)

train_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}
val_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}
test_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}

In [9]:
with open("./1_concepts_similar_llm.pkl", "rb") as f:
    G = pickle.load(f)
    # Convert to directed graph for compatibility with Deepsnap
    G = G.to_directed()
    

hetero_graph = HeteroGraph(G, netlib=nx, directed=True)

In [10]:
for message_type in hetero_graph.message_types:
    print("TYPE", message_type)
    print("\t Feature", hetero_graph.num_node_features(message_type[0]))
    print("\t Feature", hetero_graph.num_node_features(message_type[2]))


# Node feature and node label to device

for key in hetero_graph.node_feature:
    hetero_graph.node_feature[key] = hetero_graph.node_feature[key].to(args['device'])
# for key in hetero_graph.node_label:
#     hetero_graph.node_label[key] = hetero_graph.node_label[key].to(args['device'])


# edge_index1 = hetero_graph.edge_index[("concept", "related", "event")]
# edge_index1 = hetero_graph.edge_index[("event", "related", "concept")]

# Edge_index to sparse tensor and to device
for key in hetero_graph.edge_index:
    print("KEY", key, type(key))
    print("KEY NUMS", key, hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2]))
    
    
    # TODO: remove quick fix
    # if key == ('event', 'related', 'concept'):
    #     edge_index = hetero_graph.edge_index[("concept", "related", "event")]
    # else:
    edge_index = hetero_graph.edge_index[key]

    print("MAX EDGES", edge_index[0].max(), edge_index[1].max(), hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2]))
    adj = SparseTensor(row=edge_index[0].long(), col=edge_index[1].long(), sparse_sizes=(hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2])))
    hetero_graph.edge_index[key] = adj.t().to(args['device'])
    


for key in hetero_graph.node_target:
    hetero_graph.node_target[key] = hetero_graph.node_target[key].to(args['device'])


TYPE ('event', 'related', 'concept')
	 Feature 770
	 Feature 1
TYPE ('event', 'similar', 'event')
	 Feature 770
	 Feature 770
TYPE ('concept', 'related', 'event')
	 Feature 1
	 Feature 770
KEY ('event', 'related', 'concept') <class 'tuple'>
KEY NUMS ('event', 'related', 'concept') 8487 8729
MAX EDGES tensor(8448) tensor(8728) 8487 8729
KEY ('event', 'similar', 'event') <class 'tuple'>
KEY NUMS ('event', 'similar', 'event') 8487 8487
MAX EDGES tensor(8486) tensor(8486) 8487 8487
KEY ('concept', 'related', 'event') <class 'tuple'>
KEY NUMS ('concept', 'related', 'event') 8729 8487
MAX EDGES tensor(8728) tensor(8448) 8729 8487


In [11]:
nEvents = hetero_graph.num_nodes("event")
nConcepts = hetero_graph.num_nodes("concept")

s1 = 0.7
s2 = 0.8

train_idx = {   "event": torch.tensor(range(0, int(nEvents * s1))).to(args["device"]), 
                "concept": torch.tensor(range(0, int(nConcepts * s1))).to(args["device"])
            }
val_idx = {   "event": torch.tensor(range(int(nEvents * s1), int(nEvents * s2))).to(args["device"]), 
                "concept": torch.tensor(range(int(nConcepts * s1), int(nConcepts * s2))).to(args["device"])
            }
test_idx = {   "event": torch.tensor(range(int(nEvents * s2), nEvents)).to(args["device"]), 
                "concept": torch.tensor(range(int(nConcepts * s2), nConcepts)).to(args["device"])
            }

print(train_idx["event"].shape)
print(test_idx["event"].shape)
print(val_idx["event"].shape)

# dataset = deepsnap.dataset.GraphDataset([hetero_graph], task='node')

# dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
# datasets = {'train': dataset_train, 'val': dataset_val, 'test': dataset_test}

# datasets

torch.Size([5940])
torch.Size([1698])
torch.Size([849])


In [12]:
best_model = None
best_val = float("inf")

model = HeteroGNN(hetero_graph, args, num_layers=2, aggr="mean").to(args['device'])
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

for epoch in range(args['epochs']):
    loss = train(model, optimizer, hetero_graph, train_idx)
    accs, best_model, best_val = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_val)
    # print(
    #     f"Epoch {epoch + 1}: loss {round(loss, 5)}, "
    #     f"train micro {round(accs[0][0] * 100, 2)}%, train macro {round(accs[0][1] * 100, 2)}%, "
    #     f"valid micro {round(accs[1][0] * 100, 2)}%, valid macro {round(accs[1][1] * 100, 2)}%, "
    #     f"test micro {round(accs[2][0] * 100, 2)}%, test macro {round(accs[2][1] * 100, 2)}%"
    # )
    print(f"Epoch {epoch} Loss {loss} Accs {accs}")

best_accs, _, _ = test(best_model, hetero_graph, [train_idx, val_idx, test_idx])

print("Best accs", best_accs)



# print(
#     f"Best model: "
#     f"train micro {round(best_accs[0][0] * 100, 2)}%, train macro {round(best_accs[0][1] * 100, 2)}%, "
#     f"valid micro {round(best_accs[1][0] * 100, 2)}%, valid macro {round(best_accs[1][1] * 100, 2)}%, "
#     f"test micro {round(best_accs[2][0] * 100, 2)}%, test macro {round(best_accs[2][1] * 100, 2)}%"
# )

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0 Loss 1979.5784912109375 Accs [tensor(75566312., device='cuda:0', grad_fn=<SumBackward0>), tensor(10772876., device='cuda:0', grad_fn=<SumBackward0>), tensor(21423520., device='cuda:0', grad_fn=<SumBackward0>)]
Epoch 1 Loss 2143.34619140625 Accs [tensor(7.0874e+08, device='cuda:0', grad_fn=<SumBackward0>), tensor(1.0187e+08, device='cuda:0', grad_fn=<SumBackward0>), tensor(2.0182e+08, device='cuda:0', grad_fn=<SumBackward0>)]
Epoch 2 Loss 1757.8538818359375 Accs [tensor(3048727., device='cuda:0', grad_fn=<SumBackward0>), tensor(422222.1875, device='cuda:0', grad_fn=<SumBackward0>), tensor(860904., device='cuda:0', grad_fn=<SumBackward0>)]
Epoch 3 Loss 1900.925537109375 Accs [tensor(2984781.5000, device='cuda:0', grad_fn=<SumBackward0>), tensor(429750.4062, device='cuda:0', grad_fn=<SumBackward0>), tensor(845222.1250, device='cuda:0', grad_fn=<SumBackward0>)]
Epoch 4 Loss 1845.0560302734375 Accs [tensor(2073996.3750, device='cuda:0', grad_fn=<SumBackward0>), tensor(298066.6250, d

In [13]:
preds = best_model(hetero_graph.node_feature, hetero_graph.edge_index)
# mask = preds['event'] > 0
# preds['event'][preds['event'] > 0].shape

# print(preds['event'][0], hetero_graph.node_target['event'][0]) 


#print(hetero_graph.node_feature['event'])

# for i in range(3000):
#     if hetero_graph.node_target['event'][i] != -1: # concepts have node target -1
#         print(preds['event'][i], hetero_graph.node_target['event'][i])
        
    
for i in range(1000):    
    if hetero_graph.node_target['event'][test_idx['event']][i] != -1:
        print(preds['event'][test_idx['event']][i], hetero_graph.node_target['event'][test_idx['event']][i])

tensor([25.1041], device='cuda:0', grad_fn=<SelectBackward0>) tensor([3.], device='cuda:0')
tensor([25.1997], device='cuda:0', grad_fn=<SelectBackward0>) tensor([4.], device='cuda:0')
tensor([25.1016], device='cuda:0', grad_fn=<SelectBackward0>) tensor([22.], device='cuda:0')
tensor([25.1474], device='cuda:0', grad_fn=<SelectBackward0>) tensor([12.], device='cuda:0')
tensor([25.0410], device='cuda:0', grad_fn=<SelectBackward0>) tensor([1.], device='cuda:0')
tensor([25.2733], device='cuda:0', grad_fn=<SelectBackward0>) tensor([56.], device='cuda:0')
tensor([25.0296], device='cuda:0', grad_fn=<SelectBackward0>) tensor([3.], device='cuda:0')
tensor([24.9846], device='cuda:0', grad_fn=<SelectBackward0>) tensor([5.], device='cuda:0')
tensor([25.2393], device='cuda:0', grad_fn=<SelectBackward0>) tensor([6.], device='cuda:0')
tensor([17.2762], device='cuda:0', grad_fn=<SelectBackward0>) tensor([9.], device='cuda:0')
tensor([17.2164], device='cuda:0', grad_fn=<SelectBackward0>) tensor([1.], de