In [1]:
import copy
import torch
import deepsnap
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn

from sklearn.metrics import f1_score
from deepsnap.hetero_gnn import forward_op
from deepsnap.hetero_graph import HeteroGraph
from torch_sparse import SparseTensor, matmul

import pickle
import networkx as nx

In [2]:
class HeteroGNNConv(pyg_nn.MessagePassing):
    def __init__(self, in_channels_src, in_channels_dst, out_channels):
        super(HeteroGNNConv, self).__init__(aggr="mean")

        self.in_channels_src = in_channels_src
        self.in_channels_dst = in_channels_dst
        self.out_channels = out_channels

        self.lin_dst = None
        self.lin_src = None

        self.lin_update = None

        self.lin_dst = nn.Linear(in_channels_dst, out_channels)
        self.lin_src = nn.Linear(in_channels_src, out_channels)
        self.lin_update = nn.Linear(2 * out_channels, out_channels)

    def forward(
        self,
        node_feature_src,
        node_feature_dst,
        edge_index,
        size=None,
        res_n_id=None,
        ):

        return self.propagate(edge_index, node_feature_src=node_feature_src, 
                    node_feature_dst=node_feature_dst, size=size, res_n_id=res_n_id)

    def message_and_aggregate(self, edge_index, node_feature_src):

        out = matmul(edge_index, node_feature_src, reduce='mean')

        return out

    def update(self, aggr_out, node_feature_dst, res_n_id):

        dst_out = self.lin_dst(node_feature_dst)
        aggr_out = self.lin_src(aggr_out)
        aggr_out = torch.cat([dst_out, aggr_out], -1)
        aggr_out = self.lin_update(aggr_out)

        return aggr_out

In [3]:
class HeteroGNNWrapperConv(deepsnap.hetero_gnn.HeteroConv):
    def __init__(self, convs, args, aggr="mean"):
        """
        Initializes the HeteroGNNWrapperConv instance.

        :param convs: Dictionary of convolution layers for each message type.
        :param args: Arguments dictionary containing hyperparameters like hidden_size and attn_size.
        :param aggr: Aggregation method, defaults to 'mean'.
        """
        
        super(HeteroGNNWrapperConv, self).__init__(convs, None)
        self.aggr = aggr

        # Map the index and message type
        self.mapping = {}

        # A numpy array that stores the final attention probability
        self.alpha = None

        self.attn_proj = None

        if self.aggr == "attn":

            self.attn_proj = nn.Sequential(
                nn.Linear(args['hidden_size'], args['attn_size']),
                nn.Tanh(),
                nn.Linear(args['attn_size'], 1, bias=False)
            )
    
    def reset_parameters(self):
        super(HeteroGNNWrapperConv, self).reset_parameters()
        if self.aggr == "attn":
            for layer in self.attn_proj.children():
                layer.reset_parameters()
    
    def forward(self, node_features, edge_indices):
        """
        Forward pass of the model.

        :param node_features: Dictionary of node features for each node type.
        :param edge_indices: Dictionary of edge indices for each message type.
        :return: Aggregated node embeddings for each node type.
        """
        
        message_type_emb = {}
        for message_key, message_type in edge_indices.items():
            src_type, edge_type, dst_type = message_key
            node_feature_src = node_features[src_type]
            node_feature_dst = node_features[dst_type]
            edge_index = edge_indices[message_key]
            message_type_emb[message_key] = (
                self.convs[message_key](
                    node_feature_src,
                    node_feature_dst,
                    edge_index,
                )
            )
            
        
        node_emb = {dst: [] for _, _, dst in message_type_emb.keys()}
        mapping = {}        
        
        for (src, edge_type, dst), item in message_type_emb.items():
            mapping[len(node_emb[dst])] = (src, edge_type, dst)
            node_emb[dst].append(item)
        self.mapping = mapping
        
        for node_type, embs in node_emb.items():
            if len(embs) == 1:
                node_emb[node_type] = embs[0]
            else:
                node_emb[node_type] = self.aggregate(embs)
                
        return node_emb
    
    def aggregate(self, xs):
        """
        Aggregates node embeddings using the specified aggregation method.

        :param xs: List of node embeddings to aggregate.
        :return: Aggregated node embeddings as a torch.Tensor.
        """

        if self.aggr == "mean":
            xs = torch.stack(xs)
            out = torch.mean(xs, dim=0)
            return out

        elif self.aggr == "attn":
            xs = torch.stack(xs, dim=0)
            s = self.attn_proj(xs).squeeze(-1)
            s = torch.mean(s, dim=-1)
            self.alpha = torch.softmax(s, dim=0).detach()
            out = self.alpha.reshape(-1, 1, 1) * xs
            out = torch.sum(out, dim=0)
            return out

In [4]:
def generate_convs(hetero_graph, conv, hidden_size, first_layer=False):
    
    """
    Generates convolutional layers for each message type in a heterogeneous graph.

    :param hetero_graph: The heterogeneous graph for which convolutions are to be created.
    :param conv: The convolutional layer class or constructor.
    :param hidden_size: The number of features in the hidden layer.
    :param first_layer: Boolean indicating if this is the first layer in the network.
    
    :return: A dictionary of convolutional layers, keyed by message type.
    """

    convs = {}
    
    # Extracting all types of messages/edges in the heterogeneous graph.
    all_messages_types = hetero_graph.message_types
    for message_type in all_messages_types:
        
        # Determine the input feature size for source and destination nodes.
        # If it's the first layer, use the feature size of the nodes.
        # Otherwise, use the hidden size, since from there on the size of embeddings
        # is the same for all nodes.
        if first_layer:
            in_channels_src = hetero_graph.num_node_features(message_type[0])
            in_channels_dst = hetero_graph.num_node_features(message_type[2])
        else:
            in_channels_src = hidden_size
            in_channels_dst = hidden_size
        out_channels = hidden_size
        
        # Create a convolutional layer for this message type and add it to the dictionary.
        convs[message_type] = conv(in_channels_src, in_channels_dst, out_channels)
    
    return convs

In [5]:
class HeteroGNN(torch.nn.Module):
    def __init__(self, hetero_graph, args, num_layers, aggr="mean"):
        super(HeteroGNN, self).__init__()

        self.aggr = aggr
        self.hidden_size = args['hidden_size']

        self.bns1 = nn.ModuleDict()
        self.bns2 = nn.ModuleDict()
        self.relus1 = nn.ModuleDict()
        self.relus2 = nn.ModuleDict()
        self.post_mps = nn.ModuleDict()
        self.fc = nn.ModuleDict()
        
        # Initialize the graph convolutional layers
        self.convs1 = HeteroGNNWrapperConv(
            generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=True), 
            args, self.aggr)
        self.convs2 = HeteroGNNWrapperConv(
            generate_convs(hetero_graph, HeteroGNNConv, self.hidden_size, first_layer=False), 
            args, self.aggr)

        # Initialize batch normalization, ReLU, and fully connected layers for each node type
        all_node_types = hetero_graph.node_types
        for node_type in all_node_types:
            
            self.bns1[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1.0)
            self.bns2[node_type] = nn.BatchNorm1d(self.hidden_size, eps=1.0)
            
            self.relus1[node_type] = nn.LeakyReLU()
            self.relus2[node_type] = nn.LeakyReLU()
            self.fc[node_type] = nn.Linear(self.hidden_size, 1)
            
    def forward(self, node_feature, edge_index):
        """
        Forward pass of the model.

        :param node_feature: Dictionary of node features for each node type.
        :param edge_index: Dictionary of edge indices for each message type.
        :return: The output embeddings for each node type after passing through the model.
        """
        x = node_feature

        # Apply graph convolutional, batch normalization, and ReLU layers
        x = self.convs1(x, edge_index)
        x = forward_op(x, self.bns1)
        x = forward_op(x, self.relus1)

        x = self.convs2(x, edge_index)
        x = forward_op(x, self.bns2)
        x = forward_op(x, self.relus2)
        
        x = forward_op(x, self.fc)

        return x

    def loss(self, preds, y, indices):
        """
        Computes the loss for the model.

        :param preds: Predictions made by the model.
        :param y: Ground truth target values.
        :param indices: Indices of nodes for which loss should be calculated.
        
        :return: The computed loss value.
        """
        
        loss = 0
        loss_func = torch.nn.MSELoss() 
             
        mask = y['event'][indices['event'], 0] != -1
        non_zero_idx = torch.masked_select(indices['event'], mask)
                
        loss += loss_func(preds['event'][non_zero_idx], y['event'][non_zero_idx])

        return loss

In [6]:
def train(model, optimizer, hetero_graph, train_idx):
    """
    Trains the model on the given heterogeneous graph using the specified indices.

    :param model: The graph neural network model to train.
    :param optimizer: The optimizer used for training the model.
    :param hetero_graph: The heterogeneous graph data.
    :param train_idx: Indices for training nodes.

    :return: The training loss as a float.
    """

    model.train() # Set the model to training mode
    optimizer.zero_grad() # Zero out any existing gradients 
    
    # Compute predictions using the model
    # TODO: Use only train_idx instead of edge_index
    # TODO: Train only on events not on concepts
    
    preds = model(hetero_graph.node_feature, hetero_graph.edge_index)

    # Compute the loss using model's loss function
    loss = model.loss(preds, hetero_graph.node_target, train_idx)

    loss.backward() # Backward pass: compute gradient of the loss
    optimizer.step() # Perform a single optimization step, updates parameters
    
    return loss.item() 

def test(model, graph, indices, best_model, best_tvt_scores):
    """
    Tests the model on given indices and updates the best model based on validation loss.

    :param model: The trained graph neural network model.
    :param graph: The heterogeneous graph data.
    :param indices: List of indices for training, validation, and testing nodes.
    :param best_model: The current best model based on validation loss.
    :param best_val: The current best validation loss.
    
    :return: A tuple containing the list of losses for each dataset, the best model, and the best validation loss.
    """
    
    model.eval() # Set the model to evaluation mode
    tvt_scores = []
    
    # Evaluate the model on each set of indices
    for index in indices:
        preds = model(graph.node_feature, graph.edge_index)
        
        idx = index['event']
        
                     
        # mask = y['event'][indices['event'], 0] != -1
        # non_zero_idx = torch.masked_select(indices['event'], mask)
        #preds['event'][non_zero_idx], y['event'][non_zero_idx]
        
        # non_zero_targets = torch.masked_select(graph.node_target['event'][indices['event']], mask)
        # non_zero_truth = torch.masked_select(graph.node_target['event'][indices['event']], mask)
        
        mask = graph.node_target['event'][idx, 0] != -1
        non_zero_idx = torch.masked_select(idx, mask)
        
        
        L1 = torch.sum(torch.abs(preds['event'][non_zero_idx] - graph.node_target['event'][non_zero_idx])) / non_zero_idx.shape[0]
        
        tvt_scores.append(L1)
    
    # Update the best model and validation loss if the current model performs better
    if tvt_scores[1] < best_tvt_scores[1]:
        best_tvt_scores = tvt_scores
        # torch.to_pickle(model, 'best_model.pkl')
        # model.to_pickle('best_model.pkl')
        # best_model = copy.deepcopy(model)
        torch.save(model.state_dict(), './best_model.pkl')
    
    return tvt_scores, best_tvt_scores, best_model

In [7]:
args = {
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'hidden_size': 48,
    'epochs': 500,
    'weight_decay': 0.0002930387278908051,
    'lr': 0.05091434725288385,
    'attn_size': 32,
}

In [8]:
"""
This cell creates a small heterogeneous graph, primarily for testing purposes.
"""

S_node_feature = {
    "event": torch.tensor([
                [1, 1, 1],   # event 0
                [2, 2, 2]    # event 1
    ], dtype=torch.float32),
    "concept": torch.tensor([
                [2, 2, 2],   # concept 0
                [3, 3, 3]    # concept 1
    ], dtype=torch.float32)
}

# S_node_label = {
#     "event": torch.tensor([0, 1], dtype=torch.long), # Class 0, Class 1
#     "concept": torch.tensor([0, 1], dtype=torch.long)  # Class 0, Class 1
# }

S_node_targets = {
    "event": torch.tensor([[50], [2000]], dtype=torch.float32),
    # "concept": torch.tensor([[0], [0]], dtype=torch.float32)
}

S_edge_index = {
    ("event", "similar", "event"): torch.tensor([[0,1],[1,0]], dtype=torch.int64),
    ("event", "related", "concept"): torch.tensor([[0,1],[0,1]], dtype=torch.int64),
    ("concept", "related", "event"): torch.tensor([[0,1],[0,1]], dtype=torch.int64)
}

# Testing
hetero_graph = HeteroGraph(
    node_feature=S_node_feature,
    node_target=S_node_targets,
    edge_index=S_edge_index
)

train_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}
val_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}
test_idx = {"event": torch.tensor([0, 1]).to(args['device']), "concept": torch.tensor([0, 1]).to(args['device'])}

In [9]:
with open("./1_concepts_similar_llm.pkl", "rb") as f:
    G = pickle.load(f)
    # Convert to directed graph for compatibility with Deepsnap
    # G = G.to_directed()
   
        
hetero_graph = HeteroGraph(G, netlib=nx, directed=True)

In [10]:
"""
This code block ensures that all tensors in a heterogeneous graph are transferred to the same 
computational device, as specified in the 'args' dictionary.
"""

for message_type in hetero_graph.message_types:
    print("TYPE", message_type)
    print("\t Feature", hetero_graph.num_node_features(message_type[0]))
    print("\t Feature", hetero_graph.num_node_features(message_type[2]))

# Send node features to device
for key in hetero_graph.node_feature:
    hetero_graph.node_feature[key] = hetero_graph.node_feature[key].to(args['device'])

# for key in hetero_graph.node_label:
#     hetero_graph.node_label[key] = hetero_graph.node_label[key].to(args['device'])

# Create a torch.SparseTensor from edge_index and send it to device
for key in hetero_graph.edge_index:
    print("KEY", key, type(key))
    print("KEY NUMS", key, hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2]))
    
    edge_index = hetero_graph.edge_index[key]

    print("MAX EDGES", edge_index[0].max(), edge_index[1].max(), hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2]))
    adj = SparseTensor(row=edge_index[0].long(), col=edge_index[1].long(), sparse_sizes=(hetero_graph.num_nodes(key[0]), hetero_graph.num_nodes(key[2])))
    hetero_graph.edge_index[key] = adj.t().to(args['device'])
    
# Send node targets to device
for key in hetero_graph.node_target:
    hetero_graph.node_target[key] = hetero_graph.node_target[key].to(args['device'])

TYPE ('event', 'similar', 'event')
	 Feature 769
	 Feature 769
TYPE ('event', 'related', 'concept')
	 Feature 769
	 Feature 1
TYPE ('concept', 'related', 'event')
	 Feature 1
	 Feature 769
KEY ('event', 'similar', 'event') <class 'tuple'>
KEY NUMS ('event', 'similar', 'event') 8487 8487
MAX EDGES tensor(8283) tensor(8283) 8487 8487
KEY ('event', 'related', 'concept') <class 'tuple'>
KEY NUMS ('event', 'related', 'concept') 8487 8729
MAX EDGES tensor(8265) tensor(8728) 8487 8729
KEY ('concept', 'related', 'event') <class 'tuple'>
KEY NUMS ('concept', 'related', 'event') 8729 8487
MAX EDGES tensor(8728) tensor(8265) 8729 8487


In [11]:
"""
This code block creates a basic split of a graph's nodes into training, validation, and testing sets. 
It uses predefined ratios to divide 'event' and 'concept' nodes in the heterogeneous graph for a simple 
dataset split, mainly for testing purposes.
"""

nEvents = hetero_graph.num_nodes("event")
nConcepts = hetero_graph.num_nodes("concept")

s1 = 0.7
s2 = 0.8

train_idx = {   "event": torch.tensor(range(0, int(nEvents * s1))).to(args["device"]), 
                "concept": torch.tensor(range(0, int(nConcepts * s1))).to(args["device"])
            }
val_idx = {   "event": torch.tensor(range(int(nEvents * s1), int(nEvents * s2))).to(args["device"]), 
                "concept": torch.tensor(range(int(nConcepts * s1), int(nConcepts * s2))).to(args["device"])
            }
test_idx = {   "event": torch.tensor(range(int(nEvents * s2), nEvents)).to(args["device"]), 
                "concept": torch.tensor(range(int(nConcepts * s2), nConcepts)).to(args["device"])
            }

print(train_idx["event"].shape)
print(test_idx["event"].shape)
print(val_idx["event"].shape)


# TODO: Add node labels to the nodes and try to make the deepsnap split work even for regression!

# dataset = deepsnap.dataset.GraphDataset([hetero_graph], task='node')

# dataset_train, dataset_val, dataset_test = dataset.split(transductive=True, split_ratio=[0.4, 0.3, 0.3])
# datasets = {'train': dataset_train, 'val': dataset_val, 'test': dataset_test}

# datasets

torch.Size([5940])
torch.Size([1698])
torch.Size([849])


In [12]:
"""
Creates a HeteroGNN model from the previously constructed hetero graph and trains it.
"""

best_model = None
best_tvt_scores = (float("inf"),float("inf"),float("inf"))

model = HeteroGNN(hetero_graph, args, num_layers=2, aggr="mean").to(args['device'])
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

for epoch in range(args['epochs']):
    # Train
    loss = train(model, optimizer, hetero_graph, train_idx)
    # Test for the accuracy of the model
    cur_tvt_scores, best_tvt_scores, best_model = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_tvt_scores)
    print(f"Epoch {epoch} Loss {loss:.4f} Current Train,Val,Test Scores {[score.item() for score in cur_tvt_scores]}")

print("Best Train,Val,Test Scores", [score.item() for score in best_tvt_scores])

Epoch 0 Loss 2813.7439 Current Train,Val,Test Scores [373.6999206542969, 362.2122497558594, 305.9820861816406]
Epoch 1 Loss 2710.2632 Current Train,Val,Test Scores [1550.159423828125, 1537.192626953125, 596.01220703125]
Epoch 2 Loss 2392.2263 Current Train,Val,Test Scores [883.7342529296875, 873.007568359375, 283.9662780761719]
Epoch 3 Loss 2311.0391 Current Train,Val,Test Scores [2086.635009765625, 2077.3955078125, 565.779541015625]
Epoch 4 Loss 1912.1632 Current Train,Val,Test Scores [1644.73681640625, 1637.889404296875, 438.6648254394531]
Epoch 5 Loss 1747.6449 Current Train,Val,Test Scores [850.1151123046875, 845.3836669921875, 247.95516967773438]
Epoch 6 Loss 1703.8121 Current Train,Val,Test Scores [398.1546325683594, 392.571533203125, 136.44036865234375]
Epoch 7 Loss 1730.8505 Current Train,Val,Test Scores [199.00094604492188, 199.64639282226562, 86.96051788330078]
Epoch 8 Loss 1771.8457 Current Train,Val,Test Scores [133.33621215820312, 140.39471435546875, 69.69786071777344]
Epo

In [13]:
model = HeteroGNN(hetero_graph, args, num_layers=2, aggr="mean").to(args['device'])
model.load_state_dict(torch.load('./best_model.pkl'))
preds = model(hetero_graph.node_feature, hetero_graph.edge_index)
# mask = preds['event'] > 0
# preds['event'][preds['event'] > 0].shape

# print(preds['event'][0], hetero_graph.node_target['event'][0]) 


#print(hetero_graph.node_feature['event'])

# Assuming val_idx is the dictionary containing the validation indices for 'event' node type

# TODO: Best validation score doesn't match the best validation score from the training loop
# best_val = torch.sum(torch.abs(preds['event'][val_idx['event']] - hetero_graph.node_target['event'][val_idx['event']])) / val_idx['event'].shape[0]
cur_tvt_scores, best_tvt_scores, best_model = test(model, hetero_graph, [train_idx, val_idx, test_idx], best_model, best_tvt_scores)


# Print the validation loss
print(f"Best Validation Score: {cur_tvt_scores[1].item()}")

# for i in range(3000):
#     if hetero_graph.node_target['event'][i] != -1: # concepts have node target -1
#         print(preds['event'][i], hetero_graph.node_target['event'][i])
        
    
for i in range(1000):    
    if hetero_graph.node_target['event'][test_idx['event']][i] != -1:
        print(preds['event'][test_idx['event']][i], hetero_graph.node_target['event'][test_idx['event']][i])


# for i in range(1000):
#     # Extract the target value and check if it is not equal to -1
#     target = hetero_graph.node_target['event'][test_idx['event']][i].item()
#     if target != -1:
#         # Extract the prediction value
#         pred = preds['event'][test_idx['event']][i].item()
#         print(f"Prediction: {pred:.4f}, Target: {target:.4f}")


Best Validation Score: 28.550447463989258
tensor([4.3030], device='cuda:0', grad_fn=<SelectBackward0>) tensor([3.], device='cuda:0')
tensor([7.5159], device='cuda:0', grad_fn=<SelectBackward0>) tensor([5.], device='cuda:0')
tensor([138.2642], device='cuda:0', grad_fn=<SelectBackward0>) tensor([6.], device='cuda:0')
tensor([18.7437], device='cuda:0', grad_fn=<SelectBackward0>) tensor([94.], device='cuda:0')
tensor([183.1452], device='cuda:0', grad_fn=<SelectBackward0>) tensor([48.], device='cuda:0')
tensor([8.6889], device='cuda:0', grad_fn=<SelectBackward0>) tensor([4.], device='cuda:0')
tensor([189.9236], device='cuda:0', grad_fn=<SelectBackward0>) tensor([111.], device='cuda:0')
tensor([8.4420], device='cuda:0', grad_fn=<SelectBackward0>) tensor([7.], device='cuda:0')
tensor([11.8422], device='cuda:0', grad_fn=<SelectBackward0>) tensor([1.], device='cuda:0')
tensor([3.1953], device='cuda:0', grad_fn=<SelectBackward0>) tensor([1.], device='cuda:0')
tensor([7.5582], device='cuda:0', gr

### Hyperparameter Tuning

In [14]:
import wandb
import optuna

def objective(trial):
    # Initialize wandb run
    wandb.init(project="V2_MLG_PredEvents_GNN+LMM", config={
        "lr": trial.suggest_float("lr", 1e-5, 1e-1, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True),
        "hidden_size": trial.suggest_int("hidden_size", 16, 128),
        "attn_size": 32,  # Fixed value
        "epochs": trial.suggest_int("epochs", 150, 300),
        "num_layers": 2,  # Fixed value
    })

    # Use wandb config
    config = wandb.config

    # Initialize the model with the new hyperparameters
    model = HeteroGNN(hetero_graph, {
        'hidden_size': config.hidden_size,
        'attn_size': config.attn_size,
        'device': args['device']
    }, num_layers=config.num_layers, aggr="mean").to(args['device'])
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)

    # Initialize best scores with infinity
    best_tvt_scores = (float("inf"), float("inf"), float("inf"))

    # Training loop
    for epoch in range(config.epochs):
        train_loss = train(model, optimizer, hetero_graph, train_idx)
        cur_tvt_scores, best_tvt_scores, _ = test(model, hetero_graph, [train_idx, val_idx, test_idx], None, best_tvt_scores)

        # Log metrics to wandb
        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "val_score": cur_tvt_scores[1],
            "best_val_score": best_tvt_scores[1],
        })

        # Update the best validation score
        if cur_tvt_scores[1] < best_tvt_scores[1]:
            best_tvt_scores = (cur_tvt_scores[0], cur_tvt_scores[1], cur_tvt_scores[2])

    # Finish wandb run
    wandb.finish()

    # The objective value is the best validation score
    return best_tvt_scores[1]

# Create a study object and optimize the objective function
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=500)

# Print the best hyperparameters
print("Best trial:")
trial = study.best_trial

print(f"Value: {trial.value}")
print("Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

  from .autonotebook import tqdm as notebook_tqdm
[I 2023-11-17 15:42:53,067] A new study created in memory with name: no-name-8c7c805b-21f1-45da-951b-ccd6bb051623
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mamgrobelnik[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
best_val_score,█▇▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,███▇▇▇▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁
val_score,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▆▃▄▄▆▅▂▅▄▁▁▁▁█▁

0,1
best_val_score,28.58196
epoch,196.0
train_loss,960.5827
val_score,115.50377


[I 2023-11-17 15:43:21,016] Trial 0 finished with value: 28.58196258544922 and parameters: {'lr': 0.0005128759386601744, 'weight_decay': 0.00011143511561573684, 'hidden_size': 71, 'epochs': 197}. Best is trial 0 with value: 28.58196258544922.


0,1
best_val_score,█▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,███████▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
val_score,█▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
best_val_score,28.34345
epoch,244.0
train_loss,2319.35352
val_score,28.34345


[I 2023-11-17 15:43:50,660] Trial 1 finished with value: 28.34345054626465 and parameters: {'lr': 3.506501759862614e-05, 'weight_decay': 0.0003395052165948722, 'hidden_size': 110, 'epochs': 245}. Best is trial 1 with value: 28.34345054626465.


0,1
best_val_score,████████████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,████████▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
val_score,███████████▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁

0,1
best_val_score,30.39174
epoch,238.0
train_loss,2590.52368
val_score,30.39174


[I 2023-11-17 15:44:14,747] Trial 2 finished with value: 30.391735076904297 and parameters: {'lr': 2.911051528768232e-05, 'weight_decay': 1.0138805940007106e-05, 'hidden_size': 58, 'epochs': 239}. Best is trial 1 with value: 28.34345054626465.


0,1
best_val_score,█▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train_loss,████▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▂▂▂▁▁
val_score,█▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_val_score,34.08345
epoch,150.0
train_loss,2819.29419
val_score,34.11644


[I 2023-11-17 15:44:31,835] Trial 3 finished with value: 34.08344650268555 and parameters: {'lr': 2.1254522410732966e-05, 'weight_decay': 1.57423049376229e-05, 'hidden_size': 19, 'epochs': 151}. Best is trial 1 with value: 28.34345054626465.


0,1
best_val_score,█▇▇▇▇▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,████▇▇▇▇▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
val_score,▂▂▁▁▁▁▁▁▁▁▂▂▃▄▄▃▃▂▂▂▂▂▂▂▃▃▄▄▄▄▅▆▇██▅▁▅▆▆

0,1
best_val_score,28.73788
epoch,165.0
train_loss,1547.87
val_score,56.0388


[I 2023-11-17 15:44:52,202] Trial 4 finished with value: 28.73788070678711 and parameters: {'lr': 0.00024145035285286874, 'weight_decay': 9.941609318037565e-05, 'hidden_size': 96, 'epochs': 166}. Best is trial 1 with value: 28.34345054626465.


0,1
best_val_score,█▇▆▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,████▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁
val_score,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▂▄▂▄▁▄▆▂▃▄▂▁▂▃▁██▆▅▁

0,1
best_val_score,27.62709
epoch,231.0
train_loss,800.51306
val_score,34.79512


[I 2023-11-17 15:45:14,880] Trial 5 finished with value: 27.62708854675293 and parameters: {'lr': 0.0005830498619745771, 'weight_decay': 3.0750473636308886e-05, 'hidden_size': 57, 'epochs': 232}. Best is trial 5 with value: 27.62708854675293.


0,1
best_val_score,██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▆▅▅▄▃▃▃▃▂▂▂▂▁▁▂▂▂▅▆▇▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁
val_score,▄▃▁▁▂▄▁▁▁▁▁▁▁▁▁▂▁▁█▂▁▂▁▁▁▄▅▃▁▁▁▁▁▁▁▂▁▁▁▁

0,1
best_val_score,31.53897
epoch,200.0
train_loss,208.72409
val_score,126.51942


[I 2023-11-17 15:45:34,337] Trial 6 finished with value: 31.53897476196289 and parameters: {'lr': 0.07535688363628285, 'weight_decay': 0.0008031969490745124, 'hidden_size': 35, 'epochs': 201}. Best is trial 5 with value: 27.62708854675293.


0,1
best_val_score,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,██▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_score,▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂█▃▁▃▁▇▁█▂█▆▁▁▁▂▅▁▄▃▂

0,1
best_val_score,29.22615
epoch,248.0
train_loss,60.63571
val_score,202.49635


[I 2023-11-17 15:45:54,768] Trial 7 finished with value: 29.226150512695312 and parameters: {'lr': 0.002646462803964806, 'weight_decay': 8.835572283522618e-05, 'hidden_size': 32, 'epochs': 249}. Best is trial 5 with value: 27.62708854675293.


0,1
best_val_score,████████████████████████▇▇▇▆▆▅▅▅▄▄▃▃▂▂▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█████████▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁
val_score,▆▇▇▇██████████████▇▇▇▇▇▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▁▁

0,1
best_val_score,30.21928
epoch,154.0
train_loss,2579.76538
val_score,30.21928


[I 2023-11-17 15:46:17,180] Trial 8 finished with value: 30.219282150268555 and parameters: {'lr': 2.315772786035054e-05, 'weight_decay': 0.00012081525803156375, 'hidden_size': 116, 'epochs': 155}. Best is trial 5 with value: 27.62708854675293.


0,1
best_val_score,██████████████▇▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█████████▇▇▇▇▇▇▇▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁
val_score,▇▇█████▇▇▇▇▇▆▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
best_val_score,28.1071
epoch,292.0
train_loss,2266.1582
val_score,28.1071


[I 2023-11-17 15:46:43,635] Trial 9 finished with value: 28.107099533081055 and parameters: {'lr': 4.8317574052644374e-05, 'weight_decay': 4.429478041930544e-05, 'hidden_size': 64, 'epochs': 293}. Best is trial 5 with value: 27.62708854675293.


0,1
best_val_score,██████████████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_score,▁▁▁▁▁▁▁▂▁▁▁▁▅▄▂█▁▁▇▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇▁

0,1
best_val_score,23.98135
epoch,280.0
train_loss,99.29585
val_score,140.05194


[I 2023-11-17 15:47:12,484] Trial 10 finished with value: 23.981351852416992 and parameters: {'lr': 0.0035050315664344163, 'weight_decay': 2.7717728682482574e-05, 'hidden_size': 84, 'epochs': 281}. Best is trial 10 with value: 23.981351852416992.
[W 2023-11-17 15:47:15,227] Trial 11 failed with parameters: {'lr': 0.0030133056083382513, 'weight_decay': 2.366877322726593e-05, 'hidden_size': 92, 'epochs': 289} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Adrian\anaconda3\envs\mlg\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Adrian\AppData\Local\Temp\ipykernel_148\1583648187.py", line 6, in objective
    wandb.init(project="V2_MLG_PredEvents_GNN+LMM", config={
  File "c:\Users\Adrian\anaconda3\envs\mlg\lib\site-packages\wandb\sdk\wandb_init.py", line 1189, in init
    raise e
  File "c:\Users\Adrian\anaconda3\envs\mlg\lib\site-packages\wandb\sdk\wandb_init

Problem at: C:\Users\Adrian\AppData\Local\Temp\ipykernel_148\1583648187.py 6 objective


KeyboardInterrupt: 