In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Obtaining dependency information for torch_geometric from https://files.pythonhosted.org/packages/65/4e/6f9a75548a93fedcd4514ae2de9bee1e91bade6b73252b4da32f0e42ac52/torch_geometric-2.4.0-py3-none-any.whl.metadata
  Downloading torch_geometric-2.4.0-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.9/63.9 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


In [2]:
from torch_geometric.data import DataLoader
from torch.utils.data import DataLoader as TorchDataLoader
import numpy as np
from transformers import AutoTokenizer
import torch
from torch import optim
from torch import nn
import time
import os
import pandas as pd
import os.path as osp
from torch_geometric.data import Dataset 
from torch_geometric.data import Data
from torch.utils.data import Dataset as TorchDataset
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GAT
from torch_geometric.nn import global_mean_pool
from transformers import AutoModel



# **Model**

In [3]:
class GraphEncoder(nn.Module):
    def __init__(self, num_node_features, nout, nhid, graph_hidden_channels):
        super(GraphEncoder, self).__init__()
        self.nhid = nhid
        self.nout = nout
        self.relu = nn.ReLU()
        self.ln = nn.LayerNorm((nout))
        self.conv1 = GCNConv(num_node_features, graph_hidden_channels)
        self.conv2 = GCNConv(graph_hidden_channels, graph_hidden_channels)
        self.conv3 = GCNConv(graph_hidden_channels, graph_hidden_channels)
        self.mol_hidden1 = nn.Linear(graph_hidden_channels, nhid)
        self.mol_hidden2 = nn.Linear(nhid, nout)

    def forward(self, graph_batch):
        x = graph_batch.x
        edge_index = graph_batch.edge_index
        batch = graph_batch.batch
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = global_mean_pool(x, batch)
        x = self.mol_hidden1(x).relu()
        x = self.mol_hidden2(x)
        return x
    
    
class GATEncoder(nn.Module):
    def __init__(self, nout, nhid, attention_hidden, n_in, dropout):
        super(GATEncoder, self).__init__()
        self.dropout = dropout
        self.n_in = n_in
        self.attention_hidden = attention_hidden
        self.n_hidden = nhid
        self.n_out = nout
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(self.attention_hidden, self.n_out)
        self.GATEnc = GAT(in_channels=self.n_in, hidden_channels = self.attention_hidden, out_channels=self.n_hidden, dropout=self.dropout, num_layers=4, v2=True)

    def forward(self, gr):
        x = gr.x
        x = self.GATEnc(x, gr.edge_index)
        x = self.relu(x)
        x = global_mean_pool(x, gr.batch)
        x = self.fc1(x)
        x = self.relu(x)
        return x
    
    
class TextEncoder(nn.Module):
    def __init__(self, model_name):
        super(TextEncoder, self).__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        
    def forward(self, input_ids, attention_mask):
        encoded_text = self.bert(input_ids, attention_mask=attention_mask)
        return encoded_text.last_hidden_state[:,0,:]
    
class Model(nn.Module):
    def __init__(self, model_name, num_node_features, nout, nhid, graph_hidden_channels):
        super(Model, self).__init__()
        self.graph_encoder = GraphEncoder(num_node_features, nout, nhid, graph_hidden_channels)
        self.text_encoder = TextEncoder(model_name)
        
    def forward(self, graph_batch, input_ids, attention_mask):
        graph_encoded = self.graph_encoder(graph_batch)
        text_encoded = self.text_encoder(input_ids, attention_mask)
        return graph_encoded, text_encoded
    
    def get_text_encoder(self):
        return self.text_encoder
    
    def get_graph_encoder(self):
        return self.graph_encoder
    
class ModelGAT(nn.Module):
    def __init__(self, model_name, n_in, nout, nhid, attention_hidden, dropout):
        super(ModelGAT, self).__init__()
        self.graph_encoder = GATEncoder(nout, nhid, attention_hidden, n_in, dropout)
        self.text_encoder = TextEncoder(model_name)
        
    def forward(self, graph_batch, input_ids, attention_mask):
        graph_encoded = self.graph_encoder(graph_batch)
        text_encoded = self.text_encoder(input_ids, attention_mask)
        return graph_encoded, text_encoded
    
    def get_text_encoder(self):
        return self.text_encoder
    
    def get_graph_encoder(self):
        return self.graph_encoder

# **Dataloader**

In [4]:
!pip install gdown
!gdown --id 13lNOBsHGviiS8276DVgl7EmfTO8icxV6
!unzip /kaggle/working/Public.zip
!rm -rf /kaggle/working/Public.zip

Collecting gdown
  Obtaining dependency information for gdown from https://files.pythonhosted.org/packages/68/fb/c1bb2cfbf1ad068129e3d67f3420649d38183cca7118f4fa46cfe3c3adab/gdown-5.0.0-py3-none-any.whl.metadata
  Downloading gdown-5.0.0-py3-none-any.whl.metadata (5.6 kB)
Downloading gdown-5.0.0-py3-none-any.whl (16 kB)
Installing collected packages: gdown
Successfully installed gdown-5.0.0
Downloading...
From (original): https://drive.google.com/uc?id=13lNOBsHGviiS8276DVgl7EmfTO8icxV6
From (redirected): https://drive.google.com/uc?id=13lNOBsHGviiS8276DVgl7EmfTO8icxV6&confirm=t&uuid=0959c727-54b0-4347-9a2e-aef169c07b2c
To: /kaggle/working/Public.zip
100%|████████████████████████████████████████| 172M/172M [00:14<00:00, 12.0MB/s]
Archive:  /kaggle/working/Public.zip
   creating: Public/
  inflating: Public/.DS_Store        
  inflating: __MACOSX/Public/._.DS_Store  
  inflating: Public/submission.csv   
  inflating: __MACOSX/Public/._submission.csv  
  inflating: Public/Model.py        

In [9]:
class GraphTextDataset(Dataset):
    def __init__(self, root, gt, split, tokenizer=None, transform=None, pre_transform=None):
        self.root = root
        self.gt = gt
        self.split = split
        self.tokenizer = tokenizer
        self.description = pd.read_csv(os.path.join(self.root, split+'.tsv'), sep='\t', header=None)   
        self.description = self.description.set_index(0).to_dict()
        self.cids = list(self.description[1].keys())
        
        self.idx_to_cid = {}
        i = 0
        for cid in self.cids:
            self.idx_to_cid[i] = cid
            i += 1
        super(GraphTextDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return [str(cid) + ".graph" for cid in self.cids]

    @property
    def processed_file_names(self):
        return ['data_{}.pt'.format(cid) for cid in self.cids]
    
    @property
    def raw_dir(self) -> str:
        return osp.join(self.root, 'raw')

    @property
    def processed_dir(self) -> str:
        return osp.join(self.root, 'processed/', self.split)

    def download(self):
        pass
        
    def process_graph(self, raw_path):
      edge_index  = []
      x = []
      with open(raw_path, 'r') as f:
        next(f)
        for line in f: 
          if line != "\n":
            edge = *map(int, line.split()), 
            edge_index.append(edge)
          else:
            break
        next(f)
        for line in f: #get mol2vec features:
          substruct_id = line.strip().split()[-1]
          if substruct_id in self.gt.keys():
            x.append(self.gt[substruct_id])
          else:
            x.append(self.gt['UNK'])
        return torch.LongTensor(edge_index).T, torch.FloatTensor(x)

    def process(self):
        i = 0        
        for raw_path in self.raw_paths:
            cid = int(raw_path.split('/')[-1][:-6])
            text_input = self.tokenizer([self.description[1][cid]],
                                   return_tensors="pt", 
                                   truncation=True, 
                                   max_length=256,
                                   padding="max_length",
                                   add_special_tokens=True,)
            edge_index, x = self.process_graph(raw_path)
            data = Data(x=x, edge_index=edge_index, input_ids=text_input['input_ids'], attention_mask=text_input['attention_mask'])

            torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(cid)))
            i += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(self.idx_to_cid[idx])))
        return data

    def get_cid(self, cid):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(cid)))
        return data
    
    
class GraphDataset(Dataset):
    def __init__(self, root, gt, split, transform=None, pre_transform=None):
        self.root = root
        self.gt = gt
        self.split = split
        self.description = pd.read_csv(os.path.join(self.root, split+'.txt'), sep='\t', header=None)
        self.cids = self.description[0].tolist()
        
        self.idx_to_cid = {}
        i = 0
        for cid in self.cids:
            self.idx_to_cid[i] = cid
            i += 1
        super(GraphDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return [str(cid) + ".graph" for cid in self.cids]

    @property
    def processed_file_names(self):
        return ['data_{}.pt'.format(cid) for cid in self.cids]
    
    @property
    def raw_dir(self) -> str:
        return osp.join(self.root, 'raw')

    @property
    def processed_dir(self) -> str:
        return osp.join(self.root, 'processed/', self.split)

    def download(self):
        pass
        
    def process_graph(self, raw_path):
      edge_index  = []
      x = []
      with open(raw_path, 'r') as f:
        next(f)
        for line in f: 
          if line != "\n":
            edge = *map(int, line.split()), 
            edge_index.append(edge)
          else:
            break
        next(f)
        for line in f:
          substruct_id = line.strip().split()[-1]
          if substruct_id in self.gt.keys():
            x.append(self.gt[substruct_id])
          else:
            x.append(self.gt['UNK'])
        return torch.LongTensor(edge_index).T, torch.FloatTensor(x)

    def process(self):
        i = 0        
        for raw_path in self.raw_paths:
            cid = int(raw_path.split('/')[-1][:-6])
            edge_index, x = self.process_graph(raw_path)
            data = Data(x=x, edge_index=edge_index)
            torch.save(data, osp.join(self.processed_dir, 'data_{}.pt'.format(cid)))
            i += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(self.idx_to_cid[idx])))
        return data

    def get_cid(self, cid):
        data = torch.load(osp.join(self.processed_dir, 'data_{}.pt'.format(cid)))
        return data
    
    def get_idx_to_cid(self):
        return self.idx_to_cid
    
class TextDataset(TorchDataset):
    def __init__(self, file_path, tokenizer, max_length=256):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.sentences = self.load_sentences(file_path)

    def load_sentences(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
        return [line.strip() for line in lines]

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]

        encoding = self.tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze()
        }

# **Training**

In [7]:
CE = torch.nn.CrossEntropyLoss()
def contrastive_loss(v1, v2):
  logits = torch.matmul(v1,torch.transpose(v2, 0, 1))
  labels = torch.arange(logits.shape[0], device=v1.device)
  return CE(logits, labels) + CE(torch.transpose(logits, 0, 1), labels)


model_name = "allenai/scibert_scivocab_uncased"
#model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
gt = np.load("/kaggle/working/Public/data/token_embedding_dict.npy", allow_pickle=True)[()]
val_dataset = GraphTextDataset(root='/kaggle/working/Public/data', gt=gt, split='val', tokenizer=tokenizer)
train_dataset = GraphTextDataset(root='/kaggle/working/Public/data', gt=gt, split='train', tokenizer=tokenizer)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

nb_epochs = 5
batch_size = 32
learning_rate = 2e-5

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = ModelGAT(model_name=model_name, n_in=300, nout=768, nhid=1000, attention_hidden=1000, dropout=0.3)
model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=learning_rate,
                                betas=(0.9, 0.999),
                                weight_decay=0.01)



pytorch_model.bin:   0%|          | 0.00/442M [00:00<?, ?B/s]

In [8]:
epoch = 0
loss = 0
losses = []
count_iter = 0
time1 = time.time()
printEvery = 50
best_validation_loss = 1000000

for i in range(nb_epochs):
    print('-----EPOCH{}-----'.format(i+1))
    model.train()
    for batch in train_loader:
        input_ids = batch.input_ids
        batch.pop('input_ids')
        attention_mask = batch.attention_mask
        batch.pop('attention_mask')
        graph_batch = batch
        
        x_graph, x_text = model(graph_batch.to(device), 
                                input_ids.to(device), 
                                attention_mask.to(device))
        current_loss = contrastive_loss(x_graph, x_text)   
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()
        loss += current_loss.item()
        
        count_iter += 1
        if count_iter % printEvery == 0:
            time2 = time.time()
            print("Iteration: {0}, Time: {1:.4f} s, training loss: {2:.4f}".format(count_iter,
                                                                        time2 - time1, loss/printEvery))
            losses.append(loss)
            loss = 0 
    model.eval()       
    val_loss = 0        
    for batch in val_loader:
        input_ids = batch.input_ids
        batch.pop('input_ids')
        attention_mask = batch.attention_mask
        batch.pop('attention_mask')
        graph_batch = batch
        x_graph, x_text = model(graph_batch.to(device), 
                                input_ids.to(device), 
                                attention_mask.to(device))
        current_loss = contrastive_loss(x_graph, x_text)   
        val_loss += current_loss.item()
    best_validation_loss = min(best_validation_loss, val_loss)
    print('-----EPOCH'+str(i+1)+'----- done.  Validation loss: ', str(val_loss/len(val_loader)) )
    if best_validation_loss==val_loss:
        print('validation loss improoved saving checkpoint...')
        save_path = os.path.join('./', 'model'+str(i)+'.pt')
        torch.save({
        'epoch': i,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'validation_accuracy': val_loss,
        'loss': loss,
        }, save_path)
        print('checkpoint saved to: {}'.format(save_path))

-----EPOCH1-----
Iteration: 50, Time: 53.5008 s, training loss: 6.0607
Iteration: 100, Time: 106.1300 s, training loss: 4.5183
Iteration: 150, Time: 158.6364 s, training loss: 3.7243
Iteration: 200, Time: 211.3610 s, training loss: 3.1291
Iteration: 250, Time: 264.3666 s, training loss: 2.8851
Iteration: 300, Time: 317.3375 s, training loss: 2.6394
Iteration: 350, Time: 369.5026 s, training loss: 2.2959
Iteration: 400, Time: 422.3356 s, training loss: 2.1901
Iteration: 450, Time: 474.8640 s, training loss: 2.0665
Iteration: 500, Time: 526.9663 s, training loss: 1.9357
Iteration: 550, Time: 579.8844 s, training loss: 1.8173
Iteration: 600, Time: 632.2532 s, training loss: 1.7616
Iteration: 650, Time: 684.5142 s, training loss: 1.7065
Iteration: 700, Time: 737.3302 s, training loss: 1.5871
Iteration: 750, Time: 789.6431 s, training loss: 1.4000
Iteration: 800, Time: 842.2152 s, training loss: 1.4611
-----EPOCH1----- done.  Validation loss:  1.2217628681459105
validation loss improoved sa

In [10]:
print('Loading best model...')
checkpoint = torch.load(save_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

graph_model = model.get_graph_encoder()
text_model = model.get_text_encoder()

test_cids_dataset = GraphDataset(root='/kaggle/working/Public/data/', gt=gt, split='test_cids')
test_text_dataset = TextDataset(file_path='/kaggle/working/Public/data/test_text.txt', tokenizer=tokenizer)

idx_to_cid = test_cids_dataset.get_idx_to_cid()

test_loader = DataLoader(test_cids_dataset, batch_size=batch_size, shuffle=False)

graph_embeddings = []
for batch in test_loader:
    for output in graph_model(batch.to(device)):
        graph_embeddings.append(output.tolist())

test_text_loader = TorchDataLoader(test_text_dataset, batch_size=batch_size, shuffle=False)
text_embeddings = []
for batch in test_text_loader:
    for output in text_model(batch['input_ids'].to(device), 
                             attention_mask=batch['attention_mask'].to(device)):
        text_embeddings.append(output.tolist())


from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(text_embeddings, graph_embeddings)

solution = pd.DataFrame(similarity)
solution['ID'] = solution.index
solution = solution[['ID'] + [col for col in solution.columns if col!='ID']]
solution.to_csv('submission.csv', index=False)

Loading best model...


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.89 GiB total capacity; 15.36 GiB already allocated; 24.12 MiB free; 15.57 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# **Hyperparameter tuning**

In [11]:
import optuna
import wandb

def objective(trial):
    nb_epochs = trial.suggest_int("nb_epochs", 2, 10)
    batch_size = trial.suggest_int("batch_size", 32, 64)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True)
    nhid = trial.suggest_int("nhid", 100, 1500)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)

    training_results = train_loop({
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
        "nhid": nhid,
        "dropout": dropout
    })

    val_loss = training_results['val_loss']
    return -val_loss

In [15]:
import os
os.environ['WANDB_API_KEY'] = "64a527bbea4f60e9c34d5dede6c22bb22dc488d1"

In [17]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [18]:
def train_loop(params):

    CE = torch.nn.CrossEntropyLoss()
    def contrastive_loss(v1, v2):
      logits = torch.matmul(v1,torch.transpose(v2, 0, 1))
      labels = torch.arange(logits.shape[0], device=v1.device)
      return CE(logits, labels) + CE(torch.transpose(logits, 0, 1), labels)


    model_name = "allenai/scibert_scivocab_uncased"
    #model_name = 'distilbert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    gt = np.load("/kaggle/working/Public/data/token_embedding_dict.npy", allow_pickle=True)[()]
    val_dataset = GraphTextDataset(root='/kaggle/working/Public/data', gt=gt, split='val', tokenizer=tokenizer)
    train_dataset = GraphTextDataset(root='/kaggle/working/Public/data', gt=gt, split='train', tokenizer=tokenizer)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    nb_epochs = params['nb_epochs']
    batch_size = params['batch_size']
    learning_rate = params['learning_rate']

    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    model = ModelGAT(model_name=model_name, n_in=300, nout=768, nhid=params['nhid'], attention_hidden=params['nhid'], dropout=params['dropout'])
    model.to(device)

    optimizer = optim.AdamW(model.parameters(), lr=learning_rate,
                                    betas=(0.9, 0.999),
                                    weight_decay=0.01)

    epoch = 0
    loss = 0
    losses = []
    count_iter = 0
    time1 = time.time()
    printEvery = 50
    best_validation_loss = 1000000

    for i in range(nb_epochs):
        print('-----EPOCH{}-----'.format(i+1))
        model.train()
        for batch in train_loader:
            input_ids = batch.input_ids
            batch.pop('input_ids')
            attention_mask = batch.attention_mask
            batch.pop('attention_mask')
            graph_batch = batch

            x_graph, x_text = model(graph_batch.to(device), 
                                    input_ids.to(device), 
                                    attention_mask.to(device))
            current_loss = contrastive_loss(x_graph, x_text)   
            optimizer.zero_grad()
            current_loss.backward()
            optimizer.step()
            loss += current_loss.item()

            count_iter += 1
            if count_iter % printEvery == 0:
                time2 = time.time()
                print("Iteration: {0}, Time: {1:.4f} s, training loss: {2:.4f}".format(count_iter,
                                                                            time2 - time1, loss/printEvery))
                losses.append(loss)
                loss = 0 
        model.eval()       
        val_loss = 0        
        for batch in val_loader:
            input_ids = batch.input_ids
            batch.pop('input_ids')
            attention_mask = batch.attention_mask
            batch.pop('attention_mask')
            graph_batch = batch
            x_graph, x_text = model(graph_batch.to(device), 
                                    input_ids.to(device), 
                                    attention_mask.to(device))
            current_loss = contrastive_loss(x_graph, x_text)   
            val_loss += current_loss.item()
        best_validation_loss = min(best_validation_loss, val_loss)
        print('-----EPOCH'+str(i+1)+'----- done.  Validation loss: ', str(val_loss/len(val_loader)) )
        if best_validation_loss==val_loss:
            print('validation loss improoved saving checkpoint...')
            save_path = os.path.join('./', 'model'+str(i)+'.pt')
            torch.save({
            'epoch': i,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'validation_accuracy': val_loss,
            'loss': loss,
            }, save_path)
            print('checkpoint saved to: {}'.format(save_path))

In [19]:
study = optuna.create_study(direction="minimize")
study.optimize(objective)

# Initialize WandB with project name and run name
wandb.init(project="ALTEGRAD", name="optuna-tuning")

# Track hyperparameters and metrics
for key, value in study.best_params.items():
    wandb.log(key, value)

wandb.log("val_loss", study.best_value)

[I 2024-01-23 10:28:15,956] A new study created in memory with name: no-name-244a75e4-62ae-4d87-b989-76b573ab5265


-----EPOCH1-----


[W 2024-01-23 10:28:19,993] Trial 0 failed with parameters: {'nb_epochs': 3, 'batch_size': 62, 'learning_rate': 4.081547080171527e-05, 'nhid': 1368, 'dropout': 0.12063357205828665} because of the following error: OutOfMemoryError('CUDA out of memory. Tried to allocate 186.00 MiB (GPU 0; 15.89 GiB total capacity; 15.19 GiB already allocated; 114.12 MiB free; 15.49 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF').
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_26/2249016247.py", line 11, in objective
    training_results = train_loop({
  File "/tmp/ipykernel_26/3162336210.py", line 51, in train_loop
    x_graph, x_text = model(graph_batch.to(device),
  File "/opt/conda/lib/python3.10/site-package

OutOfMemoryError: CUDA out of memory. Tried to allocate 186.00 MiB (GPU 0; 15.89 GiB total capacity; 15.19 GiB already allocated; 114.12 MiB free; 15.49 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF