In [1]:
from typing import Dict, List, Optional
from collections import Counter
import os
import csv
!pip install torchmetrics
!pip install pytorch-metric-learning
import urllib.request
from torch.utils.data import DataLoader
!pip install pytorch-lightning
import torch.optim as optim
import torchmetrics
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
url = 'https://raw.githubusercontent.com/sighsmile/conlleval/master/conlleval.py'
filename = 'conlleval.py'
urllib.request.urlretrieve(url, filename)
from conlleval import evaluate


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
class Tokenizer:
    def __init__(self):
        # two special tokens for padding and unknown
        self.token2idx = {"<pad>": 0, "<unk>": 1}
        self.idx2token = ["<pad>", "<unk>"]
        self.is_fit = False
    
    @property
    def pad_id(self):
        return self.token2idx["<pad>"]
    
    def __len__(self):
        return len(self.idx2token)
    
    def fit(self, train_texts: List[str]):
        counter = Counter()
        for text in train_texts:
            counter.update(text.lower().split())
        
        # manually set a vocabulary size for the data set
        vocab_size = 20000
        self.idx2token.extend([token for token, count in counter.most_common(vocab_size - 2)])
        for (i, token) in enumerate(self.idx2token):
            self.token2idx[token] = i
            
        self.is_fit = True
                
    def encode(self, text: str, max_length: Optional[int] = None) -> List[int]:
        if not self.is_fit:
            raise Exception("Please fit the tokenizer on the training tokens")
            
        # Split the text into tokens and encode each token using the token2idx mapping
        tokens = text.lower().split()
        token_ids = [self.token2idx.get(token, self.token2idx["<unk>"]) for token in tokens]

        # Pad or truncate the token ids based on the max_length parameter
        if max_length is not None:
            if len(token_ids) < max_length:
                token_ids += [self.token2idx["<pad>"]] * (max_length - len(token_ids))
            else:
                token_ids = token_ids[:max_length]

        return token_ids


In [3]:
def load_raw_data(filepath: str, with_tags: bool = True):
    data = {'text': []}
    if with_tags:
        data['tags'] = []
        with open(filepath) as f:
            reader = csv.reader(f)
            for text, tags in reader:
                data['text'].append(text)
                data['tags'].append(tags)
    else:
        with open(filepath) as f:
            for line in f:
                data['text'].append(line.strip())
    return data

In [4]:
tokenizer = Tokenizer()
data_dir = os.getcwd()
train_raw = load_raw_data(os.path.join(data_dir, "train.csv"))
val_raw = load_raw_data(os.path.join(data_dir, "val.csv"))
test_raw = load_raw_data(os.path.join(data_dir, "test_tokens.txt"), with_tags=False)
# fit the tokenizer on the training tokens
tokenizer.fit(train_raw['text'])

In [5]:
newtext = 'how are you doing today ?'
tokenizer.encode(newtext, max_length=10)


[803, 57, 256, 1313, 755, 1813, 0, 0, 0, 0]

In [6]:
print(len(train_raw['text']))

14041


In [7]:
#upload the dataset
#for google colb, use this
#from google.colab import files
#uploaded = files.upload()

In [8]:
class NERDataset: 
    tag2idx = {'O': 1, 'B-PER': 2, 'I-PER': 3, 'B-ORG': 4, 'I-ORG': 5, 'B-LOC': 6, 'I-LOC': 7, 'B-MISC': 8, 'I-MISC': 9}
    idx2tag = ['<pad>', 'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG','B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']
  
    def __init__(self, raw_data: Dict[str, List[str]], tokenizer: Tokenizer, max_length: int = 128):
        self.tokenizer = tokenizer
        self.token_ids = []
        self.tag_ids = []
        self.with_tags = False
        for text in raw_data['text']:
            self.token_ids.append(tokenizer.encode(text, max_length=max_length))
        if 'tags' in raw_data:
            self.with_tags = True
            for tags in raw_data['tags']:
                self.tag_ids.append(self.encode_tags(tags, max_length=max_length))

    
    def encode_tags(self, tags: str, max_length: Optional[int] = None):
        tag_ids = [self.tag2idx[tag] for tag in tags.split()]
        if max_length is None:
            return tag_ids
        # truncate the tags if longer than max_length
        if len(tag_ids) > max_length:
            return tag_ids[:max_length]
        # pad with 0s if shorter than max_length
        else:
            return tag_ids + [0] * (max_length - len(tag_ids))  # 0 as padding for tags
        
    def __len__(self):
        return len(self.token_ids)
    
    def __getitem__(self, idx):
        token_ids = torch.LongTensor(self.token_ids[idx])
        mask = token_ids == self.tokenizer.pad_id  # padding tokens
        if self.with_tags:
            # for training and validation
            return token_ids, mask, torch.LongTensor(self.tag_ids[idx])
        else:
            # for testing
            return token_ids, mask
        

In [9]:
tr_data = NERDataset(train_raw, tokenizer)
va_data = NERDataset(val_raw, tokenizer)
te_data = NERDataset(test_raw, tokenizer)

In [10]:
samplenum = 265
print(len(tr_data))
print(train_raw['text'][samplenum])
print(tr_data[samplenum])


14041
The Greek socialist party 's executive bureau gave Prime Minister Costas Simitis its backing if he chooses to call snap elections , its general secretary Costas Skandalidis told reporters on Thursday .
(tensor([    2,  1638,  1466,   147,    15,   996,  1877,   407,   229,   103,
         2415,  2672,    63,  2698,   141,    26, 11142,     7,   629,  2673,
          269,     4,    63,   335,   750,  2415,  4652,    90,   524,    13,
           70,     3,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
           

In [11]:
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        encoder_layer = nn.TransformerEncoderLayer(embed_size, num_heads, hidden_size)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)
        self.linear = nn.Linear(embed_size, 10)

    def forward(self, src, src_mask):
        # src shape: (batch_size, max_length)
        # src_mask shape: (batch_size, max_length)
        embedded = self.embedding(src)  # shape: (batch_size, max_length, embed_size)
        encoded = self.transformer_encoder(embedded.transpose(0, 1), src_key_padding_mask=src_mask).transpose(0, 1)  # shape: (batch_size, max_length, embed_size)
        output = self.linear(encoded)  # shape: (batch_size, max_length, num_classes)
        return output

In [19]:
# def init_weights(module):
#     if isinstance(module, (nn.Linear, nn.Embedding)):
#         nn.init.xavier_uniform_(module.weight)

In [51]:
def init_weights(module):
    if isinstance(module, (nn.Linear, nn.Embedding)):
        nn.init.kaiming_uniform_(module.weight, nonlinearity='relu')

In [40]:
def validate(
    model: nn.Module, 
    dataloader: DataLoader, 
    device: torch.device,
):
    acc_metric = torchmetrics.Accuracy(task = 'multiclass', num_classes = 10, compute_on_step=False).to(device)
    loss_metric = torchmetrics.MeanMetric(compute_on_step=False).to(device)
    model.eval()
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids, input_mask, tags = batch[0].to(device), batch[1].to(device), batch[2].to(device)
            # output shape: (batch_size, max_length, num_classes)
            logits = model(input_ids, input_mask)
            # ignore padding index 0 when calculating loss
            loss = F.cross_entropy(logits.reshape(-1, 10), tags.reshape(-1), ignore_index=0)
                
            loss_metric.update(loss, input_mask.numel() - input_mask.sum())
            is_active = torch.logical_not(input_mask)  # non-padding elements
            # only consider non-padded tokens when calculating accuracy
            acc_metric.update(logits[is_active], tags[is_active])

    print(f"| Epoch {epoch+1} | Validate | loss {loss_metric.compute():.6f} | acc {acc_metric.compute():.6f} |!!!!!!!!!!!!!!!!!!!!!!!!!")

    return loss_metric.compute()


In [41]:
#modify as required
def train(
    model: nn.Module, 
    dataloader: DataLoader, 
    optimizer: optim.Optimizer,
    device: torch.device,
    epoch: int,
):
    acc_metric = torchmetrics.Accuracy(task = 'multiclass', num_classes = 10, compute_on_step=False).to(device)
    loss_metric = torchmetrics.MeanMetric(compute_on_step=False).to(device)
    model.train()


    # loop through all batches in the training
    for batch in tqdm(dataloader):
        input_ids, input_mask, tags = batch[0].to(device), batch[1].to(device), batch[2].to(device)
        optimizer.zero_grad()
        # output shape: (batch_size, max_length, num_classes)
        logits = model(input_ids, input_mask)
        # ignore padding index 0 when calculating loss
        loss = F.cross_entropy(logits.reshape(-1, 10), tags.reshape(-1), ignore_index=0)
        
        loss.backward()
        optimizer.step()
        
        loss_metric.update(loss, input_mask.numel() - input_mask.sum())
        is_active = torch.logical_not(input_mask)  # non-padding elements
        # only consider non-padded tokens when calculating accuracy
        acc_metric.update(logits[is_active], tags[is_active])
    
    print(f"| Epoch {epoch+1} | Train | loss {loss_metric.compute():.6f} | acc {acc_metric.compute():.6f} |")


In [14]:
def predict(model: nn.Module, dataloader: DataLoader, device: torch.device) -> List[List[str]]:
    model.eval()
    preds = []

    idx2tag = ['<pad>', 'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids, src_mask = batch[0].to(device), batch[1].to(device)
            logits = model(input_ids, src_mask=src_mask)
            batch_preds = logits.argmax(dim=-1).tolist()
            for i, tags in enumerate(batch_preds):
                # Get the true length of the unpadded sequence using the input mask
                seq_len = 128 - src_mask[i].sum().item()  # .item() converts a one-element tensor to a scalar
                # print(src_mask[i])
                # print(seq_len)
                # Convert the predicted tag indices to tag labels
                tag_preds = [idx2tag[idx] for idx in tags[:seq_len]]  # Slice the predicted tags to seq_len
                # print(tag_preds)
                # Append the tag predictions to the list of predictions
                preds.append(tag_preds)

    return preds

In [49]:
import optuna
def objective(trial: optuna.Trial):
    # Suggest hyperparameters
    embed_size =  trial.suggest_categorical("embed_size", [128, 256, 512])
    num_heads = trial.suggest_categorical("num_heads", [2, 4, 8])
    hidden_size = trial.suggest_categorical("hidden_size", [128, 256, 512])
    num_layers = trial.suggest_int("num_layers", 1, 4)
    lr = trial.suggest_float("lr", 5e-5, 1e-3)

    # Create the model with the suggested hyperparameters
    model = TransformerModel(vocab_size=len(tokenizer),
                             embed_size=embed_size,
                             num_heads=num_heads,
                             hidden_size=hidden_size,
                             num_layers=num_layers).to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.apply(init_weights)

    best_val_loss = float('inf')
    best_epoch = 0
    best_model_state = None

    for epoch in range(max_epochs):
        train(model, train_dataloader, optimizer, device, epoch)

        val_loss = validate(model, val_dataloader, device)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
            best_model_state = model.state_dict()

            print(f"| Epoch {epoch+1} | New best validation loss: {best_val_loss:.4f} **********************")
        elif epoch - best_epoch >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}.")
            print(f"Best epoch is: {best_epoch + 1}")
            break

    # Restore the best model state
    model.load_state_dict(best_model_state)

    return best_val_loss

In [52]:
torch.manual_seed(321)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

#hyperparameters
BATCH_SIZE = 32
max_epochs = 10
patience = 2

# data loaders
train_dataloader = DataLoader(tr_data, batch_size = BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(va_data, batch_size = BATCH_SIZE)
test_dataloader = DataLoader(te_data, batch_size = BATCH_SIZE)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)
print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[32m[I 2023-03-16 19:11:20,840][0m A new study created in memory with name: no-name-0831b67d-f29b-4716-8eb6-ba12d537c003[0m


cuda


100%|██████████| 439/439 [00:08<00:00, 54.21it/s]


| Epoch 1 | Train | loss 0.405201 | acc 0.892791 |


100%|██████████| 102/102 [00:00<00:00, 164.84it/s]


| Epoch 4 | Validate | loss 0.276741 | acc 0.923426 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2767 **********************


100%|██████████| 439/439 [00:07<00:00, 56.11it/s]


| Epoch 2 | Train | loss 0.147424 | acc 0.954681 |


100%|██████████| 102/102 [00:00<00:00, 173.94it/s]


| Epoch 4 | Validate | loss 0.248736 | acc 0.929812 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2487 **********************


100%|██████████| 439/439 [00:07<00:00, 56.11it/s]


| Epoch 3 | Train | loss 0.088816 | acc 0.971113 |


100%|██████████| 102/102 [00:00<00:00, 168.05it/s]


| Epoch 4 | Validate | loss 0.281189 | acc 0.931739 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:07<00:00, 56.23it/s]


| Epoch 4 | Train | loss 0.062173 | acc 0.979015 |


100%|██████████| 102/102 [00:00<00:00, 173.39it/s]
[32m[I 2023-03-16 19:11:54,951][0m Trial 0 finished with value: 0.2487362027168274 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 1, 'lr': 0.00037026589736658544}. Best is trial 0 with value: 0.2487362027168274.[0m


| Epoch 4 | Validate | loss 0.285631 | acc 0.933044 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:17<00:00, 25.08it/s]


| Epoch 1 | Train | loss 0.441116 | acc 0.884423 |


100%|██████████| 102/102 [00:01<00:00, 79.39it/s]


| Epoch 4 | Validate | loss 0.286088 | acc 0.919999 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2861 **********************


100%|██████████| 439/439 [00:17<00:00, 25.09it/s]


| Epoch 2 | Train | loss 0.160341 | acc 0.951935 |


100%|██████████| 102/102 [00:01<00:00, 78.89it/s]


| Epoch 4 | Validate | loss 0.277040 | acc 0.928371 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2770 **********************


100%|██████████| 439/439 [00:17<00:00, 25.05it/s]


| Epoch 3 | Train | loss 0.098936 | acc 0.968888 |


100%|██████████| 102/102 [00:01<00:00, 78.42it/s]


| Epoch 4 | Validate | loss 0.243654 | acc 0.933998 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2437 **********************


100%|██████████| 439/439 [00:17<00:00, 25.06it/s]


| Epoch 4 | Train | loss 0.077351 | acc 0.975042 |


100%|██████████| 102/102 [00:01<00:00, 79.34it/s]


| Epoch 4 | Validate | loss 0.277478 | acc 0.922686 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:17<00:00, 25.01it/s]


| Epoch 5 | Train | loss 0.066937 | acc 0.977895 |


100%|██████████| 102/102 [00:01<00:00, 78.53it/s]
[32m[I 2023-03-16 19:13:29,150][0m Trial 1 finished with value: 0.24365386366844177 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.0005261295936382586}. Best is trial 1 with value: 0.24365386366844177.[0m


| Epoch 4 | Validate | loss 0.280556 | acc 0.935341 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:16<00:00, 26.14it/s]


| Epoch 1 | Train | loss 0.463855 | acc 0.879865 |


100%|██████████| 102/102 [00:01<00:00, 80.76it/s]


| Epoch 4 | Validate | loss 0.313115 | acc 0.914957 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3131 **********************


100%|██████████| 439/439 [00:16<00:00, 26.12it/s]


| Epoch 2 | Train | loss 0.170514 | acc 0.948939 |


100%|██████████| 102/102 [00:01<00:00, 81.96it/s]


| Epoch 4 | Validate | loss 0.256677 | acc 0.931214 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2567 **********************


100%|██████████| 439/439 [00:16<00:00, 26.11it/s]


| Epoch 3 | Train | loss 0.099998 | acc 0.968451 |


100%|██████████| 102/102 [00:01<00:00, 81.54it/s]


| Epoch 4 | Validate | loss 0.261949 | acc 0.932421 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:16<00:00, 26.14it/s]


| Epoch 4 | Train | loss 0.072831 | acc 0.976142 |


100%|██████████| 102/102 [00:01<00:00, 82.35it/s]
[32m[I 2023-03-16 19:14:41,466][0m Trial 2 finished with value: 0.2566770315170288 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.000403989068373604}. Best is trial 1 with value: 0.24365386366844177.[0m


| Epoch 4 | Validate | loss 0.263608 | acc 0.932538 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:13<00:00, 33.20it/s]


| Epoch 1 | Train | loss 0.428975 | acc 0.887664 |


100%|██████████| 102/102 [00:00<00:00, 103.77it/s]


| Epoch 4 | Validate | loss 0.304686 | acc 0.920389 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3047 **********************


100%|██████████| 439/439 [00:13<00:00, 33.16it/s]


| Epoch 2 | Train | loss 0.159831 | acc 0.951366 |


100%|██████████| 102/102 [00:00<00:00, 103.24it/s]


| Epoch 4 | Validate | loss 0.239361 | acc 0.931915 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2394 **********************


100%|██████████| 439/439 [00:13<00:00, 33.14it/s]


| Epoch 3 | Train | loss 0.089702 | acc 0.970759 |


100%|██████████| 102/102 [00:01<00:00, 101.47it/s]


| Epoch 4 | Validate | loss 0.287547 | acc 0.933102 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:13<00:00, 33.21it/s]


| Epoch 4 | Train | loss 0.058586 | acc 0.980591 |


100%|██████████| 102/102 [00:00<00:00, 103.06it/s]
[32m[I 2023-03-16 19:15:38,507][0m Trial 3 finished with value: 0.23936066031455994 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 2, 'lr': 0.0003273473628481439}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.278165 | acc 0.935906 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:07<00:00, 58.42it/s]


| Epoch 1 | Train | loss 0.369144 | acc 0.902540 |


100%|██████████| 102/102 [00:00<00:00, 174.38it/s]


| Epoch 4 | Validate | loss 0.265682 | acc 0.925821 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2657 **********************


100%|██████████| 439/439 [00:07<00:00, 58.84it/s]


| Epoch 2 | Train | loss 0.128628 | acc 0.960259 |


100%|██████████| 102/102 [00:00<00:00, 173.64it/s]


| Epoch 4 | Validate | loss 0.261007 | acc 0.929578 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2610 **********************


100%|██████████| 439/439 [00:07<00:00, 58.68it/s]


| Epoch 3 | Train | loss 0.079193 | acc 0.973952 |


100%|██████████| 102/102 [00:00<00:00, 179.49it/s]


| Epoch 4 | Validate | loss 0.261376 | acc 0.931856 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:07<00:00, 58.54it/s]


| Epoch 4 | Train | loss 0.058693 | acc 0.980086 |


100%|██████████| 102/102 [00:00<00:00, 177.51it/s]
[32m[I 2023-03-16 19:16:10,888][0m Trial 4 finished with value: 0.2610066533088684 and parameters: {'embed_size': 512, 'num_heads': 2, 'hidden_size': 256, 'num_layers': 1, 'lr': 0.0005888811820103137}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.291175 | acc 0.932479 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:03<00:00, 128.04it/s]


| Epoch 1 | Train | loss 0.446503 | acc 0.883131 |


100%|██████████| 102/102 [00:00<00:00, 291.10it/s]


| Epoch 4 | Validate | loss 0.296590 | acc 0.916845 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2966 **********************


100%|██████████| 439/439 [00:03<00:00, 130.86it/s]


| Epoch 2 | Train | loss 0.179916 | acc 0.945300 |


100%|██████████| 102/102 [00:00<00:00, 312.30it/s]


| Epoch 4 | Validate | loss 0.285762 | acc 0.906974 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2858 **********************


100%|██████████| 439/439 [00:03<00:00, 131.72it/s]


| Epoch 3 | Train | loss 0.109030 | acc 0.965249 |


100%|██████████| 102/102 [00:00<00:00, 314.39it/s]


| Epoch 4 | Validate | loss 0.246892 | acc 0.931019 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2469 **********************


100%|██████████| 439/439 [00:03<00:00, 136.28it/s]


| Epoch 4 | Train | loss 0.077816 | acc 0.973863 |


100%|██████████| 102/102 [00:00<00:00, 296.68it/s]


| Epoch 4 | Validate | loss 0.259603 | acc 0.932888 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:03<00:00, 134.26it/s]


| Epoch 5 | Train | loss 0.060437 | acc 0.979511 |


100%|██████████| 102/102 [00:00<00:00, 310.93it/s]
[32m[I 2023-03-16 19:16:29,262][0m Trial 5 finished with value: 0.24689213931560516 and parameters: {'embed_size': 128, 'num_heads': 2, 'hidden_size': 512, 'num_layers': 1, 'lr': 0.0009758469494084238}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.306066 | acc 0.933394 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:07<00:00, 58.68it/s]


| Epoch 1 | Train | loss 0.355993 | acc 0.905265 |


100%|██████████| 102/102 [00:00<00:00, 173.02it/s]


| Epoch 4 | Validate | loss 0.292061 | acc 0.924575 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2921 **********************


100%|██████████| 439/439 [00:07<00:00, 58.53it/s]


| Epoch 2 | Train | loss 0.125433 | acc 0.960972 |


100%|██████████| 102/102 [00:00<00:00, 173.42it/s]


| Epoch 4 | Validate | loss 0.269463 | acc 0.932129 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2695 **********************


100%|██████████| 439/439 [00:07<00:00, 58.48it/s]


| Epoch 3 | Train | loss 0.076988 | acc 0.974487 |


100%|██████████| 102/102 [00:00<00:00, 176.43it/s]


| Epoch 4 | Validate | loss 0.295671 | acc 0.931467 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:07<00:00, 58.64it/s]


| Epoch 4 | Train | loss 0.058233 | acc 0.980027 |


100%|██████████| 102/102 [00:00<00:00, 177.32it/s]
[32m[I 2023-03-16 19:17:01,663][0m Trial 6 finished with value: 0.26946279406547546 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 128, 'num_layers': 1, 'lr': 0.0006516890873542069}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.316686 | acc 0.933258 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:04<00:00, 96.74it/s]


| Epoch 1 | Train | loss 0.379633 | acc 0.899308 |


100%|██████████| 102/102 [00:00<00:00, 249.60it/s]


| Epoch 4 | Validate | loss 0.265917 | acc 0.922998 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2659 **********************


100%|██████████| 439/439 [00:04<00:00, 97.48it/s]


| Epoch 2 | Train | loss 0.139868 | acc 0.957018 |


100%|██████████| 102/102 [00:00<00:00, 245.25it/s]


| Epoch 4 | Validate | loss 0.252986 | acc 0.928722 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2530 **********************


100%|██████████| 439/439 [00:04<00:00, 97.22it/s]


| Epoch 3 | Train | loss 0.085827 | acc 0.971634 |


100%|██████████| 102/102 [00:00<00:00, 249.29it/s]


| Epoch 4 | Validate | loss 0.247420 | acc 0.931895 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2474 **********************


100%|██████████| 439/439 [00:04<00:00, 96.87it/s]


| Epoch 4 | Train | loss 0.061854 | acc 0.978814 |


100%|██████████| 102/102 [00:00<00:00, 249.87it/s]


| Epoch 4 | Validate | loss 0.280740 | acc 0.932732 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:04<00:00, 96.48it/s]


| Epoch 5 | Train | loss 0.048747 | acc 0.983312 |


100%|██████████| 102/102 [00:00<00:00, 251.09it/s]
[32m[I 2023-03-16 19:17:26,428][0m Trial 7 finished with value: 0.24741970002651215 and parameters: {'embed_size': 256, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 1, 'lr': 0.0009357097310617897}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.309989 | acc 0.933492 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:05<00:00, 76.90it/s]


| Epoch 1 | Train | loss 0.612601 | acc 0.845448 |


100%|██████████| 102/102 [00:00<00:00, 201.03it/s]


| Epoch 4 | Validate | loss 0.430365 | acc 0.881118 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.4304 **********************


100%|██████████| 439/439 [00:05<00:00, 76.28it/s]


| Epoch 2 | Train | loss 0.347666 | acc 0.897344 |


100%|██████████| 102/102 [00:00<00:00, 210.56it/s]


| Epoch 4 | Validate | loss 0.321597 | acc 0.909096 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.3216 **********************


100%|██████████| 439/439 [00:05<00:00, 76.28it/s]


| Epoch 3 | Train | loss 0.237290 | acc 0.928082 |


100%|██████████| 102/102 [00:00<00:00, 210.32it/s]


| Epoch 4 | Validate | loss 0.275294 | acc 0.920291 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2753 **********************


100%|██████████| 439/439 [00:05<00:00, 76.43it/s]


| Epoch 4 | Train | loss 0.172629 | acc 0.947049 |


100%|██████████| 102/102 [00:00<00:00, 209.41it/s]


| Epoch 4 | Validate | loss 0.278273 | acc 0.925723 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:05<00:00, 76.35it/s]


| Epoch 5 | Train | loss 0.132711 | acc 0.958487 |


100%|██████████| 102/102 [00:00<00:00, 201.60it/s]


| Epoch 4 | Validate | loss 0.266066 | acc 0.930279 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.2661 **********************


100%|██████████| 439/439 [00:05<00:00, 76.59it/s]


| Epoch 6 | Train | loss 0.106136 | acc 0.965819 |


100%|██████████| 102/102 [00:00<00:00, 202.89it/s]


| Epoch 4 | Validate | loss 0.249700 | acc 0.932324 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 6 | New best validation loss: 0.2497 **********************


100%|██████████| 439/439 [00:05<00:00, 76.46it/s]


| Epoch 7 | Train | loss 0.087780 | acc 0.971437 |


100%|██████████| 102/102 [00:00<00:00, 215.66it/s]


| Epoch 4 | Validate | loss 0.273136 | acc 0.933686 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:05<00:00, 76.41it/s]


| Epoch 8 | Train | loss 0.072951 | acc 0.975705 |


100%|██████████| 102/102 [00:00<00:00, 210.71it/s]
[32m[I 2023-03-16 19:18:16,386][0m Trial 8 finished with value: 0.24969972670078278 and parameters: {'embed_size': 128, 'num_heads': 2, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.00033026053820606785}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.293970 | acc 0.934348 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 8.
Best epoch is: 6


100%|██████████| 439/439 [00:06<00:00, 64.40it/s]


| Epoch 1 | Train | loss 0.408320 | acc 0.890444 |


100%|██████████| 102/102 [00:00<00:00, 180.37it/s]


| Epoch 4 | Validate | loss 0.275413 | acc 0.922881 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2754 **********************


100%|██████████| 439/439 [00:06<00:00, 64.39it/s]


| Epoch 2 | Train | loss 0.156313 | acc 0.952854 |


100%|██████████| 102/102 [00:00<00:00, 182.23it/s]


| Epoch 4 | Validate | loss 0.249246 | acc 0.929695 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2492 **********************


100%|██████████| 439/439 [00:06<00:00, 64.15it/s]


| Epoch 3 | Train | loss 0.092122 | acc 0.970327 |


100%|██████████| 102/102 [00:00<00:00, 178.82it/s]


| Epoch 4 | Validate | loss 0.255192 | acc 0.933803 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:06<00:00, 64.21it/s]


| Epoch 4 | Train | loss 0.066944 | acc 0.977463 |


100%|██████████| 102/102 [00:00<00:00, 178.78it/s]
[32m[I 2023-03-16 19:18:45,999][0m Trial 9 finished with value: 0.24924619495868683 and parameters: {'embed_size': 256, 'num_heads': 2, 'hidden_size': 512, 'num_layers': 2, 'lr': 0.0007488095638381202}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.254318 | acc 0.933355 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:07<00:00, 55.82it/s]


| Epoch 1 | Train | loss 0.742315 | acc 0.830101 |


100%|██████████| 102/102 [00:00<00:00, 158.09it/s]


| Epoch 4 | Validate | loss 0.636974 | acc 0.835559 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.6370 **********************


100%|██████████| 439/439 [00:07<00:00, 55.99it/s]


| Epoch 2 | Train | loss 0.595859 | acc 0.841451 |


100%|██████████| 102/102 [00:00<00:00, 156.68it/s]


| Epoch 4 | Validate | loss 0.504456 | acc 0.860013 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.5045 **********************


100%|██████████| 439/439 [00:07<00:00, 56.01it/s]


| Epoch 3 | Train | loss 0.491913 | acc 0.861424 |


100%|██████████| 102/102 [00:00<00:00, 159.46it/s]


| Epoch 4 | Validate | loss 0.474141 | acc 0.874499 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.4741 **********************


100%|██████████| 439/439 [00:07<00:00, 55.80it/s]


| Epoch 4 | Train | loss 0.417774 | acc 0.878028 |


100%|██████████| 102/102 [00:00<00:00, 154.98it/s]


| Epoch 4 | Validate | loss 0.397983 | acc 0.888984 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 4 | New best validation loss: 0.3980 **********************


100%|██████████| 439/439 [00:07<00:00, 55.75it/s]


| Epoch 5 | Train | loss 0.365088 | acc 0.890949 |


100%|██████████| 102/102 [00:00<00:00, 159.47it/s]


| Epoch 4 | Validate | loss 0.376901 | acc 0.895837 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.3769 **********************


100%|██████████| 439/439 [00:07<00:00, 55.47it/s]


| Epoch 6 | Train | loss 0.322933 | acc 0.901695 |


100%|██████████| 102/102 [00:00<00:00, 159.91it/s]


| Epoch 4 | Validate | loss 0.355426 | acc 0.901698 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 6 | New best validation loss: 0.3554 **********************


100%|██████████| 439/439 [00:07<00:00, 55.76it/s]


| Epoch 7 | Train | loss 0.288320 | acc 0.911836 |


100%|██████████| 102/102 [00:00<00:00, 159.23it/s]


| Epoch 4 | Validate | loss 0.325932 | acc 0.906176 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 7 | New best validation loss: 0.3259 **********************


100%|██████████| 439/439 [00:07<00:00, 55.64it/s]


| Epoch 8 | Train | loss 0.259241 | acc 0.919787 |


100%|██████████| 102/102 [00:00<00:00, 158.27it/s]


| Epoch 4 | Validate | loss 0.318868 | acc 0.911024 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 8 | New best validation loss: 0.3189 **********************


100%|██████████| 439/439 [00:07<00:00, 55.67it/s]


| Epoch 9 | Train | loss 0.235379 | acc 0.926898 |


100%|██████████| 102/102 [00:00<00:00, 160.14it/s]


| Epoch 4 | Validate | loss 0.314352 | acc 0.914567 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 9 | New best validation loss: 0.3144 **********************


100%|██████████| 439/439 [00:07<00:00, 55.69it/s]


| Epoch 10 | Train | loss 0.213340 | acc 0.933553 |


100%|██████████| 102/102 [00:00<00:00, 150.31it/s]
[32m[I 2023-03-16 19:20:11,282][0m Trial 10 finished with value: 0.29700174927711487 and parameters: {'embed_size': 128, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 4, 'lr': 7.78231772194265e-05}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.297002 | acc 0.917488 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 10 | New best validation loss: 0.2970 **********************


100%|██████████| 439/439 [00:11<00:00, 37.24it/s]


| Epoch 1 | Train | loss 0.470577 | acc 0.876064 |


100%|██████████| 102/102 [00:00<00:00, 116.26it/s]


| Epoch 4 | Validate | loss 0.319294 | acc 0.914119 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3193 **********************


100%|██████████| 439/439 [00:11<00:00, 38.11it/s]


| Epoch 2 | Train | loss 0.193940 | acc 0.942025 |


100%|██████████| 102/102 [00:00<00:00, 116.85it/s]


| Epoch 4 | Validate | loss 0.270800 | acc 0.927904 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2708 **********************


100%|██████████| 439/439 [00:11<00:00, 38.19it/s]


| Epoch 3 | Train | loss 0.108310 | acc 0.966045 |


100%|██████████| 102/102 [00:00<00:00, 117.74it/s]


| Epoch 4 | Validate | loss 0.253321 | acc 0.934504 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2533 **********************


100%|██████████| 439/439 [00:11<00:00, 38.09it/s]


| Epoch 4 | Train | loss 0.070072 | acc 0.977159 |


100%|██████████| 102/102 [00:00<00:00, 117.42it/s]


| Epoch 4 | Validate | loss 0.284248 | acc 0.935692 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:11<00:00, 38.06it/s]


| Epoch 5 | Train | loss 0.051509 | acc 0.982561 |


100%|██████████| 102/102 [00:00<00:00, 117.26it/s]
[32m[I 2023-03-16 19:21:13,607][0m Trial 11 finished with value: 0.25332143902778625 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 128, 'num_layers': 2, 'lr': 0.00022506985812860734}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.306921 | acc 0.933706 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:24<00:00, 18.00it/s]


| Epoch 1 | Train | loss 0.535833 | acc 0.863604 |


100%|██████████| 102/102 [00:01<00:00, 56.96it/s]


| Epoch 4 | Validate | loss 0.328280 | acc 0.910556 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3283 **********************


100%|██████████| 439/439 [00:24<00:00, 17.97it/s]


| Epoch 2 | Train | loss 0.210889 | acc 0.937015 |


100%|██████████| 102/102 [00:01<00:00, 56.93it/s]


| Epoch 4 | Validate | loss 0.273266 | acc 0.923913 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2733 **********************


100%|██████████| 439/439 [00:24<00:00, 18.00it/s]


| Epoch 3 | Train | loss 0.134627 | acc 0.959071 |


100%|██████████| 102/102 [00:01<00:00, 56.72it/s]


| Epoch 4 | Validate | loss 0.281045 | acc 0.932538 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:24<00:00, 17.98it/s]


| Epoch 4 | Train | loss 0.106402 | acc 0.967248 |


100%|██████████| 102/102 [00:01<00:00, 56.98it/s]
[32m[I 2023-03-16 19:22:58,500][0m Trial 12 finished with value: 0.27326616644859314 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 512, 'num_layers': 4, 'lr': 0.00048163876991890115}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.323055 | acc 0.932538 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:18<00:00, 24.00it/s]


| Epoch 1 | Train | loss 0.469963 | acc 0.880066 |


100%|██████████| 102/102 [00:01<00:00, 75.35it/s]


| Epoch 4 | Validate | loss 0.308914 | acc 0.919026 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3089 **********************


100%|██████████| 439/439 [00:18<00:00, 24.07it/s]


| Epoch 2 | Train | loss 0.167602 | acc 0.949990 |


100%|██████████| 102/102 [00:01<00:00, 74.72it/s]


| Epoch 4 | Validate | loss 0.264766 | acc 0.931467 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2648 **********************


100%|██████████| 439/439 [00:18<00:00, 24.08it/s]


| Epoch 3 | Train | loss 0.103756 | acc 0.967999 |


100%|██████████| 102/102 [00:01<00:00, 75.75it/s]


| Epoch 4 | Validate | loss 0.244757 | acc 0.932966 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2448 **********************


100%|██████████| 439/439 [00:18<00:00, 24.09it/s]


| Epoch 4 | Train | loss 0.080099 | acc 0.974261 |


100%|██████████| 102/102 [00:01<00:00, 75.14it/s]


| Epoch 4 | Validate | loss 0.276865 | acc 0.933180 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:18<00:00, 24.05it/s]


| Epoch 5 | Train | loss 0.071194 | acc 0.976712 |


100%|██████████| 102/102 [00:01<00:00, 75.24it/s]
[32m[I 2023-03-16 19:24:36,681][0m Trial 13 finished with value: 0.2447567582130432 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 3, 'lr': 0.000530042943955299}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.278663 | acc 0.927748 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:12<00:00, 36.25it/s]


| Epoch 1 | Train | loss 0.466079 | acc 0.876373 |


100%|██████████| 102/102 [00:00<00:00, 108.17it/s]


| Epoch 4 | Validate | loss 0.319671 | acc 0.914879 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3197 **********************


100%|██████████| 439/439 [00:12<00:00, 36.32it/s]


| Epoch 2 | Train | loss 0.189581 | acc 0.943601 |


100%|██████████| 102/102 [00:00<00:00, 112.82it/s]


| Epoch 4 | Validate | loss 0.255663 | acc 0.927437 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2557 **********************


100%|██████████| 439/439 [00:12<00:00, 36.12it/s]


| Epoch 3 | Train | loss 0.104529 | acc 0.967268 |


100%|██████████| 102/102 [00:00<00:00, 109.93it/s]


| Epoch 4 | Validate | loss 0.268682 | acc 0.931545 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:12<00:00, 35.49it/s]


| Epoch 4 | Train | loss 0.067233 | acc 0.977777 |


100%|██████████| 102/102 [00:00<00:00, 110.94it/s]
[32m[I 2023-03-16 19:25:29,223][0m Trial 14 finished with value: 0.2556627094745636 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 128, 'num_layers': 2, 'lr': 0.00024283503923421776}. Best is trial 3 with value: 0.23936066031455994.[0m


| Epoch 4 | Validate | loss 0.316621 | acc 0.933239 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:08<00:00, 50.94it/s]


| Epoch 1 | Train | loss 0.506751 | acc 0.870298 |


100%|██████████| 102/102 [00:00<00:00, 147.14it/s]


| Epoch 4 | Validate | loss 0.308833 | acc 0.910070 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3088 **********************


100%|██████████| 439/439 [00:08<00:00, 50.95it/s]


| Epoch 2 | Train | loss 0.209727 | acc 0.937619 |


100%|██████████| 102/102 [00:00<00:00, 145.64it/s]


| Epoch 4 | Validate | loss 0.251011 | acc 0.930065 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2510 **********************


100%|██████████| 439/439 [00:08<00:00, 51.01it/s]


| Epoch 3 | Train | loss 0.121144 | acc 0.962528 |


100%|██████████| 102/102 [00:00<00:00, 147.45it/s]


| Epoch 4 | Validate | loss 0.238180 | acc 0.933745 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2382 **********************


100%|██████████| 439/439 [00:08<00:00, 51.00it/s]


| Epoch 4 | Train | loss 0.084051 | acc 0.972847 |


100%|██████████| 102/102 [00:00<00:00, 148.21it/s]


| Epoch 4 | Validate | loss 0.271811 | acc 0.934582 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:08<00:00, 50.91it/s]


| Epoch 5 | Train | loss 0.063140 | acc 0.979305 |


100%|██████████| 102/102 [00:00<00:00, 148.32it/s]
[32m[I 2023-03-16 19:26:15,821][0m Trial 15 finished with value: 0.23818016052246094 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.000456008621520148}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.265541 | acc 0.935030 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:07<00:00, 62.22it/s]


| Epoch 1 | Train | loss 0.482675 | acc 0.874983 |


100%|██████████| 102/102 [00:00<00:00, 176.47it/s]


| Epoch 4 | Validate | loss 0.331675 | acc 0.912017 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3317 **********************


100%|██████████| 439/439 [00:07<00:00, 62.35it/s]


| Epoch 2 | Train | loss 0.195360 | acc 0.942241 |


100%|██████████| 102/102 [00:00<00:00, 175.93it/s]


| Epoch 4 | Validate | loss 0.263481 | acc 0.928352 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2635 **********************


100%|██████████| 439/439 [00:07<00:00, 62.16it/s]


| Epoch 3 | Train | loss 0.112842 | acc 0.964424 |


100%|██████████| 102/102 [00:00<00:00, 171.63it/s]


| Epoch 4 | Validate | loss 0.253101 | acc 0.934154 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2531 **********************


100%|██████████| 439/439 [00:07<00:00, 62.12it/s]


| Epoch 4 | Train | loss 0.074746 | acc 0.975282 |


100%|██████████| 102/102 [00:00<00:00, 171.79it/s]


| Epoch 4 | Validate | loss 0.262762 | acc 0.933881 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:07<00:00, 62.22it/s]


| Epoch 5 | Train | loss 0.056270 | acc 0.980989 |


100%|██████████| 102/102 [00:00<00:00, 171.49it/s]
[32m[I 2023-03-16 19:26:54,136][0m Trial 16 finished with value: 0.25310105085372925 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 2, 'lr': 0.00042851961762829566}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.305664 | acc 0.921985 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:10<00:00, 40.83it/s]


| Epoch 1 | Train | loss 0.569203 | acc 0.853915 |


100%|██████████| 102/102 [00:00<00:00, 123.97it/s]


| Epoch 4 | Validate | loss 0.390269 | acc 0.894416 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3903 **********************


100%|██████████| 439/439 [00:10<00:00, 40.80it/s]


| Epoch 2 | Train | loss 0.281124 | acc 0.915063 |


100%|██████████| 102/102 [00:00<00:00, 123.14it/s]


| Epoch 4 | Validate | loss 0.277007 | acc 0.919474 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2770 **********************


100%|██████████| 439/439 [00:10<00:00, 40.93it/s]


| Epoch 3 | Train | loss 0.176864 | acc 0.945423 |


100%|██████████| 102/102 [00:00<00:00, 122.79it/s]


| Epoch 4 | Validate | loss 0.274583 | acc 0.928877 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2746 **********************


100%|██████████| 439/439 [00:10<00:00, 40.91it/s]


| Epoch 4 | Train | loss 0.124800 | acc 0.961070 |


100%|██████████| 102/102 [00:00<00:00, 123.18it/s]


| Epoch 4 | Validate | loss 0.248340 | acc 0.931408 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 4 | New best validation loss: 0.2483 **********************


100%|██████████| 439/439 [00:10<00:00, 40.88it/s]


| Epoch 5 | Train | loss 0.093554 | acc 0.970062 |


100%|██████████| 102/102 [00:00<00:00, 122.21it/s]


| Epoch 4 | Validate | loss 0.257503 | acc 0.933784 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:10<00:00, 40.85it/s]


| Epoch 6 | Train | loss 0.074961 | acc 0.975577 |


100%|██████████| 102/102 [00:00<00:00, 121.86it/s]
[32m[I 2023-03-16 19:28:03,697][0m Trial 17 finished with value: 0.2483399659395218 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 4, 'lr': 0.00028245989517727463}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.262088 | acc 0.931973 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 6.
Best epoch is: 4


100%|██████████| 439/439 [00:09<00:00, 46.06it/s]


| Epoch 1 | Train | loss 0.605745 | acc 0.847781 |


100%|██████████| 102/102 [00:00<00:00, 134.42it/s]


| Epoch 4 | Validate | loss 0.405712 | acc 0.885460 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.4057 **********************


100%|██████████| 439/439 [00:09<00:00, 46.08it/s]


| Epoch 2 | Train | loss 0.340794 | acc 0.900320 |


100%|██████████| 102/102 [00:00<00:00, 134.88it/s]


| Epoch 4 | Validate | loss 0.343390 | acc 0.907149 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.3434 **********************


100%|██████████| 439/439 [00:09<00:00, 46.04it/s]


| Epoch 3 | Train | loss 0.227914 | acc 0.930690 |


100%|██████████| 102/102 [00:00<00:00, 133.22it/s]


| Epoch 4 | Validate | loss 0.296619 | acc 0.918072 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2966 **********************


100%|██████████| 439/439 [00:09<00:00, 45.94it/s]


| Epoch 4 | Train | loss 0.164524 | acc 0.948630 |


100%|██████████| 102/102 [00:00<00:00, 133.81it/s]


| Epoch 4 | Validate | loss 0.279781 | acc 0.926288 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 4 | New best validation loss: 0.2798 **********************


100%|██████████| 439/439 [00:09<00:00, 45.85it/s]


| Epoch 5 | Train | loss 0.125059 | acc 0.960043 |


100%|██████████| 102/102 [00:00<00:00, 137.28it/s]


| Epoch 4 | Validate | loss 0.265290 | acc 0.929500 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.2653 **********************


100%|██████████| 439/439 [00:09<00:00, 46.06it/s]


| Epoch 6 | Train | loss 0.097327 | acc 0.968466 |


100%|██████████| 102/102 [00:00<00:00, 136.03it/s]


| Epoch 4 | Validate | loss 0.266958 | acc 0.931214 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:09<00:00, 45.84it/s]


| Epoch 7 | Train | loss 0.079703 | acc 0.973539 |


100%|██████████| 102/102 [00:00<00:00, 134.05it/s]


| Epoch 4 | Validate | loss 0.252774 | acc 0.931214 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 7 | New best validation loss: 0.2528 **********************


100%|██████████| 439/439 [00:09<00:00, 45.23it/s]


| Epoch 8 | Train | loss 0.065476 | acc 0.977871 |


100%|██████████| 102/102 [00:00<00:00, 128.13it/s]


| Epoch 4 | Validate | loss 0.257309 | acc 0.931525 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:09<00:00, 46.10it/s]


| Epoch 9 | Train | loss 0.055611 | acc 0.981019 |


100%|██████████| 102/102 [00:00<00:00, 134.59it/s]
[32m[I 2023-03-16 19:29:36,773][0m Trial 18 finished with value: 0.2527739405632019 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 3, 'lr': 0.00015316298101827983}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.269272 | acc 0.933589 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 9.
Best epoch is: 7


100%|██████████| 439/439 [00:06<00:00, 71.97it/s]


| Epoch 1 | Train | loss 0.519505 | acc 0.864557 |


100%|██████████| 102/102 [00:00<00:00, 200.46it/s]


| Epoch 4 | Validate | loss 0.339141 | acc 0.904910 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3391 **********************


100%|██████████| 439/439 [00:06<00:00, 71.72it/s]


| Epoch 2 | Train | loss 0.237590 | acc 0.929614 |


100%|██████████| 102/102 [00:00<00:00, 200.78it/s]


| Epoch 4 | Validate | loss 0.282032 | acc 0.924127 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2820 **********************


100%|██████████| 439/439 [00:06<00:00, 71.56it/s]


| Epoch 3 | Train | loss 0.139633 | acc 0.956630 |


100%|██████████| 102/102 [00:00<00:00, 191.15it/s]


| Epoch 4 | Validate | loss 0.256947 | acc 0.927729 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2569 **********************


100%|██████████| 439/439 [00:06<00:00, 69.30it/s]


| Epoch 4 | Train | loss 0.094732 | acc 0.969394 |


100%|██████████| 102/102 [00:00<00:00, 194.14it/s]


| Epoch 4 | Validate | loss 0.283632 | acc 0.929948 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:06<00:00, 69.44it/s]


| Epoch 5 | Train | loss 0.069799 | acc 0.976913 |


100%|██████████| 102/102 [00:00<00:00, 193.62it/s]
[32m[I 2023-03-16 19:30:10,434][0m Trial 19 finished with value: 0.2569471597671509 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 128, 'num_layers': 2, 'lr': 0.00032816430696497886}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.288143 | acc 0.933297 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:09<00:00, 46.09it/s]


| Epoch 1 | Train | loss 0.597938 | acc 0.849382 |


100%|██████████| 102/102 [00:00<00:00, 136.45it/s]


| Epoch 4 | Validate | loss 0.409184 | acc 0.891048 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.4092 **********************


100%|██████████| 439/439 [00:09<00:00, 46.10it/s]


| Epoch 2 | Train | loss 0.318246 | acc 0.906468 |


100%|██████████| 102/102 [00:00<00:00, 135.77it/s]


| Epoch 4 | Validate | loss 0.326429 | acc 0.914392 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.3264 **********************


100%|██████████| 439/439 [00:09<00:00, 46.21it/s]


| Epoch 3 | Train | loss 0.208246 | acc 0.937069 |


100%|██████████| 102/102 [00:00<00:00, 133.45it/s]


| Epoch 4 | Validate | loss 0.286681 | acc 0.924341 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2867 **********************


100%|██████████| 439/439 [00:09<00:00, 46.22it/s]


| Epoch 4 | Train | loss 0.148435 | acc 0.953625 |


100%|██████████| 102/102 [00:00<00:00, 133.39it/s]


| Epoch 4 | Validate | loss 0.272900 | acc 0.929403 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 4 | New best validation loss: 0.2729 **********************


100%|██████████| 439/439 [00:09<00:00, 46.05it/s]


| Epoch 5 | Train | loss 0.111794 | acc 0.964380 |


100%|██████████| 102/102 [00:00<00:00, 136.08it/s]


| Epoch 4 | Validate | loss 0.261877 | acc 0.931486 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.2619 **********************


100%|██████████| 439/439 [00:09<00:00, 46.29it/s]


| Epoch 6 | Train | loss 0.087167 | acc 0.971697 |


100%|██████████| 102/102 [00:00<00:00, 136.21it/s]


| Epoch 4 | Validate | loss 0.272306 | acc 0.932869 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:09<00:00, 46.05it/s]


| Epoch 7 | Train | loss 0.069760 | acc 0.976933 |


100%|██████████| 102/102 [00:00<00:00, 133.12it/s]
[32m[I 2023-03-16 19:31:22,428][0m Trial 20 finished with value: 0.26187664270401 and parameters: {'embed_size': 256, 'num_heads': 4, 'hidden_size': 512, 'num_layers': 3, 'lr': 0.00017768413750430086}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.273753 | acc 0.933141 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 7.
Best epoch is: 5


100%|██████████| 439/439 [00:17<00:00, 25.16it/s]


| Epoch 1 | Train | loss 0.462796 | acc 0.881849 |


100%|██████████| 102/102 [00:01<00:00, 78.58it/s]


| Epoch 4 | Validate | loss 0.293514 | acc 0.917877 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.2935 **********************


100%|██████████| 439/439 [00:17<00:00, 25.13it/s]


| Epoch 2 | Train | loss 0.164377 | acc 0.950894 |


100%|██████████| 102/102 [00:01<00:00, 78.51it/s]


| Epoch 4 | Validate | loss 0.258461 | acc 0.932849 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2585 **********************


100%|██████████| 439/439 [00:17<00:00, 25.16it/s]


| Epoch 3 | Train | loss 0.099498 | acc 0.969139 |


100%|██████████| 102/102 [00:01<00:00, 79.12it/s]


| Epoch 4 | Validate | loss 0.258447 | acc 0.932674 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2584 **********************


100%|██████████| 439/439 [00:17<00:00, 25.18it/s]


| Epoch 4 | Train | loss 0.075649 | acc 0.975376 |


100%|██████████| 102/102 [00:01<00:00, 79.67it/s]


| Epoch 4 | Validate | loss 0.268111 | acc 0.933940 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:17<00:00, 25.16it/s]


| Epoch 5 | Train | loss 0.063918 | acc 0.978995 |


100%|██████████| 102/102 [00:01<00:00, 79.66it/s]
[32m[I 2023-03-16 19:32:56,252][0m Trial 21 finished with value: 0.2584473490715027 and parameters: {'embed_size': 512, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.000491270685555596}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.275613 | acc 0.934290 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:16<00:00, 26.03it/s]


| Epoch 1 | Train | loss 0.456116 | acc 0.879703 |


100%|██████████| 102/102 [00:01<00:00, 82.65it/s]


| Epoch 4 | Validate | loss 0.313292 | acc 0.917604 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3133 **********************


100%|██████████| 439/439 [00:16<00:00, 26.29it/s]


| Epoch 2 | Train | loss 0.172696 | acc 0.948586 |


100%|██████████| 102/102 [00:01<00:00, 81.63it/s]


| Epoch 4 | Validate | loss 0.268540 | acc 0.929520 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2685 **********************


100%|██████████| 439/439 [00:16<00:00, 26.27it/s]


| Epoch 3 | Train | loss 0.100262 | acc 0.968245 |


100%|██████████| 102/102 [00:01<00:00, 82.27it/s]


| Epoch 4 | Validate | loss 0.263698 | acc 0.935555 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2637 **********************


100%|██████████| 439/439 [00:16<00:00, 26.27it/s]


| Epoch 4 | Train | loss 0.071946 | acc 0.976574 |


100%|██████████| 102/102 [00:01<00:00, 83.16it/s]


| Epoch 4 | Validate | loss 0.298925 | acc 0.935263 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:16<00:00, 26.27it/s]


| Epoch 5 | Train | loss 0.056561 | acc 0.981294 |


100%|██████████| 102/102 [00:01<00:00, 83.12it/s]
[32m[I 2023-03-16 19:34:26,242][0m Trial 22 finished with value: 0.26369842886924744 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.00039760896970047707}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.277697 | acc 0.935517 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:04<00:00, 89.24it/s]


| Epoch 1 | Train | loss 0.532205 | acc 0.862303 |


100%|██████████| 102/102 [00:00<00:00, 220.09it/s]


| Epoch 4 | Validate | loss 0.339167 | acc 0.903547 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3392 **********************


100%|██████████| 439/439 [00:04<00:00, 88.77it/s]


| Epoch 2 | Train | loss 0.259917 | acc 0.923849 |


100%|██████████| 102/102 [00:00<00:00, 219.71it/s]


| Epoch 4 | Validate | loss 0.276517 | acc 0.921868 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2765 **********************


100%|██████████| 439/439 [00:04<00:00, 88.79it/s]


| Epoch 3 | Train | loss 0.160868 | acc 0.951154 |


100%|██████████| 102/102 [00:00<00:00, 225.16it/s]


| Epoch 4 | Validate | loss 0.265726 | acc 0.928955 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2657 **********************


100%|██████████| 439/439 [00:04<00:00, 89.17it/s]


| Epoch 4 | Train | loss 0.112786 | acc 0.964321 |


100%|██████████| 102/102 [00:00<00:00, 218.91it/s]


| Epoch 4 | Validate | loss 0.275543 | acc 0.932421 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:04<00:00, 89.20it/s]


| Epoch 5 | Train | loss 0.083377 | acc 0.973225 |


100%|██████████| 102/102 [00:00<00:00, 226.47it/s]
[32m[I 2023-03-16 19:34:53,224][0m Trial 23 finished with value: 0.265726238489151 and parameters: {'embed_size': 128, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 2, 'lr': 0.0005653628827193151}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.298846 | acc 0.932362 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:21<00:00, 20.76it/s]


| Epoch 1 | Train | loss 0.561393 | acc 0.858600 |


100%|██████████| 102/102 [00:01<00:00, 65.51it/s]


| Epoch 4 | Validate | loss 0.385062 | acc 0.905300 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3851 **********************


100%|██████████| 439/439 [00:21<00:00, 20.77it/s]


| Epoch 2 | Train | loss 0.225705 | acc 0.932183 |


100%|██████████| 102/102 [00:01<00:00, 64.84it/s]


| Epoch 4 | Validate | loss 0.281786 | acc 0.928352 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2818 **********************


100%|██████████| 439/439 [00:21<00:00, 20.76it/s]


| Epoch 3 | Train | loss 0.140755 | acc 0.957598 |


100%|██████████| 102/102 [00:01<00:00, 64.95it/s]


| Epoch 4 | Validate | loss 0.259125 | acc 0.930357 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2591 **********************


100%|██████████| 439/439 [00:21<00:00, 20.75it/s]


| Epoch 4 | Train | loss 0.106460 | acc 0.967017 |


100%|██████████| 102/102 [00:01<00:00, 65.49it/s]


| Epoch 4 | Validate | loss 0.273406 | acc 0.932635 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:21<00:00, 20.76it/s]


| Epoch 5 | Train | loss 0.090156 | acc 0.972331 |


100%|██████████| 102/102 [00:01<00:00, 65.71it/s]
[32m[I 2023-03-16 19:36:46,900][0m Trial 24 finished with value: 0.25912490487098694 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 4, 'lr': 0.00044759153063910097}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.312463 | acc 0.932304 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 5.
Best epoch is: 3


100%|██████████| 439/439 [00:09<00:00, 46.36it/s]


| Epoch 1 | Train | loss 0.565234 | acc 0.856626 |


100%|██████████| 102/102 [00:00<00:00, 133.88it/s]


| Epoch 4 | Validate | loss 0.350831 | acc 0.902749 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3508 **********************


100%|██████████| 439/439 [00:09<00:00, 46.42it/s]


| Epoch 2 | Train | loss 0.264232 | acc 0.921423 |


100%|██████████| 102/102 [00:00<00:00, 136.20it/s]


| Epoch 4 | Validate | loss 0.295738 | acc 0.922219 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2957 **********************


100%|██████████| 439/439 [00:09<00:00, 46.37it/s]


| Epoch 3 | Train | loss 0.160637 | acc 0.950511 |


100%|██████████| 102/102 [00:00<00:00, 135.81it/s]


| Epoch 4 | Validate | loss 0.258126 | acc 0.928332 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2581 **********************


100%|██████████| 439/439 [00:09<00:00, 46.38it/s]


| Epoch 4 | Train | loss 0.109126 | acc 0.965897 |


100%|██████████| 102/102 [00:00<00:00, 135.22it/s]


| Epoch 4 | Validate | loss 0.252187 | acc 0.931506 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 4 | New best validation loss: 0.2522 **********************


100%|██████████| 439/439 [00:09<00:00, 46.37it/s]


| Epoch 5 | Train | loss 0.081098 | acc 0.973770 |


100%|██████████| 102/102 [00:00<00:00, 133.27it/s]


| Epoch 4 | Validate | loss 0.250334 | acc 0.932966 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.2503 **********************


100%|██████████| 439/439 [00:09<00:00, 46.33it/s]


| Epoch 6 | Train | loss 0.063779 | acc 0.978548 |


100%|██████████| 102/102 [00:00<00:00, 134.57it/s]


| Epoch 4 | Validate | loss 0.261967 | acc 0.936217 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:09<00:00, 46.21it/s]


| Epoch 7 | Train | loss 0.051799 | acc 0.982703 |


100%|██████████| 102/102 [00:00<00:00, 133.48it/s]
[32m[I 2023-03-16 19:37:58,602][0m Trial 25 finished with value: 0.2503337860107422 and parameters: {'embed_size': 256, 'num_heads': 8, 'hidden_size': 256, 'num_layers': 3, 'lr': 0.00028890514549803544}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.267450 | acc 0.934835 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 7.
Best epoch is: 5


100%|██████████| 439/439 [00:12<00:00, 36.27it/s]


| Epoch 1 | Train | loss 0.423840 | acc 0.886971 |


100%|██████████| 102/102 [00:00<00:00, 112.02it/s]


| Epoch 4 | Validate | loss 0.302407 | acc 0.917293 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3024 **********************


100%|██████████| 439/439 [00:12<00:00, 36.26it/s]


| Epoch 2 | Train | loss 0.157572 | acc 0.951891 |


100%|██████████| 102/102 [00:00<00:00, 113.27it/s]


| Epoch 4 | Validate | loss 0.257586 | acc 0.930591 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2576 **********************


100%|██████████| 439/439 [00:12<00:00, 36.25it/s]


| Epoch 3 | Train | loss 0.084565 | acc 0.973185 |


100%|██████████| 102/102 [00:00<00:00, 110.79it/s]


| Epoch 4 | Validate | loss 0.277166 | acc 0.931837 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:12<00:00, 36.24it/s]


| Epoch 4 | Train | loss 0.058068 | acc 0.980807 |


100%|██████████| 102/102 [00:00<00:00, 111.73it/s]
[32m[I 2023-03-16 19:38:50,792][0m Trial 26 finished with value: 0.25758594274520874 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 2, 'lr': 0.0003521368282502649}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.295298 | acc 0.934309 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:05<00:00, 73.74it/s]


| Epoch 1 | Train | loss 0.560826 | acc 0.855010 |


100%|██████████| 102/102 [00:00<00:00, 211.19it/s]


| Epoch 4 | Validate | loss 0.392518 | acc 0.895623 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3925 **********************


100%|██████████| 439/439 [00:05<00:00, 76.34it/s]


| Epoch 2 | Train | loss 0.267383 | acc 0.919660 |


100%|██████████| 102/102 [00:00<00:00, 204.00it/s]


| Epoch 4 | Validate | loss 0.288470 | acc 0.918967 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2885 **********************


100%|██████████| 439/439 [00:05<00:00, 78.45it/s]


| Epoch 3 | Train | loss 0.166562 | acc 0.949131 |


100%|██████████| 102/102 [00:00<00:00, 208.24it/s]


| Epoch 4 | Validate | loss 0.257966 | acc 0.928157 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 3 | New best validation loss: 0.2580 **********************


100%|██████████| 439/439 [00:05<00:00, 77.00it/s]


| Epoch 4 | Train | loss 0.117744 | acc 0.963162 |


100%|██████████| 102/102 [00:00<00:00, 207.73it/s]


| Epoch 4 | Validate | loss 0.263512 | acc 0.932635 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:05<00:00, 77.07it/s]


| Epoch 5 | Train | loss 0.091973 | acc 0.970725 |


100%|██████████| 102/102 [00:00<00:00, 212.91it/s]


| Epoch 4 | Validate | loss 0.252870 | acc 0.934718 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 5 | New best validation loss: 0.2529 **********************


100%|██████████| 439/439 [00:05<00:00, 79.20it/s]


| Epoch 6 | Train | loss 0.073308 | acc 0.976093 |


100%|██████████| 102/102 [00:00<00:00, 212.30it/s]


| Epoch 4 | Validate | loss 0.258185 | acc 0.934426 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:05<00:00, 76.87it/s]


| Epoch 7 | Train | loss 0.062273 | acc 0.979285 |


100%|██████████| 102/102 [00:00<00:00, 203.12it/s]
[32m[I 2023-03-16 19:39:34,278][0m Trial 27 finished with value: 0.2528698146343231 and parameters: {'embed_size': 128, 'num_heads': 2, 'hidden_size': 128, 'num_layers': 3, 'lr': 0.0006281171375459057}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.296142 | acc 0.935770 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 7.
Best epoch is: 5


100%|██████████| 439/439 [00:21<00:00, 20.77it/s]


| Epoch 1 | Train | loss 0.543221 | acc 0.862794 |


100%|██████████| 102/102 [00:01<00:00, 65.50it/s]


| Epoch 4 | Validate | loss 0.321441 | acc 0.906487 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3214 **********************


100%|██████████| 439/439 [00:21<00:00, 20.76it/s]


| Epoch 2 | Train | loss 0.221686 | acc 0.934486 |


100%|██████████| 102/102 [00:01<00:00, 64.94it/s]


| Epoch 4 | Validate | loss 0.259568 | acc 0.926969 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2596 **********************


100%|██████████| 439/439 [00:21<00:00, 20.77it/s]


| Epoch 3 | Train | loss 0.142000 | acc 0.958020 |


100%|██████████| 102/102 [00:01<00:00, 65.04it/s]


| Epoch 4 | Validate | loss 0.290885 | acc 0.926152 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:21<00:00, 20.75it/s]


| Epoch 4 | Train | loss 0.113382 | acc 0.965642 |


100%|██████████| 102/102 [00:01<00:00, 65.22it/s]
[32m[I 2023-03-16 19:41:05,205][0m Trial 28 finished with value: 0.2595675587654114 and parameters: {'embed_size': 512, 'num_heads': 4, 'hidden_size': 256, 'num_layers': 4, 'lr': 0.0004924273743615496}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.304296 | acc 0.931194 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2


100%|██████████| 439/439 [00:07<00:00, 57.22it/s]


| Epoch 1 | Train | loss 0.468313 | acc 0.876177 |


100%|██████████| 102/102 [00:00<00:00, 160.39it/s]


| Epoch 4 | Validate | loss 0.323180 | acc 0.908123 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 1 | New best validation loss: 0.3232 **********************


100%|██████████| 439/439 [00:07<00:00, 57.39it/s]


| Epoch 2 | Train | loss 0.206392 | acc 0.938209 |


100%|██████████| 102/102 [00:00<00:00, 158.59it/s]


| Epoch 4 | Validate | loss 0.263108 | acc 0.927690 |!!!!!!!!!!!!!!!!!!!!!!!!!
| Epoch 2 | New best validation loss: 0.2631 **********************


100%|██████████| 439/439 [00:07<00:00, 57.20it/s]


| Epoch 3 | Train | loss 0.118780 | acc 0.962686 |


100%|██████████| 102/102 [00:00<00:00, 162.94it/s]


| Epoch 4 | Validate | loss 0.269891 | acc 0.931077 |!!!!!!!!!!!!!!!!!!!!!!!!!


100%|██████████| 439/439 [00:07<00:00, 57.04it/s]


| Epoch 4 | Train | loss 0.080585 | acc 0.973736 |


100%|██████████| 102/102 [00:00<00:00, 160.12it/s]
[32m[I 2023-03-16 19:41:38,520][0m Trial 29 finished with value: 0.26310837268829346 and parameters: {'embed_size': 256, 'num_heads': 8, 'hidden_size': 512, 'num_layers': 2, 'lr': 0.00039022726015531454}. Best is trial 15 with value: 0.23818016052246094.[0m


| Epoch 4 | Validate | loss 0.308337 | acc 0.931934 |!!!!!!!!!!!!!!!!!!!!!!!!!
Early stopping triggered at epoch 4.
Best epoch is: 2
Best trial:
  Value: 0.2382
  Params: 
    embed_size: 256
    num_heads: 4
    hidden_size: 256
    num_layers: 3
    lr: 0.000456008621520148


In [28]:
prediction = predict(model, val_dataloader, device)
pred_tags = []
for tags in prediction:
    pred_tags.extend(tags)
    pred_tags.append('O')

true_tags = []
for tags in val_raw['tags']:
    true_tags.extend(tags.strip().split())
    true_tags.append('O')

evaluate(true_tags, pred_tags)

processed 54612 tokens with 5942 phrases; found: 5956 phrases; correct: 4140.
accuracy:  67.53%; (non-O)
accuracy:  93.80%; precision:  69.51%; recall:  69.67%; FB1:  69.59
              LOC: precision:  83.95%; recall:  80.89%; FB1:  82.40  1770
             MISC: precision:  76.70%; recall:  72.13%; FB1:  74.34  867
              ORG: precision:  58.69%; recall:  61.45%; FB1:  60.04  1404
              PER: precision:  60.84%; recall:  63.25%; FB1:  62.02  1915


(69.50973807924782, 69.67351060249074, 69.59152798789712)

In [None]:
samplenum = 2166

print(val_raw['text'][samplenum])
print(val_raw['tags'][samplenum])
print(prediction[samplenum])

In [29]:
# YOU SHOULD NOT CHANGE THIS CODEBLOCK
# make prediction on the test set and save to submission.txt
preds = predict(model, test_dataloader, device)
with open("submission.txt", "w") as f:
    for tags in preds:
        f.write(" ".join(tags) + "\n")

100%|██████████| 108/108 [00:01<00:00, 102.07it/s]


In [None]:
pwd

In [None]:
ls