### Importing packages

In [1]:
# Libraries
import time

# Reading in files
import pandas as pd
import numpy as np

# Progress bar
from tqdm.auto import tqdm

# Torch modules
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast as AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup

# Lightning modules
import pytorch_lightning as pl
from torchmetrics.functional import accuracy, auroc
from torchmetrics import F1Score
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torchmetrics.classification import MulticlassAccuracy, MulticlassF1Score, MulticlassPrecision, MulticlassRecall

# Split dataset/validation
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from sklearn.model_selection import StratifiedKFold

# Importing own functions 
from extract.importing_data import get_section

  from .autonotebook import tqdm as notebook_tqdm


### Reading in data

In [2]:
df = pd.read_csv("../Data/Collated_dataset_for_scientific_papers.csv")

In [3]:
df = df[["Intro Concl", "Labels"]]

In [4]:
df.columns = ["string", "label"]

In [5]:
possible_labels = list(df['label'].unique())
possible_labels_num = list(range(0,len(possible_labels)))

In [6]:
len(possible_labels)

21

### Tokenizer 

In [7]:
MODEL_NAME = 'allenai/specter'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [8]:
mapping = zip(possible_labels, possible_labels_num)
label_to_idx = {label: num  for label, num in mapping}
mapping = zip(possible_labels, possible_labels_num)
idx_to_label = {num: label for label, num in mapping}

In [9]:
MAX_TOKEN_COUNT = 512
N_EPOCHS = 5
BATCH_SIZE = 8 # Changes: Edit the batch size here
KFOLD = 5

In [10]:
class SpecterDataset(Dataset):
    
    def __init__(self, data: pd.DataFrame, tokenizer: AutoTokenizer, max_token_len: int = MAX_TOKEN_COUNT, mapping = label_to_idx):
        self.tokenizer = tokenizer
        self.data = data
        self.max_token_len = max_token_len
        self.mapping = mapping
    
    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, index: int):
    
        data_row = self.data.iloc[index]
        text = data_row["string"]
        labels = self.mapping[data_row["label"]]

        encoding = self.tokenizer.encode_plus(
          text,
          add_special_tokens=True,
          max_length=self.max_token_len,
          return_token_type_ids=False,
          padding="max_length",
          truncation=True,
          return_attention_mask=True,
          return_tensors='pt',
        )

        return dict(
          text=text,
          input_ids=encoding["input_ids"].flatten(),
          attention_mask=encoding["attention_mask"].flatten(),
          labels=labels
        )

In [11]:
class SpecterDataModule(pl.LightningDataModule):
    
    def __init__(self, df,
                 tokenizer,
                 k = 0,  # fold number
                 split_seed = 123,  # split needs to be always the same for correct cross validation
                 num_splits = KFOLD,
                 batch_size = BATCH_SIZE, 
                 max_token_len = MAX_TOKEN_COUNT,
                 num_workers = 0,
                 pin_memory = False):
        
        super().__init__()

        self.save_hyperparameters(logger=False)
    
    def setup(self, stage=None):

        # choose fold to train on
        kf = StratifiedKFold(n_splits=self.hparams.num_splits, shuffle=True, random_state=self.hparams.split_seed)
        all_splits = [k for k in kf.split(df, df.label)]
        train_indexes, val_indexes = all_splits[self.hparams.k]
        train_indexes, val_indexes = train_indexes.tolist(), val_indexes.tolist()

        self.data_train, self.data_val = df.iloc[train_indexes], df.iloc[val_indexes]
        
        self.train_dataset = SpecterDataset(
          self.data_train,
          self.hparams.tokenizer,
          self.hparams.max_token_len
        )
        self.val_dataset = SpecterDataset(
          self.data_val,
          self.hparams.tokenizer,
          self.hparams.max_token_len
        )
        
    def train_dataloader(self):
        return DataLoader(
          self.train_dataset,
          batch_size = self.hparams.batch_size,
          shuffle=True
        )
    
    def val_dataloader(self):
        return DataLoader(
          self.val_dataset,
          batch_size = self.hparams.batch_size
        )

### Modelling

In [12]:
class SpecterClassModel(pl.LightningModule):
    
    def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
    
        super().__init__()
        self.specter = AutoModel.from_pretrained(MODEL_NAME, return_dict=True)
        # Changes: Edit model architecture forward pass here
        self.classifier = nn.Linear(self.specter.config.hidden_size, n_classes)
        self.n_training_steps = n_training_steps
        self.n_warmup_steps = n_warmup_steps
        self.criterion = nn.CrossEntropyLoss()
        self.acc = MulticlassAccuracy(num_classes = 21, average = 'weighted') #measure performance based on weighted average
        self.f1 = MulticlassF1Score(num_classes = 21, average = 'weighted')
        self.prec = MulticlassPrecision(num_classes = 21, average = 'weighted')
        self.rec = MulticlassRecall(num_classes = 21, average = 'weighted')
        
        # Changes: Comment below code to remove freezing of the SPECTER embeddings
        for name, param in self.specter.named_parameters():
            if name.startswith('embeddings'):
                param.requires_grad = False
    
    
    def forward(self, input_ids, attention_mask, labels=None):
        output = self.specter(input_ids, attention_mask=attention_mask)
        # Changes: Edit model architecture forward pass here
        output = self.classifier(output.pooler_output)
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels)
            # acc = self.acc(output, labels)
            # f1 = self.f1(output, labels)
            # prec = self.prec(output, labels)
            # rec = self.rec(output, labels)
        return loss, output #, acc, f1, prec, rec

    
    def training_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        #loss, output, acc, f1, prec, rec = self(input_ids, attention_mask, labels)
        loss, output = self(input_ids, attention_mask, labels)
        self.log("train_loss", loss, prog_bar=True, logger=True)
        return {"loss": loss, "predictions": output, "labels": labels}

    
    def validation_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        #loss, output, acc, f1, prec, rec = self(input_ids, attention_mask, labels)
        loss, output = self(input_ids, attention_mask, labels)
        self.log("val_loss", loss, prog_bar=True, logger=True)
        return {"loss": loss, "predictions": output, "labels": labels}
    
    def training_epoch_end(self, outputs):
        avg_loss = sum(output['loss'].item() for output in outputs) / len(outputs)
        
        predictions = torch.cat([output['predictions'] for output in outputs])
        labels = torch.cat([output['labels'] for output in outputs])
        
        acc = self.acc(predictions, labels)
        f1 = self.f1(predictions, labels)
        prec = self.prec(predictions, labels)
        rec = self.rec(predictions, labels)
        print(f"******Train epoch {self.current_epoch} eval metrics: loss {avg_loss:.8f}, f1 {f1:.4f} prec {prec:.4f} rec {rec:.4f}, acc {acc:.4f}")
        
        self.logger.experiment.add_scalars('loss', {'train': avg_loss}, self.current_epoch)    
    
    def validation_epoch_end(self, outputs):
        avg_loss = sum(output['loss'].item() for output in outputs) / len(outputs)

        predictions = torch.cat([output['predictions'] for output in outputs])
        labels = torch.cat([output['labels'] for output in outputs])
        
        acc = self.acc(predictions, labels)
        f1 = self.f1(predictions, labels)
        prec = self.prec(predictions, labels)
        rec = self.rec(predictions, labels)
        print(f"******Val epoch {self.current_epoch} eval metrics: loss {avg_loss:.8f}, f1 {f1:.4f} prec {prec:.4f} rec {rec:.4f}, acc {acc:.4f}")
        
        #For final output
        self.log("Ignore/acc", acc, logger=True)
        self.log("Ignore/f1", f1, logger=True)
        self.log("Ignore/prec", prec, logger=True)
        self.log("Ignore/rec", rec, logger=True)
        
        #For Tensorboard visualisaion with Epoch as x axis
        self.logger.experiment.add_scalar('Collated/acc', acc, self.current_epoch)
        self.logger.experiment.add_scalar('Collated/f1', f1, self.current_epoch)
        self.logger.experiment.add_scalar('Collated/prec', prec, self.current_epoch)
        self.logger.experiment.add_scalar('Collated/rec', rec, self.current_epoch)
    
        self.logger.experiment.add_scalars('loss', {'val': avg_loss}, self.current_epoch)
    
    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=2e-5) # Changes: Edit the learning rate
        scheduler = get_linear_schedule_with_warmup( # Changes: Edit the scheduler
          optimizer,
          num_warmup_steps=self.n_warmup_steps,
          num_training_steps=self.n_training_steps
        )
        
        # Changes: Edit the optimizer
        return dict(
          optimizer = optimizer,
          lr_scheduler = dict(
            scheduler = scheduler,
            interval = 'step'
          )
        )

In [13]:
experiment = "Default"
time_now = time.strftime("%d_%m_%Y_%H_%M")

In [14]:
# early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)

In [15]:
steps_per_epoch= round(len(df) * 0.8 // BATCH_SIZE)
total_training_steps = steps_per_epoch * N_EPOCHS
warmup_steps = total_training_steps // 5
warmup_steps, total_training_steps

(254, 1270)

In [16]:
result_acc_lst = []
result_f1_lst = []
result_prec_lst = []
result_recall_lst = []
nums_fold = 5
split_seed = 123

for k in range(nums_fold):
    data_module = SpecterDataModule(df, tokenizer, k = k)
    
    checkpoint_callback = ModelCheckpoint(
        dirpath = "checkpoints",
        filename = f"best-checkpoint-Specter_{experiment}_{time_now}_run{k}",
        save_top_k = 1,
        verbose = True,
        monitor = "val_loss",
        mode = "min"
    )

    # here we train the model on given split...
    logger = TensorBoardLogger("lightning_logs", name = f"Specter_{experiment}_{time_now}_run{k}")
    model = SpecterClassModel(n_classes = 21, n_warmup_steps = warmup_steps, n_training_steps = total_training_steps)
    trainer = pl.Trainer(logger = logger, callbacks = [checkpoint_callback], max_epochs = N_EPOCHS, accelerator = "auto")#, enable_progress_bar = False)
    trainer.fit(model, data_module)

    accuracy = trainer.callback_metrics['Ignore/acc'].item()
    f1 = trainer.callback_metrics['Ignore/f1'].item()
    precision = trainer.callback_metrics['Ignore/prec'].item()
    recall = trainer.callback_metrics['Ignore/rec'].item()
    
    result_acc_lst.append(accuracy)
    result_f1_lst.append(f1)
    result_prec_lst.append(precision)
    result_recall_lst.append(recall)

average_val_acc_score = sum(result_acc_lst) / len(result_acc_lst)
average_val_f1_score = sum(result_f1_lst) / len(result_f1_lst)
average_val_prec_score = sum(result_prec_lst) / len(result_prec_lst)
average_val_recall_score = sum(result_recall_lst) / len(result_recall_lst)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs\Specter_Default_06_02_2023_01_41_run0
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params
---------------------------------------------------
0 | specter    | BertModel           | 109 M 
1 | classifier | Linear              | 16.1 K
2 | criterion  | CrossEntropyLoss    | 0     
3 | acc        | MulticlassAccuracy  | 0     
4 | f1         | MulticlassF1Score   | 0     
5 | prec       | MulticlassPrecision | 0     
6 | rec        | MulticlassRecall    | 0     
---------------------------------------------------
85.7 M    Trainable params
24.3 M    Non-trainable params
109 M     Total params
439.818   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|                                                              | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0: 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00,  2.62it/s]******Val epoch 0 eval metrics: loss 3.23358631, f1 0.0000 prec 0.0000 rec 0.0000, acc 0.0000
                                                                                                                       

  rank_zero_warn(


Epoch 0:  80%|██████████████████████████▍      | 255/319 [00:51<00:12,  4.93it/s, loss=1.89, v_num=0, train_loss=1.550]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 0:  80%|██████████████████████████▍      | 256/319 [00:51<00:12,  4.95it/s, loss=1.89, v_num=0, train_loss=1.550][A
Epoch 0:  81%|██████████████████████████▌      | 257/319 [00:51<00:12,  4.97it/s, loss=1.89, v_num=0, train_loss=1.550][A
Epoch 0:  81%|██████████████████████████▋      | 258/319 [00:51<00:12,  4.98it/s, loss=1.89, v_num=0, train_loss=1.550][A
Epoch 0:  81%|██████████████████████████▊      | 259/319 [00:51<00:11,  5.00it/s, loss=1.89, v_num=0, train_loss=1.550][A
Epoch 0:  82%|██████████████████████████▉      | 260/319 [00:51<00:11,  5.02it/s, loss=1.89, v_num=0, train_

Epoch 0, global step 255: 'val_loss' reached 1.77107 (best 1.77107), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run0.ckpt' as top 1


Epoch 1:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.88it/s, loss=1.36, v_num=0, train_loss=1.110, val_loss=1.770]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 1:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.90it/s, loss=1.36, v_num=0, train_loss=1.110, val_loss=1.770][A
Epoch 1:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.92it/s, loss=1.36, v_num=0, train_loss=1.110, val_loss=1.770][A
Epoch 1:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.93it/s, loss=1.36, v_num=0, train_loss=1.110, val_loss=1.770][A
Epoch 1:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.95it/s, loss=1.36, v_num=0, train_loss=1.110, val_loss=1.770][A
Epoch 1:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.97it/s, loss=1.36, v_num=0, train_loss=1.110, val_

Epoch 1, global step 510: 'val_loss' reached 1.27289 (best 1.27289), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run0.ckpt' as top 1


Epoch 2:  80%|████████████▊   | 255/319 [00:52<00:13,  4.88it/s, loss=0.917, v_num=0, train_loss=0.535, val_loss=1.270]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 2:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.917, v_num=0, train_loss=0.535, val_loss=1.270][A
Epoch 2:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.917, v_num=0, train_loss=0.535, val_loss=1.270][A
Epoch 2:  81%|████████████▉   | 258/319 [00:52<00:12,  4.93it/s, loss=0.917, v_num=0, train_loss=0.535, val_loss=1.270][A
Epoch 2:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.917, v_num=0, train_loss=0.535, val_loss=1.270][A
Epoch 2:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.917, v_num=0, train_loss=0.535, val_

Epoch 2, global step 765: 'val_loss' reached 1.16398 (best 1.16398), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run0.ckpt' as top 1


Epoch 3:  80%|████████████▊   | 255/319 [00:52<00:13,  4.85it/s, loss=0.585, v_num=0, train_loss=0.155, val_loss=1.160]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 3:  80%|████████████▊   | 256/319 [00:52<00:12,  4.87it/s, loss=0.585, v_num=0, train_loss=0.155, val_loss=1.160][A
Epoch 3:  81%|████████████▉   | 257/319 [00:52<00:12,  4.88it/s, loss=0.585, v_num=0, train_loss=0.155, val_loss=1.160][A
Epoch 3:  81%|████████████▉   | 258/319 [00:52<00:12,  4.90it/s, loss=0.585, v_num=0, train_loss=0.155, val_loss=1.160][A
Epoch 3:  81%|████████████▉   | 259/319 [00:52<00:12,  4.92it/s, loss=0.585, v_num=0, train_loss=0.155, val_loss=1.160][A
Epoch 3:  82%|█████████████   | 260/319 [00:52<00:11,  4.93it/s, loss=0.585, v_num=0, train_loss=0.155, val_

Epoch 3, global step 1020: 'val_loss' reached 1.12732 (best 1.12732), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run0.ckpt' as top 1


Epoch 4:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.88it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.130]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 4:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.90it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.130][A
Epoch 4:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.91it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.130][A
Epoch 4:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.93it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.130][A
Epoch 4:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.95it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.130][A
Epoch 4:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.96it/s, loss=0.54, v_num=0, train_loss=0.181, val_

Epoch 4, global step 1275: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█████████████████| 319/319 [00:56<00:00,  5.66it/s, loss=0.54, v_num=0, train_loss=0.181, val_loss=1.140]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs\Specter_Default_06_02_2023_01_41_run1
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params
---------------------------------------------------
0 | specter    | BertModel           | 109 M 
1 | classifier | Linear              | 16.1 K
2 | criterion  | CrossEntropyLoss    | 0     
3 | acc        | MulticlassAccuracy  | 0     
4 | f1         | MulticlassF1Score   | 0     
5 | prec       | MulticlassPrecision | 0     
6 | rec        | MulticlassRecall    | 0     
---------------------------------------------------
85.7 M    Trainable params
24.3 M    Non-trainable params
109 M     Total params
439.818   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0: 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00, 54.05it/s]******Val epoch 0 eval metrics: loss 2.91295171, f1 0.1000 prec 0.2500 rec 0.0625, acc 0.0625
                                                                                                                       

  rank_zero_warn(


Epoch 0:  80%|██████████████████████████▍      | 255/319 [00:52<00:13,  4.89it/s, loss=1.72, v_num=0, train_loss=1.280]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 0:  80%|██████████████████████████▍      | 256/319 [00:52<00:12,  4.91it/s, loss=1.72, v_num=0, train_loss=1.280][A
Epoch 0:  81%|██████████████████████████▌      | 257/319 [00:52<00:12,  4.92it/s, loss=1.72, v_num=0, train_loss=1.280][A
Epoch 0:  81%|██████████████████████████▋      | 258/319 [00:52<00:12,  4.94it/s, loss=1.72, v_num=0, train_loss=1.280][A
Epoch 0:  81%|██████████████████████████▊      | 259/319 [00:52<00:12,  4.96it/s, loss=1.72, v_num=0, train_loss=1.280][A
Epoch 0:  82%|██████████████████████████▉      | 260/319 [00:52<00:11,  4.97it/s, loss=1.72, v_num=0, train_

Epoch 0, global step 255: 'val_loss' reached 1.80053 (best 1.80053), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run1.ckpt' as top 1


Epoch 1:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.87it/s, loss=1.29, v_num=0, train_loss=1.030, val_loss=1.800]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 1:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.89it/s, loss=1.29, v_num=0, train_loss=1.030, val_loss=1.800][A
Epoch 1:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.90it/s, loss=1.29, v_num=0, train_loss=1.030, val_loss=1.800][A
Epoch 1:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.92it/s, loss=1.29, v_num=0, train_loss=1.030, val_loss=1.800][A
Epoch 1:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.94it/s, loss=1.29, v_num=0, train_loss=1.030, val_loss=1.800][A
Epoch 1:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.95it/s, loss=1.29, v_num=0, train_loss=1.030, val_

Epoch 1, global step 510: 'val_loss' reached 1.38668 (best 1.38668), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run1.ckpt' as top 1


Epoch 2:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.87it/s, loss=1.08, v_num=0, train_loss=1.410, val_loss=1.390]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 2:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.88it/s, loss=1.08, v_num=0, train_loss=1.410, val_loss=1.390][A
Epoch 2:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.90it/s, loss=1.08, v_num=0, train_loss=1.410, val_loss=1.390][A
Epoch 2:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.92it/s, loss=1.08, v_num=0, train_loss=1.410, val_loss=1.390][A
Epoch 2:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.93it/s, loss=1.08, v_num=0, train_loss=1.410, val_loss=1.390][A
Epoch 2:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.95it/s, loss=1.08, v_num=0, train_loss=1.410, val_

Epoch 2, global step 765: 'val_loss' reached 1.24673 (best 1.24673), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run1.ckpt' as top 1


Epoch 3:  80%|████████████▊   | 255/319 [00:52<00:13,  4.87it/s, loss=0.818, v_num=0, train_loss=1.390, val_loss=1.250]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 3:  80%|████████████▊   | 256/319 [00:52<00:12,  4.88it/s, loss=0.818, v_num=0, train_loss=1.390, val_loss=1.250][A
Epoch 3:  81%|████████████▉   | 257/319 [00:52<00:12,  4.90it/s, loss=0.818, v_num=0, train_loss=1.390, val_loss=1.250][A
Epoch 3:  81%|████████████▉   | 258/319 [00:52<00:12,  4.92it/s, loss=0.818, v_num=0, train_loss=1.390, val_loss=1.250][A
Epoch 3:  81%|████████████▉   | 259/319 [00:52<00:12,  4.93it/s, loss=0.818, v_num=0, train_loss=1.390, val_loss=1.250][A
Epoch 3:  82%|█████████████   | 260/319 [00:52<00:11,  4.95it/s, loss=0.818, v_num=0, train_loss=1.390, val_

Epoch 3, global step 1020: 'val_loss' was not in top 1


Epoch 4:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.88it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.280]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 4:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.90it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.280][A
Epoch 4:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.91it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.280][A
Epoch 4:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.93it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.280][A
Epoch 4:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.95it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.280][A
Epoch 4:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.96it/s, loss=0.57, v_num=0, train_loss=0.720, val_

Epoch 4, global step 1275: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█████████████████| 319/319 [00:56<00:00,  5.66it/s, loss=0.57, v_num=0, train_loss=0.720, val_loss=1.270]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs\Specter_Default_06_02_2023_01_41_run2
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params
---------------------------------------------------
0 | specter    | BertModel           | 109 M 
1 | classifier | Linear              | 16.1 K
2 | criterion  | CrossEntropyLoss    | 0     
3 | acc        | MulticlassAccuracy  | 0     
4 | f1         | MulticlassF1Score   | 0     
5 | prec       | MulticlassPrecision | 0     
6 | rec        | MulticlassRecall    | 0     
---------------------------------------------------
85.7 M    Trainable params
24.3 M    Non-trainable params
109 M     Total params
439.818   Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00, 38.46it/s]

  rank_zero_warn(


******Val epoch 0 eval metrics: loss 2.86650538, f1 0.1964 prec 0.4583 rec 0.1250, acc 0.1250
                                                                                                                       

  rank_zero_warn(


Epoch 0:  80%|██████████████████████████▍      | 255/319 [00:52<00:13,  4.89it/s, loss=1.88, v_num=0, train_loss=1.670]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 0:  80%|██████████████████████████▍      | 256/319 [00:52<00:12,  4.91it/s, loss=1.88, v_num=0, train_loss=1.670][A
Epoch 0:  81%|██████████████████████████▌      | 257/319 [00:52<00:12,  4.92it/s, loss=1.88, v_num=0, train_loss=1.670][A
Epoch 0:  81%|██████████████████████████▋      | 258/319 [00:52<00:12,  4.94it/s, loss=1.88, v_num=0, train_loss=1.670][A
Epoch 0:  81%|██████████████████████████▊      | 259/319 [00:52<00:12,  4.96it/s, loss=1.88, v_num=0, train_loss=1.670][A
Epoch 0:  82%|██████████████████████████▉      | 260/319 [00:52<00:11,  4.97it/s, loss=1.88, v_num=0, train_

Epoch 0, global step 255: 'val_loss' reached 1.73662 (best 1.73662), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run2.ckpt' as top 1


Epoch 1:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.87it/s, loss=1.22, v_num=0, train_loss=1.410, val_loss=1.740]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 1:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.88it/s, loss=1.22, v_num=0, train_loss=1.410, val_loss=1.740][A
Epoch 1:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.90it/s, loss=1.22, v_num=0, train_loss=1.410, val_loss=1.740][A
Epoch 1:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.92it/s, loss=1.22, v_num=0, train_loss=1.410, val_loss=1.740][A
Epoch 1:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.93it/s, loss=1.22, v_num=0, train_loss=1.410, val_loss=1.740][A
Epoch 1:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.95it/s, loss=1.22, v_num=0, train_loss=1.410, val_

Epoch 1, global step 510: 'val_loss' reached 1.35000 (best 1.35000), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run2.ckpt' as top 1


Epoch 2:  80%|████████████▊   | 255/319 [00:52<00:13,  4.87it/s, loss=0.933, v_num=0, train_loss=1.490, val_loss=1.350]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 2:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.933, v_num=0, train_loss=1.490, val_loss=1.350][A
Epoch 2:  81%|████████████▉   | 257/319 [00:52<00:12,  4.90it/s, loss=0.933, v_num=0, train_loss=1.490, val_loss=1.350][A
Epoch 2:  81%|████████████▉   | 258/319 [00:52<00:12,  4.92it/s, loss=0.933, v_num=0, train_loss=1.490, val_loss=1.350][A
Epoch 2:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.933, v_num=0, train_loss=1.490, val_loss=1.350][A
Epoch 2:  82%|█████████████   | 260/319 [00:52<00:11,  4.95it/s, loss=0.933, v_num=0, train_loss=1.490, val_

Epoch 2, global step 765: 'val_loss' reached 1.19610 (best 1.19610), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run2.ckpt' as top 1


Epoch 3:  80%|████████████▊   | 255/319 [00:52<00:13,  4.87it/s, loss=0.633, v_num=0, train_loss=0.678, val_loss=1.200]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 3:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.633, v_num=0, train_loss=0.678, val_loss=1.200][A
Epoch 3:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.633, v_num=0, train_loss=0.678, val_loss=1.200][A
Epoch 3:  81%|████████████▉   | 258/319 [00:52<00:12,  4.92it/s, loss=0.633, v_num=0, train_loss=0.678, val_loss=1.200][A
Epoch 3:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.633, v_num=0, train_loss=0.678, val_loss=1.200][A
Epoch 3:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.633, v_num=0, train_loss=0.678, val_

Epoch 3, global step 1020: 'val_loss' was not in top 1


Epoch 4:  80%|███████████▉   | 255/319 [00:52<00:13,  4.89it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.210]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 4:  80%|████████████   | 256/319 [00:52<00:12,  4.90it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.210][A
Epoch 4:  81%|████████████   | 257/319 [00:52<00:12,  4.92it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.210][A
Epoch 4:  81%|████████████▏  | 258/319 [00:52<00:12,  4.94it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.210][A
Epoch 4:  81%|████████████▏  | 259/319 [00:52<00:12,  4.95it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.210][A
Epoch 4:  82%|████████████▏  | 260/319 [00:52<00:11,  4.97it/s, loss=0.511, v_num=0, train_loss=0.0904, val_

Epoch 4, global step 1275: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|███████████████| 319/319 [00:56<00:00,  5.67it/s, loss=0.511, v_num=0, train_loss=0.0904, val_loss=1.200]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs\Specter_Default_06_02_2023_01_41_run3
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params
---------------------------------------------------
0 | specter    | BertModel           | 109 M 
1 | classifier | Linear              | 16.1 K
2 | criterion  | CrossEntropyLoss    | 0     
3 | acc        | MulticlassAccuracy  | 0     
4 | f1         | MulticlassF1Score   | 0     
5 | prec       | MulticlassPrecision | 0     
6 | rec        | MulticlassRecall    | 0     
---------------------------------------------------
85.7 M    Trainable params
24.3 M    Non-trainable params
109 M     Total params
439.818   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|                                                              | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0: 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00, 54.06it/s]******Val epoch 0 eval metrics: loss 3.06673944, f1 0.0000 prec 0.0000 rec 0.0000, acc 0.0000
                                                                                                                       

  rank_zero_warn(


Epoch 0:  80%|██████████████████████████▍      | 255/319 [00:52<00:13,  4.89it/s, loss=1.72, v_num=0, train_loss=1.870]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 0:  80%|██████████████████████████▍      | 256/319 [00:52<00:12,  4.91it/s, loss=1.72, v_num=0, train_loss=1.870][A
Epoch 0:  81%|██████████████████████████▌      | 257/319 [00:52<00:12,  4.93it/s, loss=1.72, v_num=0, train_loss=1.870][A
Epoch 0:  81%|██████████████████████████▋      | 258/319 [00:52<00:12,  4.94it/s, loss=1.72, v_num=0, train_loss=1.870][A
Epoch 0:  81%|██████████████████████████▊      | 259/319 [00:52<00:12,  4.96it/s, loss=1.72, v_num=0, train_loss=1.870][A
Epoch 0:  82%|██████████████████████████▉      | 260/319 [00:52<00:11,  4.98it/s, loss=1.72, v_num=0, train_

Epoch 0, global step 255: 'val_loss' reached 1.75508 (best 1.75508), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run3.ckpt' as top 1


Epoch 1:  80%|██████████████▍   | 255/319 [00:52<00:13,  4.87it/s, loss=1.1, v_num=0, train_loss=0.775, val_loss=1.760]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 1:  80%|██████████████▍   | 256/319 [00:52<00:12,  4.89it/s, loss=1.1, v_num=0, train_loss=0.775, val_loss=1.760][A
Epoch 1:  81%|██████████████▌   | 257/319 [00:52<00:12,  4.90it/s, loss=1.1, v_num=0, train_loss=0.775, val_loss=1.760][A
Epoch 1:  81%|██████████████▌   | 258/319 [00:52<00:12,  4.92it/s, loss=1.1, v_num=0, train_loss=0.775, val_loss=1.760][A
Epoch 1:  81%|██████████████▌   | 259/319 [00:52<00:12,  4.94it/s, loss=1.1, v_num=0, train_loss=0.775, val_loss=1.760][A
Epoch 1:  82%|██████████████▋   | 260/319 [00:52<00:11,  4.95it/s, loss=1.1, v_num=0, train_loss=0.775, val_

Epoch 1, global step 510: 'val_loss' reached 1.37140 (best 1.37140), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run3.ckpt' as top 1


Epoch 2:  80%|████████████▊   | 255/319 [00:52<00:13,  4.87it/s, loss=0.887, v_num=0, train_loss=0.546, val_loss=1.370]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 2:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.887, v_num=0, train_loss=0.546, val_loss=1.370][A
Epoch 2:  81%|████████████▉   | 257/319 [00:52<00:12,  4.90it/s, loss=0.887, v_num=0, train_loss=0.546, val_loss=1.370][A
Epoch 2:  81%|████████████▉   | 258/319 [00:52<00:12,  4.92it/s, loss=0.887, v_num=0, train_loss=0.546, val_loss=1.370][A
Epoch 2:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.887, v_num=0, train_loss=0.546, val_loss=1.370][A
Epoch 2:  82%|█████████████   | 260/319 [00:52<00:11,  4.95it/s, loss=0.887, v_num=0, train_loss=0.546, val_

Epoch 2, global step 765: 'val_loss' reached 1.29498 (best 1.29498), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run3.ckpt' as top 1


Epoch 3:  80%|████████████▊   | 255/319 [00:52<00:13,  4.88it/s, loss=0.639, v_num=0, train_loss=0.267, val_loss=1.290]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 3:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.639, v_num=0, train_loss=0.267, val_loss=1.290][A
Epoch 3:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.639, v_num=0, train_loss=0.267, val_loss=1.290][A
Epoch 3:  81%|████████████▉   | 258/319 [00:52<00:12,  4.93it/s, loss=0.639, v_num=0, train_loss=0.267, val_loss=1.290][A
Epoch 3:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.639, v_num=0, train_loss=0.267, val_loss=1.290][A
Epoch 3:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.639, v_num=0, train_loss=0.267, val_

Epoch 3, global step 1020: 'val_loss' reached 1.28121 (best 1.28121), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run3.ckpt' as top 1


Epoch 4:  80%|████████████▊   | 255/319 [00:52<00:13,  4.88it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.280]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 4:  80%|████████████▊   | 256/319 [00:52<00:12,  4.90it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.280][A
Epoch 4:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.280][A
Epoch 4:  81%|████████████▉   | 258/319 [00:52<00:12,  4.93it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.280][A
Epoch 4:  81%|████████████▉   | 259/319 [00:52<00:12,  4.95it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.280][A
Epoch 4:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.349, v_num=0, train_loss=0.175, val_

Epoch 4, global step 1275: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|████████████████| 319/319 [00:56<00:00,  5.67it/s, loss=0.349, v_num=0, train_loss=0.175, val_loss=1.300]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: lightning_logs\Specter_Default_06_02_2023_01_41_run4
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type                | Params
---------------------------------------------------
0 | specter    | BertModel           | 109 M 
1 | classifier | Linear              | 16.1 K
2 | criterion  | CrossEntropyLoss    | 0     
3 | acc        | MulticlassAccuracy  | 0     
4 | f1         | MulticlassF1Score   | 0     
5 | prec       | MulticlassPrecision | 0     
6 | rec        | MulticlassRecall    | 0     
---------------------------------------------------
85.7 M    Trainable params
24.3 M    Non-trainable params
109 M     Total params
439.818   Total estimated model params size (MB)


Sanity Checking:   0%|                                                                           | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0: 100%|██████████████████████████████████████████████████████| 2/2 [00:00<00:00, 45.45it/s]******Val epoch 0 eval metrics: loss 2.98585367, f1 0.0000 prec 0.0000 rec 0.0000, acc 0.0000
                                                                                                                       

  rank_zero_warn(


Epoch 0:  80%|██████████████████████████▍      | 255/319 [00:52<00:13,  4.90it/s, loss=1.85, v_num=0, train_loss=1.760]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 0:  80%|██████████████████████████▍      | 256/319 [00:52<00:12,  4.91it/s, loss=1.85, v_num=0, train_loss=1.760][A
Epoch 0:  81%|██████████████████████████▌      | 257/319 [00:52<00:12,  4.93it/s, loss=1.85, v_num=0, train_loss=1.760][A
Epoch 0:  81%|██████████████████████████▋      | 258/319 [00:52<00:12,  4.95it/s, loss=1.85, v_num=0, train_loss=1.760][A
Epoch 0:  81%|██████████████████████████▊      | 259/319 [00:52<00:12,  4.96it/s, loss=1.85, v_num=0, train_loss=1.760][A
Epoch 0:  82%|██████████████████████████▉      | 260/319 [00:52<00:11,  4.98it/s, loss=1.85, v_num=0, train_

Epoch 0, global step 255: 'val_loss' reached 1.71933 (best 1.71933), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run4.ckpt' as top 1


Epoch 1:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.88it/s, loss=1.26, v_num=0, train_loss=1.290, val_loss=1.720]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 1:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.89it/s, loss=1.26, v_num=0, train_loss=1.290, val_loss=1.720][A
Epoch 1:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.91it/s, loss=1.26, v_num=0, train_loss=1.290, val_loss=1.720][A
Epoch 1:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.93it/s, loss=1.26, v_num=0, train_loss=1.290, val_loss=1.720][A
Epoch 1:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.94it/s, loss=1.26, v_num=0, train_loss=1.290, val_loss=1.720][A
Epoch 1:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.96it/s, loss=1.26, v_num=0, train_loss=1.290, val_

Epoch 1, global step 510: 'val_loss' reached 1.32757 (best 1.32757), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run4.ckpt' as top 1


Epoch 2:  80%|████████████▊   | 255/319 [00:52<00:13,  4.87it/s, loss=0.975, v_num=0, train_loss=0.696, val_loss=1.330]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 2:  80%|████████████▊   | 256/319 [00:52<00:12,  4.89it/s, loss=0.975, v_num=0, train_loss=0.696, val_loss=1.330][A
Epoch 2:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.975, v_num=0, train_loss=0.696, val_loss=1.330][A
Epoch 2:  81%|████████████▉   | 258/319 [00:52<00:12,  4.92it/s, loss=0.975, v_num=0, train_loss=0.696, val_loss=1.330][A
Epoch 2:  81%|████████████▉   | 259/319 [00:52<00:12,  4.94it/s, loss=0.975, v_num=0, train_loss=0.696, val_loss=1.330][A
Epoch 2:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.975, v_num=0, train_loss=0.696, val_

Epoch 2, global step 765: 'val_loss' reached 1.25570 (best 1.25570), saving model to 'C:\\Users\\Benjamin Aw\\Desktop\\ACL_Anthology_Exploratory\\Experiments\\checkpoints\\best-checkpoint-Specter_Default_06_02_2023_01_41_run4.ckpt' as top 1


Epoch 3:  80%|████████████▊   | 255/319 [00:52<00:13,  4.88it/s, loss=0.492, v_num=0, train_loss=0.252, val_loss=1.260]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 3:  80%|████████████▊   | 256/319 [00:52<00:12,  4.90it/s, loss=0.492, v_num=0, train_loss=0.252, val_loss=1.260][A
Epoch 3:  81%|████████████▉   | 257/319 [00:52<00:12,  4.91it/s, loss=0.492, v_num=0, train_loss=0.252, val_loss=1.260][A
Epoch 3:  81%|████████████▉   | 258/319 [00:52<00:12,  4.93it/s, loss=0.492, v_num=0, train_loss=0.252, val_loss=1.260][A
Epoch 3:  81%|████████████▉   | 259/319 [00:52<00:12,  4.95it/s, loss=0.492, v_num=0, train_loss=0.252, val_loss=1.260][A
Epoch 3:  82%|█████████████   | 260/319 [00:52<00:11,  4.96it/s, loss=0.492, v_num=0, train_loss=0.252, val_

Epoch 3, global step 1020: 'val_loss' was not in top 1


Epoch 4:  80%|█████████████▌   | 255/319 [00:52<00:13,  4.90it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.280]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                                                               | 0/64 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/64 [00:00<?, ?it/s][A
Epoch 4:  80%|█████████████▋   | 256/319 [00:52<00:12,  4.92it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.280][A
Epoch 4:  81%|█████████████▋   | 257/319 [00:52<00:12,  4.93it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.280][A
Epoch 4:  81%|█████████████▋   | 258/319 [00:52<00:12,  4.95it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.280][A
Epoch 4:  81%|█████████████▊   | 259/319 [00:52<00:12,  4.97it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.280][A
Epoch 4:  82%|█████████████▊   | 260/319 [00:52<00:11,  4.99it/s, loss=0.53, v_num=0, train_loss=0.290, val_

Epoch 4, global step 1275: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█████████████████| 319/319 [00:56<00:00,  5.69it/s, loss=0.53, v_num=0, train_loss=0.290, val_loss=1.270]


In [17]:
print(f"The average accuracy of the validation set across 5 folds is: {average_val_acc_score}")
print(f"The average F1 score of the validation set across 5 folds is: {average_val_f1_score}")
print(f"The average precision of the validation set across 5 folds is: {average_val_prec_score}")
print(f"The average recall of the validation set across 5 folds is: {average_val_recall_score}")

The average accuracy of the validation set across 5 folds is: 0.6636542201042175
The average F1 score of the validation set across 5 folds is: 0.653399920463562
The average precision of the validation set across 5 folds is: 0.6579187512397766
The average recall of the validation set across 5 folds is: 0.6636542201042175


### Stop here for now We might want to take a look at creating a test set 