<a href="https://colab.research.google.com/github/IvanDePivan/2AMM30-groep-2/blob/main/Flairmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Mining Group 2
This notebook was developed for use on Google Colab. To run the notebook make sure the files from the data folder are uploaded.




## Prerequisites

In [1]:
from google.colab import output
!pip uninstall -y torch
!pip install flair

import os
import random
import json
import re
import regex
import difflib
import srsly
import torch
from typing import List
from pathlib import Path

from flair.trainers import ModelTrainer
from flair.data import Corpus, Label, Relation, Sentence
from flair.datasets import ColumnCorpus, DataLoader
from flair.embeddings import WordEmbeddings, StackedEmbeddings, TokenEmbeddings, TransformerWordEmbeddings, TransformerDocumentEmbeddings
from flair.models import SequenceTagger, RelationExtractor, RelationClassifier

output.clear()

In [None]:
# Sanity check on the gpu; some issues with flair not recognizing one of the GPUs from Colab
torch.cuda.is_available()

True

### Reformatting Label Studio output
We wrote this method for converting the output from Label Studio to the conll format, because the standard format had issues (e.g. No relation data was exported, tokenization had flaws, output used different delimiters)

In [None]:
def write_conll(filename, text):
  output_file = Path(filename)
  output_file.parent.mkdir(exist_ok=True, parents=True)
  with open(output_file, "w") as file1:
    file1.write("\n\n".join(text))

In [None]:
# Function for getting each token with their correct label and id (ids are used for relations)
def get_labels(file_item, key_field):
  rows = []
  position = 0
  next_label = 0
  # Sort the labels on start?
  labels = [item for item in file_item['annotations'][0]['result'] if item['type'] == 'labels']
  labels.sort(key=lambda x: x['value']['start'])
  item_count = 1
  b_count = 0
  ids = {} # count -> id
  tokens = [item for item in re.split(r'(\s+)|([\(,\.\)\\;:$*&"\-–])|(\')|(\'\w+)', file_item['data'][key_field]) if item]
  for i, item in enumerate(tokens):
    if re.search(r'\s+', item):
      position += len(item)
      continue
    start = position
    end = position + len(item)
    row = f'{item_count}\t{item}'
    label = '\tO'

    if len(labels) > next_label:
      if start >= labels[next_label]['value']['start']:
          # the current item has a label, and might have a relation
          id = labels[next_label]["id"]
          ids[item_count] = id
          if end > labels[next_label]['value']['end']:
            difference = end - labels[next_label]['value']['end']
            end -= difference
            tokens.insert(i+1, item[-difference:])
            item = item[:-difference]
          if start == labels[next_label]['value']['start']:
            # Current Item is start of a label
            b_count += 1
            label = '\tB-' + labels[next_label]["value"]["labels"][0]

          else:
            # Current item is inside a label
            if '\tB-' + labels[next_label]["value"]["labels"][0] in rows[-1] or '\tI-' + labels[next_label]["value"]["labels"][0] in rows[-1]:
              label = '\tI-' + labels[next_label]["value"]["labels"][0]
            else:
              b_count += 1
              label = '\tB-' + labels[next_label]["value"]["labels"][0]

      if end >= labels[next_label]['value']['end']:
        next_label += 1


    row = row + label
    position += len(item)
    item_count += 1
    rows.append(row)
  # Sanity check on the labels, gives a warning if labels are disappearing.
  if len(labels) != b_count:
    print(f'labels: {len(labels)}, B-tags: {b_count}')
    print(labels)
    print(ids)
    print(f'missing label: {[item for item in labels if item["id"] not in ids.values()]}')
    print(rows)
  return rows, ids


In [None]:
# This creates the relation format flair expects for each document
def get_relations(file_item, ids):
  keys = ids.keys()
  relations = [item for item in file_item['annotations'][0]['result'] if item['type'] == 'relation']
  bio_relations = []
  found_relations = 0
  for relation in relations:
    from_ids = []
    to_ids = []
    rel_type = relation['labels'][0]
    for key in keys:
      if ids[key] == relation['from_id']:
        from_ids.append(key)
      if ids[key] == relation['to_id']:
        to_ids.append(key)
    from_ids = [int(i) for i in from_ids]
    from_ids.sort()
    to_ids = [int(i) for i in to_ids]
    to_ids.sort()
    if len(from_ids) > 0 and len(to_ids) > 0:
      found_relations += 1
      bio_relations.append(f'{from_ids[0]};{from_ids[-1]};{to_ids[0]};{to_ids[-1]};{rel_type}')
  if found_relations != len(relations):
    print(f'found relations: {found_relations}, actual relations: {len(relations)}')
    print(relations)
    print(bio_relations)
    print(keys)
    print(ids)
  return bio_relations

In [None]:
# creates the connl file using the expanded json file as input. This is reused for component 2
def make_conll(file, key_field='text', file_name='test_set'):
  conllu_file = []
  for item in file:
    doc = ''
    rows, ids = get_labels(item, key_field)
    relations = get_relations(item, ids)
    if relations and len(relations) > 0:
      doc += '# relations = ' + ('|'.join(relations))
    doc += '\n' + ('\n'.join(rows))
    conllu_file.append(doc)

  print(len(conllu_file))
  write_conll(f'{file_name}.conll', conllu_file)

## Component 1
First, the json files are converted to the appropriate format (conll).

In [None]:
# Make a conll file from the test set json
with open('comp1_test_set.json') as f:
  file = json.load(f)
  make_conll(file, file_name='data/comp1_test_set')

# Make a conll file from the train set json
with open('comp1_train_set.json') as f:
  file = json.load(f)
  make_conll(file, file_name='data/comp1_train_set')


40
95


In [None]:
# The training conll file is split up for train and dev data
with open('data/comp1_train_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  print(len(splits))
  splits = sorted(splits, key=lambda x: random.random())

  # For component 1, uses the last 25 for dev
  train = splits[:-25]
  dev = splits[-25:]

  write_conll('data/comp1/train.conll', train)
  write_conll('data/comp1/dev.conll', dev)

with open('data/comp1_test_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  write_conll('data/comp1/test.conll', splits)

95


A corpus is created from the separate conll files. We specify which column in the conll files contains the NER labels.

In [None]:
# The corpus converts the BIO formatted tokens into a structure Flair uses for training
# Here we can simply specify the folder where the files are and which columns they have
columns = {1: 'text', 2: 'ner'}
data_folder = 'data/comp1/'

corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.conll',
                              test_file='test.conll',
                              dev_file='dev.conll')

# Here we check if the corpus has the expected size, meaning importing likely went well
print(f'Corpus size: {len(corpus.train)}')

2023-11-10 09:29:09,100 Reading data from data/comp1
2023-11-10 09:29:09,105 Train: data/comp1/train.conll
2023-11-10 09:29:09,106 Dev: data/comp1/dev.conll
2023-11-10 09:29:09,112 Test: data/comp1/test.conll
Corpus size: 70


In [None]:
# this dictionary let's the model know what labels exist in the set.
tag_type = 'ner'# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)

2023-11-10 09:29:09,885 Computing label dictionary. Progress:


70it [00:00, 6861.61it/s]

2023-11-10 09:29:09,922 Dictionary created for label 'ner' with 6 values: Winner (seen 605 times), Date (seen 422 times), Nationality (seen 331 times), Prizetype (seen 69 times), Reason (seen 44 times)





In [None]:
# The embedding layer uses a pretrained model, distilBERT, cased in this case
# The layers="-1" specifies which layer to get the embeddings from. We use the final layer
# fine_tuning is on, as we only use a linear layer on top
# Subtoken pooling is set to first, meaning only the first subword is considered for the embeddings
# However, context is also considered
embeddings = TransformerWordEmbeddings(
    model='distilbert-base-cased',
    layers="-1",
    subtoken_pooling='first',
    fine_tune=True,
    use_context=True,
    respect_document_boundaries=False,
)

# This tagger put on top of the embeddings and does the classification
# Here we specify BIO vs BIOES and supply the label (tag) dictionary, and have additional choices in model config
tagger: SequenceTagger = SequenceTagger(
        hidden_size=256,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=False,
        use_rnn=False,
        reproject_embeddings=False,
        tag_format='BIO'
    )

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

2023-11-10 09:29:16,751 SequenceTagger predicts: Dictionary with 11 tags: O, B-Winner, I-Winner, B-Date, I-Date, B-Nationality, I-Nationality, B-Prizetype, I-Prizetype, B-Reason, I-Reason


In [None]:
# The model trainer in flair makes finetuning very easy
trainer : ModelTrainer = ModelTrainer(tagger, corpus)

# Note this is set for a decent graphics card (like a v100 on Colab)
trainer.fine_tune(
        'data/comp1/ner',
        learning_rate=5e-05,
        mini_batch_size=16,
        max_epochs=100,
        embeddings_storage_mode='gpu', # cpu, gpu
        weight_decay=0.0,
    )


2023-11-10 09:29:16,793 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:16,804 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(28997, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_fea

100%|██████████| 2/2 [00:03<00:00,  1.91s/it]

2023-11-10 09:29:27,995 Evaluating as a multi-label problem: False
2023-11-10 09:29:28,116 DEV : loss 2.1376683712005615 - f1-score (micro avg)  0.0065
2023-11-10 09:29:28,147 ----------------------------------------------------------------------------------------------------





2023-11-10 09:29:28,718 epoch 2 - iter 1/5 - loss 2.24434881 - time (sec): 0.56 - samples/sec: 7011.70 - lr: 0.000006
2023-11-10 09:29:29,313 epoch 2 - iter 2/5 - loss 2.26072765 - time (sec): 1.16 - samples/sec: 7312.26 - lr: 0.000007
2023-11-10 09:29:29,987 epoch 2 - iter 3/5 - loss 2.22494892 - time (sec): 1.83 - samples/sec: 7208.25 - lr: 0.000008
2023-11-10 09:29:30,603 epoch 2 - iter 4/5 - loss 2.19728792 - time (sec): 2.45 - samples/sec: 7389.10 - lr: 0.000008
2023-11-10 09:29:30,904 epoch 2 - iter 5/5 - loss 2.17338455 - time (sec): 2.75 - samples/sec: 7193.35 - lr: 0.000009
2023-11-10 09:29:30,909 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:30,913 EPOCH 2 done: loss 2.1734 - lr 0.000009


100%|██████████| 2/2 [00:03<00:00,  1.97s/it]

2023-11-10 09:29:34,890 Evaluating as a multi-label problem: False





2023-11-10 09:29:35,386 DEV : loss 1.7962977886199951 - f1-score (micro avg)  0.002
2023-11-10 09:29:35,410 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:36,127 epoch 3 - iter 1/5 - loss 1.95239177 - time (sec): 0.71 - samples/sec: 8014.92 - lr: 0.000010
2023-11-10 09:29:36,680 epoch 3 - iter 2/5 - loss 1.89710852 - time (sec): 1.27 - samples/sec: 7947.26 - lr: 0.000011
2023-11-10 09:29:37,161 epoch 3 - iter 3/5 - loss 1.85094095 - time (sec): 1.75 - samples/sec: 8197.15 - lr: 0.000012
2023-11-10 09:29:37,614 epoch 3 - iter 4/5 - loss 1.79819319 - time (sec): 2.20 - samples/sec: 8265.95 - lr: 0.000013
2023-11-10 09:29:37,827 epoch 3 - iter 5/5 - loss 1.78975257 - time (sec): 2.41 - samples/sec: 8194.94 - lr: 0.000014
2023-11-10 09:29:37,833 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:37,837 EPOCH 3 done: loss 1.7898 - lr 0.000014


100%|██████████| 2/2 [00:04<00:00,  2.47s/it]

2023-11-10 09:29:42,787 Evaluating as a multi-label problem: False
2023-11-10 09:29:42,822 DEV : loss 1.1916927099227905 - f1-score (micro avg)  0.0
2023-11-10 09:29:42,843 ----------------------------------------------------------------------------------------------------





2023-11-10 09:29:43,416 epoch 4 - iter 1/5 - loss 1.30751725 - time (sec): 0.57 - samples/sec: 8109.65 - lr: 0.000015
2023-11-10 09:29:43,965 epoch 4 - iter 2/5 - loss 1.28457486 - time (sec): 1.12 - samples/sec: 7774.42 - lr: 0.000016
2023-11-10 09:29:44,401 epoch 4 - iter 3/5 - loss 1.24276485 - time (sec): 1.56 - samples/sec: 8458.83 - lr: 0.000017
2023-11-10 09:29:44,873 epoch 4 - iter 4/5 - loss 1.16166907 - time (sec): 2.03 - samples/sec: 8876.87 - lr: 0.000018
2023-11-10 09:29:45,042 epoch 4 - iter 5/5 - loss 1.13177834 - time (sec): 2.20 - samples/sec: 9003.12 - lr: 0.000019
2023-11-10 09:29:45,044 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:45,048 EPOCH 4 done: loss 1.1318 - lr 0.000019


100%|██████████| 2/2 [00:02<00:00,  1.26s/it]

2023-11-10 09:29:47,584 Evaluating as a multi-label problem: False
2023-11-10 09:29:47,608 DEV : loss 0.6016022562980652 - f1-score (micro avg)  0.0
2023-11-10 09:29:47,621 ----------------------------------------------------------------------------------------------------





2023-11-10 09:29:48,056 epoch 5 - iter 1/5 - loss 0.75203920 - time (sec): 0.43 - samples/sec: 10442.50 - lr: 0.000020
2023-11-10 09:29:48,532 epoch 5 - iter 2/5 - loss 0.74535507 - time (sec): 0.91 - samples/sec: 10233.11 - lr: 0.000021
2023-11-10 09:29:48,914 epoch 5 - iter 3/5 - loss 0.79676009 - time (sec): 1.29 - samples/sec: 10292.95 - lr: 0.000022
2023-11-10 09:29:49,373 epoch 5 - iter 4/5 - loss 0.77603625 - time (sec): 1.75 - samples/sec: 10241.29 - lr: 0.000023
2023-11-10 09:29:49,558 epoch 5 - iter 5/5 - loss 0.76204006 - time (sec): 1.94 - samples/sec: 10214.62 - lr: 0.000024
2023-11-10 09:29:49,560 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:49,563 EPOCH 5 done: loss 0.7620 - lr 0.000024


100%|██████████| 2/2 [00:03<00:00,  1.50s/it]

2023-11-10 09:29:52,588 Evaluating as a multi-label problem: False
2023-11-10 09:29:52,607 DEV : loss 0.5848215818405151 - f1-score (micro avg)  0.0
2023-11-10 09:29:52,621 ----------------------------------------------------------------------------------------------------





2023-11-10 09:29:53,061 epoch 6 - iter 1/5 - loss 0.73130886 - time (sec): 0.44 - samples/sec: 10554.79 - lr: 0.000025
2023-11-10 09:29:53,521 epoch 6 - iter 2/5 - loss 0.70739561 - time (sec): 0.90 - samples/sec: 10739.27 - lr: 0.000025
2023-11-10 09:29:53,922 epoch 6 - iter 3/5 - loss 0.71314584 - time (sec): 1.30 - samples/sec: 10557.52 - lr: 0.000026
2023-11-10 09:29:54,338 epoch 6 - iter 4/5 - loss 0.72225769 - time (sec): 1.72 - samples/sec: 10476.20 - lr: 0.000027
2023-11-10 09:29:54,603 epoch 6 - iter 5/5 - loss 0.72398610 - time (sec): 1.98 - samples/sec: 9982.24 - lr: 0.000028
2023-11-10 09:29:54,606 ----------------------------------------------------------------------------------------------------
2023-11-10 09:29:54,612 EPOCH 6 done: loss 0.7240 - lr 0.000028


100%|██████████| 2/2 [00:05<00:00,  2.57s/it]

2023-11-10 09:29:59,778 Evaluating as a multi-label problem: False
2023-11-10 09:29:59,817 DEV : loss 0.5181533694267273 - f1-score (micro avg)  0.0
2023-11-10 09:29:59,841 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:00,451 epoch 7 - iter 1/5 - loss 0.70881254 - time (sec): 0.61 - samples/sec: 7708.67 - lr: 0.000029
2023-11-10 09:30:00,968 epoch 7 - iter 2/5 - loss 0.69613776 - time (sec): 1.12 - samples/sec: 7856.11 - lr: 0.000030
2023-11-10 09:30:01,488 epoch 7 - iter 3/5 - loss 0.69627539 - time (sec): 1.64 - samples/sec: 8070.97 - lr: 0.000031
2023-11-10 09:30:02,169 epoch 7 - iter 4/5 - loss 0.65244946 - time (sec): 2.32 - samples/sec: 7755.36 - lr: 0.000032
2023-11-10 09:30:02,407 epoch 7 - iter 5/5 - loss 0.64348242 - time (sec): 2.56 - samples/sec: 7715.60 - lr: 0.000033
2023-11-10 09:30:02,410 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:02,413 EPOCH 7 done: loss 0.6435 - lr 0.000033


100%|██████████| 2/2 [00:02<00:00,  1.44s/it]

2023-11-10 09:30:05,307 Evaluating as a multi-label problem: False
2023-11-10 09:30:05,325 DEV : loss 0.4307210445404053 - f1-score (micro avg)  0.0
2023-11-10 09:30:05,338 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:05,710 epoch 8 - iter 1/5 - loss 0.66020860 - time (sec): 0.37 - samples/sec: 10386.61 - lr: 0.000034
2023-11-10 09:30:06,602 epoch 8 - iter 2/5 - loss 0.58687743 - time (sec): 1.26 - samples/sec: 6737.17 - lr: 0.000035
2023-11-10 09:30:07,004 epoch 8 - iter 3/5 - loss 0.56255489 - time (sec): 1.66 - samples/sec: 7711.96 - lr: 0.000036
2023-11-10 09:30:07,498 epoch 8 - iter 4/5 - loss 0.55175210 - time (sec): 2.16 - samples/sec: 8369.93 - lr: 0.000037
2023-11-10 09:30:07,680 epoch 8 - iter 5/5 - loss 0.54543338 - time (sec): 2.34 - samples/sec: 8458.42 - lr: 0.000038
2023-11-10 09:30:07,682 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:07,686 EPOCH 8 done: loss 0.5454 - lr 0.000038


100%|██████████| 2/2 [00:02<00:00,  1.29s/it]

2023-11-10 09:30:10,277 Evaluating as a multi-label problem: False
2023-11-10 09:30:10,305 DEV : loss 0.3580443859100342 - f1-score (micro avg)  0.101
2023-11-10 09:30:10,319 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:10,757 epoch 9 - iter 1/5 - loss 0.48092399 - time (sec): 0.44 - samples/sec: 9772.54 - lr: 0.000039
2023-11-10 09:30:11,150 epoch 9 - iter 2/5 - loss 0.49429181 - time (sec): 0.83 - samples/sec: 10492.89 - lr: 0.000040
2023-11-10 09:30:11,621 epoch 9 - iter 3/5 - loss 0.46816418 - time (sec): 1.30 - samples/sec: 10193.96 - lr: 0.000041
2023-11-10 09:30:12,087 epoch 9 - iter 4/5 - loss 0.45263516 - time (sec): 1.77 - samples/sec: 10283.97 - lr: 0.000042
2023-11-10 09:30:12,292 epoch 9 - iter 5/5 - loss 0.45345338 - time (sec): 1.97 - samples/sec: 10028.29 - lr: 0.000042
2023-11-10 09:30:12,294 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:12,297 EPOCH 9 done: loss 0.4535 - lr 0.000042


100%|██████████| 2/2 [00:04<00:00,  2.27s/it]

2023-11-10 09:30:16,858 Evaluating as a multi-label problem: False
2023-11-10 09:30:16,891 DEV : loss 0.28441640734672546 - f1-score (micro avg)  0.486
2023-11-10 09:30:16,910 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:17,573 epoch 10 - iter 1/5 - loss 0.41554896 - time (sec): 0.66 - samples/sec: 6351.69 - lr: 0.000043
2023-11-10 09:30:18,197 epoch 10 - iter 2/5 - loss 0.38320782 - time (sec): 1.28 - samples/sec: 7081.93 - lr: 0.000044
2023-11-10 09:30:18,844 epoch 10 - iter 3/5 - loss 0.35303671 - time (sec): 1.93 - samples/sec: 7134.38 - lr: 0.000045
2023-11-10 09:30:19,394 epoch 10 - iter 4/5 - loss 0.34894470 - time (sec): 2.48 - samples/sec: 7335.57 - lr: 0.000046
2023-11-10 09:30:19,659 epoch 10 - iter 5/5 - loss 0.34908758 - time (sec): 2.75 - samples/sec: 7200.25 - lr: 0.000047
2023-11-10 09:30:19,663 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:19,665 EPOCH 10 done: loss 0.3491 - lr 0.000047


100%|██████████| 2/2 [00:06<00:00,  3.32s/it]

2023-11-10 09:30:26,341 Evaluating as a multi-label problem: False
2023-11-10 09:30:26,372 DEV : loss 0.2320910096168518 - f1-score (micro avg)  0.5283
2023-11-10 09:30:26,396 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:26,879 epoch 11 - iter 1/5 - loss 0.35619892 - time (sec): 0.48 - samples/sec: 9019.88 - lr: 0.000048
2023-11-10 09:30:27,449 epoch 11 - iter 2/5 - loss 0.30237723 - time (sec): 1.05 - samples/sec: 8330.55 - lr: 0.000049
2023-11-10 09:30:28,022 epoch 11 - iter 3/5 - loss 0.29603254 - time (sec): 1.62 - samples/sec: 8420.25 - lr: 0.000050
2023-11-10 09:30:28,636 epoch 11 - iter 4/5 - loss 0.27247224 - time (sec): 2.24 - samples/sec: 8113.75 - lr: 0.000050
2023-11-10 09:30:28,852 epoch 11 - iter 5/5 - loss 0.27109707 - time (sec): 2.45 - samples/sec: 8060.19 - lr: 0.000050
2023-11-10 09:30:28,858 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:28,863 EPOCH 11 done: loss 0.2711 - lr 0.000050


100%|██████████| 2/2 [00:02<00:00,  1.38s/it]

2023-11-10 09:30:31,640 Evaluating as a multi-label problem: False
2023-11-10 09:30:31,660 DEV : loss 0.19074136018753052 - f1-score (micro avg)  0.6701
2023-11-10 09:30:31,674 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:32,109 epoch 12 - iter 1/5 - loss 0.22711516 - time (sec): 0.43 - samples/sec: 10522.58 - lr: 0.000050
2023-11-10 09:30:32,514 epoch 12 - iter 2/5 - loss 0.22936400 - time (sec): 0.84 - samples/sec: 10850.70 - lr: 0.000050
2023-11-10 09:30:32,958 epoch 12 - iter 3/5 - loss 0.20665834 - time (sec): 1.28 - samples/sec: 10678.04 - lr: 0.000049
2023-11-10 09:30:33,395 epoch 12 - iter 4/5 - loss 0.19783485 - time (sec): 1.72 - samples/sec: 10448.22 - lr: 0.000049
2023-11-10 09:30:33,599 epoch 12 - iter 5/5 - loss 0.20035849 - time (sec): 1.92 - samples/sec: 10279.17 - lr: 0.000049
2023-11-10 09:30:33,603 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:33,605 EPOCH 12 done: loss 0.2004 - lr 0.000049


100%|██████████| 2/2 [00:05<00:00,  2.55s/it]

2023-11-10 09:30:38,732 Evaluating as a multi-label problem: False
2023-11-10 09:30:38,770 DEV : loss 0.17646218836307526 - f1-score (micro avg)  0.7451
2023-11-10 09:30:38,794 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:39,858 epoch 13 - iter 1/5 - loss 0.16659609 - time (sec): 1.06 - samples/sec: 3962.66 - lr: 0.000049
2023-11-10 09:30:40,351 epoch 13 - iter 2/5 - loss 0.15366003 - time (sec): 1.55 - samples/sec: 5274.69 - lr: 0.000049
2023-11-10 09:30:41,038 epoch 13 - iter 3/5 - loss 0.15264315 - time (sec): 2.24 - samples/sec: 6057.26 - lr: 0.000049
2023-11-10 09:30:41,545 epoch 13 - iter 4/5 - loss 0.15088176 - time (sec): 2.74 - samples/sec: 6505.49 - lr: 0.000049
2023-11-10 09:30:41,792 epoch 13 - iter 5/5 - loss 0.14710878 - time (sec): 2.99 - samples/sec: 6610.08 - lr: 0.000049
2023-11-10 09:30:41,798 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:41,802 EPOCH 13 done: loss 0.1471 - lr 0.000049


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:30:44,925 Evaluating as a multi-label problem: False
2023-11-10 09:30:44,950 DEV : loss 0.17710456252098083 - f1-score (micro avg)  0.7432
2023-11-10 09:30:44,963 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:45,387 epoch 14 - iter 1/5 - loss 0.11312010 - time (sec): 0.42 - samples/sec: 10169.69 - lr: 0.000049
2023-11-10 09:30:45,891 epoch 14 - iter 2/5 - loss 0.13731491 - time (sec): 0.93 - samples/sec: 10422.66 - lr: 0.000049
2023-11-10 09:30:46,350 epoch 14 - iter 3/5 - loss 0.14029387 - time (sec): 1.38 - samples/sec: 10125.16 - lr: 0.000048
2023-11-10 09:30:46,958 epoch 14 - iter 4/5 - loss 0.13202069 - time (sec): 1.99 - samples/sec: 9240.26 - lr: 0.000048
2023-11-10 09:30:47,120 epoch 14 - iter 5/5 - loss 0.12733064 - time (sec): 2.16 - samples/sec: 9175.45 - lr: 0.000048
2023-11-10 09:30:47,123 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:47,126 EPOCH 14 done: loss 0.1273 - lr 0.000048


100%|██████████| 2/2 [00:03<00:00,  1.56s/it]

2023-11-10 09:30:50,266 Evaluating as a multi-label problem: False
2023-11-10 09:30:50,287 DEV : loss 0.1765315979719162 - f1-score (micro avg)  0.7037
2023-11-10 09:30:50,300 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:50,797 epoch 15 - iter 1/5 - loss 0.09651655 - time (sec): 0.50 - samples/sec: 10381.66 - lr: 0.000048
2023-11-10 09:30:51,724 epoch 15 - iter 2/5 - loss 0.09939464 - time (sec): 1.42 - samples/sec: 7286.79 - lr: 0.000048
2023-11-10 09:30:52,128 epoch 15 - iter 3/5 - loss 0.10221310 - time (sec): 1.83 - samples/sec: 7934.33 - lr: 0.000048
2023-11-10 09:30:52,539 epoch 15 - iter 4/5 - loss 0.09845312 - time (sec): 2.24 - samples/sec: 8067.57 - lr: 0.000048
2023-11-10 09:30:52,720 epoch 15 - iter 5/5 - loss 0.09969533 - time (sec): 2.42 - samples/sec: 8179.09 - lr: 0.000048
2023-11-10 09:30:52,722 ----------------------------------------------------------------------------------------------------
2023-11-10 09:30:52,726 EPOCH 15 done: loss 0.0997 - lr 0.000048


100%|██████████| 2/2 [00:05<00:00,  2.70s/it]

2023-11-10 09:30:58,153 Evaluating as a multi-label problem: False
2023-11-10 09:30:58,191 DEV : loss 0.16494159400463104 - f1-score (micro avg)  0.7839
2023-11-10 09:30:58,214 ----------------------------------------------------------------------------------------------------





2023-11-10 09:30:58,725 epoch 16 - iter 1/5 - loss 0.06565936 - time (sec): 0.51 - samples/sec: 8982.64 - lr: 0.000048
2023-11-10 09:30:59,219 epoch 16 - iter 2/5 - loss 0.06646817 - time (sec): 1.00 - samples/sec: 9006.35 - lr: 0.000047
2023-11-10 09:30:59,846 epoch 16 - iter 3/5 - loss 0.06293455 - time (sec): 1.63 - samples/sec: 8448.82 - lr: 0.000047
2023-11-10 09:31:00,373 epoch 16 - iter 4/5 - loss 0.07150182 - time (sec): 2.16 - samples/sec: 8341.66 - lr: 0.000047
2023-11-10 09:31:00,610 epoch 16 - iter 5/5 - loss 0.07277928 - time (sec): 2.39 - samples/sec: 8264.79 - lr: 0.000047
2023-11-10 09:31:00,617 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:00,619 EPOCH 16 done: loss 0.0728 - lr 0.000047


100%|██████████| 2/2 [00:04<00:00,  2.21s/it]

2023-11-10 09:31:05,056 Evaluating as a multi-label problem: False
2023-11-10 09:31:05,078 DEV : loss 0.16854926943778992 - f1-score (micro avg)  0.7678
2023-11-10 09:31:05,092 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:05,577 epoch 17 - iter 1/5 - loss 0.07009546 - time (sec): 0.48 - samples/sec: 10070.06 - lr: 0.000047
2023-11-10 09:31:06,039 epoch 17 - iter 2/5 - loss 0.06830048 - time (sec): 0.94 - samples/sec: 9710.30 - lr: 0.000047
2023-11-10 09:31:06,565 epoch 17 - iter 3/5 - loss 0.06289470 - time (sec): 1.47 - samples/sec: 9510.97 - lr: 0.000047
2023-11-10 09:31:06,975 epoch 17 - iter 4/5 - loss 0.06154050 - time (sec): 1.88 - samples/sec: 9694.48 - lr: 0.000047
2023-11-10 09:31:07,156 epoch 17 - iter 5/5 - loss 0.06289373 - time (sec): 2.06 - samples/sec: 9609.93 - lr: 0.000047
2023-11-10 09:31:07,162 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:07,168 EPOCH 17 done: loss 0.0629 - lr 0.000047


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:31:10,283 Evaluating as a multi-label problem: False
2023-11-10 09:31:10,313 DEV : loss 0.16509544849395752 - f1-score (micro avg)  0.7851
2023-11-10 09:31:10,326 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:10,735 epoch 18 - iter 1/5 - loss 0.06563027 - time (sec): 0.41 - samples/sec: 10001.68 - lr: 0.000047
2023-11-10 09:31:11,172 epoch 18 - iter 2/5 - loss 0.06538905 - time (sec): 0.84 - samples/sec: 10103.32 - lr: 0.000046
2023-11-10 09:31:11,667 epoch 18 - iter 3/5 - loss 0.05920381 - time (sec): 1.34 - samples/sec: 9846.64 - lr: 0.000046
2023-11-10 09:31:12,154 epoch 18 - iter 4/5 - loss 0.05851403 - time (sec): 1.83 - samples/sec: 9706.27 - lr: 0.000046
2023-11-10 09:31:12,440 epoch 18 - iter 5/5 - loss 0.05521703 - time (sec): 2.11 - samples/sec: 9362.25 - lr: 0.000046
2023-11-10 09:31:12,444 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:12,446 EPOCH 18 done: loss 0.0552 - lr 0.000046


100%|██████████| 2/2 [00:05<00:00,  2.71s/it]

2023-11-10 09:31:17,879 Evaluating as a multi-label problem: False
2023-11-10 09:31:17,911 DEV : loss 0.19798114895820618 - f1-score (micro avg)  0.7657
2023-11-10 09:31:17,933 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:18,489 epoch 19 - iter 1/5 - loss 0.05853815 - time (sec): 0.55 - samples/sec: 8211.37 - lr: 0.000046
2023-11-10 09:31:19,069 epoch 19 - iter 2/5 - loss 0.06131520 - time (sec): 1.13 - samples/sec: 7954.27 - lr: 0.000046
2023-11-10 09:31:19,618 epoch 19 - iter 3/5 - loss 0.05684888 - time (sec): 1.68 - samples/sec: 7828.90 - lr: 0.000046
2023-11-10 09:31:20,270 epoch 19 - iter 4/5 - loss 0.04940014 - time (sec): 2.33 - samples/sec: 7633.85 - lr: 0.000046
2023-11-10 09:31:20,508 epoch 19 - iter 5/5 - loss 0.04748856 - time (sec): 2.57 - samples/sec: 7689.37 - lr: 0.000046
2023-11-10 09:31:20,513 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:20,519 EPOCH 19 done: loss 0.0475 - lr 0.000046


100%|██████████| 2/2 [00:03<00:00,  1.58s/it]

2023-11-10 09:31:23,693 Evaluating as a multi-label problem: False
2023-11-10 09:31:23,716 DEV : loss 0.1794450283050537 - f1-score (micro avg)  0.7617
2023-11-10 09:31:23,729 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:24,176 epoch 20 - iter 1/5 - loss 0.04965167 - time (sec): 0.45 - samples/sec: 10219.66 - lr: 0.000046
2023-11-10 09:31:24,595 epoch 20 - iter 2/5 - loss 0.05043878 - time (sec): 0.86 - samples/sec: 10442.33 - lr: 0.000045
2023-11-10 09:31:25,121 epoch 20 - iter 3/5 - loss 0.04804766 - time (sec): 1.39 - samples/sec: 9943.40 - lr: 0.000045
2023-11-10 09:31:25,507 epoch 20 - iter 4/5 - loss 0.04851152 - time (sec): 1.78 - samples/sec: 10089.67 - lr: 0.000045
2023-11-10 09:31:25,677 epoch 20 - iter 5/5 - loss 0.04514155 - time (sec): 1.95 - samples/sec: 10157.70 - lr: 0.000045
2023-11-10 09:31:25,679 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:25,683 EPOCH 20 done: loss 0.0451 - lr 0.000045


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:31:28,377 Evaluating as a multi-label problem: False
2023-11-10 09:31:28,398 DEV : loss 0.18923097848892212 - f1-score (micro avg)  0.7946
2023-11-10 09:31:28,412 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:28,891 epoch 21 - iter 1/5 - loss 0.03598950 - time (sec): 0.47 - samples/sec: 10166.70 - lr: 0.000045
2023-11-10 09:31:29,405 epoch 21 - iter 2/5 - loss 0.03378960 - time (sec): 0.99 - samples/sec: 9733.82 - lr: 0.000045
2023-11-10 09:31:29,804 epoch 21 - iter 3/5 - loss 0.03164168 - time (sec): 1.39 - samples/sec: 9663.34 - lr: 0.000045
2023-11-10 09:31:30,237 epoch 21 - iter 4/5 - loss 0.03238162 - time (sec): 1.82 - samples/sec: 10057.99 - lr: 0.000045
2023-11-10 09:31:30,398 epoch 21 - iter 5/5 - loss 0.03286952 - time (sec): 1.98 - samples/sec: 9976.99 - lr: 0.000045
2023-11-10 09:31:30,401 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:30,404 EPOCH 21 done: loss 0.0329 - lr 0.000045


100%|██████████| 2/2 [00:05<00:00,  2.55s/it]

2023-11-10 09:31:35,522 Evaluating as a multi-label problem: False
2023-11-10 09:31:35,553 DEV : loss 0.1761743575334549 - f1-score (micro avg)  0.7862
2023-11-10 09:31:35,572 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:36,667 epoch 22 - iter 1/5 - loss 0.02189431 - time (sec): 1.09 - samples/sec: 4018.20 - lr: 0.000044
2023-11-10 09:31:37,386 epoch 22 - iter 2/5 - loss 0.02720702 - time (sec): 1.81 - samples/sec: 5117.19 - lr: 0.000044
2023-11-10 09:31:37,963 epoch 22 - iter 3/5 - loss 0.02608764 - time (sec): 2.39 - samples/sec: 5885.18 - lr: 0.000044
2023-11-10 09:31:38,531 epoch 22 - iter 4/5 - loss 0.02457161 - time (sec): 2.96 - samples/sec: 6136.29 - lr: 0.000044
2023-11-10 09:31:38,757 epoch 22 - iter 5/5 - loss 0.02510101 - time (sec): 3.18 - samples/sec: 6215.51 - lr: 0.000044
2023-11-10 09:31:38,761 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:38,763 EPOCH 22 done: loss 0.0251 - lr 0.000044


100%|██████████| 2/2 [00:03<00:00,  1.56s/it]

2023-11-10 09:31:41,898 Evaluating as a multi-label problem: False
2023-11-10 09:31:41,921 DEV : loss 0.17062877118587494 - f1-score (micro avg)  0.8137
2023-11-10 09:31:41,935 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:42,379 epoch 23 - iter 1/5 - loss 0.02653190 - time (sec): 0.44 - samples/sec: 9972.81 - lr: 0.000044
2023-11-10 09:31:42,782 epoch 23 - iter 2/5 - loss 0.02160398 - time (sec): 0.85 - samples/sec: 10243.39 - lr: 0.000044
2023-11-10 09:31:43,280 epoch 23 - iter 3/5 - loss 0.02098772 - time (sec): 1.34 - samples/sec: 10354.08 - lr: 0.000044
2023-11-10 09:31:43,679 epoch 23 - iter 4/5 - loss 0.02119215 - time (sec): 1.74 - samples/sec: 10308.13 - lr: 0.000044
2023-11-10 09:31:43,859 epoch 23 - iter 5/5 - loss 0.02110080 - time (sec): 1.92 - samples/sec: 10287.27 - lr: 0.000044
2023-11-10 09:31:43,862 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:43,865 EPOCH 23 done: loss 0.0211 - lr 0.000044


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:31:46,568 Evaluating as a multi-label problem: False
2023-11-10 09:31:46,599 DEV : loss 0.16933304071426392 - f1-score (micro avg)  0.81
2023-11-10 09:31:46,613 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:47,047 epoch 24 - iter 1/5 - loss 0.01341723 - time (sec): 0.43 - samples/sec: 10602.30 - lr: 0.000043
2023-11-10 09:31:47,472 epoch 24 - iter 2/5 - loss 0.01295252 - time (sec): 0.86 - samples/sec: 10326.33 - lr: 0.000043
2023-11-10 09:31:47,920 epoch 24 - iter 3/5 - loss 0.01508743 - time (sec): 1.30 - samples/sec: 10585.97 - lr: 0.000043
2023-11-10 09:31:48,376 epoch 24 - iter 4/5 - loss 0.01484934 - time (sec): 1.76 - samples/sec: 10485.28 - lr: 0.000043
2023-11-10 09:31:48,962 epoch 24 - iter 5/5 - loss 0.01459357 - time (sec): 2.35 - samples/sec: 8423.71 - lr: 0.000043
2023-11-10 09:31:48,965 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:48,971 EPOCH 24 done: loss 0.0146 - lr 0.000043


100%|██████████| 2/2 [00:04<00:00,  2.35s/it]

2023-11-10 09:31:53,683 Evaluating as a multi-label problem: False
2023-11-10 09:31:53,717 DEV : loss 0.17919783294200897 - f1-score (micro avg)  0.8216
2023-11-10 09:31:53,737 ----------------------------------------------------------------------------------------------------





2023-11-10 09:31:54,294 epoch 25 - iter 1/5 - loss 0.01208278 - time (sec): 0.55 - samples/sec: 7535.82 - lr: 0.000043
2023-11-10 09:31:54,827 epoch 25 - iter 2/5 - loss 0.01393520 - time (sec): 1.09 - samples/sec: 8029.82 - lr: 0.000043
2023-11-10 09:31:55,393 epoch 25 - iter 3/5 - loss 0.01454426 - time (sec): 1.65 - samples/sec: 8126.76 - lr: 0.000043
2023-11-10 09:31:55,915 epoch 25 - iter 4/5 - loss 0.01392218 - time (sec): 2.18 - samples/sec: 8065.56 - lr: 0.000043
2023-11-10 09:31:56,190 epoch 25 - iter 5/5 - loss 0.01276899 - time (sec): 2.45 - samples/sec: 8071.81 - lr: 0.000042
2023-11-10 09:31:56,195 ----------------------------------------------------------------------------------------------------
2023-11-10 09:31:56,202 EPOCH 25 done: loss 0.0128 - lr 0.000042


100%|██████████| 2/2 [00:04<00:00,  2.01s/it]

2023-11-10 09:32:00,232 Evaluating as a multi-label problem: False
2023-11-10 09:32:00,264 DEV : loss 0.19227828085422516 - f1-score (micro avg)  0.8148
2023-11-10 09:32:00,277 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:00,672 epoch 26 - iter 1/5 - loss 0.00511072 - time (sec): 0.39 - samples/sec: 10171.95 - lr: 0.000042
2023-11-10 09:32:01,116 epoch 26 - iter 2/5 - loss 0.00782370 - time (sec): 0.84 - samples/sec: 10265.32 - lr: 0.000042
2023-11-10 09:32:01,529 epoch 26 - iter 3/5 - loss 0.00749840 - time (sec): 1.25 - samples/sec: 10033.82 - lr: 0.000042
2023-11-10 09:32:02,050 epoch 26 - iter 4/5 - loss 0.00830761 - time (sec): 1.77 - samples/sec: 9872.17 - lr: 0.000042
2023-11-10 09:32:02,291 epoch 26 - iter 5/5 - loss 0.00985660 - time (sec): 2.01 - samples/sec: 9826.47 - lr: 0.000042
2023-11-10 09:32:02,293 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:02,297 EPOCH 26 done: loss 0.0099 - lr 0.000042


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:32:05,418 Evaluating as a multi-label problem: False
2023-11-10 09:32:05,442 DEV : loss 0.23386844992637634 - f1-score (micro avg)  0.8065
2023-11-10 09:32:05,456 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:05,891 epoch 27 - iter 1/5 - loss 0.01505703 - time (sec): 0.43 - samples/sec: 10029.47 - lr: 0.000042
2023-11-10 09:32:06,411 epoch 27 - iter 2/5 - loss 0.01138568 - time (sec): 0.95 - samples/sec: 9707.84 - lr: 0.000042
2023-11-10 09:32:06,857 epoch 27 - iter 3/5 - loss 0.01003531 - time (sec): 1.40 - samples/sec: 10025.89 - lr: 0.000042
2023-11-10 09:32:07,266 epoch 27 - iter 4/5 - loss 0.00938489 - time (sec): 1.81 - samples/sec: 10040.14 - lr: 0.000042
2023-11-10 09:32:07,481 epoch 27 - iter 5/5 - loss 0.00934541 - time (sec): 2.02 - samples/sec: 9770.70 - lr: 0.000041
2023-11-10 09:32:07,484 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:07,490 EPOCH 27 done: loss 0.0093 - lr 0.000041


100%|██████████| 2/2 [00:03<00:00,  2.00s/it]

2023-11-10 09:32:11,497 Evaluating as a multi-label problem: False
2023-11-10 09:32:11,527 DEV : loss 0.21146902441978455 - f1-score (micro avg)  0.8258
2023-11-10 09:32:11,545 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:12,075 epoch 28 - iter 1/5 - loss 0.00911944 - time (sec): 0.53 - samples/sec: 7204.75 - lr: 0.000041
2023-11-10 09:32:12,679 epoch 28 - iter 2/5 - loss 0.00847081 - time (sec): 1.13 - samples/sec: 7638.19 - lr: 0.000041
2023-11-10 09:32:13,395 epoch 28 - iter 3/5 - loss 0.00826603 - time (sec): 1.85 - samples/sec: 7853.84 - lr: 0.000041
2023-11-10 09:32:13,879 epoch 28 - iter 4/5 - loss 0.00848259 - time (sec): 2.33 - samples/sec: 7879.32 - lr: 0.000041
2023-11-10 09:32:14,036 epoch 28 - iter 5/5 - loss 0.00819678 - time (sec): 2.49 - samples/sec: 7944.60 - lr: 0.000041
2023-11-10 09:32:14,044 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:14,046 EPOCH 28 done: loss 0.0082 - lr 0.000041


100%|██████████| 2/2 [00:04<00:00,  2.44s/it]

2023-11-10 09:32:18,937 Evaluating as a multi-label problem: False
2023-11-10 09:32:18,959 DEV : loss 0.23437540233135223 - f1-score (micro avg)  0.8043
2023-11-10 09:32:18,972 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:19,431 epoch 29 - iter 1/5 - loss 0.00865371 - time (sec): 0.46 - samples/sec: 9786.35 - lr: 0.000041
2023-11-10 09:32:19,903 epoch 29 - iter 2/5 - loss 0.00982231 - time (sec): 0.93 - samples/sec: 10453.74 - lr: 0.000041
2023-11-10 09:32:20,423 epoch 29 - iter 3/5 - loss 0.00803049 - time (sec): 1.45 - samples/sec: 10255.38 - lr: 0.000041
2023-11-10 09:32:20,747 epoch 29 - iter 4/5 - loss 0.00768793 - time (sec): 1.77 - samples/sec: 10414.15 - lr: 0.000040
2023-11-10 09:32:20,892 epoch 29 - iter 5/5 - loss 0.00741904 - time (sec): 1.92 - samples/sec: 10311.92 - lr: 0.000040
2023-11-10 09:32:20,895 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:20,898 EPOCH 29 done: loss 0.0074 - lr 0.000040


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:32:23,602 Evaluating as a multi-label problem: False
2023-11-10 09:32:23,626 DEV : loss 0.22507324814796448 - f1-score (micro avg)  0.8182
2023-11-10 09:32:23,640 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:24,118 epoch 30 - iter 1/5 - loss 0.01265763 - time (sec): 0.48 - samples/sec: 10925.52 - lr: 0.000040
2023-11-10 09:32:24,578 epoch 30 - iter 2/5 - loss 0.01103437 - time (sec): 0.94 - samples/sec: 10981.70 - lr: 0.000040
2023-11-10 09:32:25,004 epoch 30 - iter 3/5 - loss 0.01094468 - time (sec): 1.36 - samples/sec: 10241.19 - lr: 0.000040
2023-11-10 09:32:25,415 epoch 30 - iter 4/5 - loss 0.01028323 - time (sec): 1.77 - samples/sec: 10086.58 - lr: 0.000040
2023-11-10 09:32:25,590 epoch 30 - iter 5/5 - loss 0.00961093 - time (sec): 1.95 - samples/sec: 10143.04 - lr: 0.000040
2023-11-10 09:32:25,593 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:25,597 EPOCH 30 done: loss 0.0096 - lr 0.000040


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:32:28,713 Evaluating as a multi-label problem: False
2023-11-10 09:32:28,748 DEV : loss 0.23248973488807678 - f1-score (micro avg)  0.8161
2023-11-10 09:32:28,770 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:29,920 epoch 31 - iter 1/5 - loss 0.00424487 - time (sec): 1.15 - samples/sec: 4115.43 - lr: 0.000040
2023-11-10 09:32:30,444 epoch 31 - iter 2/5 - loss 0.00442629 - time (sec): 1.67 - samples/sec: 5719.98 - lr: 0.000040
2023-11-10 09:32:30,934 epoch 31 - iter 3/5 - loss 0.00454788 - time (sec): 2.16 - samples/sec: 6120.01 - lr: 0.000040
2023-11-10 09:32:31,495 epoch 31 - iter 4/5 - loss 0.00578298 - time (sec): 2.72 - samples/sec: 6569.08 - lr: 0.000039
2023-11-10 09:32:31,738 epoch 31 - iter 5/5 - loss 0.00602638 - time (sec): 2.96 - samples/sec: 6673.02 - lr: 0.000039
2023-11-10 09:32:31,746 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:31,748 EPOCH 31 done: loss 0.0060 - lr 0.000039


100%|██████████| 2/2 [00:04<00:00,  2.46s/it]

2023-11-10 09:32:36,680 Evaluating as a multi-label problem: False
2023-11-10 09:32:36,703 DEV : loss 0.23838719725608826 - f1-score (micro avg)  0.8152
2023-11-10 09:32:36,717 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:37,264 epoch 32 - iter 1/5 - loss 0.00642471 - time (sec): 0.55 - samples/sec: 9465.55 - lr: 0.000039
2023-11-10 09:32:37,725 epoch 32 - iter 2/5 - loss 0.00538829 - time (sec): 1.01 - samples/sec: 10008.43 - lr: 0.000039
2023-11-10 09:32:38,129 epoch 32 - iter 3/5 - loss 0.00708600 - time (sec): 1.41 - samples/sec: 10089.58 - lr: 0.000039
2023-11-10 09:32:38,550 epoch 32 - iter 4/5 - loss 0.00845797 - time (sec): 1.83 - samples/sec: 9972.23 - lr: 0.000039
2023-11-10 09:32:38,687 epoch 32 - iter 5/5 - loss 0.00878170 - time (sec): 1.97 - samples/sec: 10046.91 - lr: 0.000039
2023-11-10 09:32:38,689 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:38,693 EPOCH 32 done: loss 0.0088 - lr 0.000039


100%|██████████| 2/2 [00:02<00:00,  1.35s/it]

2023-11-10 09:32:41,407 Evaluating as a multi-label problem: False
2023-11-10 09:32:41,429 DEV : loss 0.24439847469329834 - f1-score (micro avg)  0.8195
2023-11-10 09:32:41,443 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:41,942 epoch 33 - iter 1/5 - loss 0.00507274 - time (sec): 0.50 - samples/sec: 9071.10 - lr: 0.000039
2023-11-10 09:32:42,408 epoch 33 - iter 2/5 - loss 0.00530737 - time (sec): 0.96 - samples/sec: 9617.82 - lr: 0.000039
2023-11-10 09:32:42,858 epoch 33 - iter 3/5 - loss 0.00685010 - time (sec): 1.41 - samples/sec: 9578.31 - lr: 0.000038
2023-11-10 09:32:43,315 epoch 33 - iter 4/5 - loss 0.00721727 - time (sec): 1.87 - samples/sec: 9903.97 - lr: 0.000038
2023-11-10 09:32:43,433 epoch 33 - iter 5/5 - loss 0.00716175 - time (sec): 1.99 - samples/sec: 9943.81 - lr: 0.000038
2023-11-10 09:32:43,435 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:43,438 EPOCH 33 done: loss 0.0072 - lr 0.000038


100%|██████████| 2/2 [00:03<00:00,  1.58s/it]

2023-11-10 09:32:46,616 Evaluating as a multi-label problem: False
2023-11-10 09:32:46,650 DEV : loss 0.23816995322704315 - f1-score (micro avg)  0.8121
2023-11-10 09:32:46,675 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:47,225 epoch 34 - iter 1/5 - loss 0.00528391 - time (sec): 0.55 - samples/sec: 8001.72 - lr: 0.000038
2023-11-10 09:32:47,732 epoch 34 - iter 2/5 - loss 0.00404002 - time (sec): 1.06 - samples/sec: 7844.47 - lr: 0.000038
2023-11-10 09:32:48,243 epoch 34 - iter 3/5 - loss 0.00626724 - time (sec): 1.57 - samples/sec: 8200.71 - lr: 0.000038
2023-11-10 09:32:48,863 epoch 34 - iter 4/5 - loss 0.00544609 - time (sec): 2.19 - samples/sec: 8129.48 - lr: 0.000038
2023-11-10 09:32:49,094 epoch 34 - iter 5/5 - loss 0.00543448 - time (sec): 2.42 - samples/sec: 8181.72 - lr: 0.000038
2023-11-10 09:32:49,100 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:49,102 EPOCH 34 done: loss 0.0054 - lr 0.000038


100%|██████████| 2/2 [00:05<00:00,  2.67s/it]

2023-11-10 09:32:54,451 Evaluating as a multi-label problem: False
2023-11-10 09:32:54,505 DEV : loss 0.23307180404663086 - f1-score (micro avg)  0.8207
2023-11-10 09:32:54,531 ----------------------------------------------------------------------------------------------------





2023-11-10 09:32:55,079 epoch 35 - iter 1/5 - loss 0.00407390 - time (sec): 0.54 - samples/sec: 7939.33 - lr: 0.000038
2023-11-10 09:32:55,633 epoch 35 - iter 2/5 - loss 0.00454105 - time (sec): 1.10 - samples/sec: 8197.82 - lr: 0.000038
2023-11-10 09:32:56,121 epoch 35 - iter 3/5 - loss 0.00523149 - time (sec): 1.58 - samples/sec: 8766.09 - lr: 0.000037
2023-11-10 09:32:56,538 epoch 35 - iter 4/5 - loss 0.00550685 - time (sec): 2.00 - samples/sec: 8982.63 - lr: 0.000037
2023-11-10 09:32:56,713 epoch 35 - iter 5/5 - loss 0.00520472 - time (sec): 2.17 - samples/sec: 9092.50 - lr: 0.000037
2023-11-10 09:32:56,715 ----------------------------------------------------------------------------------------------------
2023-11-10 09:32:56,721 EPOCH 35 done: loss 0.0052 - lr 0.000037


100%|██████████| 2/2 [00:03<00:00,  1.59s/it]

2023-11-10 09:32:59,908 Evaluating as a multi-label problem: False
2023-11-10 09:32:59,932 DEV : loss 0.23208202421665192 - f1-score (micro avg)  0.823
2023-11-10 09:32:59,946 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:00,422 epoch 36 - iter 1/5 - loss 0.00685807 - time (sec): 0.47 - samples/sec: 10887.91 - lr: 0.000037
2023-11-10 09:33:00,848 epoch 36 - iter 2/5 - loss 0.00624759 - time (sec): 0.90 - samples/sec: 10463.48 - lr: 0.000037
2023-11-10 09:33:01,277 epoch 36 - iter 3/5 - loss 0.00497949 - time (sec): 1.33 - samples/sec: 10329.11 - lr: 0.000037
2023-11-10 09:33:01,753 epoch 36 - iter 4/5 - loss 0.00480220 - time (sec): 1.81 - samples/sec: 9972.35 - lr: 0.000037
2023-11-10 09:33:01,929 epoch 36 - iter 5/5 - loss 0.00461280 - time (sec): 1.98 - samples/sec: 9981.98 - lr: 0.000037
2023-11-10 09:33:01,931 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:01,934 EPOCH 36 done: loss 0.0046 - lr 0.000037


100%|██████████| 2/2 [00:02<00:00,  1.37s/it]

2023-11-10 09:33:04,688 Evaluating as a multi-label problem: False
2023-11-10 09:33:04,713 DEV : loss 0.23133261501789093 - f1-score (micro avg)  0.813
2023-11-10 09:33:04,728 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:05,126 epoch 37 - iter 1/5 - loss 0.00224630 - time (sec): 0.40 - samples/sec: 10303.38 - lr: 0.000037
2023-11-10 09:33:05,542 epoch 37 - iter 2/5 - loss 0.00245767 - time (sec): 0.81 - samples/sec: 10407.42 - lr: 0.000037
2023-11-10 09:33:05,935 epoch 37 - iter 3/5 - loss 0.00580553 - time (sec): 1.21 - samples/sec: 10436.80 - lr: 0.000036
2023-11-10 09:33:06,609 epoch 37 - iter 4/5 - loss 0.00631394 - time (sec): 1.88 - samples/sec: 9270.07 - lr: 0.000036
2023-11-10 09:33:06,889 epoch 37 - iter 5/5 - loss 0.00596328 - time (sec): 2.16 - samples/sec: 9157.84 - lr: 0.000036
2023-11-10 09:33:06,892 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:06,894 EPOCH 37 done: loss 0.0060 - lr 0.000036


100%|██████████| 2/2 [00:05<00:00,  2.62s/it]

2023-11-10 09:33:12,154 Evaluating as a multi-label problem: False
2023-11-10 09:33:12,188 DEV : loss 0.23008503019809723 - f1-score (micro avg)  0.8407
2023-11-10 09:33:12,210 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:13,471 epoch 38 - iter 1/5 - loss 0.00709937 - time (sec): 1.26 - samples/sec: 4127.27 - lr: 0.000036
2023-11-10 09:33:14,033 epoch 38 - iter 2/5 - loss 0.00604714 - time (sec): 1.82 - samples/sec: 5136.75 - lr: 0.000036
2023-11-10 09:33:14,565 epoch 38 - iter 3/5 - loss 0.00524839 - time (sec): 2.35 - samples/sec: 6042.75 - lr: 0.000036
2023-11-10 09:33:14,999 epoch 38 - iter 4/5 - loss 0.00453911 - time (sec): 2.78 - samples/sec: 6689.14 - lr: 0.000036
2023-11-10 09:33:15,115 epoch 38 - iter 5/5 - loss 0.00438959 - time (sec): 2.90 - samples/sec: 6820.66 - lr: 0.000036
2023-11-10 09:33:15,118 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:15,120 EPOCH 38 done: loss 0.0044 - lr 0.000036


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:33:17,815 Evaluating as a multi-label problem: False
2023-11-10 09:33:17,841 DEV : loss 0.2320011556148529 - f1-score (micro avg)  0.8309
2023-11-10 09:33:17,855 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:18,350 epoch 39 - iter 1/5 - loss 0.00651068 - time (sec): 0.49 - samples/sec: 10218.73 - lr: 0.000036
2023-11-10 09:33:18,773 epoch 39 - iter 2/5 - loss 0.00586693 - time (sec): 0.92 - samples/sec: 10347.02 - lr: 0.000035
2023-11-10 09:33:19,183 epoch 39 - iter 3/5 - loss 0.00523554 - time (sec): 1.33 - samples/sec: 10272.42 - lr: 0.000035
2023-11-10 09:33:19,610 epoch 39 - iter 4/5 - loss 0.00451227 - time (sec): 1.75 - samples/sec: 10277.13 - lr: 0.000035
2023-11-10 09:33:19,793 epoch 39 - iter 5/5 - loss 0.00446461 - time (sec): 1.94 - samples/sec: 10214.92 - lr: 0.000035
2023-11-10 09:33:19,795 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:19,800 EPOCH 39 done: loss 0.0045 - lr 0.000035


100%|██████████| 2/2 [00:02<00:00,  1.37s/it]

2023-11-10 09:33:22,546 Evaluating as a multi-label problem: False
2023-11-10 09:33:22,570 DEV : loss 0.2340705245733261 - f1-score (micro avg)  0.8301
2023-11-10 09:33:22,584 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:23,117 epoch 40 - iter 1/5 - loss 0.00308892 - time (sec): 0.53 - samples/sec: 9278.68 - lr: 0.000035
2023-11-10 09:33:23,558 epoch 40 - iter 2/5 - loss 0.00289002 - time (sec): 0.97 - samples/sec: 10122.10 - lr: 0.000035
2023-11-10 09:33:23,995 epoch 40 - iter 3/5 - loss 0.00433848 - time (sec): 1.41 - samples/sec: 10196.09 - lr: 0.000035
2023-11-10 09:33:24,398 epoch 40 - iter 4/5 - loss 0.00482749 - time (sec): 1.81 - samples/sec: 10200.22 - lr: 0.000035
2023-11-10 09:33:24,601 epoch 40 - iter 5/5 - loss 0.00505073 - time (sec): 2.02 - samples/sec: 9810.33 - lr: 0.000035
2023-11-10 09:33:24,608 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:24,611 EPOCH 40 done: loss 0.0051 - lr 0.000035


100%|██████████| 2/2 [00:05<00:00,  2.97s/it]

2023-11-10 09:33:30,560 Evaluating as a multi-label problem: False
2023-11-10 09:33:30,597 DEV : loss 0.2646377384662628 - f1-score (micro avg)  0.8054
2023-11-10 09:33:30,616 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:31,215 epoch 41 - iter 1/5 - loss 0.00371956 - time (sec): 0.60 - samples/sec: 7801.85 - lr: 0.000035
2023-11-10 09:33:31,747 epoch 41 - iter 2/5 - loss 0.00343853 - time (sec): 1.13 - samples/sec: 8022.21 - lr: 0.000034
2023-11-10 09:33:32,339 epoch 41 - iter 3/5 - loss 0.00402719 - time (sec): 1.72 - samples/sec: 7743.36 - lr: 0.000034
2023-11-10 09:33:32,920 epoch 41 - iter 4/5 - loss 0.00397383 - time (sec): 2.30 - samples/sec: 7946.53 - lr: 0.000034
2023-11-10 09:33:33,131 epoch 41 - iter 5/5 - loss 0.00375960 - time (sec): 2.51 - samples/sec: 7870.53 - lr: 0.000034
2023-11-10 09:33:33,133 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:33,137 EPOCH 41 done: loss 0.0038 - lr 0.000034


100%|██████████| 2/2 [00:02<00:00,  1.36s/it]

2023-11-10 09:33:35,875 Evaluating as a multi-label problem: False
2023-11-10 09:33:35,897 DEV : loss 0.21295569837093353 - f1-score (micro avg)  0.8214
2023-11-10 09:33:35,911 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:36,431 epoch 42 - iter 1/5 - loss 0.00277661 - time (sec): 0.52 - samples/sec: 9498.61 - lr: 0.000034
2023-11-10 09:33:36,842 epoch 42 - iter 2/5 - loss 0.00384094 - time (sec): 0.93 - samples/sec: 10365.23 - lr: 0.000034
2023-11-10 09:33:37,304 epoch 42 - iter 3/5 - loss 0.00361287 - time (sec): 1.39 - samples/sec: 10336.51 - lr: 0.000034
2023-11-10 09:33:37,729 epoch 42 - iter 4/5 - loss 0.00384824 - time (sec): 1.82 - samples/sec: 10345.17 - lr: 0.000034
2023-11-10 09:33:37,835 epoch 42 - iter 5/5 - loss 0.00370933 - time (sec): 1.92 - samples/sec: 10292.25 - lr: 0.000034
2023-11-10 09:33:37,837 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:37,841 EPOCH 42 done: loss 0.0037 - lr 0.000034


100%|██████████| 2/2 [00:03<00:00,  1.54s/it]

2023-11-10 09:33:40,941 Evaluating as a multi-label problem: False
2023-11-10 09:33:40,964 DEV : loss 0.2513829171657562 - f1-score (micro avg)  0.8247
2023-11-10 09:33:40,977 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:41,429 epoch 43 - iter 1/5 - loss 0.00430645 - time (sec): 0.45 - samples/sec: 10030.72 - lr: 0.000033
2023-11-10 09:33:41,948 epoch 43 - iter 2/5 - loss 0.00492438 - time (sec): 0.97 - samples/sec: 10027.30 - lr: 0.000033
2023-11-10 09:33:42,348 epoch 43 - iter 3/5 - loss 0.00459693 - time (sec): 1.37 - samples/sec: 10265.30 - lr: 0.000033
2023-11-10 09:33:42,762 epoch 43 - iter 4/5 - loss 0.00389689 - time (sec): 1.78 - samples/sec: 10445.19 - lr: 0.000033
2023-11-10 09:33:42,889 epoch 43 - iter 5/5 - loss 0.00372061 - time (sec): 1.91 - samples/sec: 10350.05 - lr: 0.000033
2023-11-10 09:33:42,892 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:42,896 EPOCH 43 done: loss 0.0037 - lr 0.000033


100%|██████████| 2/2 [00:05<00:00,  2.67s/it]

2023-11-10 09:33:48,246 Evaluating as a multi-label problem: False
2023-11-10 09:33:48,286 DEV : loss 0.2527458369731903 - f1-score (micro avg)  0.8319
2023-11-10 09:33:48,309 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:48,888 epoch 44 - iter 1/5 - loss 0.00257890 - time (sec): 0.58 - samples/sec: 7722.83 - lr: 0.000033
2023-11-10 09:33:49,569 epoch 44 - iter 2/5 - loss 0.00180200 - time (sec): 1.26 - samples/sec: 7846.48 - lr: 0.000033
2023-11-10 09:33:50,035 epoch 44 - iter 3/5 - loss 0.00216581 - time (sec): 1.72 - samples/sec: 8065.86 - lr: 0.000033
2023-11-10 09:33:50,558 epoch 44 - iter 4/5 - loss 0.00232605 - time (sec): 2.25 - samples/sec: 8104.92 - lr: 0.000033
2023-11-10 09:33:50,747 epoch 44 - iter 5/5 - loss 0.00218563 - time (sec): 2.43 - samples/sec: 8120.90 - lr: 0.000033
2023-11-10 09:33:50,753 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:50,757 EPOCH 44 done: loss 0.0022 - lr 0.000033


100%|██████████| 2/2 [00:03<00:00,  1.60s/it]

2023-11-10 09:33:53,970 Evaluating as a multi-label problem: False
2023-11-10 09:33:53,992 DEV : loss 0.24926047027111053 - f1-score (micro avg)  0.8269
2023-11-10 09:33:54,008 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:54,408 epoch 45 - iter 1/5 - loss 0.00273279 - time (sec): 0.40 - samples/sec: 11060.42 - lr: 0.000032
2023-11-10 09:33:55,240 epoch 45 - iter 2/5 - loss 0.00302601 - time (sec): 1.23 - samples/sec: 7171.32 - lr: 0.000032
2023-11-10 09:33:55,696 epoch 45 - iter 3/5 - loss 0.00224620 - time (sec): 1.69 - samples/sec: 7803.34 - lr: 0.000032
2023-11-10 09:33:56,126 epoch 45 - iter 4/5 - loss 0.00216787 - time (sec): 2.12 - samples/sec: 8366.00 - lr: 0.000032
2023-11-10 09:33:56,342 epoch 45 - iter 5/5 - loss 0.00245447 - time (sec): 2.33 - samples/sec: 8478.56 - lr: 0.000032
2023-11-10 09:33:56,344 ----------------------------------------------------------------------------------------------------
2023-11-10 09:33:56,347 EPOCH 45 done: loss 0.0025 - lr 0.000032


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:33:59,034 Evaluating as a multi-label problem: False
2023-11-10 09:33:59,057 DEV : loss 0.2574838101863861 - f1-score (micro avg)  0.8163
2023-11-10 09:33:59,071 ----------------------------------------------------------------------------------------------------





2023-11-10 09:33:59,596 epoch 46 - iter 1/5 - loss 0.00070466 - time (sec): 0.52 - samples/sec: 8726.95 - lr: 0.000032
2023-11-10 09:34:00,056 epoch 46 - iter 2/5 - loss 0.00341748 - time (sec): 0.98 - samples/sec: 9742.25 - lr: 0.000032
2023-11-10 09:34:00,532 epoch 46 - iter 3/5 - loss 0.00303853 - time (sec): 1.46 - samples/sec: 9841.69 - lr: 0.000032
2023-11-10 09:34:00,925 epoch 46 - iter 4/5 - loss 0.00282971 - time (sec): 1.85 - samples/sec: 9829.82 - lr: 0.000032
2023-11-10 09:34:01,097 epoch 46 - iter 5/5 - loss 0.00267431 - time (sec): 2.02 - samples/sec: 9765.96 - lr: 0.000031
2023-11-10 09:34:01,099 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:01,104 EPOCH 46 done: loss 0.0027 - lr 0.000031


100%|██████████| 2/2 [00:04<00:00,  2.38s/it]

2023-11-10 09:34:05,884 Evaluating as a multi-label problem: False
2023-11-10 09:34:05,918 DEV : loss 0.2594267427921295 - f1-score (micro avg)  0.8204
2023-11-10 09:34:05,940 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:06,513 epoch 47 - iter 1/5 - loss 0.00077691 - time (sec): 0.57 - samples/sec: 7922.60 - lr: 0.000031
2023-11-10 09:34:07,675 epoch 47 - iter 2/5 - loss 0.00138334 - time (sec): 1.73 - samples/sec: 5357.48 - lr: 0.000031
2023-11-10 09:34:08,206 epoch 47 - iter 3/5 - loss 0.00170170 - time (sec): 2.26 - samples/sec: 6086.39 - lr: 0.000031
2023-11-10 09:34:08,704 epoch 47 - iter 4/5 - loss 0.00239961 - time (sec): 2.76 - samples/sec: 6623.95 - lr: 0.000031
2023-11-10 09:34:08,906 epoch 47 - iter 5/5 - loss 0.00227516 - time (sec): 2.96 - samples/sec: 6671.11 - lr: 0.000031
2023-11-10 09:34:08,910 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:08,912 EPOCH 47 done: loss 0.0023 - lr 0.000031


100%|██████████| 2/2 [00:03<00:00,  1.85s/it]

2023-11-10 09:34:12,636 Evaluating as a multi-label problem: False
2023-11-10 09:34:12,658 DEV : loss 0.3106972873210907 - f1-score (micro avg)  0.8032
2023-11-10 09:34:12,672 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:13,178 epoch 48 - iter 1/5 - loss 0.00242920 - time (sec): 0.50 - samples/sec: 9004.33 - lr: 0.000031
2023-11-10 09:34:13,634 epoch 48 - iter 2/5 - loss 0.00333949 - time (sec): 0.96 - samples/sec: 10136.41 - lr: 0.000031
2023-11-10 09:34:14,062 epoch 48 - iter 3/5 - loss 0.00341501 - time (sec): 1.39 - samples/sec: 10011.54 - lr: 0.000031
2023-11-10 09:34:14,482 epoch 48 - iter 4/5 - loss 0.00322148 - time (sec): 1.81 - samples/sec: 10136.06 - lr: 0.000031
2023-11-10 09:34:14,610 epoch 48 - iter 5/5 - loss 0.00312203 - time (sec): 1.94 - samples/sec: 10213.25 - lr: 0.000030
2023-11-10 09:34:14,612 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:14,616 EPOCH 48 done: loss 0.0031 - lr 0.000030


100%|██████████| 2/2 [00:02<00:00,  1.35s/it]

2023-11-10 09:34:17,345 Evaluating as a multi-label problem: False
2023-11-10 09:34:17,371 DEV : loss 0.2629760205745697 - f1-score (micro avg)  0.8087
2023-11-10 09:34:17,386 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:17,907 epoch 49 - iter 1/5 - loss 0.00311232 - time (sec): 0.52 - samples/sec: 9459.86 - lr: 0.000030
2023-11-10 09:34:18,336 epoch 49 - iter 2/5 - loss 0.00348702 - time (sec): 0.95 - samples/sec: 9622.59 - lr: 0.000030
2023-11-10 09:34:18,764 epoch 49 - iter 3/5 - loss 0.00396071 - time (sec): 1.38 - samples/sec: 9962.49 - lr: 0.000030
2023-11-10 09:34:19,243 epoch 49 - iter 4/5 - loss 0.00368558 - time (sec): 1.86 - samples/sec: 9851.13 - lr: 0.000030
2023-11-10 09:34:19,420 epoch 49 - iter 5/5 - loss 0.00345541 - time (sec): 2.03 - samples/sec: 9728.97 - lr: 0.000030
2023-11-10 09:34:19,422 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:19,424 EPOCH 49 done: loss 0.0035 - lr 0.000030


100%|██████████| 2/2 [00:11<00:00,  5.96s/it]

2023-11-10 09:34:31,377 Evaluating as a multi-label problem: False
2023-11-10 09:34:31,429 DEV : loss 0.2738499045372009 - f1-score (micro avg)  0.8161
2023-11-10 09:34:31,467 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:32,127 epoch 50 - iter 1/5 - loss 0.00097991 - time (sec): 0.65 - samples/sec: 6374.04 - lr: 0.000030
2023-11-10 09:34:32,664 epoch 50 - iter 2/5 - loss 0.00181096 - time (sec): 1.18 - samples/sec: 6904.68 - lr: 0.000030
2023-11-10 09:34:33,333 epoch 50 - iter 3/5 - loss 0.00131153 - time (sec): 1.85 - samples/sec: 7052.56 - lr: 0.000030
2023-11-10 09:34:34,063 epoch 50 - iter 4/5 - loss 0.00197001 - time (sec): 2.58 - samples/sec: 7057.16 - lr: 0.000029
2023-11-10 09:34:34,317 epoch 50 - iter 5/5 - loss 0.00226297 - time (sec): 2.84 - samples/sec: 6971.32 - lr: 0.000029
2023-11-10 09:34:34,323 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:34,329 EPOCH 50 done: loss 0.0023 - lr 0.000029


100%|██████████| 2/2 [00:08<00:00,  4.36s/it]

2023-11-10 09:34:43,082 Evaluating as a multi-label problem: False
2023-11-10 09:34:43,150 DEV : loss 0.2940373420715332 - f1-score (micro avg)  0.8222
2023-11-10 09:34:43,183 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:44,187 epoch 51 - iter 1/5 - loss 0.00181358 - time (sec): 0.99 - samples/sec: 4231.25 - lr: 0.000029
2023-11-10 09:34:45,077 epoch 51 - iter 2/5 - loss 0.00145680 - time (sec): 1.88 - samples/sec: 4819.79 - lr: 0.000029
2023-11-10 09:34:45,918 epoch 51 - iter 3/5 - loss 0.00155925 - time (sec): 2.72 - samples/sec: 4923.32 - lr: 0.000029
2023-11-10 09:34:46,794 epoch 51 - iter 4/5 - loss 0.00217967 - time (sec): 3.59 - samples/sec: 5096.22 - lr: 0.000029
2023-11-10 09:34:47,120 epoch 51 - iter 5/5 - loss 0.00224924 - time (sec): 3.92 - samples/sec: 5046.25 - lr: 0.000029
2023-11-10 09:34:47,128 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:47,130 EPOCH 51 done: loss 0.0022 - lr 0.000029


100%|██████████| 2/2 [00:08<00:00,  4.03s/it]

2023-11-10 09:34:55,210 Evaluating as a multi-label problem: False
2023-11-10 09:34:55,256 DEV : loss 0.2952629625797272 - f1-score (micro avg)  0.8233
2023-11-10 09:34:55,279 ----------------------------------------------------------------------------------------------------





2023-11-10 09:34:56,410 epoch 52 - iter 1/5 - loss 0.00097824 - time (sec): 1.13 - samples/sec: 4107.32 - lr: 0.000029
2023-11-10 09:34:57,041 epoch 52 - iter 2/5 - loss 0.00108465 - time (sec): 1.76 - samples/sec: 5096.61 - lr: 0.000029
2023-11-10 09:34:57,644 epoch 52 - iter 3/5 - loss 0.00191696 - time (sec): 2.36 - samples/sec: 5877.64 - lr: 0.000029
2023-11-10 09:34:58,484 epoch 52 - iter 4/5 - loss 0.00157522 - time (sec): 3.20 - samples/sec: 5637.54 - lr: 0.000028
2023-11-10 09:34:58,725 epoch 52 - iter 5/5 - loss 0.00180152 - time (sec): 3.44 - samples/sec: 5743.84 - lr: 0.000028
2023-11-10 09:34:58,730 ----------------------------------------------------------------------------------------------------
2023-11-10 09:34:58,740 EPOCH 52 done: loss 0.0018 - lr 0.000028


100%|██████████| 2/2 [00:05<00:00,  2.81s/it]

2023-11-10 09:35:04,377 Evaluating as a multi-label problem: False
2023-11-10 09:35:04,405 DEV : loss 0.2680259943008423 - f1-score (micro avg)  0.8219
2023-11-10 09:35:04,421 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:04,847 epoch 53 - iter 1/5 - loss 0.00153419 - time (sec): 0.42 - samples/sec: 10377.27 - lr: 0.000028
2023-11-10 09:35:05,266 epoch 53 - iter 2/5 - loss 0.00368785 - time (sec): 0.84 - samples/sec: 10833.49 - lr: 0.000028
2023-11-10 09:35:05,661 epoch 53 - iter 3/5 - loss 0.00277190 - time (sec): 1.24 - samples/sec: 10356.99 - lr: 0.000028
2023-11-10 09:35:06,162 epoch 53 - iter 4/5 - loss 0.00282369 - time (sec): 1.74 - samples/sec: 10275.45 - lr: 0.000028
2023-11-10 09:35:06,366 epoch 53 - iter 5/5 - loss 0.00302604 - time (sec): 1.94 - samples/sec: 10172.56 - lr: 0.000028
2023-11-10 09:35:06,368 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:06,371 EPOCH 53 done: loss 0.0030 - lr 0.000028


100%|██████████| 2/2 [00:04<00:00,  2.41s/it]

2023-11-10 09:35:11,204 Evaluating as a multi-label problem: False
2023-11-10 09:35:11,249 DEV : loss 0.3009181022644043 - f1-score (micro avg)  0.8163
2023-11-10 09:35:11,272 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:11,823 epoch 54 - iter 1/5 - loss 0.00214643 - time (sec): 0.55 - samples/sec: 8635.70 - lr: 0.000028
2023-11-10 09:35:12,745 epoch 54 - iter 2/5 - loss 0.00177401 - time (sec): 1.47 - samples/sec: 6602.16 - lr: 0.000028
2023-11-10 09:35:13,770 epoch 54 - iter 3/5 - loss 0.00305027 - time (sec): 2.49 - samples/sec: 5500.40 - lr: 0.000028
2023-11-10 09:35:15,921 epoch 54 - iter 4/5 - loss 0.00272701 - time (sec): 4.65 - samples/sec: 3980.85 - lr: 0.000027
2023-11-10 09:35:16,197 epoch 54 - iter 5/5 - loss 0.00290084 - time (sec): 4.92 - samples/sec: 4017.56 - lr: 0.000027
2023-11-10 09:35:16,203 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:16,206 EPOCH 54 done: loss 0.0029 - lr 0.000027


100%|██████████| 2/2 [00:06<00:00,  3.04s/it]

2023-11-10 09:35:22,317 Evaluating as a multi-label problem: False
2023-11-10 09:35:22,352 DEV : loss 0.32182809710502625 - f1-score (micro avg)  0.8044
2023-11-10 09:35:22,381 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:22,926 epoch 55 - iter 1/5 - loss 0.00537667 - time (sec): 0.54 - samples/sec: 7919.48 - lr: 0.000027
2023-11-10 09:35:23,517 epoch 55 - iter 2/5 - loss 0.00286689 - time (sec): 1.13 - samples/sec: 7967.36 - lr: 0.000027
2023-11-10 09:35:24,074 epoch 55 - iter 3/5 - loss 0.00258946 - time (sec): 1.69 - samples/sec: 8127.09 - lr: 0.000027
2023-11-10 09:35:24,701 epoch 55 - iter 4/5 - loss 0.00203979 - time (sec): 2.32 - samples/sec: 7954.28 - lr: 0.000027
2023-11-10 09:35:24,861 epoch 55 - iter 5/5 - loss 0.00211706 - time (sec): 2.48 - samples/sec: 7981.40 - lr: 0.000027
2023-11-10 09:35:24,871 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:24,879 EPOCH 55 done: loss 0.0021 - lr 0.000027


100%|██████████| 2/2 [00:06<00:00,  3.49s/it]

2023-11-10 09:35:31,889 Evaluating as a multi-label problem: False
2023-11-10 09:35:31,932 DEV : loss 0.26214173436164856 - f1-score (micro avg)  0.8302
2023-11-10 09:35:31,964 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:32,503 epoch 56 - iter 1/5 - loss 0.00186354 - time (sec): 0.54 - samples/sec: 8165.99 - lr: 0.000027
2023-11-10 09:35:33,160 epoch 56 - iter 2/5 - loss 0.00116422 - time (sec): 1.19 - samples/sec: 8294.11 - lr: 0.000027
2023-11-10 09:35:33,649 epoch 56 - iter 3/5 - loss 0.00110278 - time (sec): 1.68 - samples/sec: 8111.75 - lr: 0.000026
2023-11-10 09:35:34,788 epoch 56 - iter 4/5 - loss 0.00161761 - time (sec): 2.82 - samples/sec: 6525.15 - lr: 0.000026
2023-11-10 09:35:34,952 epoch 56 - iter 5/5 - loss 0.00177490 - time (sec): 2.98 - samples/sec: 6625.88 - lr: 0.000026
2023-11-10 09:35:34,955 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:34,957 EPOCH 56 done: loss 0.0018 - lr 0.000026


100%|██████████| 2/2 [00:04<00:00,  2.01s/it]

2023-11-10 09:35:38,982 Evaluating as a multi-label problem: False
2023-11-10 09:35:39,049 DEV : loss 0.26014262437820435 - f1-score (micro avg)  0.8283
2023-11-10 09:35:39,070 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:39,660 epoch 57 - iter 1/5 - loss 0.00218109 - time (sec): 0.59 - samples/sec: 8248.18 - lr: 0.000026
2023-11-10 09:35:40,140 epoch 57 - iter 2/5 - loss 0.00126951 - time (sec): 1.06 - samples/sec: 9126.71 - lr: 0.000026
2023-11-10 09:35:40,653 epoch 57 - iter 3/5 - loss 0.00106532 - time (sec): 1.58 - samples/sec: 8621.77 - lr: 0.000026
2023-11-10 09:35:41,288 epoch 57 - iter 4/5 - loss 0.00163922 - time (sec): 2.21 - samples/sec: 8221.07 - lr: 0.000026
2023-11-10 09:35:41,478 epoch 57 - iter 5/5 - loss 0.00188575 - time (sec): 2.40 - samples/sec: 8226.56 - lr: 0.000026
2023-11-10 09:35:41,482 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:41,485 EPOCH 57 done: loss 0.0019 - lr 0.000026


100%|██████████| 2/2 [00:03<00:00,  1.56s/it]

2023-11-10 09:35:44,624 Evaluating as a multi-label problem: False
2023-11-10 09:35:44,649 DEV : loss 0.2573532462120056 - f1-score (micro avg)  0.8247
2023-11-10 09:35:44,663 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:45,134 epoch 58 - iter 1/5 - loss 0.00122459 - time (sec): 0.47 - samples/sec: 9786.82 - lr: 0.000026
2023-11-10 09:35:45,566 epoch 58 - iter 2/5 - loss 0.00102492 - time (sec): 0.90 - samples/sec: 10291.20 - lr: 0.000026
2023-11-10 09:35:46,026 epoch 58 - iter 3/5 - loss 0.00081551 - time (sec): 1.36 - samples/sec: 10009.51 - lr: 0.000025
2023-11-10 09:35:46,502 epoch 58 - iter 4/5 - loss 0.00077429 - time (sec): 1.84 - samples/sec: 9960.41 - lr: 0.000025
2023-11-10 09:35:46,715 epoch 58 - iter 5/5 - loss 0.00074522 - time (sec): 2.05 - samples/sec: 9643.44 - lr: 0.000025
2023-11-10 09:35:46,721 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:46,724 EPOCH 58 done: loss 0.0007 - lr 0.000025


100%|██████████| 2/2 [00:05<00:00,  2.91s/it]

2023-11-10 09:35:52,566 Evaluating as a multi-label problem: False
2023-11-10 09:35:52,605 DEV : loss 0.2520274221897125 - f1-score (micro avg)  0.8088
2023-11-10 09:35:52,628 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:53,173 epoch 59 - iter 1/5 - loss 0.00048423 - time (sec): 0.54 - samples/sec: 8953.82 - lr: 0.000025
2023-11-10 09:35:53,677 epoch 59 - iter 2/5 - loss 0.00117581 - time (sec): 1.05 - samples/sec: 8664.88 - lr: 0.000025
2023-11-10 09:35:54,172 epoch 59 - iter 3/5 - loss 0.00090980 - time (sec): 1.54 - samples/sec: 8742.71 - lr: 0.000025
2023-11-10 09:35:54,824 epoch 59 - iter 4/5 - loss 0.00078478 - time (sec): 2.19 - samples/sec: 8332.94 - lr: 0.000025
2023-11-10 09:35:54,970 epoch 59 - iter 5/5 - loss 0.00089560 - time (sec): 2.34 - samples/sec: 8452.43 - lr: 0.000025
2023-11-10 09:35:54,972 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:54,977 EPOCH 59 done: loss 0.0009 - lr 0.000025


100%|██████████| 2/2 [00:02<00:00,  1.37s/it]

2023-11-10 09:35:57,734 Evaluating as a multi-label problem: False
2023-11-10 09:35:57,764 DEV : loss 0.26280874013900757 - f1-score (micro avg)  0.8091
2023-11-10 09:35:57,777 ----------------------------------------------------------------------------------------------------





2023-11-10 09:35:58,222 epoch 60 - iter 1/5 - loss 0.00125710 - time (sec): 0.44 - samples/sec: 11110.39 - lr: 0.000025
2023-11-10 09:35:58,620 epoch 60 - iter 2/5 - loss 0.00093291 - time (sec): 0.84 - samples/sec: 10499.91 - lr: 0.000024
2023-11-10 09:35:59,126 epoch 60 - iter 3/5 - loss 0.00104595 - time (sec): 1.35 - samples/sec: 10269.69 - lr: 0.000024
2023-11-10 09:35:59,546 epoch 60 - iter 4/5 - loss 0.00111180 - time (sec): 1.77 - samples/sec: 10195.00 - lr: 0.000024
2023-11-10 09:35:59,730 epoch 60 - iter 5/5 - loss 0.00103324 - time (sec): 1.95 - samples/sec: 10136.07 - lr: 0.000024
2023-11-10 09:35:59,733 ----------------------------------------------------------------------------------------------------
2023-11-10 09:35:59,735 EPOCH 60 done: loss 0.0010 - lr 0.000024


100%|██████████| 2/2 [00:03<00:00,  1.58s/it]

2023-11-10 09:36:02,904 Evaluating as a multi-label problem: False
2023-11-10 09:36:02,925 DEV : loss 0.27908387780189514 - f1-score (micro avg)  0.8241
2023-11-10 09:36:02,938 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:03,377 epoch 61 - iter 1/5 - loss 0.00064046 - time (sec): 0.44 - samples/sec: 10399.01 - lr: 0.000024
2023-11-10 09:36:03,782 epoch 61 - iter 2/5 - loss 0.00090728 - time (sec): 0.84 - samples/sec: 9743.50 - lr: 0.000024
2023-11-10 09:36:04,285 epoch 61 - iter 3/5 - loss 0.00159159 - time (sec): 1.35 - samples/sec: 9536.56 - lr: 0.000024
2023-11-10 09:36:04,741 epoch 61 - iter 4/5 - loss 0.00184066 - time (sec): 1.80 - samples/sec: 9971.63 - lr: 0.000024
2023-11-10 09:36:04,927 epoch 61 - iter 5/5 - loss 0.00188643 - time (sec): 1.99 - samples/sec: 9949.00 - lr: 0.000024
2023-11-10 09:36:04,934 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:04,935 EPOCH 61 done: loss 0.0019 - lr 0.000024


100%|██████████| 2/2 [00:05<00:00,  2.70s/it]

2023-11-10 09:36:10,351 Evaluating as a multi-label problem: False
2023-11-10 09:36:10,391 DEV : loss 0.28059425950050354 - f1-score (micro avg)  0.8294
2023-11-10 09:36:10,411 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:10,964 epoch 62 - iter 1/5 - loss 0.00036643 - time (sec): 0.55 - samples/sec: 8626.64 - lr: 0.000024
2023-11-10 09:36:11,484 epoch 62 - iter 2/5 - loss 0.00038502 - time (sec): 1.07 - samples/sec: 8421.79 - lr: 0.000023
2023-11-10 09:36:11,977 epoch 62 - iter 3/5 - loss 0.00051282 - time (sec): 1.56 - samples/sec: 8395.26 - lr: 0.000023
2023-11-10 09:36:12,623 epoch 62 - iter 4/5 - loss 0.00043357 - time (sec): 2.21 - samples/sec: 8156.05 - lr: 0.000023
2023-11-10 09:36:12,851 epoch 62 - iter 5/5 - loss 0.00057981 - time (sec): 2.44 - samples/sec: 8112.18 - lr: 0.000023
2023-11-10 09:36:12,854 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:12,856 EPOCH 62 done: loss 0.0006 - lr 0.000023


100%|██████████| 2/2 [00:03<00:00,  1.53s/it]

2023-11-10 09:36:15,936 Evaluating as a multi-label problem: False
2023-11-10 09:36:15,957 DEV : loss 0.2901126742362976 - f1-score (micro avg)  0.8275
2023-11-10 09:36:15,969 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:16,842 epoch 63 - iter 1/5 - loss 0.00020272 - time (sec): 0.87 - samples/sec: 4973.01 - lr: 0.000023
2023-11-10 09:36:17,269 epoch 63 - iter 2/5 - loss 0.00030761 - time (sec): 1.29 - samples/sec: 6862.51 - lr: 0.000023
2023-11-10 09:36:18,135 epoch 63 - iter 3/5 - loss 0.00033535 - time (sec): 2.16 - samples/sec: 6309.41 - lr: 0.000023
2023-11-10 09:36:18,570 epoch 63 - iter 4/5 - loss 0.00037422 - time (sec): 2.59 - samples/sec: 7106.66 - lr: 0.000023
2023-11-10 09:36:18,689 epoch 63 - iter 5/5 - loss 0.00036734 - time (sec): 2.71 - samples/sec: 7288.31 - lr: 0.000023
2023-11-10 09:36:18,692 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:18,694 EPOCH 63 done: loss 0.0004 - lr 0.000023


100%|██████████| 2/2 [00:02<00:00,  1.36s/it]

2023-11-10 09:36:21,428 Evaluating as a multi-label problem: False
2023-11-10 09:36:21,460 DEV : loss 0.2878546714782715 - f1-score (micro avg)  0.8352
2023-11-10 09:36:21,475 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:21,910 epoch 64 - iter 1/5 - loss 0.00032956 - time (sec): 0.43 - samples/sec: 10465.94 - lr: 0.000022
2023-11-10 09:36:22,328 epoch 64 - iter 2/5 - loss 0.00108690 - time (sec): 0.85 - samples/sec: 10246.38 - lr: 0.000022
2023-11-10 09:36:22,999 epoch 64 - iter 3/5 - loss 0.00083266 - time (sec): 1.52 - samples/sec: 9498.40 - lr: 0.000022
2023-11-10 09:36:23,401 epoch 64 - iter 4/5 - loss 0.00069963 - time (sec): 1.92 - samples/sec: 9448.11 - lr: 0.000022
2023-11-10 09:36:23,567 epoch 64 - iter 5/5 - loss 0.00065717 - time (sec): 2.09 - samples/sec: 9459.01 - lr: 0.000022
2023-11-10 09:36:23,570 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:23,574 EPOCH 64 done: loss 0.0007 - lr 0.000022


100%|██████████| 2/2 [00:05<00:00,  2.68s/it]

2023-11-10 09:36:28,963 Evaluating as a multi-label problem: False
2023-11-10 09:36:29,004 DEV : loss 0.29582661390304565 - f1-score (micro avg)  0.8347
2023-11-10 09:36:29,024 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:29,639 epoch 65 - iter 1/5 - loss 0.00021994 - time (sec): 0.61 - samples/sec: 8886.51 - lr: 0.000022
2023-11-10 09:36:30,192 epoch 65 - iter 2/5 - loss 0.00071143 - time (sec): 1.17 - samples/sec: 8565.75 - lr: 0.000022
2023-11-10 09:36:31,258 epoch 65 - iter 3/5 - loss 0.00058027 - time (sec): 2.23 - samples/sec: 6287.84 - lr: 0.000022
2023-11-10 09:36:31,867 epoch 65 - iter 4/5 - loss 0.00133875 - time (sec): 2.84 - samples/sec: 6469.75 - lr: 0.000022
2023-11-10 09:36:32,061 epoch 65 - iter 5/5 - loss 0.00126389 - time (sec): 3.03 - samples/sec: 6515.77 - lr: 0.000022
2023-11-10 09:36:32,065 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:32,067 EPOCH 65 done: loss 0.0013 - lr 0.000022


100%|██████████| 2/2 [00:02<00:00,  1.47s/it]

2023-11-10 09:36:35,010 Evaluating as a multi-label problem: False
2023-11-10 09:36:35,043 DEV : loss 0.2890584170818329 - f1-score (micro avg)  0.8308
2023-11-10 09:36:35,063 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:35,457 epoch 66 - iter 1/5 - loss 0.00056671 - time (sec): 0.39 - samples/sec: 9341.55 - lr: 0.000021
2023-11-10 09:36:35,868 epoch 66 - iter 2/5 - loss 0.00058096 - time (sec): 0.80 - samples/sec: 10020.86 - lr: 0.000021
2023-11-10 09:36:36,329 epoch 66 - iter 3/5 - loss 0.00038565 - time (sec): 1.26 - samples/sec: 10403.62 - lr: 0.000021
2023-11-10 09:36:36,813 epoch 66 - iter 4/5 - loss 0.00036120 - time (sec): 1.75 - samples/sec: 10129.21 - lr: 0.000021
2023-11-10 09:36:37,000 epoch 66 - iter 5/5 - loss 0.00084010 - time (sec): 1.93 - samples/sec: 10220.79 - lr: 0.000021
2023-11-10 09:36:37,002 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:37,006 EPOCH 66 done: loss 0.0008 - lr 0.000021


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:36:39,700 Evaluating as a multi-label problem: False
2023-11-10 09:36:39,726 DEV : loss 0.2896568477153778 - f1-score (micro avg)  0.8323
2023-11-10 09:36:39,740 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:40,225 epoch 67 - iter 1/5 - loss 0.00013217 - time (sec): 0.48 - samples/sec: 9874.70 - lr: 0.000021
2023-11-10 09:36:40,609 epoch 67 - iter 2/5 - loss 0.00015700 - time (sec): 0.87 - samples/sec: 10180.97 - lr: 0.000021
2023-11-10 09:36:41,084 epoch 67 - iter 3/5 - loss 0.00019412 - time (sec): 1.34 - samples/sec: 10367.46 - lr: 0.000021
2023-11-10 09:36:41,496 epoch 67 - iter 4/5 - loss 0.00022089 - time (sec): 1.75 - samples/sec: 10270.65 - lr: 0.000021
2023-11-10 09:36:41,636 epoch 67 - iter 5/5 - loss 0.00021194 - time (sec): 1.89 - samples/sec: 10437.26 - lr: 0.000021
2023-11-10 09:36:41,639 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:41,642 EPOCH 67 done: loss 0.0002 - lr 0.000021


100%|██████████| 2/2 [00:04<00:00,  2.49s/it]

2023-11-10 09:36:46,638 Evaluating as a multi-label problem: False
2023-11-10 09:36:46,665 DEV : loss 0.32629287242889404 - f1-score (micro avg)  0.8307
2023-11-10 09:36:46,684 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:47,250 epoch 68 - iter 1/5 - loss 0.00110521 - time (sec): 0.56 - samples/sec: 8153.69 - lr: 0.000020
2023-11-10 09:36:47,744 epoch 68 - iter 2/5 - loss 0.00068438 - time (sec): 1.06 - samples/sec: 7958.19 - lr: 0.000020
2023-11-10 09:36:48,287 epoch 68 - iter 3/5 - loss 0.00073613 - time (sec): 1.60 - samples/sec: 8108.51 - lr: 0.000020
2023-11-10 09:36:48,872 epoch 68 - iter 4/5 - loss 0.00065342 - time (sec): 2.18 - samples/sec: 8011.11 - lr: 0.000020
2023-11-10 09:36:49,141 epoch 68 - iter 5/5 - loss 0.00060537 - time (sec): 2.45 - samples/sec: 8059.19 - lr: 0.000020
2023-11-10 09:36:49,150 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:49,152 EPOCH 68 done: loss 0.0006 - lr 0.000020


100%|██████████| 2/2 [00:03<00:00,  1.85s/it]

2023-11-10 09:36:52,868 Evaluating as a multi-label problem: False
2023-11-10 09:36:52,900 DEV : loss 0.33137017488479614 - f1-score (micro avg)  0.8363
2023-11-10 09:36:52,914 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:53,383 epoch 69 - iter 1/5 - loss 0.00016713 - time (sec): 0.47 - samples/sec: 8904.70 - lr: 0.000020
2023-11-10 09:36:53,839 epoch 69 - iter 2/5 - loss 0.00024949 - time (sec): 0.92 - samples/sec: 9460.59 - lr: 0.000020
2023-11-10 09:36:54,320 epoch 69 - iter 3/5 - loss 0.00020807 - time (sec): 1.40 - samples/sec: 9874.98 - lr: 0.000020
2023-11-10 09:36:54,687 epoch 69 - iter 4/5 - loss 0.00021571 - time (sec): 1.77 - samples/sec: 10033.96 - lr: 0.000020
2023-11-10 09:36:54,889 epoch 69 - iter 5/5 - loss 0.00021493 - time (sec): 1.97 - samples/sec: 10019.25 - lr: 0.000019
2023-11-10 09:36:54,893 ----------------------------------------------------------------------------------------------------
2023-11-10 09:36:54,899 EPOCH 69 done: loss 0.0002 - lr 0.000019


100%|██████████| 2/2 [00:02<00:00,  1.36s/it]

2023-11-10 09:36:57,633 Evaluating as a multi-label problem: False
2023-11-10 09:36:57,666 DEV : loss 0.31606540083885193 - f1-score (micro avg)  0.8366
2023-11-10 09:36:57,681 ----------------------------------------------------------------------------------------------------





2023-11-10 09:36:58,577 epoch 70 - iter 1/5 - loss 0.00024506 - time (sec): 0.89 - samples/sec: 5322.55 - lr: 0.000019
2023-11-10 09:36:58,968 epoch 70 - iter 2/5 - loss 0.00023453 - time (sec): 1.28 - samples/sec: 6608.77 - lr: 0.000019
2023-11-10 09:36:59,389 epoch 70 - iter 3/5 - loss 0.00019692 - time (sec): 1.71 - samples/sec: 7608.64 - lr: 0.000019
2023-11-10 09:36:59,846 epoch 70 - iter 4/5 - loss 0.00022625 - time (sec): 2.16 - samples/sec: 8182.30 - lr: 0.000019
2023-11-10 09:37:00,047 epoch 70 - iter 5/5 - loss 0.00020760 - time (sec): 2.36 - samples/sec: 8367.54 - lr: 0.000019
2023-11-10 09:37:00,049 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:00,054 EPOCH 70 done: loss 0.0002 - lr 0.000019


100%|██████████| 2/2 [00:04<00:00,  2.10s/it]

2023-11-10 09:37:04,260 Evaluating as a multi-label problem: False
2023-11-10 09:37:04,298 DEV : loss 0.30900073051452637 - f1-score (micro avg)  0.8248
2023-11-10 09:37:04,317 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:04,879 epoch 71 - iter 1/5 - loss 0.00023555 - time (sec): 0.56 - samples/sec: 7979.48 - lr: 0.000019
2023-11-10 09:37:05,361 epoch 71 - iter 2/5 - loss 0.00039187 - time (sec): 1.04 - samples/sec: 8198.76 - lr: 0.000019
2023-11-10 09:37:05,856 epoch 71 - iter 3/5 - loss 0.00109282 - time (sec): 1.54 - samples/sec: 8416.18 - lr: 0.000019
2023-11-10 09:37:06,529 epoch 71 - iter 4/5 - loss 0.00081193 - time (sec): 2.21 - samples/sec: 8178.32 - lr: 0.000019
2023-11-10 09:37:06,754 epoch 71 - iter 5/5 - loss 0.00075325 - time (sec): 2.43 - samples/sec: 8124.51 - lr: 0.000018
2023-11-10 09:37:06,757 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:06,759 EPOCH 71 done: loss 0.0008 - lr 0.000018


100%|██████████| 2/2 [00:04<00:00,  2.12s/it]

2023-11-10 09:37:11,016 Evaluating as a multi-label problem: False
2023-11-10 09:37:11,042 DEV : loss 0.31458720564842224 - f1-score (micro avg)  0.8234
2023-11-10 09:37:11,056 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:11,494 epoch 72 - iter 1/5 - loss 0.00012489 - time (sec): 0.44 - samples/sec: 8517.91 - lr: 0.000018
2023-11-10 09:37:11,912 epoch 72 - iter 2/5 - loss 0.00012382 - time (sec): 0.85 - samples/sec: 9501.15 - lr: 0.000018
2023-11-10 09:37:12,327 epoch 72 - iter 3/5 - loss 0.00011902 - time (sec): 1.27 - samples/sec: 9654.94 - lr: 0.000018
2023-11-10 09:37:13,258 epoch 72 - iter 4/5 - loss 0.00015880 - time (sec): 2.20 - samples/sec: 8089.59 - lr: 0.000018
2023-11-10 09:37:13,442 epoch 72 - iter 5/5 - loss 0.00015345 - time (sec): 2.38 - samples/sec: 8293.47 - lr: 0.000018
2023-11-10 09:37:13,444 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:13,449 EPOCH 72 done: loss 0.0002 - lr 0.000018


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:37:16,143 Evaluating as a multi-label problem: False
2023-11-10 09:37:16,173 DEV : loss 0.3329286277294159 - f1-score (micro avg)  0.8225
2023-11-10 09:37:16,192 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:16,617 epoch 73 - iter 1/5 - loss 0.00271743 - time (sec): 0.42 - samples/sec: 10834.57 - lr: 0.000018
2023-11-10 09:37:17,026 epoch 73 - iter 2/5 - loss 0.00159775 - time (sec): 0.83 - samples/sec: 10387.65 - lr: 0.000018
2023-11-10 09:37:17,523 epoch 73 - iter 3/5 - loss 0.00108985 - time (sec): 1.33 - samples/sec: 9994.18 - lr: 0.000018
2023-11-10 09:37:17,923 epoch 73 - iter 4/5 - loss 0.00084209 - time (sec): 1.73 - samples/sec: 10315.16 - lr: 0.000017
2023-11-10 09:37:18,112 epoch 73 - iter 5/5 - loss 0.00076741 - time (sec): 1.92 - samples/sec: 10302.23 - lr: 0.000017
2023-11-10 09:37:18,115 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:18,117 EPOCH 73 done: loss 0.0008 - lr 0.000017


100%|██████████| 2/2 [00:03<00:00,  1.63s/it]

2023-11-10 09:37:21,403 Evaluating as a multi-label problem: False
2023-11-10 09:37:21,454 DEV : loss 0.3323475122451782 - f1-score (micro avg)  0.8233
2023-11-10 09:37:21,480 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:22,026 epoch 74 - iter 1/5 - loss 0.00008375 - time (sec): 0.54 - samples/sec: 8452.32 - lr: 0.000017
2023-11-10 09:37:22,546 epoch 74 - iter 2/5 - loss 0.00087421 - time (sec): 1.06 - samples/sec: 8601.98 - lr: 0.000017
2023-11-10 09:37:23,093 epoch 74 - iter 3/5 - loss 0.00061462 - time (sec): 1.61 - samples/sec: 8568.07 - lr: 0.000017
2023-11-10 09:37:23,697 epoch 74 - iter 4/5 - loss 0.00049625 - time (sec): 2.21 - samples/sec: 8185.60 - lr: 0.000017
2023-11-10 09:37:23,917 epoch 74 - iter 5/5 - loss 0.00045989 - time (sec): 2.43 - samples/sec: 8124.83 - lr: 0.000017
2023-11-10 09:37:23,920 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:23,926 EPOCH 74 done: loss 0.0005 - lr 0.000017


100%|██████████| 2/2 [00:05<00:00,  2.61s/it]

2023-11-10 09:37:29,170 Evaluating as a multi-label problem: False
2023-11-10 09:37:29,198 DEV : loss 0.3299107551574707 - f1-score (micro avg)  0.8258
2023-11-10 09:37:29,214 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:29,738 epoch 75 - iter 1/5 - loss 0.00006226 - time (sec): 0.52 - samples/sec: 9146.44 - lr: 0.000017
2023-11-10 09:37:30,161 epoch 75 - iter 2/5 - loss 0.00062360 - time (sec): 0.94 - samples/sec: 10286.66 - lr: 0.000017
2023-11-10 09:37:30,613 epoch 75 - iter 3/5 - loss 0.00047837 - time (sec): 1.40 - samples/sec: 9853.18 - lr: 0.000017
2023-11-10 09:37:31,026 epoch 75 - iter 4/5 - loss 0.00074436 - time (sec): 1.81 - samples/sec: 10091.89 - lr: 0.000016
2023-11-10 09:37:31,193 epoch 75 - iter 5/5 - loss 0.00069584 - time (sec): 1.98 - samples/sec: 10003.37 - lr: 0.000016
2023-11-10 09:37:31,195 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:31,200 EPOCH 75 done: loss 0.0007 - lr 0.000016


100%|██████████| 2/2 [00:02<00:00,  1.35s/it]

2023-11-10 09:37:33,933 Evaluating as a multi-label problem: False
2023-11-10 09:37:33,964 DEV : loss 0.341106653213501 - f1-score (micro avg)  0.8316
2023-11-10 09:37:33,979 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:34,435 epoch 76 - iter 1/5 - loss 0.00010997 - time (sec): 0.45 - samples/sec: 10772.29 - lr: 0.000016
2023-11-10 09:37:34,885 epoch 76 - iter 2/5 - loss 0.00012020 - time (sec): 0.91 - samples/sec: 10472.33 - lr: 0.000016
2023-11-10 09:37:35,383 epoch 76 - iter 3/5 - loss 0.00012185 - time (sec): 1.40 - samples/sec: 9946.79 - lr: 0.000016
2023-11-10 09:37:35,801 epoch 76 - iter 4/5 - loss 0.00010612 - time (sec): 1.82 - samples/sec: 10070.97 - lr: 0.000016
2023-11-10 09:37:35,939 epoch 76 - iter 5/5 - loss 0.00010302 - time (sec): 1.96 - samples/sec: 10096.86 - lr: 0.000016
2023-11-10 09:37:35,941 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:35,945 EPOCH 76 done: loss 0.0001 - lr 0.000016


100%|██████████| 2/2 [00:02<00:00,  1.33s/it]

2023-11-10 09:37:38,621 Evaluating as a multi-label problem: False
2023-11-10 09:37:38,660 DEV : loss 0.34898731112480164 - f1-score (micro avg)  0.8285
2023-11-10 09:37:38,674 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:39,632 epoch 77 - iter 1/5 - loss 0.00009721 - time (sec): 0.96 - samples/sec: 3900.97 - lr: 0.000016
2023-11-10 09:37:40,199 epoch 77 - iter 2/5 - loss 0.00012386 - time (sec): 1.52 - samples/sec: 5683.95 - lr: 0.000016
2023-11-10 09:37:40,728 epoch 77 - iter 3/5 - loss 0.00018171 - time (sec): 2.05 - samples/sec: 6213.27 - lr: 0.000015
2023-11-10 09:37:41,408 epoch 77 - iter 4/5 - loss 0.00015217 - time (sec): 2.73 - samples/sec: 6548.01 - lr: 0.000015
2023-11-10 09:37:41,650 epoch 77 - iter 5/5 - loss 0.00014663 - time (sec): 2.97 - samples/sec: 6647.40 - lr: 0.000015
2023-11-10 09:37:41,655 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:41,658 EPOCH 77 done: loss 0.0001 - lr 0.000015


100%|██████████| 2/2 [00:05<00:00,  2.57s/it]

2023-11-10 09:37:46,816 Evaluating as a multi-label problem: False
2023-11-10 09:37:46,860 DEV : loss 0.3459241986274719 - f1-score (micro avg)  0.8247
2023-11-10 09:37:46,884 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:47,426 epoch 78 - iter 1/5 - loss 0.00010328 - time (sec): 0.54 - samples/sec: 7909.21 - lr: 0.000015
2023-11-10 09:37:47,835 epoch 78 - iter 2/5 - loss 0.00009489 - time (sec): 0.95 - samples/sec: 9165.51 - lr: 0.000015
2023-11-10 09:37:48,268 epoch 78 - iter 3/5 - loss 0.00008924 - time (sec): 1.38 - samples/sec: 9486.00 - lr: 0.000015
2023-11-10 09:37:48,688 epoch 78 - iter 4/5 - loss 0.00008942 - time (sec): 1.80 - samples/sec: 9656.37 - lr: 0.000015
2023-11-10 09:37:48,888 epoch 78 - iter 5/5 - loss 0.00009418 - time (sec): 2.00 - samples/sec: 9882.74 - lr: 0.000015
2023-11-10 09:37:48,892 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:48,894 EPOCH 78 done: loss 0.0001 - lr 0.000015


100%|██████████| 2/2 [00:02<00:00,  1.32s/it]

2023-11-10 09:37:51,554 Evaluating as a multi-label problem: False
2023-11-10 09:37:51,579 DEV : loss 0.33726850152015686 - f1-score (micro avg)  0.8264
2023-11-10 09:37:51,596 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:52,117 epoch 79 - iter 1/5 - loss 0.00031359 - time (sec): 0.52 - samples/sec: 10204.35 - lr: 0.000015
2023-11-10 09:37:52,962 epoch 79 - iter 2/5 - loss 0.00023591 - time (sec): 1.36 - samples/sec: 7241.25 - lr: 0.000015
2023-11-10 09:37:53,372 epoch 79 - iter 3/5 - loss 0.00066706 - time (sec): 1.77 - samples/sec: 7831.48 - lr: 0.000014
2023-11-10 09:37:53,840 epoch 79 - iter 4/5 - loss 0.00052259 - time (sec): 2.24 - samples/sec: 8246.77 - lr: 0.000014
2023-11-10 09:37:53,985 epoch 79 - iter 5/5 - loss 0.00049273 - time (sec): 2.39 - samples/sec: 8284.05 - lr: 0.000014
2023-11-10 09:37:53,987 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:53,991 EPOCH 79 done: loss 0.0005 - lr 0.000014


100%|██████████| 2/2 [00:02<00:00,  1.35s/it]

2023-11-10 09:37:56,712 Evaluating as a multi-label problem: False
2023-11-10 09:37:56,745 DEV : loss 0.3377552926540375 - f1-score (micro avg)  0.8251
2023-11-10 09:37:56,765 ----------------------------------------------------------------------------------------------------





2023-11-10 09:37:57,338 epoch 80 - iter 1/5 - loss 0.00216850 - time (sec): 0.57 - samples/sec: 9274.16 - lr: 0.000014
2023-11-10 09:37:57,794 epoch 80 - iter 2/5 - loss 0.00125819 - time (sec): 1.03 - samples/sec: 9072.71 - lr: 0.000014
2023-11-10 09:37:58,274 epoch 80 - iter 3/5 - loss 0.00140749 - time (sec): 1.51 - samples/sec: 8853.39 - lr: 0.000014
2023-11-10 09:37:58,811 epoch 80 - iter 4/5 - loss 0.00112027 - time (sec): 2.04 - samples/sec: 8819.29 - lr: 0.000014
2023-11-10 09:37:59,040 epoch 80 - iter 5/5 - loss 0.00102446 - time (sec): 2.27 - samples/sec: 8700.58 - lr: 0.000014
2023-11-10 09:37:59,049 ----------------------------------------------------------------------------------------------------
2023-11-10 09:37:59,052 EPOCH 80 done: loss 0.0010 - lr 0.000014


100%|██████████| 2/2 [00:05<00:00,  2.95s/it]

2023-11-10 09:38:04,967 Evaluating as a multi-label problem: False
2023-11-10 09:38:05,009 DEV : loss 0.3464910686016083 - f1-score (micro avg)  0.8237
2023-11-10 09:38:05,038 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:05,569 epoch 81 - iter 1/5 - loss 0.00011520 - time (sec): 0.53 - samples/sec: 7994.35 - lr: 0.000014
2023-11-10 09:38:06,057 epoch 81 - iter 2/5 - loss 0.00009357 - time (sec): 1.02 - samples/sec: 8024.12 - lr: 0.000013
2023-11-10 09:38:06,618 epoch 81 - iter 3/5 - loss 0.00009079 - time (sec): 1.58 - samples/sec: 8326.36 - lr: 0.000013
2023-11-10 09:38:07,146 epoch 81 - iter 4/5 - loss 0.00008206 - time (sec): 2.10 - samples/sec: 8442.62 - lr: 0.000013
2023-11-10 09:38:07,361 epoch 81 - iter 5/5 - loss 0.00007643 - time (sec): 2.32 - samples/sec: 8524.44 - lr: 0.000013
2023-11-10 09:38:07,364 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:07,369 EPOCH 81 done: loss 0.0001 - lr 0.000013


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:38:10,486 Evaluating as a multi-label problem: False
2023-11-10 09:38:10,520 DEV : loss 0.3665333092212677 - f1-score (micro avg)  0.8271
2023-11-10 09:38:10,534 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:10,965 epoch 82 - iter 1/5 - loss 0.00007511 - time (sec): 0.43 - samples/sec: 10551.24 - lr: 0.000013
2023-11-10 09:38:11,443 epoch 82 - iter 2/5 - loss 0.00061279 - time (sec): 0.91 - samples/sec: 10091.01 - lr: 0.000013
2023-11-10 09:38:11,853 epoch 82 - iter 3/5 - loss 0.00042950 - time (sec): 1.32 - samples/sec: 10350.97 - lr: 0.000013
2023-11-10 09:38:12,271 epoch 82 - iter 4/5 - loss 0.00033360 - time (sec): 1.74 - samples/sec: 10525.56 - lr: 0.000013
2023-11-10 09:38:12,474 epoch 82 - iter 5/5 - loss 0.00031404 - time (sec): 1.94 - samples/sec: 10199.04 - lr: 0.000013
2023-11-10 09:38:12,479 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:12,481 EPOCH 82 done: loss 0.0003 - lr 0.000013


100%|██████████| 2/2 [00:02<00:00,  1.35s/it]

2023-11-10 09:38:15,199 Evaluating as a multi-label problem: False
2023-11-10 09:38:15,224 DEV : loss 0.3722875416278839 - f1-score (micro avg)  0.8275
2023-11-10 09:38:15,238 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:15,742 epoch 83 - iter 1/5 - loss 0.00005737 - time (sec): 0.50 - samples/sec: 8950.93 - lr: 0.000013
2023-11-10 09:38:16,159 epoch 83 - iter 2/5 - loss 0.00005289 - time (sec): 0.92 - samples/sec: 9617.78 - lr: 0.000012
2023-11-10 09:38:16,615 epoch 83 - iter 3/5 - loss 0.00006107 - time (sec): 1.37 - samples/sec: 9712.29 - lr: 0.000012
2023-11-10 09:38:17,095 epoch 83 - iter 4/5 - loss 0.00006248 - time (sec): 1.85 - samples/sec: 9842.78 - lr: 0.000012
2023-11-10 09:38:17,327 epoch 83 - iter 5/5 - loss 0.00006196 - time (sec): 2.09 - samples/sec: 9482.43 - lr: 0.000012
2023-11-10 09:38:17,332 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:17,339 EPOCH 83 done: loss 0.0001 - lr 0.000012


100%|██████████| 2/2 [00:05<00:00,  2.92s/it]

2023-11-10 09:38:23,202 Evaluating as a multi-label problem: False
2023-11-10 09:38:23,244 DEV : loss 0.3640052080154419 - f1-score (micro avg)  0.8344
2023-11-10 09:38:23,265 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:23,808 epoch 84 - iter 1/5 - loss 0.00004279 - time (sec): 0.54 - samples/sec: 8949.35 - lr: 0.000012
2023-11-10 09:38:24,331 epoch 84 - iter 2/5 - loss 0.00004964 - time (sec): 1.06 - samples/sec: 8512.01 - lr: 0.000012
2023-11-10 09:38:24,931 epoch 84 - iter 3/5 - loss 0.00006314 - time (sec): 1.66 - samples/sec: 8493.77 - lr: 0.000012
2023-11-10 09:38:25,456 epoch 84 - iter 4/5 - loss 0.00006524 - time (sec): 2.19 - samples/sec: 8335.06 - lr: 0.000012
2023-11-10 09:38:25,611 epoch 84 - iter 5/5 - loss 0.00006591 - time (sec): 2.34 - samples/sec: 8437.72 - lr: 0.000012
2023-11-10 09:38:25,613 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:25,618 EPOCH 84 done: loss 0.0001 - lr 0.000012


100%|██████████| 2/2 [00:02<00:00,  1.36s/it]

2023-11-10 09:38:28,363 Evaluating as a multi-label problem: False
2023-11-10 09:38:28,393 DEV : loss 0.3447619378566742 - f1-score (micro avg)  0.8301
2023-11-10 09:38:28,408 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:28,873 epoch 85 - iter 1/5 - loss 0.00006004 - time (sec): 0.46 - samples/sec: 10689.29 - lr: 0.000012
2023-11-10 09:38:29,338 epoch 85 - iter 2/5 - loss 0.00005934 - time (sec): 0.93 - samples/sec: 10217.10 - lr: 0.000011
2023-11-10 09:38:29,748 epoch 85 - iter 3/5 - loss 0.00006486 - time (sec): 1.34 - samples/sec: 10343.10 - lr: 0.000011
2023-11-10 09:38:30,231 epoch 85 - iter 4/5 - loss 0.00005772 - time (sec): 1.82 - samples/sec: 10135.85 - lr: 0.000011
2023-11-10 09:38:30,348 epoch 85 - iter 5/5 - loss 0.00005830 - time (sec): 1.94 - samples/sec: 10203.91 - lr: 0.000011
2023-11-10 09:38:30,350 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:30,354 EPOCH 85 done: loss 0.0001 - lr 0.000011


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:38:33,053 Evaluating as a multi-label problem: False
2023-11-10 09:38:33,080 DEV : loss 0.33931925892829895 - f1-score (micro avg)  0.8268
2023-11-10 09:38:33,099 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:33,971 epoch 86 - iter 1/5 - loss 0.00116359 - time (sec): 0.87 - samples/sec: 5002.13 - lr: 0.000011
2023-11-10 09:38:34,386 epoch 86 - iter 2/5 - loss 0.00062389 - time (sec): 1.28 - samples/sec: 6602.49 - lr: 0.000011
2023-11-10 09:38:34,783 epoch 86 - iter 3/5 - loss 0.00042408 - time (sec): 1.68 - samples/sec: 7634.00 - lr: 0.000011
2023-11-10 09:38:35,220 epoch 86 - iter 4/5 - loss 0.00032317 - time (sec): 2.12 - samples/sec: 8289.94 - lr: 0.000011
2023-11-10 09:38:35,429 epoch 86 - iter 5/5 - loss 0.00029257 - time (sec): 2.33 - samples/sec: 8499.53 - lr: 0.000011
2023-11-10 09:38:35,432 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:35,437 EPOCH 86 done: loss 0.0003 - lr 0.000011


100%|██████████| 2/2 [00:05<00:00,  2.64s/it]

2023-11-10 09:38:40,742 Evaluating as a multi-label problem: False
2023-11-10 09:38:40,783 DEV : loss 0.3432070314884186 - f1-score (micro avg)  0.8281
2023-11-10 09:38:40,811 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:41,426 epoch 87 - iter 1/5 - loss 0.00005259 - time (sec): 0.61 - samples/sec: 8373.26 - lr: 0.000010
2023-11-10 09:38:42,010 epoch 87 - iter 2/5 - loss 0.00006044 - time (sec): 1.19 - samples/sec: 7815.03 - lr: 0.000010
2023-11-10 09:38:42,514 epoch 87 - iter 3/5 - loss 0.00005214 - time (sec): 1.70 - samples/sec: 8005.15 - lr: 0.000010
2023-11-10 09:38:43,044 epoch 87 - iter 4/5 - loss 0.00005676 - time (sec): 2.23 - samples/sec: 8161.30 - lr: 0.000010
2023-11-10 09:38:43,259 epoch 87 - iter 5/5 - loss 0.00005576 - time (sec): 2.44 - samples/sec: 8094.59 - lr: 0.000010
2023-11-10 09:38:43,262 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:43,265 EPOCH 87 done: loss 0.0001 - lr 0.000010


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-11-10 09:38:46,389 Evaluating as a multi-label problem: False
2023-11-10 09:38:46,417 DEV : loss 0.35176706314086914 - f1-score (micro avg)  0.8278
2023-11-10 09:38:46,434 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:46,880 epoch 88 - iter 1/5 - loss 0.00003826 - time (sec): 0.44 - samples/sec: 9082.03 - lr: 0.000010
2023-11-10 09:38:47,363 epoch 88 - iter 2/5 - loss 0.00005328 - time (sec): 0.93 - samples/sec: 9705.85 - lr: 0.000010
2023-11-10 09:38:47,852 epoch 88 - iter 3/5 - loss 0.00006230 - time (sec): 1.42 - samples/sec: 9421.90 - lr: 0.000010
2023-11-10 09:38:48,265 epoch 88 - iter 4/5 - loss 0.00007599 - time (sec): 1.83 - samples/sec: 10031.10 - lr: 0.000010
2023-11-10 09:38:48,402 epoch 88 - iter 5/5 - loss 0.00007362 - time (sec): 1.97 - samples/sec: 10055.05 - lr: 0.000010
2023-11-10 09:38:48,404 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:48,413 EPOCH 88 done: loss 0.0001 - lr 0.000010


100%|██████████| 2/2 [00:03<00:00,  1.52s/it]

2023-11-10 09:38:51,478 Evaluating as a multi-label problem: False
2023-11-10 09:38:51,522 DEV : loss 0.3587762713432312 - f1-score (micro avg)  0.8302
2023-11-10 09:38:51,536 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:51,989 epoch 89 - iter 1/5 - loss 0.00005172 - time (sec): 0.45 - samples/sec: 9807.72 - lr: 0.000009
2023-11-10 09:38:52,424 epoch 89 - iter 2/5 - loss 0.00037414 - time (sec): 0.89 - samples/sec: 10322.27 - lr: 0.000009
2023-11-10 09:38:52,979 epoch 89 - iter 3/5 - loss 0.00025600 - time (sec): 1.44 - samples/sec: 10074.81 - lr: 0.000009
2023-11-10 09:38:53,384 epoch 89 - iter 4/5 - loss 0.00021410 - time (sec): 1.85 - samples/sec: 10075.69 - lr: 0.000009
2023-11-10 09:38:53,522 epoch 89 - iter 5/5 - loss 0.00020284 - time (sec): 1.98 - samples/sec: 9967.37 - lr: 0.000009
2023-11-10 09:38:53,524 ----------------------------------------------------------------------------------------------------
2023-11-10 09:38:53,530 EPOCH 89 done: loss 0.0002 - lr 0.000009


100%|██████████| 2/2 [00:04<00:00,  2.41s/it]

2023-11-10 09:38:58,376 Evaluating as a multi-label problem: False
2023-11-10 09:38:58,420 DEV : loss 0.36595892906188965 - f1-score (micro avg)  0.829
2023-11-10 09:38:58,443 ----------------------------------------------------------------------------------------------------





2023-11-10 09:38:59,001 epoch 90 - iter 1/5 - loss 0.00073976 - time (sec): 0.55 - samples/sec: 8447.28 - lr: 0.000009
2023-11-10 09:38:59,643 epoch 90 - iter 2/5 - loss 0.00037843 - time (sec): 1.20 - samples/sec: 7924.34 - lr: 0.000009
2023-11-10 09:39:00,175 epoch 90 - iter 3/5 - loss 0.00027906 - time (sec): 1.73 - samples/sec: 8015.46 - lr: 0.000009
2023-11-10 09:39:00,718 epoch 90 - iter 4/5 - loss 0.00022661 - time (sec): 2.27 - samples/sec: 8218.85 - lr: 0.000009
2023-11-10 09:39:00,856 epoch 90 - iter 5/5 - loss 0.00021625 - time (sec): 2.41 - samples/sec: 8204.19 - lr: 0.000008
2023-11-10 09:39:00,859 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:00,866 EPOCH 90 done: loss 0.0002 - lr 0.000008


100%|██████████| 2/2 [00:06<00:00,  3.06s/it]

2023-11-10 09:39:06,994 Evaluating as a multi-label problem: False
2023-11-10 09:39:07,048 DEV : loss 0.3693319261074066 - f1-score (micro avg)  0.8262
2023-11-10 09:39:07,068 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:07,704 epoch 91 - iter 1/5 - loss 0.00007466 - time (sec): 0.63 - samples/sec: 7369.02 - lr: 0.000008
2023-11-10 09:39:08,337 epoch 91 - iter 2/5 - loss 0.00005598 - time (sec): 1.27 - samples/sec: 7649.99 - lr: 0.000008
2023-11-10 09:39:08,822 epoch 91 - iter 3/5 - loss 0.00044958 - time (sec): 1.75 - samples/sec: 7878.93 - lr: 0.000008
2023-11-10 09:39:09,323 epoch 91 - iter 4/5 - loss 0.00035561 - time (sec): 2.25 - samples/sec: 8038.47 - lr: 0.000008
2023-11-10 09:39:09,555 epoch 91 - iter 5/5 - loss 0.00033043 - time (sec): 2.48 - samples/sec: 7958.03 - lr: 0.000008
2023-11-10 09:39:09,559 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:09,561 EPOCH 91 done: loss 0.0003 - lr 0.000008


100%|██████████| 2/2 [00:03<00:00,  1.96s/it]

2023-11-10 09:39:13,498 Evaluating as a multi-label problem: False
2023-11-10 09:39:13,524 DEV : loss 0.3647539019584656 - f1-score (micro avg)  0.8254
2023-11-10 09:39:13,542 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:13,945 epoch 92 - iter 1/5 - loss 0.00126746 - time (sec): 0.40 - samples/sec: 9399.91 - lr: 0.000008
2023-11-10 09:39:14,412 epoch 92 - iter 2/5 - loss 0.00059856 - time (sec): 0.87 - samples/sec: 9576.59 - lr: 0.000008
2023-11-10 09:39:14,883 epoch 92 - iter 3/5 - loss 0.00039729 - time (sec): 1.34 - samples/sec: 9934.41 - lr: 0.000008
2023-11-10 09:39:15,300 epoch 92 - iter 4/5 - loss 0.00030563 - time (sec): 1.76 - samples/sec: 10264.49 - lr: 0.000008
2023-11-10 09:39:15,489 epoch 92 - iter 5/5 - loss 0.00029667 - time (sec): 1.94 - samples/sec: 10168.08 - lr: 0.000007
2023-11-10 09:39:15,497 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:15,499 EPOCH 92 done: loss 0.0003 - lr 0.000007


100%|██████████| 2/2 [00:05<00:00,  2.66s/it]

2023-11-10 09:39:20,841 Evaluating as a multi-label problem: False
2023-11-10 09:39:20,881 DEV : loss 0.36129945516586304 - f1-score (micro avg)  0.8301
2023-11-10 09:39:20,905 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:22,027 epoch 93 - iter 1/5 - loss 0.00005098 - time (sec): 1.12 - samples/sec: 3857.05 - lr: 0.000007
2023-11-10 09:39:22,681 epoch 93 - iter 2/5 - loss 0.00005560 - time (sec): 1.77 - samples/sec: 5414.71 - lr: 0.000007
2023-11-10 09:39:23,231 epoch 93 - iter 3/5 - loss 0.00006377 - time (sec): 2.32 - samples/sec: 6058.85 - lr: 0.000007
2023-11-10 09:39:23,742 epoch 93 - iter 4/5 - loss 0.00005635 - time (sec): 2.83 - samples/sec: 6578.33 - lr: 0.000007
2023-11-10 09:39:23,884 epoch 93 - iter 5/5 - loss 0.00005561 - time (sec): 2.97 - samples/sec: 6650.80 - lr: 0.000007
2023-11-10 09:39:23,893 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:23,895 EPOCH 93 done: loss 0.0001 - lr 0.000007


100%|██████████| 2/2 [00:02<00:00,  1.43s/it]

2023-11-10 09:39:26,772 Evaluating as a multi-label problem: False
2023-11-10 09:39:26,795 DEV : loss 0.36124446988105774 - f1-score (micro avg)  0.8305
2023-11-10 09:39:26,809 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:27,265 epoch 94 - iter 1/5 - loss 0.00007891 - time (sec): 0.45 - samples/sec: 10069.97 - lr: 0.000007
2023-11-10 09:39:27,752 epoch 94 - iter 2/5 - loss 0.00006104 - time (sec): 0.94 - samples/sec: 9654.80 - lr: 0.000007
2023-11-10 09:39:28,178 epoch 94 - iter 3/5 - loss 0.00005445 - time (sec): 1.37 - samples/sec: 9959.66 - lr: 0.000007
2023-11-10 09:39:28,625 epoch 94 - iter 4/5 - loss 0.00005268 - time (sec): 1.81 - samples/sec: 10055.56 - lr: 0.000006
2023-11-10 09:39:28,794 epoch 94 - iter 5/5 - loss 0.00005236 - time (sec): 1.98 - samples/sec: 9972.45 - lr: 0.000006
2023-11-10 09:39:28,796 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:28,803 EPOCH 94 done: loss 0.0001 - lr 0.000006


100%|██████████| 2/2 [00:02<00:00,  1.33s/it]

2023-11-10 09:39:31,496 Evaluating as a multi-label problem: False
2023-11-10 09:39:31,522 DEV : loss 0.36162203550338745 - f1-score (micro avg)  0.8302
2023-11-10 09:39:31,536 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:32,480 epoch 95 - iter 1/5 - loss 0.00004487 - time (sec): 0.94 - samples/sec: 4780.56 - lr: 0.000006
2023-11-10 09:39:32,920 epoch 95 - iter 2/5 - loss 0.00003717 - time (sec): 1.38 - samples/sec: 6622.59 - lr: 0.000006
2023-11-10 09:39:33,323 epoch 95 - iter 3/5 - loss 0.00003736 - time (sec): 1.79 - samples/sec: 7585.11 - lr: 0.000006
2023-11-10 09:39:33,734 epoch 95 - iter 4/5 - loss 0.00004205 - time (sec): 2.20 - samples/sec: 8151.40 - lr: 0.000006
2023-11-10 09:39:33,925 epoch 95 - iter 5/5 - loss 0.00004214 - time (sec): 2.39 - samples/sec: 8281.35 - lr: 0.000006
2023-11-10 09:39:33,928 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:33,933 EPOCH 95 done: loss 0.0000 - lr 0.000006


100%|██████████| 2/2 [00:05<00:00,  2.54s/it]

2023-11-10 09:39:39,023 Evaluating as a multi-label problem: False
2023-11-10 09:39:39,069 DEV : loss 0.36441850662231445 - f1-score (micro avg)  0.8298
2023-11-10 09:39:39,092 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:39,664 epoch 96 - iter 1/5 - loss 0.00485053 - time (sec): 0.57 - samples/sec: 7308.79 - lr: 0.000006
2023-11-10 09:39:40,233 epoch 96 - iter 2/5 - loss 0.00230056 - time (sec): 1.14 - samples/sec: 7783.02 - lr: 0.000006
2023-11-10 09:39:40,776 epoch 96 - iter 3/5 - loss 0.00152344 - time (sec): 1.68 - samples/sec: 8076.58 - lr: 0.000006
2023-11-10 09:39:41,445 epoch 96 - iter 4/5 - loss 0.00112357 - time (sec): 2.35 - samples/sec: 7911.35 - lr: 0.000005
2023-11-10 09:39:41,611 epoch 96 - iter 5/5 - loss 0.00105847 - time (sec): 2.52 - samples/sec: 7858.64 - lr: 0.000005
2023-11-10 09:39:41,617 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:41,619 EPOCH 96 done: loss 0.0011 - lr 0.000005


100%|██████████| 2/2 [00:03<00:00,  1.69s/it]

2023-11-10 09:39:45,013 Evaluating as a multi-label problem: False
2023-11-10 09:39:45,037 DEV : loss 0.3668171465396881 - f1-score (micro avg)  0.8311
2023-11-10 09:39:45,051 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:45,500 epoch 97 - iter 1/5 - loss 0.00004409 - time (sec): 0.45 - samples/sec: 9574.95 - lr: 0.000005
2023-11-10 09:39:45,927 epoch 97 - iter 2/5 - loss 0.00003749 - time (sec): 0.87 - samples/sec: 10207.39 - lr: 0.000005
2023-11-10 09:39:46,446 epoch 97 - iter 3/5 - loss 0.00033131 - time (sec): 1.39 - samples/sec: 10143.52 - lr: 0.000005
2023-11-10 09:39:46,845 epoch 97 - iter 4/5 - loss 0.00026543 - time (sec): 1.79 - samples/sec: 10150.70 - lr: 0.000005
2023-11-10 09:39:47,027 epoch 97 - iter 5/5 - loss 0.00024804 - time (sec): 1.97 - samples/sec: 10017.88 - lr: 0.000005
2023-11-10 09:39:47,029 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:47,036 EPOCH 97 done: loss 0.0002 - lr 0.000005


100%|██████████| 2/2 [00:03<00:00,  1.56s/it]

2023-11-10 09:39:50,168 Evaluating as a multi-label problem: False
2023-11-10 09:39:50,195 DEV : loss 0.36791670322418213 - f1-score (micro avg)  0.8321
2023-11-10 09:39:50,214 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:51,067 epoch 98 - iter 1/5 - loss 0.00003939 - time (sec): 0.85 - samples/sec: 5180.07 - lr: 0.000005
2023-11-10 09:39:51,483 epoch 98 - iter 2/5 - loss 0.00004042 - time (sec): 1.27 - samples/sec: 6356.59 - lr: 0.000005
2023-11-10 09:39:51,910 epoch 98 - iter 3/5 - loss 0.00003820 - time (sec): 1.69 - samples/sec: 7422.48 - lr: 0.000004
2023-11-10 09:39:52,339 epoch 98 - iter 4/5 - loss 0.00004013 - time (sec): 2.12 - samples/sec: 8125.25 - lr: 0.000004
2023-11-10 09:39:52,573 epoch 98 - iter 5/5 - loss 0.00003740 - time (sec): 2.36 - samples/sec: 8387.78 - lr: 0.000004
2023-11-10 09:39:52,576 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:52,578 EPOCH 98 done: loss 0.0000 - lr 0.000004


100%|██████████| 2/2 [00:04<00:00,  2.44s/it]

2023-11-10 09:39:57,470 Evaluating as a multi-label problem: False
2023-11-10 09:39:57,513 DEV : loss 0.36870133876800537 - f1-score (micro avg)  0.8311
2023-11-10 09:39:57,531 ----------------------------------------------------------------------------------------------------





2023-11-10 09:39:58,146 epoch 99 - iter 1/5 - loss 0.00004987 - time (sec): 0.61 - samples/sec: 7804.28 - lr: 0.000004
2023-11-10 09:39:58,634 epoch 99 - iter 2/5 - loss 0.00004552 - time (sec): 1.10 - samples/sec: 8093.10 - lr: 0.000004
2023-11-10 09:39:59,173 epoch 99 - iter 3/5 - loss 0.00008638 - time (sec): 1.64 - samples/sec: 8271.38 - lr: 0.000004
2023-11-10 09:39:59,798 epoch 99 - iter 4/5 - loss 0.00007031 - time (sec): 2.26 - samples/sec: 8126.48 - lr: 0.000004
2023-11-10 09:39:59,957 epoch 99 - iter 5/5 - loss 0.00006722 - time (sec): 2.42 - samples/sec: 8161.99 - lr: 0.000004
2023-11-10 09:39:59,960 ----------------------------------------------------------------------------------------------------
2023-11-10 09:39:59,964 EPOCH 99 done: loss 0.0001 - lr 0.000004


100%|██████████| 2/2 [00:03<00:00,  1.82s/it]

2023-11-10 09:40:03,624 Evaluating as a multi-label problem: False
2023-11-10 09:40:03,654 DEV : loss 0.3692951500415802 - f1-score (micro avg)  0.8303
2023-11-10 09:40:03,670 ----------------------------------------------------------------------------------------------------





2023-11-10 09:40:04,619 epoch 100 - iter 1/5 - loss 0.00009949 - time (sec): 0.95 - samples/sec: 5421.33 - lr: 0.000004
2023-11-10 09:40:05,006 epoch 100 - iter 2/5 - loss 0.00007758 - time (sec): 1.33 - samples/sec: 6870.86 - lr: 0.000004
2023-11-10 09:40:05,473 epoch 100 - iter 3/5 - loss 0.00006179 - time (sec): 1.80 - samples/sec: 7879.80 - lr: 0.000003
2023-11-10 09:40:05,908 epoch 100 - iter 4/5 - loss 0.00005870 - time (sec): 2.24 - samples/sec: 8091.13 - lr: 0.000003
2023-11-10 09:40:06,050 epoch 100 - iter 5/5 - loss 0.00005733 - time (sec): 2.38 - samples/sec: 8313.83 - lr: 0.000003
2023-11-10 09:40:06,052 ----------------------------------------------------------------------------------------------------
2023-11-10 09:40:06,054 EPOCH 100 done: loss 0.0001 - lr 0.000003


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-11-10 09:40:08,748 Evaluating as a multi-label problem: False
2023-11-10 09:40:08,772 DEV : loss 0.3713493049144745 - f1-score (micro avg)  0.8282





2023-11-10 09:40:09,426 ----------------------------------------------------------------------------------------------------
2023-11-10 09:40:09,430 Testing using last state of model ...


100%|██████████| 3/3 [00:01<00:00,  1.95it/s]


2023-11-10 09:40:10,984 Evaluating as a multi-label problem: False
2023-11-10 09:40:11,016 0.7869	0.8692	0.826	0.7042
2023-11-10 09:40:11,017 
Results:
- F-score (micro) 0.826
- F-score (macro) 0.7454
- Accuracy 0.7042

By class:
              precision    recall  f1-score   support

      Winner     0.7172    0.8087    0.7602       345
        Date     0.9614    0.9881    0.9746       252
 Nationality     0.8201    0.8807    0.8493       176
   Prizetype     0.7347    0.8780    0.8000        41
      Reason     0.2791    0.4444    0.3429        27

   micro avg     0.7869    0.8692    0.8260       841
   macro avg     0.7025    0.8000    0.7454       841
weighted avg     0.7987    0.8692    0.8316       841

2023-11-10 09:40:11,026 ----------------------------------------------------------------------------------------------------


{'test_score': 0.8259887005649718,
 'dev_score_history': [0.006514657980456026,
  0.00198609731876862,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.1009771986970684,
  0.486039296794209,
  0.5283381364073005,
  0.6701298701298701,
  0.745067087608524,
  0.7431972789115646,
  0.7036688617121355,
  0.7839357429718877,
  0.7677806341045417,
  0.785063752276867,
  0.7657316148597422,
  0.76171875,
  0.7946074544012688,
  0.7862318840579711,
  0.8137009189640769,
  0.8099742046431642,
  0.8216398985629755,
  0.8148148148148148,
  0.8064777327935222,
  0.8257510729613733,
  0.8042588042588042,
  0.8181818181818181,
  0.8160535117056856,
  0.8151986183074266,
  0.8195364238410596,
  0.8120805369127518,
  0.8206839032527106,
  0.8230313293818797,
  0.8130360205831904,
  0.840677966101695,
  0.8309012875536481,
  0.8301255230125523,
  0.8053797468354431,
  0.8214285714285714,
  0.8246913580246914,
  0.831918505942275,
  0.8268733850129198,
  0.8162926018287614,
  0.8203842940685045,
  0.8032128514056

In [None]:
# Sanity check on the model output

# load the trained model
model = SequenceTagger.load("data/comp1/ner/final-model.pt")

# create example sentence
sentence = Sentence('Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer. ')

# predict the tags
model.predict(sentence)
print(sentence.to_tagged_string())

2023-11-10 09:40:12,646 SequenceTagger predicts: Dictionary with 11 tags: O, B-Winner, I-Winner, B-Date, I-Date, B-Nationality, I-Nationality, B-Prizetype, I-Prizetype, B-Reason, I-Reason
Sentence[79]: "Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer." → ["Barack Hussein Obama II"/Winner, "August 4, 1961"/Date, "American"/Nationality, "the United States"/Nationality, "he"/Winner, "Obama"/Winner, "U.S."/Nationality]


In [None]:
# The corpus from before also contains the relations, for example:
# relations = 1;5;7;9;born_on|1;5;11;13;died_on|1;5;17;17;has_nationality
# using indices per word to denote spans for each relation.
# Theoretically, this would allow for subspan relations, but this is not utilised here as we picked BIO, which has no nesting
relation_dictionary = corpus.make_label_dictionary("relation")

# The relation classifier works with document embeddings instead of word embeddings.
# This essentially encodes the whole document at once, but accounts for maximum sequence length
doc_embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)

# Flair has 2 RE models (RelationExtractor and RelationClassifier)
# The one we use (RelationClassifier) has significantly more performance
relation_model: RelationClassifier = RelationClassifier(
    embeddings =doc_embeddings,
    label_dictionary = relation_dictionary,
    label_type="relation",
    entity_label_types="ner",
    entity_pair_labels={  # Define valid entity pair combinations, used as relation candidates
        ("Winner", "Nationality"),
        ("Winner", "Date"),
        ("Winner", "Prizetype"),
        ("Winner", "Reason"),
    },
    zero_tag_value='',
    allow_unk_tag=False,
)

In [None]:
trainer: ModelTrainer = ModelTrainer(model=relation_model, corpus=relation_model.transform_corpus(corpus))

In [None]:
trainer.fine_tune(
    'data/comp1/relations/',
    max_epochs=20,
    learning_rate=4e-5,
    mini_batch_size=8,
    main_evaluation_metric=("macro avg", "f1-score"),
    shuffle=True,
    embeddings_storage_mode='gpu',
)

2023-11-10 09:41:21,154 ----------------------------------------------------------------------------------------------------
2023-11-10 09:41:21,156 Model: "RelationClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768,

100%|██████████| 391/391 [00:33<00:00, 11.63it/s]

2023-11-10 09:44:03,927 Evaluating as a multi-label problem: False





2023-11-10 09:44:03,966 DEV : loss 0.20799756050109863 - f1-score (macro avg)  0.3032
2023-11-10 09:44:04,435 ----------------------------------------------------------------------------------------------------
2023-11-10 09:44:20,205 epoch 2 - iter 102/1024 - loss 0.27005642 - time (sec): 15.77 - samples/sec: 51.75 - lr: 0.000022
2023-11-10 09:44:35,870 epoch 2 - iter 204/1024 - loss 0.26886919 - time (sec): 31.43 - samples/sec: 51.92 - lr: 0.000024
2023-11-10 09:44:51,468 epoch 2 - iter 306/1024 - loss 0.24865627 - time (sec): 47.03 - samples/sec: 52.05 - lr: 0.000026
2023-11-10 09:45:07,257 epoch 2 - iter 408/1024 - loss 0.24875280 - time (sec): 62.82 - samples/sec: 51.96 - lr: 0.000028
2023-11-10 09:45:23,194 epoch 2 - iter 510/1024 - loss 0.22617923 - time (sec): 78.76 - samples/sec: 51.80 - lr: 0.000030
2023-11-10 09:45:39,162 epoch 2 - iter 612/1024 - loss 0.21229374 - time (sec): 94.73 - samples/sec: 51.69 - lr: 0.000032
2023-11-10 09:45:55,007 epoch 2 - iter 714/1024 - loss 0.

100%|██████████| 391/391 [00:32<00:00, 12.15it/s]


2023-11-10 09:47:14,610 Evaluating as a multi-label problem: False
2023-11-10 09:47:14,643 DEV : loss 0.19273926317691803 - f1-score (macro avg)  0.4577
2023-11-10 09:47:15,136 ----------------------------------------------------------------------------------------------------
2023-11-10 09:47:31,022 epoch 3 - iter 102/1024 - loss 0.09492288 - time (sec): 15.88 - samples/sec: 51.37 - lr: 0.000040
2023-11-10 09:47:47,021 epoch 3 - iter 204/1024 - loss 0.10313889 - time (sec): 31.88 - samples/sec: 51.19 - lr: 0.000040
2023-11-10 09:48:04,154 epoch 3 - iter 306/1024 - loss 0.12507308 - time (sec): 49.02 - samples/sec: 49.94 - lr: 0.000039
2023-11-10 09:48:19,785 epoch 3 - iter 408/1024 - loss 0.13098473 - time (sec): 64.65 - samples/sec: 50.49 - lr: 0.000039
2023-11-10 09:48:35,184 epoch 3 - iter 510/1024 - loss 0.12711679 - time (sec): 80.05 - samples/sec: 50.97 - lr: 0.000039
2023-11-10 09:48:50,665 epoch 3 - iter 612/1024 - loss 0.12706760 - time (sec): 95.53 - samples/sec: 51.25 - lr:

100%|██████████| 391/391 [00:30<00:00, 12.86it/s]

2023-11-10 09:50:25,351 Evaluating as a multi-label problem: False
2023-11-10 09:50:25,416 DEV : loss 0.12733331322669983 - f1-score (macro avg)  0.711





2023-11-10 09:50:26,312 ----------------------------------------------------------------------------------------------------
2023-11-10 09:50:41,950 epoch 4 - iter 102/1024 - loss 0.06079583 - time (sec): 15.64 - samples/sec: 52.19 - lr: 0.000038
2023-11-10 09:50:57,886 epoch 4 - iter 204/1024 - loss 0.07016651 - time (sec): 31.57 - samples/sec: 51.69 - lr: 0.000037
2023-11-10 09:51:13,805 epoch 4 - iter 306/1024 - loss 0.06991813 - time (sec): 47.49 - samples/sec: 51.55 - lr: 0.000037
2023-11-10 09:51:29,877 epoch 4 - iter 408/1024 - loss 0.07369545 - time (sec): 63.56 - samples/sec: 51.35 - lr: 0.000037
2023-11-10 09:51:45,891 epoch 4 - iter 510/1024 - loss 0.06212741 - time (sec): 79.58 - samples/sec: 51.27 - lr: 0.000037
2023-11-10 09:52:01,650 epoch 4 - iter 612/1024 - loss 0.07309316 - time (sec): 95.34 - samples/sec: 51.36 - lr: 0.000036
2023-11-10 09:52:17,490 epoch 4 - iter 714/1024 - loss 0.07335053 - time (sec): 111.18 - samples/sec: 51.38 - lr: 0.000036
2023-11-10 09:52:34,

100%|██████████| 391/391 [00:30<00:00, 12.62it/s]

2023-11-10 09:53:37,448 Evaluating as a multi-label problem: False
2023-11-10 09:53:37,507 DEV : loss 0.13939329981803894 - f1-score (macro avg)  0.8184





2023-11-10 09:53:38,395 ----------------------------------------------------------------------------------------------------
2023-11-10 09:53:53,902 epoch 5 - iter 102/1024 - loss 0.06068000 - time (sec): 15.50 - samples/sec: 52.63 - lr: 0.000035
2023-11-10 09:54:09,743 epoch 5 - iter 204/1024 - loss 0.03555628 - time (sec): 31.34 - samples/sec: 52.07 - lr: 0.000035
2023-11-10 09:54:25,627 epoch 5 - iter 306/1024 - loss 0.04638034 - time (sec): 47.23 - samples/sec: 51.83 - lr: 0.000035
2023-11-10 09:54:41,593 epoch 5 - iter 408/1024 - loss 0.04685732 - time (sec): 63.20 - samples/sec: 51.65 - lr: 0.000035
2023-11-10 09:54:57,541 epoch 5 - iter 510/1024 - loss 0.05279271 - time (sec): 79.14 - samples/sec: 51.55 - lr: 0.000034
2023-11-10 09:55:13,385 epoch 5 - iter 612/1024 - loss 0.05428857 - time (sec): 94.99 - samples/sec: 51.54 - lr: 0.000034
2023-11-10 09:55:28,943 epoch 5 - iter 714/1024 - loss 0.05307978 - time (sec): 110.55 - samples/sec: 51.67 - lr: 0.000034
2023-11-10 09:55:44,

100%|██████████| 391/391 [00:34<00:00, 11.28it/s]

2023-11-10 09:56:51,430 Evaluating as a multi-label problem: False
2023-11-10 09:56:51,494 DEV : loss 0.14632125198841095 - f1-score (macro avg)  0.8265





2023-11-10 09:56:52,393 ----------------------------------------------------------------------------------------------------
2023-11-10 09:57:08,225 epoch 6 - iter 102/1024 - loss 0.04659488 - time (sec): 15.83 - samples/sec: 51.55 - lr: 0.000033
2023-11-10 09:57:23,868 epoch 6 - iter 204/1024 - loss 0.05275722 - time (sec): 31.47 - samples/sec: 51.85 - lr: 0.000033
2023-11-10 09:57:39,672 epoch 6 - iter 306/1024 - loss 0.03955598 - time (sec): 47.28 - samples/sec: 51.78 - lr: 0.000033
2023-11-10 09:57:55,569 epoch 6 - iter 408/1024 - loss 0.03850086 - time (sec): 63.17 - samples/sec: 51.67 - lr: 0.000032
2023-11-10 09:58:11,520 epoch 6 - iter 510/1024 - loss 0.03447972 - time (sec): 79.12 - samples/sec: 51.56 - lr: 0.000032
2023-11-10 09:58:27,408 epoch 6 - iter 612/1024 - loss 0.04337280 - time (sec): 95.01 - samples/sec: 51.53 - lr: 0.000032
2023-11-10 09:58:43,211 epoch 6 - iter 714/1024 - loss 0.04158724 - time (sec): 110.81 - samples/sec: 51.55 - lr: 0.000032
2023-11-10 09:58:58,

100%|██████████| 391/391 [00:32<00:00, 11.87it/s]

2023-11-10 10:00:03,784 Evaluating as a multi-label problem: False
2023-11-10 10:00:03,819 DEV : loss 0.13313010334968567 - f1-score (macro avg)  0.8455





2023-11-10 10:00:04,339 ----------------------------------------------------------------------------------------------------
2023-11-10 10:00:20,206 epoch 7 - iter 102/1024 - loss 0.02611672 - time (sec): 15.87 - samples/sec: 51.43 - lr: 0.000031
2023-11-10 10:00:36,126 epoch 7 - iter 204/1024 - loss 0.01679634 - time (sec): 31.79 - samples/sec: 51.34 - lr: 0.000031
2023-11-10 10:00:51,808 epoch 7 - iter 306/1024 - loss 0.01552415 - time (sec): 47.47 - samples/sec: 51.57 - lr: 0.000030
2023-11-10 10:01:07,615 epoch 7 - iter 408/1024 - loss 0.02379952 - time (sec): 63.27 - samples/sec: 51.58 - lr: 0.000030
2023-11-10 10:01:24,080 epoch 7 - iter 510/1024 - loss 0.02507524 - time (sec): 79.74 - samples/sec: 51.17 - lr: 0.000030
2023-11-10 10:01:39,975 epoch 7 - iter 612/1024 - loss 0.02453597 - time (sec): 95.64 - samples/sec: 51.19 - lr: 0.000030
2023-11-10 10:01:55,963 epoch 7 - iter 714/1024 - loss 0.02390721 - time (sec): 111.62 - samples/sec: 51.17 - lr: 0.000030
2023-11-10 10:02:11,

100%|██████████| 391/391 [00:32<00:00, 12.22it/s]

2023-11-10 10:03:15,159 Evaluating as a multi-label problem: False





2023-11-10 10:03:15,199 DEV : loss 0.1990513652563095 - f1-score (macro avg)  0.7941
2023-11-10 10:03:15,665 ----------------------------------------------------------------------------------------------------
2023-11-10 10:03:31,554 epoch 8 - iter 102/1024 - loss 0.01940580 - time (sec): 15.89 - samples/sec: 51.36 - lr: 0.000029
2023-11-10 10:03:47,506 epoch 8 - iter 204/1024 - loss 0.01363854 - time (sec): 31.84 - samples/sec: 51.26 - lr: 0.000028
2023-11-10 10:04:03,152 epoch 8 - iter 306/1024 - loss 0.01771507 - time (sec): 47.49 - samples/sec: 51.55 - lr: 0.000028
2023-11-10 10:04:18,795 epoch 8 - iter 408/1024 - loss 0.01649899 - time (sec): 63.13 - samples/sec: 51.70 - lr: 0.000028
2023-11-10 10:04:34,344 epoch 8 - iter 510/1024 - loss 0.01992818 - time (sec): 78.68 - samples/sec: 51.86 - lr: 0.000028
2023-11-10 10:04:49,712 epoch 8 - iter 612/1024 - loss 0.01914206 - time (sec): 94.05 - samples/sec: 52.06 - lr: 0.000028
2023-11-10 10:05:05,314 epoch 8 - iter 714/1024 - loss 0.0

100%|██████████| 391/391 [00:31<00:00, 12.39it/s]

2023-11-10 10:06:25,464 Evaluating as a multi-label problem: False





2023-11-10 10:06:25,501 DEV : loss 0.21416178345680237 - f1-score (macro avg)  0.7826
2023-11-10 10:06:25,957 ----------------------------------------------------------------------------------------------------
2023-11-10 10:06:41,668 epoch 9 - iter 102/1024 - loss 0.01467200 - time (sec): 15.71 - samples/sec: 51.94 - lr: 0.000026
2023-11-10 10:06:57,394 epoch 9 - iter 204/1024 - loss 0.00990851 - time (sec): 31.44 - samples/sec: 51.92 - lr: 0.000026
2023-11-10 10:07:13,059 epoch 9 - iter 306/1024 - loss 0.00981150 - time (sec): 47.10 - samples/sec: 51.97 - lr: 0.000026
2023-11-10 10:07:28,692 epoch 9 - iter 408/1024 - loss 0.00931401 - time (sec): 62.73 - samples/sec: 52.03 - lr: 0.000026
2023-11-10 10:07:44,151 epoch 9 - iter 510/1024 - loss 0.02050744 - time (sec): 78.19 - samples/sec: 52.18 - lr: 0.000026
2023-11-10 10:07:59,569 epoch 9 - iter 612/1024 - loss 0.02517701 - time (sec): 93.61 - samples/sec: 52.30 - lr: 0.000025
2023-11-10 10:08:15,184 epoch 9 - iter 714/1024 - loss 0.

100%|██████████| 391/391 [00:31<00:00, 12.35it/s]


2023-11-10 10:09:34,683 Evaluating as a multi-label problem: False
2023-11-10 10:09:34,717 DEV : loss 0.2631814777851105 - f1-score (macro avg)  0.6977
2023-11-10 10:09:35,159 ----------------------------------------------------------------------------------------------------
2023-11-10 10:09:50,933 epoch 10 - iter 102/1024 - loss 0.02736456 - time (sec): 15.77 - samples/sec: 51.73 - lr: 0.000024
2023-11-10 10:10:07,476 epoch 10 - iter 204/1024 - loss 0.01846754 - time (sec): 32.32 - samples/sec: 50.50 - lr: 0.000024
2023-11-10 10:10:23,059 epoch 10 - iter 306/1024 - loss 0.01562072 - time (sec): 47.90 - samples/sec: 51.11 - lr: 0.000024
2023-11-10 10:10:38,399 epoch 10 - iter 408/1024 - loss 0.01530928 - time (sec): 63.24 - samples/sec: 51.61 - lr: 0.000024
2023-11-10 10:10:53,957 epoch 10 - iter 510/1024 - loss 0.01639709 - time (sec): 78.80 - samples/sec: 51.78 - lr: 0.000023
2023-11-10 10:11:09,472 epoch 10 - iter 612/1024 - loss 0.01569250 - time (sec): 94.31 - samples/sec: 51.91 

100%|██████████| 391/391 [00:32<00:00, 12.20it/s]


2023-11-10 10:12:45,027 Evaluating as a multi-label problem: False
2023-11-10 10:12:45,065 DEV : loss 0.18452776968479156 - f1-score (macro avg)  0.7705
2023-11-10 10:12:45,564 ----------------------------------------------------------------------------------------------------
2023-11-10 10:13:01,188 epoch 11 - iter 102/1024 - loss 0.00736163 - time (sec): 15.62 - samples/sec: 52.23 - lr: 0.000022
2023-11-10 10:13:16,638 epoch 11 - iter 204/1024 - loss 0.00372171 - time (sec): 31.07 - samples/sec: 52.52 - lr: 0.000022
2023-11-10 10:13:32,163 epoch 11 - iter 306/1024 - loss 0.00249990 - time (sec): 46.60 - samples/sec: 52.54 - lr: 0.000022
2023-11-10 10:13:47,699 epoch 11 - iter 408/1024 - loss 0.00379431 - time (sec): 62.13 - samples/sec: 52.53 - lr: 0.000021
2023-11-10 10:14:03,323 epoch 11 - iter 510/1024 - loss 0.00447139 - time (sec): 77.76 - samples/sec: 52.47 - lr: 0.000021
2023-11-10 10:14:20,114 epoch 11 - iter 612/1024 - loss 0.01057452 - time (sec): 94.55 - samples/sec: 51.78

100%|██████████| 391/391 [00:32<00:00, 12.13it/s]

2023-11-10 10:15:56,337 Evaluating as a multi-label problem: False
2023-11-10 10:15:56,393 DEV : loss 0.21056701242923737 - f1-score (macro avg)  0.8061





2023-11-10 10:15:57,268 ----------------------------------------------------------------------------------------------------
2023-11-10 10:16:12,742 epoch 12 - iter 102/1024 - loss 0.00000822 - time (sec): 15.47 - samples/sec: 52.75 - lr: 0.000020
2023-11-10 10:16:28,458 epoch 12 - iter 204/1024 - loss 0.00001093 - time (sec): 31.19 - samples/sec: 52.33 - lr: 0.000020
2023-11-10 10:16:44,101 epoch 12 - iter 306/1024 - loss 0.00418064 - time (sec): 46.83 - samples/sec: 52.27 - lr: 0.000019
2023-11-10 10:16:59,801 epoch 12 - iter 408/1024 - loss 0.00981287 - time (sec): 62.53 - samples/sec: 52.20 - lr: 0.000019
2023-11-10 10:17:15,629 epoch 12 - iter 510/1024 - loss 0.01049704 - time (sec): 78.36 - samples/sec: 52.07 - lr: 0.000019
2023-11-10 10:17:31,436 epoch 12 - iter 612/1024 - loss 0.00914798 - time (sec): 94.16 - samples/sec: 51.99 - lr: 0.000019
2023-11-10 10:17:47,322 epoch 12 - iter 714/1024 - loss 0.00794542 - time (sec): 110.05 - samples/sec: 51.90 - lr: 0.000018
2023-11-10 10

100%|██████████| 391/391 [00:35<00:00, 11.11it/s]

2023-11-10 10:19:10,861 Evaluating as a multi-label problem: False
2023-11-10 10:19:10,917 DEV : loss 0.21344774961471558 - f1-score (macro avg)  0.7812





2023-11-10 10:19:11,783 ----------------------------------------------------------------------------------------------------
2023-11-10 10:19:27,352 epoch 13 - iter 102/1024 - loss 0.00005061 - time (sec): 15.56 - samples/sec: 52.43 - lr: 0.000018
2023-11-10 10:19:42,944 epoch 13 - iter 204/1024 - loss 0.00032780 - time (sec): 31.16 - samples/sec: 52.38 - lr: 0.000017
2023-11-10 10:19:58,473 epoch 13 - iter 306/1024 - loss 0.00089430 - time (sec): 46.68 - samples/sec: 52.44 - lr: 0.000017
2023-11-10 10:20:14,116 epoch 13 - iter 408/1024 - loss 0.00076751 - time (sec): 62.33 - samples/sec: 52.37 - lr: 0.000017
2023-11-10 10:20:30,018 epoch 13 - iter 510/1024 - loss 0.00361528 - time (sec): 78.23 - samples/sec: 52.15 - lr: 0.000017
2023-11-10 10:20:45,810 epoch 13 - iter 612/1024 - loss 0.00423608 - time (sec): 94.02 - samples/sec: 52.07 - lr: 0.000016
2023-11-10 10:21:01,755 epoch 13 - iter 714/1024 - loss 0.00538888 - time (sec): 109.97 - samples/sec: 51.94 - lr: 0.000016
2023-11-10 10

100%|██████████| 391/391 [00:30<00:00, 12.78it/s]

2023-11-10 10:22:20,534 Evaluating as a multi-label problem: False





2023-11-10 10:22:20,598 DEV : loss 0.1919635832309723 - f1-score (macro avg)  0.8377
2023-11-10 10:22:21,540 ----------------------------------------------------------------------------------------------------
2023-11-10 10:22:37,177 epoch 14 - iter 102/1024 - loss 0.01163702 - time (sec): 15.63 - samples/sec: 52.19 - lr: 0.000015
2023-11-10 10:22:53,970 epoch 14 - iter 204/1024 - loss 0.00581910 - time (sec): 32.43 - samples/sec: 50.33 - lr: 0.000015
2023-11-10 10:23:09,899 epoch 14 - iter 306/1024 - loss 0.01673190 - time (sec): 48.36 - samples/sec: 50.62 - lr: 0.000015
2023-11-10 10:23:25,614 epoch 14 - iter 408/1024 - loss 0.01255584 - time (sec): 64.07 - samples/sec: 50.94 - lr: 0.000015
2023-11-10 10:23:41,593 epoch 14 - iter 510/1024 - loss 0.01004575 - time (sec): 80.05 - samples/sec: 50.97 - lr: 0.000014
2023-11-10 10:23:57,391 epoch 14 - iter 612/1024 - loss 0.00996580 - time (sec): 95.85 - samples/sec: 51.08 - lr: 0.000014
2023-11-10 10:24:13,199 epoch 14 - iter 714/1024 - l

100%|██████████| 391/391 [00:30<00:00, 12.71it/s]

2023-11-10 10:25:31,248 Evaluating as a multi-label problem: False
2023-11-10 10:25:31,284 DEV : loss 0.23365266621112823 - f1-score (macro avg)  0.754





2023-11-10 10:25:31,746 ----------------------------------------------------------------------------------------------------
2023-11-10 10:25:47,595 epoch 15 - iter 102/1024 - loss 0.00000233 - time (sec): 15.85 - samples/sec: 51.49 - lr: 0.000013
2023-11-10 10:26:03,280 epoch 15 - iter 204/1024 - loss 0.00477110 - time (sec): 31.53 - samples/sec: 51.76 - lr: 0.000013
2023-11-10 10:26:19,023 epoch 15 - iter 306/1024 - loss 0.00523992 - time (sec): 47.27 - samples/sec: 51.78 - lr: 0.000013
2023-11-10 10:26:34,702 epoch 15 - iter 408/1024 - loss 0.00393032 - time (sec): 62.95 - samples/sec: 51.85 - lr: 0.000012
2023-11-10 10:26:50,637 epoch 15 - iter 510/1024 - loss 0.00317060 - time (sec): 78.89 - samples/sec: 51.72 - lr: 0.000012
2023-11-10 10:27:07,278 epoch 15 - iter 612/1024 - loss 0.00264327 - time (sec): 95.53 - samples/sec: 51.25 - lr: 0.000012
2023-11-10 10:27:22,943 epoch 15 - iter 714/1024 - loss 0.00226676 - time (sec): 111.20 - samples/sec: 51.37 - lr: 0.000012
2023-11-10 10

100%|██████████| 391/391 [00:31<00:00, 12.35it/s]


2023-11-10 10:28:41,557 Evaluating as a multi-label problem: False
2023-11-10 10:28:41,595 DEV : loss 0.21397148072719574 - f1-score (macro avg)  0.8256
2023-11-10 10:28:42,061 ----------------------------------------------------------------------------------------------------
2023-11-10 10:28:57,851 epoch 16 - iter 102/1024 - loss 0.00000111 - time (sec): 15.79 - samples/sec: 51.68 - lr: 0.000011
2023-11-10 10:29:13,702 epoch 16 - iter 204/1024 - loss 0.00629869 - time (sec): 31.64 - samples/sec: 51.58 - lr: 0.000011
2023-11-10 10:29:29,514 epoch 16 - iter 306/1024 - loss 0.00439797 - time (sec): 47.45 - samples/sec: 51.59 - lr: 0.000010
2023-11-10 10:29:45,343 epoch 16 - iter 408/1024 - loss 0.00329915 - time (sec): 63.28 - samples/sec: 51.58 - lr: 0.000010
2023-11-10 10:30:01,156 epoch 16 - iter 510/1024 - loss 0.00334236 - time (sec): 79.09 - samples/sec: 51.58 - lr: 0.000010
2023-11-10 10:30:16,748 epoch 16 - iter 612/1024 - loss 0.00497454 - time (sec): 94.69 - samples/sec: 51.71

100%|██████████| 391/391 [00:32<00:00, 12.14it/s]


2023-11-10 10:31:53,144 Evaluating as a multi-label problem: False
2023-11-10 10:31:53,180 DEV : loss 0.22113460302352905 - f1-score (macro avg)  0.8118
2023-11-10 10:31:53,658 ----------------------------------------------------------------------------------------------------
2023-11-10 10:32:09,421 epoch 17 - iter 102/1024 - loss 0.00000182 - time (sec): 15.76 - samples/sec: 51.77 - lr: 0.000009
2023-11-10 10:32:25,189 epoch 17 - iter 204/1024 - loss 0.00000147 - time (sec): 31.53 - samples/sec: 51.76 - lr: 0.000008
2023-11-10 10:32:40,996 epoch 17 - iter 306/1024 - loss 0.00000457 - time (sec): 47.34 - samples/sec: 51.71 - lr: 0.000008
2023-11-10 10:32:56,498 epoch 17 - iter 408/1024 - loss 0.00000369 - time (sec): 62.84 - samples/sec: 51.94 - lr: 0.000008
2023-11-10 10:33:12,121 epoch 17 - iter 510/1024 - loss 0.00010823 - time (sec): 78.46 - samples/sec: 52.00 - lr: 0.000008
2023-11-10 10:33:27,685 epoch 17 - iter 612/1024 - loss 0.00009038 - time (sec): 94.03 - samples/sec: 52.07

100%|██████████| 391/391 [00:32<00:00, 12.10it/s]

2023-11-10 10:35:03,590 Evaluating as a multi-label problem: False





2023-11-10 10:35:03,638 DEV : loss 0.2632475793361664 - f1-score (macro avg)  0.7588
2023-11-10 10:35:04,098 ----------------------------------------------------------------------------------------------------
2023-11-10 10:35:20,474 epoch 18 - iter 102/1024 - loss 0.00625316 - time (sec): 16.37 - samples/sec: 49.83 - lr: 0.000006
2023-11-10 10:35:36,096 epoch 18 - iter 204/1024 - loss 0.00322733 - time (sec): 32.00 - samples/sec: 51.01 - lr: 0.000006
2023-11-10 10:35:51,907 epoch 18 - iter 306/1024 - loss 0.00215187 - time (sec): 47.81 - samples/sec: 51.21 - lr: 0.000006
2023-11-10 10:36:07,686 epoch 18 - iter 408/1024 - loss 0.00167771 - time (sec): 63.59 - samples/sec: 51.33 - lr: 0.000006
2023-11-10 10:36:23,380 epoch 18 - iter 510/1024 - loss 0.00134232 - time (sec): 79.28 - samples/sec: 51.46 - lr: 0.000006
2023-11-10 10:36:39,217 epoch 18 - iter 612/1024 - loss 0.00111868 - time (sec): 95.12 - samples/sec: 51.47 - lr: 0.000005
2023-11-10 10:36:54,900 epoch 18 - iter 714/1024 - l

100%|██████████| 391/391 [00:30<00:00, 12.81it/s]

2023-11-10 10:38:12,758 Evaluating as a multi-label problem: False





2023-11-10 10:38:12,823 DEV : loss 0.2795083224773407 - f1-score (macro avg)  0.7928
2023-11-10 10:38:13,747 ----------------------------------------------------------------------------------------------------
2023-11-10 10:38:29,393 epoch 19 - iter 102/1024 - loss 0.00000271 - time (sec): 15.64 - samples/sec: 52.17 - lr: 0.000004
2023-11-10 10:38:45,196 epoch 19 - iter 204/1024 - loss 0.00000206 - time (sec): 31.44 - samples/sec: 51.90 - lr: 0.000004
2023-11-10 10:39:00,895 epoch 19 - iter 306/1024 - loss 0.00000156 - time (sec): 47.14 - samples/sec: 51.93 - lr: 0.000004
2023-11-10 10:39:16,665 epoch 19 - iter 408/1024 - loss 0.00000125 - time (sec): 62.91 - samples/sec: 51.88 - lr: 0.000004
2023-11-10 10:39:33,799 epoch 19 - iter 510/1024 - loss 0.00000106 - time (sec): 80.05 - samples/sec: 50.97 - lr: 0.000003
2023-11-10 10:39:49,554 epoch 19 - iter 612/1024 - loss 0.00000098 - time (sec): 95.80 - samples/sec: 51.11 - lr: 0.000003
2023-11-10 10:40:05,078 epoch 19 - iter 714/1024 - l

100%|██████████| 391/391 [00:32<00:00, 12.18it/s]

2023-11-10 10:41:24,230 Evaluating as a multi-label problem: False





2023-11-10 10:41:24,272 DEV : loss 0.2804284691810608 - f1-score (macro avg)  0.7907
2023-11-10 10:41:24,744 ----------------------------------------------------------------------------------------------------
2023-11-10 10:41:40,520 epoch 20 - iter 102/1024 - loss 0.00000048 - time (sec): 15.77 - samples/sec: 51.73 - lr: 0.000002
2023-11-10 10:41:56,320 epoch 20 - iter 204/1024 - loss 0.00000042 - time (sec): 31.57 - samples/sec: 51.69 - lr: 0.000002
2023-11-10 10:42:12,248 epoch 20 - iter 306/1024 - loss 0.00000039 - time (sec): 47.50 - samples/sec: 51.53 - lr: 0.000002
2023-11-10 10:42:28,049 epoch 20 - iter 408/1024 - loss 0.00000036 - time (sec): 63.30 - samples/sec: 51.56 - lr: 0.000001
2023-11-10 10:42:43,612 epoch 20 - iter 510/1024 - loss 0.00000036 - time (sec): 78.87 - samples/sec: 51.73 - lr: 0.000001
2023-11-10 10:42:59,132 epoch 20 - iter 612/1024 - loss 0.00000037 - time (sec): 94.39 - samples/sec: 51.87 - lr: 0.000001
2023-11-10 10:43:14,682 epoch 20 - iter 714/1024 - l

100%|██████████| 391/391 [00:31<00:00, 12.23it/s]

2023-11-10 10:44:35,337 Evaluating as a multi-label problem: False
2023-11-10 10:44:35,394 DEV : loss 0.2806123197078705 - f1-score (macro avg)  0.7907





2023-11-10 10:44:36,642 ----------------------------------------------------------------------------------------------------
2023-11-10 10:44:36,648 Testing using last state of model ...


100%|██████████| 621/621 [00:51<00:00, 12.08it/s]


2023-11-10 10:45:28,134 Evaluating as a multi-label problem: False
2023-11-10 10:45:28,214 0.9833	0.9833	0.9833	0.9833
2023-11-10 10:45:28,219 
Results:
- F-score (micro) 0.9833
- F-score (macro) 0.8519
- Accuracy 0.9833

By class:
                         precision    recall  f1-score   support

                            0.9886    0.9941    0.9913      4722
 received_nobelprize_in     0.8542    0.7193    0.7810        57
                has_won     0.8750    0.7500    0.8077        56
        has_nationality     0.8537    0.8537    0.8537        41
received_nobelprize_for     0.8667    0.6047    0.7123        43
                born_on     0.9062    0.9355    0.9206        31
                died_on     0.8125    1.0000    0.8966        13

               accuracy                         0.9833      4963
              macro avg     0.8796    0.8367    0.8519      4963
           weighted avg     0.9827    0.9833    0.9826      4963

2023-11-10 10:45:28,224 --------------------------

{'test_score': 0.8518799591731039,
 'dev_score_history': [0.30324872735760405,
  0.45767416520019394,
  0.7109586912482148,
  0.8183984314215603,
  0.8264995101603897,
  0.8455084570850433,
  0.7940780009334762,
  0.7826235600300739,
  0.6977273540274987,
  0.7705078396868459,
  0.8061085802819999,
  0.7811996273678782,
  0.8376924117123954,
  0.7540133777475991,
  0.8256244926173119,
  0.8117683529772554,
  0.7587597642737218,
  0.7928211799483561,
  0.7907306782509602,
  0.7907306782509602],
 'train_loss_history': [0.4421313327071543,
  0.19548283539870565,
  0.1186104439416846,
  0.0723404538678962,
  0.05704137880469936,
  0.040246722279616405,
  0.026219445213345353,
  0.015770059697864394,
  0.022194670024294073,
  0.0201640980191223,
  0.007451911496737029,
  0.006263592299737058,
  0.004708399535028456,
  0.007113202604945628,
  0.0015880570519373899,
  0.0035276494785739924,
  8.878384218941862e-05,
  0.0006899052556953796,
  7.608930745421715e-07,
  3.493261327327111e-07],
 '

### Using 2-step
Here we load the trained models and see if they work as intended.

In [4]:
loaded_ner = SequenceTagger.load("data/comp1/ner/final-model.pt")
loaded_re: RelationClassifier = RelationClassifier.load('data/comp1/relations/final-model.pt')

2023-11-10 12:35:09,666 SequenceTagger predicts: Dictionary with 11 tags: O, B-Winner, I-Winner, B-Date, I-Date, B-Nationality, I-Nationality, B-Prizetype, I-Prizetype, B-Reason, I-Reason


In [4]:
sentence = Sentence('Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer. ')
loaded_ner.predict(sentence)
loaded_re.predict(sentence)
print(sentence.to_dict())
print(sentence.get_labels('relation'))

{'text': 'Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer.', 'all labels': [{'value': 'Winner', 'confidence': 0.9999329149723053}, {'value': 'Date', 'confidence': 0.9999633431434631}, {'value': 'Nationality', 'confidence': 0.9999721050262451}, {'value': 'Nationality', 'confidence': 0.9999365409215292}, {'value': 'Winner', 'confidence': 0.9998683929443359}, {'value': 'Winner', 'confidence': 0.9999294281005859}, {'value': 'Nationality', 'confidence': 0.9999485015869141}, {'value': 'Nationality', 'confidence': 0.5977905988693237}, {'value': 'Nationality', 'confidence': 0.999382495880127}, {'value': 'born_on', 'confidence': 0

Here we apply the 2-step Relation Extraction to our test set. We simply load the json created in label-studio, run both models sequentially and create two files. One file contains only the from text, to text and label information. The other contains both the entities, relations, their confidence and all tokens.

The latter file can be used for pruning low-confidence predictions.

In [None]:
test_set = srsly.read_json('comp1_test_set.json')
all_relations = {}
document_dicts = []
for document in test_set:
  sentence = Sentence(document["data"]["text"])
  loaded_ner.predict(sentence)
  loaded_re.predict(sentence)
  sent_dict = sentence.to_dict()
  all_relations[document["data"]["title"]] = [(relation["from_text"], relation["to_text"], relation["labels"][0]["value"]) for relation in sent_dict["relations"] if len(relation["labels"][0]["value"]) > 0]
  document_dicts.append(sentence.to_dict())


srsly.write_json('comp1_relations.json', all_relations)
srsly.write_json('comp1_output.json', document_dicts)


# Component 2
For component 2 a very similar model is used.
The conll files and corpus are created in the same way as before.

In [None]:
train_json = srsly.read_json('comp2_train_set.json')
make_conll(train_json, key_field='Abstract_new', file_name='data/comp2_train_set')

test_json = srsly.read_json('comp2_test_set.json')
make_conll(test_json, key_field='Abstract_new', file_name='data/comp2_test_set')

# Split the train file into train/dev split
with open('data/comp2_train_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  splits = sorted(splits, key=lambda x: random.random())

  train = splits[:90]
  dev = splits[90:]

  write_conll('data/comp2/train.conll', train)
  write_conll('data/comp2/dev.conll', dev)

with open('data/comp2_test_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)

  write_conll('data/comp2/test.conll', splits)


In [None]:
columns = {1: 'text', 2: 'ner'}
data_folder = ''

corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='data/comp2/train.conll',
                              test_file='data/comp2/test.conll',
                              dev_file='data/comp2/dev.conll')
print(f'Corpus size: {len(corpus.train)}')

Note here the change in subtoken pooling, to include the mean of all subtokens, rather than only the first. This improved performance substantially for component 2.

In [None]:
# tag to predict
tag_type = 'ner'# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)

embeddings = TransformerWordEmbeddings(
    model='distilbert-base-cased',
    layers="-1",
    subtoken_pooling='mean',
    fine_tune=True,
    use_context=True,
    respect_document_boundaries=False,
)

tagger: SequenceTagger = SequenceTagger(
        hidden_size=256,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=False,
        use_rnn=False,
        reproject_embeddings=False,
        tag_format='BIO'
    )

In [None]:
trainer : ModelTrainer = ModelTrainer(tagger, corpus)

trainer.fine_tune(
        'data/comp2/ner/',
        learning_rate=5e-05,
        mini_batch_size=16,
        max_epochs=100,
        embeddings_storage_mode='gpu', # cpu, gpu
        weight_decay=0.0,
    )

### RE component 2

In [None]:
relation_label_dict = corpus.make_label_dictionary(label_type="relation")
relation_dictionary = corpus.make_label_dictionary("relation")
doc_embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)
relation_model: RelationClassifier = RelationClassifier(
    embeddings =doc_embeddings,
    label_dictionary = relation_dictionary,
    label_type="relation",
    entity_label_types="ner",
    entity_pair_labels={  # Define valid entity pair combinations, used as relation candidates
        ("Person", "Component"),
        ("Person", "Time"),
        ("Person", "Date"),
        ("Person", "Person"),
        ("Component", "Person"),
        ("Component", "Date"),
        ("Component", "Time"),
        ("Component", "Component")
    },
    zero_tag_value='',
    allow_unk_tag=False,
)

In [None]:
trainer: ModelTrainer = ModelTrainer(model=relation_model, corpus=relation_model.transform_corpus(corpus))
trainer.fine_tune(
    'data/comp2/relations/',
    max_epochs=20,
    learning_rate=4e-5,
    mini_batch_size=8,
    main_evaluation_metric=("macro avg", "f1-score"),
    shuffle=True,
    embeddings_storage_mode='gpu',
)

### Using 2-step RE on the validation set
The models are used on the test set for component 2, the same as for component 1.


In [2]:
# Load both models, identical to comp 1
loaded_ner = SequenceTagger.load("data/comp2/ner/final-model.pt")
loaded_re: RelationClassifier = RelationClassifier.load('data/comp2/relations/final-model.pt')

2023-11-10 12:33:24,361 SequenceTagger predicts: Dictionary with 9 tags: O, B-Component, I-Component, B-Person, I-Person, B-Time, I-Time, B-Date, I-Date


In [None]:
# process all test data and store the result
test_set = srsly.read_json('comp2_test_set.json')
all_relations = {}
document_dicts = []
for document in test_set:
  sentence = Sentence(document["data"]["Abstract_new"])
  loaded_ner.predict(sentence)
  loaded_re.predict(sentence)
  sent_dict = sentence.to_dict()
  unique_id = " ".join([str(document["data"]["EventDt"]), document["data"]["IPFCode"], document["data"]["IPFShortDesc"], str(document["inner_id"])])
  all_relations[unique_id] = [(relation["from_text"], relation["to_text"], relation["labels"][0]["value"]) for relation in sent_dict["relations"] if len(relation["labels"][0]["value"]) > 0]
  document_dicts.append(sentence.to_dict())


srsly.write_json('comp2_relations.json', all_relations)
srsly.write_json('comp2_output.json', document_dicts)
