<a href="https://colab.research.google.com/github/IvanDePivan/2AMM30-groep-2-component-1/blob/main/Flairmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Mining Group 2
Let's import everything here so we don't have to do this later

## Prerequisites

In [1]:
from google.colab import output
!pip uninstall -y torch
!pip install flair

import os
import random
import json
import re
import regex
import difflib
import srsly
import torch
from typing import List
from pathlib import Path

from flair.trainers import ModelTrainer
from flair.data import Corpus, Label, Relation, Sentence
from flair.datasets import ColumnCorpus, DataLoader
from flair.embeddings import WordEmbeddings, StackedEmbeddings, TokenEmbeddings, TransformerWordEmbeddings, TransformerDocumentEmbeddings
from flair.models import SequenceTagger, RelationExtractor, RelationClassifier
from flair.models.relation_classifier_model import EncodedSentence

output.clear()

### Reformatting Label Studio output
We wrote this method for converting the output from Label Studio to the conll format, because the standard format had issues (e.g. No relation data was exported, tokenization had flaws, output used different delimiters)

In [None]:
def write_conll(filename, text):
  output_file = Path(filename)
  output_file.parent.mkdir(exist_ok=True, parents=True)
  with open(output_file, "w") as file1:
    file1.write("\n\n".join(text))

In [None]:
# Function for getting each token with their correct label and id (ids are used for relations)
def get_labels(file_item, key_field):
  rows = []
  position = 0
  next_label = 0
  # Sort the labels on start?
  labels = [item for item in file_item['annotations'][0]['result'] if item['type'] == 'labels']
  labels.sort(key=lambda x: x['value']['start'])
  item_count = 1
  b_count = 0
  ids = {} # count -> id
  tokens = [item for item in re.split(r'(\s+)|([\(,\.\)\\;:$*&"\-–])|(\')|(\'\w+)', file_item['data'][key_field]) if item]
  for i, item in enumerate(tokens):
    if re.search(r'\s+', item):
      position += len(item)
      continue
    start = position
    end = position + len(item)
    row = f'{item_count}\t{item}'
    label = '\tO'

    if len(labels) > next_label:
      if start >= labels[next_label]['value']['start']:
        # if end <= labels[next_label]['value']['end']:
          # the current item has a label, and might have a relation
          id = labels[next_label]["id"]
          ids[item_count] = id
          if end > labels[next_label]['value']['end']:
            difference = end - labels[next_label]['value']['end']
            end -= difference
            tokens.insert(i+1, item[-difference:])
            item = item[:-difference]
          if start == labels[next_label]['value']['start']:
            # Current Item is start of a label
            b_count += 1
            label = '\tB-' + labels[next_label]["value"]["labels"][0]

          else:
            # Current item is inside a label
            if '\tB-' + labels[next_label]["value"]["labels"][0] in rows[-1] or '\tI-' + labels[next_label]["value"]["labels"][0] in rows[-1]:
              label = '\tI-' + labels[next_label]["value"]["labels"][0]
            else:
              b_count += 1
              label = '\tB-' + labels[next_label]["value"]["labels"][0]

      if end >= labels[next_label]['value']['end']:
        next_label += 1


    row = row + label
    position += len(item)
    item_count += 1
    rows.append(row)
  # Sanity check on the labels, gives a warning if labels are disappearing.
  if len(labels) != b_count:
    print(f'labels: {len(labels)}, B-tags: {b_count}')
    print(labels)
    print(ids)
    print(f'missing label: {[item for item in labels if item["id"] not in ids.values()]}')
    print(rows)
  return rows, ids


In [None]:
# This creates the relation format flair expects for each document
def get_relations(file_item, ids):
  keys = ids.keys()
  relations = [item for item in file_item['annotations'][0]['result'] if item['type'] == 'relation']
  bio_relations = []
  found_relations = 0
  for relation in relations:
    from_ids = []
    to_ids = []
    rel_type = relation['labels'][0]
    for key in keys:
      if ids[key] == relation['from_id']:
        from_ids.append(key)
      if ids[key] == relation['to_id']:
        to_ids.append(key)
    from_ids = [int(i) for i in from_ids]
    from_ids.sort()
    to_ids = [int(i) for i in to_ids]
    to_ids.sort()
    if len(from_ids) > 0 and len(to_ids) > 0:
      found_relations += 1
      bio_relations.append(f'{from_ids[0]};{from_ids[-1]};{to_ids[0]};{to_ids[-1]};{rel_type}')
  if found_relations != len(relations):
    print(f'found relations: {found_relations}, actual relations: {len(relations)}')
    print(relations)
    print(bio_relations)
    print(keys)
    print(ids)
  return bio_relations

In [None]:
# creates the connl file using the expanded json file as input. This is reused for component 2
def make_conll(file, key_field='text', file_name='test_set'):
  conllu_file = []
  for item in file:
    doc = ''
    rows, ids = get_labels(item, key_field)
    relations = get_relations(item, ids)
    if relations and len(relations) > 0:
      doc += '# relations = ' + ('|'.join(relations))
    doc += '\n' + ('\n'.join(rows))
    conllu_file.append(doc)

  print(len(conllu_file))
  write_conll(f'{file_name}.conll', conllu_file)

In [None]:
with open('test_set.json') as f:
  file = json.load(f)
  make_conll(file)

In [None]:
# The transformed file is split up for train and dev data
# Test data is gathered from a separate json export from label studio, but it uses the same methods
# The split files are stored for convenience
with open('train_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  print(len(splits))
  splits = sorted(splits, key=lambda x: random.random())

  train = splits[:-25]
  dev = splits[-25:]

  write_conll('train.conll', train)
  # write_conll('test.conll', test)
  write_conll('dev.conll', dev)

95


In [None]:
# Sanity check on the gpu; some issues with flair not recognizing one of the GPUs from Colab
torch.cuda.is_available()

True

## Component 1

In [None]:
# The corpus converts the BIO formatted tokens into a format Flair uses for training
# Here we can simply specify the folder where the files are and which columns they have
# Only supports
columns = {1: 'text', 2: 'ner'}
data_folder = ''

corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.conll',
                              test_file='test.conll',
                              dev_file='dev.conll')
print(f'Corpus size: {len(corpus.train)}')

In [None]:
# tag to predict
tag_type = 'ner'# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)


In [None]:

# embedding_types : List[TokenEmbeddings] = [
#         WordEmbeddings('glove'),
        ## other embeddings
        # ]
# embeddings : StackedEmbeddings = StackedEmbeddings(
#                                 embeddings=embedding_types)

embeddings = TransformerWordEmbeddings(
    model='distilbert-base-cased',
    layers="-1",
    subtoken_pooling='first',
    fine_tune=True,
    use_context=True,
    respect_document_boundaries=False,

)

tagger: SequenceTagger = SequenceTagger(
        hidden_size=256,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=False,
        use_rnn=False,
        reproject_embeddings=False,
        tag_format='BIO'
    )

In [None]:
trainer : ModelTrainer = ModelTrainer(tagger, corpus)

trainer.fine_tune(
        'resources/taggers/ner',
        learning_rate=5e-05,
        mini_batch_size=16,
        max_epochs=100,
        embeddings_storage_mode='gpu', # cpu, gpu
        weight_decay=0.0,
    )


2023-10-23 09:38:11,444 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:11,454 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(28997, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_fea

100%|██████████| 2/2 [00:03<00:00,  1.51s/it]

2023-10-23 09:38:21,411 Evaluating as a multi-label problem: False
2023-10-23 09:38:21,478 DEV : loss 2.3361923694610596 - f1-score (micro avg)  0.0182
2023-10-23 09:38:21,495 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:21,990 epoch 2 - iter 1/5 - loss 2.40558857 - time (sec): 0.49 - samples/sec: 9416.41 - lr: 0.000006
2023-10-23 09:38:22,490 epoch 2 - iter 2/5 - loss 2.38843878 - time (sec): 0.99 - samples/sec: 9150.46 - lr: 0.000007
2023-10-23 09:38:22,963 epoch 2 - iter 3/5 - loss 2.37580171 - time (sec): 1.47 - samples/sec: 8857.16 - lr: 0.000008
2023-10-23 09:38:23,484 epoch 2 - iter 4/5 - loss 2.33474647 - time (sec): 1.99 - samples/sec: 8931.96 - lr: 0.000008
2023-10-23 09:38:23,789 epoch 2 - iter 5/5 - loss 2.30010249 - time (sec): 2.29 - samples/sec: 8751.81 - lr: 0.000009
2023-10-23 09:38:23,803 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:23,812 EPOCH 2 done: loss 2.3001 - lr 0.000009


100%|██████████| 2/2 [00:03<00:00,  1.69s/it]

2023-10-23 09:38:27,215 Evaluating as a multi-label problem: False
2023-10-23 09:38:27,254 DEV : loss 1.9935094118118286 - f1-score (micro avg)  0.013
2023-10-23 09:38:27,272 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:27,788 epoch 3 - iter 1/5 - loss 2.08577587 - time (sec): 0.51 - samples/sec: 7910.13 - lr: 0.000010
2023-10-23 09:38:28,268 epoch 3 - iter 2/5 - loss 2.06475793 - time (sec): 0.99 - samples/sec: 8729.02 - lr: 0.000011
2023-10-23 09:38:28,748 epoch 3 - iter 3/5 - loss 2.02019099 - time (sec): 1.47 - samples/sec: 8946.72 - lr: 0.000012
2023-10-23 09:38:29,361 epoch 3 - iter 4/5 - loss 1.98293036 - time (sec): 2.08 - samples/sec: 8717.97 - lr: 0.000013
2023-10-23 09:38:29,559 epoch 3 - iter 5/5 - loss 1.95412585 - time (sec): 2.28 - samples/sec: 8789.68 - lr: 0.000014
2023-10-23 09:38:29,562 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:29,564 EPOCH 3 done: loss 1.9541 - lr 0.000014


100%|██████████| 2/2 [00:03<00:00,  1.74s/it]


2023-10-23 09:38:33,430 Evaluating as a multi-label problem: False
2023-10-23 09:38:33,447 DEV : loss 1.4174044132232666 - f1-score (micro avg)  0.0
2023-10-23 09:38:33,461 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:33,882 epoch 4 - iter 1/5 - loss 1.54874204 - time (sec): 0.42 - samples/sec: 9686.71 - lr: 0.000015
2023-10-23 09:38:34,338 epoch 4 - iter 2/5 - loss 1.40796246 - time (sec): 0.88 - samples/sec: 10102.63 - lr: 0.000016
2023-10-23 09:38:34,721 epoch 4 - iter 3/5 - loss 1.36513541 - time (sec): 1.26 - samples/sec: 10404.82 - lr: 0.000017
2023-10-23 09:38:35,152 epoch 4 - iter 4/5 - loss 1.28502945 - time (sec): 1.69 - samples/sec: 10894.48 - lr: 0.000018
2023-10-23 09:38:35,302 epoch 4 - iter 5/5 - loss 1.25953856 - time (sec): 1.84 - samples/sec: 10905.49 - lr: 0.000019
2023-10-23 09:38:35,304 ----------------------------------------------------------------------------------------------------
2023-10

100%|██████████| 2/2 [00:02<00:00,  1.09s/it]

2023-10-23 09:38:37,510 Evaluating as a multi-label problem: False
2023-10-23 09:38:37,533 DEV : loss 0.7396366000175476 - f1-score (micro avg)  0.0
2023-10-23 09:38:37,543 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:38,030 epoch 5 - iter 1/5 - loss 0.78876482 - time (sec): 0.49 - samples/sec: 10106.64 - lr: 0.000020
2023-10-23 09:38:38,438 epoch 5 - iter 2/5 - loss 0.81707889 - time (sec): 0.89 - samples/sec: 10149.02 - lr: 0.000021
2023-10-23 09:38:38,836 epoch 5 - iter 3/5 - loss 0.75814360 - time (sec): 1.29 - samples/sec: 10314.73 - lr: 0.000022
2023-10-23 09:38:39,272 epoch 5 - iter 4/5 - loss 0.76618726 - time (sec): 1.73 - samples/sec: 10543.52 - lr: 0.000023
2023-10-23 09:38:39,440 epoch 5 - iter 5/5 - loss 0.76421155 - time (sec): 1.90 - samples/sec: 10579.40 - lr: 0.000024
2023-10-23 09:38:39,442 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:39,445 EPOCH 5 done: loss 0.7642 - lr 0.000024


100%|██████████| 2/2 [00:02<00:00,  1.11s/it]

2023-10-23 09:38:41,681 Evaluating as a multi-label problem: False
2023-10-23 09:38:41,699 DEV : loss 0.7156218886375427 - f1-score (micro avg)  0.0
2023-10-23 09:38:41,711 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:42,220 epoch 6 - iter 1/5 - loss 0.72221018 - time (sec): 0.51 - samples/sec: 9185.81 - lr: 0.000025
2023-10-23 09:38:42,724 epoch 6 - iter 2/5 - loss 0.75488707 - time (sec): 1.01 - samples/sec: 9128.21 - lr: 0.000025
2023-10-23 09:38:43,220 epoch 6 - iter 3/5 - loss 0.76975516 - time (sec): 1.51 - samples/sec: 9242.25 - lr: 0.000026
2023-10-23 09:38:43,698 epoch 6 - iter 4/5 - loss 0.73807133 - time (sec): 1.99 - samples/sec: 9187.71 - lr: 0.000027
2023-10-23 09:38:43,908 epoch 6 - iter 5/5 - loss 0.72174574 - time (sec): 2.20 - samples/sec: 9135.14 - lr: 0.000028
2023-10-23 09:38:43,914 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:43,916 EPOCH 6 done: loss 0.7217 - lr 0.000028


100%|██████████| 2/2 [00:04<00:00,  2.30s/it]

2023-10-23 09:38:48,542 Evaluating as a multi-label problem: False
2023-10-23 09:38:48,567 DEV : loss 0.602821409702301 - f1-score (micro avg)  0.0
2023-10-23 09:38:48,587 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:49,071 epoch 7 - iter 1/5 - loss 0.65621602 - time (sec): 0.48 - samples/sec: 9471.21 - lr: 0.000029
2023-10-23 09:38:49,530 epoch 7 - iter 2/5 - loss 0.64182149 - time (sec): 0.94 - samples/sec: 10370.46 - lr: 0.000030
2023-10-23 09:38:49,977 epoch 7 - iter 3/5 - loss 0.63730906 - time (sec): 1.39 - samples/sec: 10063.76 - lr: 0.000031
2023-10-23 09:38:50,372 epoch 7 - iter 4/5 - loss 0.63091961 - time (sec): 1.78 - samples/sec: 10176.86 - lr: 0.000032
2023-10-23 09:38:50,557 epoch 7 - iter 5/5 - loss 0.63664398 - time (sec): 1.97 - samples/sec: 10203.21 - lr: 0.000033
2023-10-23 09:38:50,559 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:50,562 EPOCH 7 done: loss 0.6366 - lr 0.000033


100%|██████████| 2/2 [00:02<00:00,  1.09s/it]

2023-10-23 09:38:52,747 Evaluating as a multi-label problem: False
2023-10-23 09:38:52,767 DEV : loss 0.554529070854187 - f1-score (micro avg)  0.0
2023-10-23 09:38:52,778 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:53,189 epoch 8 - iter 1/5 - loss 0.68273112 - time (sec): 0.41 - samples/sec: 10185.23 - lr: 0.000034
2023-10-23 09:38:53,586 epoch 8 - iter 2/5 - loss 0.63266258 - time (sec): 0.81 - samples/sec: 10929.46 - lr: 0.000035
2023-10-23 09:38:54,032 epoch 8 - iter 3/5 - loss 0.60498364 - time (sec): 1.25 - samples/sec: 11124.96 - lr: 0.000036
2023-10-23 09:38:54,505 epoch 8 - iter 4/5 - loss 0.57812578 - time (sec): 1.73 - samples/sec: 10859.07 - lr: 0.000037
2023-10-23 09:38:54,625 epoch 8 - iter 5/5 - loss 0.57762567 - time (sec): 1.85 - samples/sec: 10865.83 - lr: 0.000038
2023-10-23 09:38:54,627 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:54,630 EPOCH 8 done: loss 0.5776 - lr 0.000038


100%|██████████| 2/2 [00:02<00:00,  1.10s/it]

2023-10-23 09:38:56,848 Evaluating as a multi-label problem: False
2023-10-23 09:38:56,869 DEV : loss 0.49129509925842285 - f1-score (micro avg)  0.0
2023-10-23 09:38:56,880 ----------------------------------------------------------------------------------------------------





2023-10-23 09:38:57,732 epoch 9 - iter 1/5 - loss 0.50560172 - time (sec): 0.85 - samples/sec: 6097.94 - lr: 0.000039
2023-10-23 09:38:58,191 epoch 9 - iter 2/5 - loss 0.49637168 - time (sec): 1.31 - samples/sec: 7311.09 - lr: 0.000040
2023-10-23 09:38:58,637 epoch 9 - iter 3/5 - loss 0.49290270 - time (sec): 1.76 - samples/sec: 8335.13 - lr: 0.000041
2023-10-23 09:38:59,076 epoch 9 - iter 4/5 - loss 0.49910051 - time (sec): 2.19 - samples/sec: 8462.57 - lr: 0.000042
2023-10-23 09:38:59,272 epoch 9 - iter 5/5 - loss 0.48991275 - time (sec): 2.39 - samples/sec: 8392.23 - lr: 0.000042
2023-10-23 09:38:59,277 ----------------------------------------------------------------------------------------------------
2023-10-23 09:38:59,282 EPOCH 9 done: loss 0.4899 - lr 0.000042


100%|██████████| 2/2 [00:04<00:00,  2.19s/it]

2023-10-23 09:39:03,673 Evaluating as a multi-label problem: False
2023-10-23 09:39:03,704 DEV : loss 0.39674651622772217 - f1-score (micro avg)  0.3155
2023-10-23 09:39:03,725 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:04,337 epoch 10 - iter 1/5 - loss 0.39754297 - time (sec): 0.61 - samples/sec: 8092.23 - lr: 0.000043
2023-10-23 09:39:04,812 epoch 10 - iter 2/5 - loss 0.38700762 - time (sec): 1.08 - samples/sec: 8827.38 - lr: 0.000044
2023-10-23 09:39:05,335 epoch 10 - iter 3/5 - loss 0.40029987 - time (sec): 1.61 - samples/sec: 8863.60 - lr: 0.000045
2023-10-23 09:39:05,720 epoch 10 - iter 4/5 - loss 0.39641027 - time (sec): 1.99 - samples/sec: 9032.06 - lr: 0.000046
2023-10-23 09:39:05,888 epoch 10 - iter 5/5 - loss 0.38829393 - time (sec): 2.16 - samples/sec: 9295.88 - lr: 0.000047
2023-10-23 09:39:05,895 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:05,897 EPOCH 10 done: loss 0.3883 - lr 0.000047


100%|██████████| 2/2 [00:02<00:00,  1.12s/it]

2023-10-23 09:39:08,157 Evaluating as a multi-label problem: False
2023-10-23 09:39:08,176 DEV : loss 0.29990720748901367 - f1-score (micro avg)  0.5126
2023-10-23 09:39:08,191 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:08,631 epoch 11 - iter 1/5 - loss 0.38261764 - time (sec): 0.44 - samples/sec: 11549.25 - lr: 0.000048
2023-10-23 09:39:09,083 epoch 11 - iter 2/5 - loss 0.35443193 - time (sec): 0.89 - samples/sec: 10877.83 - lr: 0.000049
2023-10-23 09:39:09,496 epoch 11 - iter 3/5 - loss 0.33477205 - time (sec): 1.30 - samples/sec: 11042.61 - lr: 0.000050
2023-10-23 09:39:09,899 epoch 11 - iter 4/5 - loss 0.32336752 - time (sec): 1.71 - samples/sec: 10644.52 - lr: 0.000050
2023-10-23 09:39:10,080 epoch 11 - iter 5/5 - loss 0.31198645 - time (sec): 1.89 - samples/sec: 10623.32 - lr: 0.000050
2023-10-23 09:39:10,082 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:10,087 EPOCH 11 done: loss 0.3120 - lr 0.000050


100%|██████████| 2/2 [00:02<00:00,  1.31s/it]

2023-10-23 09:39:12,734 Evaluating as a multi-label problem: False
2023-10-23 09:39:12,755 DEV : loss 0.25214487314224243 - f1-score (micro avg)  0.5596
2023-10-23 09:39:12,766 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:13,266 epoch 12 - iter 1/5 - loss 0.26916242 - time (sec): 0.50 - samples/sec: 9256.96 - lr: 0.000050
2023-10-23 09:39:13,720 epoch 12 - iter 2/5 - loss 0.26404286 - time (sec): 0.95 - samples/sec: 9826.75 - lr: 0.000050
2023-10-23 09:39:14,176 epoch 12 - iter 3/5 - loss 0.24593911 - time (sec): 1.41 - samples/sec: 10200.82 - lr: 0.000049
2023-10-23 09:39:14,578 epoch 12 - iter 4/5 - loss 0.25493542 - time (sec): 1.81 - samples/sec: 10379.45 - lr: 0.000049
2023-10-23 09:39:14,691 epoch 12 - iter 5/5 - loss 0.25010981 - time (sec): 1.92 - samples/sec: 10425.13 - lr: 0.000049
2023-10-23 09:39:14,693 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:14,697 EPOCH 12 done: loss 0.2501 - lr 0.000049


100%|██████████| 2/2 [00:04<00:00,  2.09s/it]

2023-10-23 09:39:18,892 Evaluating as a multi-label problem: False
2023-10-23 09:39:18,922 DEV : loss 0.19876091182231903 - f1-score (micro avg)  0.7158
2023-10-23 09:39:18,944 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:19,819 epoch 13 - iter 1/5 - loss 0.21287758 - time (sec): 0.87 - samples/sec: 5122.07 - lr: 0.000049
2023-10-23 09:39:20,535 epoch 13 - iter 2/5 - loss 0.18258466 - time (sec): 1.59 - samples/sec: 5469.60 - lr: 0.000049
2023-10-23 09:39:21,151 epoch 13 - iter 3/5 - loss 0.18301857 - time (sec): 2.20 - samples/sec: 6331.82 - lr: 0.000049
2023-10-23 09:39:21,699 epoch 13 - iter 4/5 - loss 0.18931790 - time (sec): 2.75 - samples/sec: 6845.50 - lr: 0.000049
2023-10-23 09:39:21,885 epoch 13 - iter 5/5 - loss 0.18804831 - time (sec): 2.94 - samples/sec: 6830.65 - lr: 0.000049
2023-10-23 09:39:21,888 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:21,891 EPOCH 13 done: loss 0.1880 - lr 0.000049


100%|██████████| 2/2 [00:02<00:00,  1.34s/it]

2023-10-23 09:39:24,579 Evaluating as a multi-label problem: False
2023-10-23 09:39:24,597 DEV : loss 0.1888035237789154 - f1-score (micro avg)  0.7333
2023-10-23 09:39:24,610 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:25,026 epoch 14 - iter 1/5 - loss 0.15997815 - time (sec): 0.41 - samples/sec: 10467.18 - lr: 0.000049
2023-10-23 09:39:25,405 epoch 14 - iter 2/5 - loss 0.17327128 - time (sec): 0.79 - samples/sec: 10131.61 - lr: 0.000049
2023-10-23 09:39:26,015 epoch 14 - iter 3/5 - loss 0.15641623 - time (sec): 1.40 - samples/sec: 9843.64 - lr: 0.000048
2023-10-23 09:39:26,431 epoch 14 - iter 4/5 - loss 0.15004713 - time (sec): 1.82 - samples/sec: 10054.17 - lr: 0.000048
2023-10-23 09:39:26,601 epoch 14 - iter 5/5 - loss 0.14875175 - time (sec): 1.99 - samples/sec: 10089.65 - lr: 0.000048
2023-10-23 09:39:26,604 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:26,605 EPOCH 14 done: loss 0.1488 - lr 0.000048


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:39:28,908 Evaluating as a multi-label problem: False
2023-10-23 09:39:28,929 DEV : loss 0.17088210582733154 - f1-score (micro avg)  0.7896
2023-10-23 09:39:28,940 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:29,341 epoch 15 - iter 1/5 - loss 0.12908243 - time (sec): 0.40 - samples/sec: 10103.48 - lr: 0.000048
2023-10-23 09:39:29,844 epoch 15 - iter 2/5 - loss 0.12727274 - time (sec): 0.90 - samples/sec: 10624.33 - lr: 0.000048
2023-10-23 09:39:30,248 epoch 15 - iter 3/5 - loss 0.12431797 - time (sec): 1.31 - samples/sec: 10472.73 - lr: 0.000048
2023-10-23 09:39:30,655 epoch 15 - iter 4/5 - loss 0.11588040 - time (sec): 1.71 - samples/sec: 10517.81 - lr: 0.000048
2023-10-23 09:39:30,796 epoch 15 - iter 5/5 - loss 0.11359200 - time (sec): 1.85 - samples/sec: 10816.16 - lr: 0.000048
2023-10-23 09:39:30,798 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:30,801 EPOCH 15 done: loss 0.1136 - lr 0.000048


100%|██████████| 2/2 [00:03<00:00,  1.76s/it]

2023-10-23 09:39:34,346 Evaluating as a multi-label problem: False
2023-10-23 09:39:34,372 DEV : loss 0.16639330983161926 - f1-score (micro avg)  0.7724
2023-10-23 09:39:34,389 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:35,377 epoch 16 - iter 1/5 - loss 0.10156095 - time (sec): 0.99 - samples/sec: 4705.53 - lr: 0.000048
2023-10-23 09:39:35,897 epoch 16 - iter 2/5 - loss 0.09938395 - time (sec): 1.51 - samples/sec: 6051.49 - lr: 0.000047
2023-10-23 09:39:36,447 epoch 16 - iter 3/5 - loss 0.09168638 - time (sec): 2.06 - samples/sec: 6448.38 - lr: 0.000047
2023-10-23 09:39:37,173 epoch 16 - iter 4/5 - loss 0.08722174 - time (sec): 2.78 - samples/sec: 6588.43 - lr: 0.000047
2023-10-23 09:39:37,668 epoch 16 - iter 5/5 - loss 0.08719716 - time (sec): 3.28 - samples/sec: 6123.23 - lr: 0.000047
2023-10-23 09:39:37,689 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:37,696 EPOCH 16 done: loss 0.0872 - lr 0.000047


100%|██████████| 2/2 [00:03<00:00,  1.64s/it]

2023-10-23 09:39:40,995 Evaluating as a multi-label problem: False
2023-10-23 09:39:41,012 DEV : loss 0.15489940345287323 - f1-score (micro avg)  0.7993
2023-10-23 09:39:41,023 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:41,476 epoch 17 - iter 1/5 - loss 0.07925775 - time (sec): 0.45 - samples/sec: 11566.77 - lr: 0.000047
2023-10-23 09:39:41,860 epoch 17 - iter 2/5 - loss 0.08025738 - time (sec): 0.83 - samples/sec: 11394.36 - lr: 0.000047
2023-10-23 09:39:42,241 epoch 17 - iter 3/5 - loss 0.08391928 - time (sec): 1.22 - samples/sec: 11640.04 - lr: 0.000047
2023-10-23 09:39:42,725 epoch 17 - iter 4/5 - loss 0.07766262 - time (sec): 1.70 - samples/sec: 10931.53 - lr: 0.000047
2023-10-23 09:39:42,866 epoch 17 - iter 5/5 - loss 0.07573254 - time (sec): 1.84 - samples/sec: 10903.96 - lr: 0.000047
2023-10-23 09:39:42,868 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:42,871 EPOCH 17 done: loss 0.0757 - lr 0.000047


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:39:45,174 Evaluating as a multi-label problem: False
2023-10-23 09:39:45,192 DEV : loss 0.15362733602523804 - f1-score (micro avg)  0.7899
2023-10-23 09:39:45,203 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:45,661 epoch 18 - iter 1/5 - loss 0.05545553 - time (sec): 0.46 - samples/sec: 10505.82 - lr: 0.000047
2023-10-23 09:39:46,190 epoch 18 - iter 2/5 - loss 0.06175448 - time (sec): 0.99 - samples/sec: 10749.26 - lr: 0.000046
2023-10-23 09:39:46,932 epoch 18 - iter 3/5 - loss 0.05655781 - time (sec): 1.73 - samples/sec: 8595.24 - lr: 0.000046
2023-10-23 09:39:47,295 epoch 18 - iter 4/5 - loss 0.05686257 - time (sec): 2.09 - samples/sec: 8868.43 - lr: 0.000046
2023-10-23 09:39:47,420 epoch 18 - iter 5/5 - loss 0.05534810 - time (sec): 2.22 - samples/sec: 9050.81 - lr: 0.000046
2023-10-23 09:39:47,422 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:47,426 EPOCH 18 done: loss 0.0553 - lr 0.000046


100%|██████████| 2/2 [00:03<00:00,  1.60s/it]

2023-10-23 09:39:50,630 Evaluating as a multi-label problem: False
2023-10-23 09:39:50,658 DEV : loss 0.16091114282608032 - f1-score (micro avg)  0.8388
2023-10-23 09:39:50,675 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:51,125 epoch 19 - iter 1/5 - loss 0.03873584 - time (sec): 0.45 - samples/sec: 8483.55 - lr: 0.000046
2023-10-23 09:39:51,677 epoch 19 - iter 2/5 - loss 0.03605752 - time (sec): 1.00 - samples/sec: 8972.32 - lr: 0.000046
2023-10-23 09:39:52,298 epoch 19 - iter 3/5 - loss 0.03645679 - time (sec): 1.62 - samples/sec: 8526.00 - lr: 0.000046
2023-10-23 09:39:52,847 epoch 19 - iter 4/5 - loss 0.03960884 - time (sec): 2.17 - samples/sec: 8589.59 - lr: 0.000046
2023-10-23 09:39:53,043 epoch 19 - iter 5/5 - loss 0.03808367 - time (sec): 2.37 - samples/sec: 8480.19 - lr: 0.000046
2023-10-23 09:39:53,050 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:53,052 EPOCH 19 done: loss 0.0381 - lr 0.000046


100%|██████████| 2/2 [00:03<00:00,  1.65s/it]

2023-10-23 09:39:56,373 Evaluating as a multi-label problem: False
2023-10-23 09:39:56,394 DEV : loss 0.15310616791248322 - f1-score (micro avg)  0.8286
2023-10-23 09:39:56,405 ----------------------------------------------------------------------------------------------------





2023-10-23 09:39:56,885 epoch 20 - iter 1/5 - loss 0.02957829 - time (sec): 0.48 - samples/sec: 9991.39 - lr: 0.000046
2023-10-23 09:39:57,309 epoch 20 - iter 2/5 - loss 0.03730716 - time (sec): 0.90 - samples/sec: 10162.44 - lr: 0.000045
2023-10-23 09:39:57,689 epoch 20 - iter 3/5 - loss 0.03459658 - time (sec): 1.28 - samples/sec: 10256.19 - lr: 0.000045
2023-10-23 09:39:58,079 epoch 20 - iter 4/5 - loss 0.03168841 - time (sec): 1.67 - samples/sec: 10439.71 - lr: 0.000045
2023-10-23 09:39:58,307 epoch 20 - iter 5/5 - loss 0.03080140 - time (sec): 1.90 - samples/sec: 10569.63 - lr: 0.000045
2023-10-23 09:39:58,309 ----------------------------------------------------------------------------------------------------
2023-10-23 09:39:58,312 EPOCH 20 done: loss 0.0308 - lr 0.000045


100%|██████████| 2/2 [00:02<00:00,  1.31s/it]

2023-10-23 09:40:00,950 Evaluating as a multi-label problem: False
2023-10-23 09:40:00,967 DEV : loss 0.17377355694770813 - f1-score (micro avg)  0.825
2023-10-23 09:40:00,978 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:01,422 epoch 21 - iter 1/5 - loss 0.02246104 - time (sec): 0.44 - samples/sec: 10065.37 - lr: 0.000045
2023-10-23 09:40:01,844 epoch 21 - iter 2/5 - loss 0.02399118 - time (sec): 0.87 - samples/sec: 10431.31 - lr: 0.000045
2023-10-23 09:40:02,292 epoch 21 - iter 3/5 - loss 0.02364424 - time (sec): 1.31 - samples/sec: 10520.88 - lr: 0.000045
2023-10-23 09:40:02,751 epoch 21 - iter 4/5 - loss 0.02436137 - time (sec): 1.77 - samples/sec: 10529.45 - lr: 0.000045
2023-10-23 09:40:02,891 epoch 21 - iter 5/5 - loss 0.02621554 - time (sec): 1.91 - samples/sec: 10489.04 - lr: 0.000045
2023-10-23 09:40:02,893 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:02,897 EPOCH 21 done: loss 0.0262 - lr 0.000045


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:40:05,209 Evaluating as a multi-label problem: False
2023-10-23 09:40:05,234 DEV : loss 0.1825631707906723 - f1-score (micro avg)  0.8182
2023-10-23 09:40:05,245 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:05,747 epoch 22 - iter 1/5 - loss 0.01934219 - time (sec): 0.50 - samples/sec: 9238.87 - lr: 0.000044
2023-10-23 09:40:06,342 epoch 22 - iter 2/5 - loss 0.02348175 - time (sec): 1.10 - samples/sec: 8696.54 - lr: 0.000044
2023-10-23 09:40:06,849 epoch 22 - iter 3/5 - loss 0.02780051 - time (sec): 1.60 - samples/sec: 8779.41 - lr: 0.000044
2023-10-23 09:40:07,346 epoch 22 - iter 4/5 - loss 0.02587431 - time (sec): 2.10 - samples/sec: 8823.35 - lr: 0.000044
2023-10-23 09:40:07,505 epoch 22 - iter 5/5 - loss 0.02642800 - time (sec): 2.26 - samples/sec: 8880.59 - lr: 0.000044
2023-10-23 09:40:07,508 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:07,511 EPOCH 22 done: loss 0.0264 - lr 0.000044


100%|██████████| 2/2 [00:04<00:00,  2.48s/it]

2023-10-23 09:40:12,481 Evaluating as a multi-label problem: False
2023-10-23 09:40:12,498 DEV : loss 0.1817513257265091 - f1-score (micro avg)  0.8256
2023-10-23 09:40:12,509 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:12,997 epoch 23 - iter 1/5 - loss 0.01806262 - time (sec): 0.49 - samples/sec: 10786.84 - lr: 0.000044
2023-10-23 09:40:13,419 epoch 23 - iter 2/5 - loss 0.01983262 - time (sec): 0.91 - samples/sec: 10233.59 - lr: 0.000044
2023-10-23 09:40:13,826 epoch 23 - iter 3/5 - loss 0.01840432 - time (sec): 1.32 - samples/sec: 10473.88 - lr: 0.000044
2023-10-23 09:40:14,277 epoch 23 - iter 4/5 - loss 0.01922168 - time (sec): 1.77 - samples/sec: 10395.80 - lr: 0.000044
2023-10-23 09:40:14,419 epoch 23 - iter 5/5 - loss 0.01857573 - time (sec): 1.91 - samples/sec: 10507.84 - lr: 0.000044
2023-10-23 09:40:14,422 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:14,426 EPOCH 23 done: loss 0.0186 - lr 0.000044


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:40:16,715 Evaluating as a multi-label problem: False
2023-10-23 09:40:16,735 DEV : loss 0.1804637610912323 - f1-score (micro avg)  0.83
2023-10-23 09:40:16,746 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:17,162 epoch 24 - iter 1/5 - loss 0.02191787 - time (sec): 0.41 - samples/sec: 10764.75 - lr: 0.000043
2023-10-23 09:40:17,606 epoch 24 - iter 2/5 - loss 0.02095599 - time (sec): 0.86 - samples/sec: 10826.79 - lr: 0.000043
2023-10-23 09:40:18,073 epoch 24 - iter 3/5 - loss 0.01975914 - time (sec): 1.33 - samples/sec: 10855.43 - lr: 0.000043
2023-10-23 09:40:18,481 epoch 24 - iter 4/5 - loss 0.01933215 - time (sec): 1.73 - samples/sec: 10804.84 - lr: 0.000043
2023-10-23 09:40:18,593 epoch 24 - iter 5/5 - loss 0.01929708 - time (sec): 1.85 - samples/sec: 10866.38 - lr: 0.000043
2023-10-23 09:40:18,595 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:18,599 EPOCH 24 done: loss 0.0193 - lr 0.000043


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:40:20,921 Evaluating as a multi-label problem: False
2023-10-23 09:40:20,945 DEV : loss 0.18202677369117737 - f1-score (micro avg)  0.8238
2023-10-23 09:40:20,956 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:21,349 epoch 25 - iter 1/5 - loss 0.00873644 - time (sec): 0.39 - samples/sec: 11185.89 - lr: 0.000043
2023-10-23 09:40:22,103 epoch 25 - iter 2/5 - loss 0.01232713 - time (sec): 1.15 - samples/sec: 7571.01 - lr: 0.000043
2023-10-23 09:40:22,580 epoch 25 - iter 3/5 - loss 0.01294848 - time (sec): 1.62 - samples/sec: 8057.33 - lr: 0.000043
2023-10-23 09:40:23,167 epoch 25 - iter 4/5 - loss 0.01164660 - time (sec): 2.21 - samples/sec: 8138.39 - lr: 0.000043
2023-10-23 09:40:23,414 epoch 25 - iter 5/5 - loss 0.01147503 - time (sec): 2.46 - samples/sec: 8164.50 - lr: 0.000042
2023-10-23 09:40:23,420 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:23,423 EPOCH 25 done: loss 0.0115 - lr 0.000042


100%|██████████| 2/2 [00:04<00:00,  2.18s/it]

2023-10-23 09:40:27,796 Evaluating as a multi-label problem: False
2023-10-23 09:40:27,830 DEV : loss 0.17868097126483917 - f1-score (micro avg)  0.847
2023-10-23 09:40:27,847 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:28,386 epoch 26 - iter 1/5 - loss 0.01109913 - time (sec): 0.54 - samples/sec: 9047.92 - lr: 0.000042
2023-10-23 09:40:28,857 epoch 26 - iter 2/5 - loss 0.01003003 - time (sec): 1.01 - samples/sec: 9685.09 - lr: 0.000042
2023-10-23 09:40:29,232 epoch 26 - iter 3/5 - loss 0.01002461 - time (sec): 1.38 - samples/sec: 10070.24 - lr: 0.000042
2023-10-23 09:40:29,618 epoch 26 - iter 4/5 - loss 0.00995554 - time (sec): 1.77 - samples/sec: 10229.74 - lr: 0.000042
2023-10-23 09:40:29,823 epoch 26 - iter 5/5 - loss 0.01042471 - time (sec): 1.97 - samples/sec: 10163.68 - lr: 0.000042
2023-10-23 09:40:29,825 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:29,830 EPOCH 26 done: loss 0.0104 - lr 0.000042


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:40:32,136 Evaluating as a multi-label problem: False
2023-10-23 09:40:32,154 DEV : loss 0.19343407452106476 - f1-score (micro avg)  0.8505
2023-10-23 09:40:32,165 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:32,661 epoch 27 - iter 1/5 - loss 0.00696003 - time (sec): 0.50 - samples/sec: 10475.57 - lr: 0.000042
2023-10-23 09:40:33,520 epoch 27 - iter 2/5 - loss 0.00933052 - time (sec): 1.35 - samples/sec: 7598.60 - lr: 0.000042
2023-10-23 09:40:33,971 epoch 27 - iter 3/5 - loss 0.00902673 - time (sec): 1.80 - samples/sec: 8307.32 - lr: 0.000042
2023-10-23 09:40:34,356 epoch 27 - iter 4/5 - loss 0.00928499 - time (sec): 2.19 - samples/sec: 8618.82 - lr: 0.000042
2023-10-23 09:40:34,464 epoch 27 - iter 5/5 - loss 0.00916412 - time (sec): 2.30 - samples/sec: 8731.24 - lr: 0.000041
2023-10-23 09:40:34,466 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:34,469 EPOCH 27 done: loss 0.0092 - lr 0.000041


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:40:36,770 Evaluating as a multi-label problem: False
2023-10-23 09:40:36,790 DEV : loss 0.19854958355426788 - f1-score (micro avg)  0.8234
2023-10-23 09:40:36,800 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:37,206 epoch 28 - iter 1/5 - loss 0.01168089 - time (sec): 0.40 - samples/sec: 10197.35 - lr: 0.000041
2023-10-23 09:40:37,691 epoch 28 - iter 2/5 - loss 0.00787560 - time (sec): 0.89 - samples/sec: 10314.52 - lr: 0.000041
2023-10-23 09:40:38,143 epoch 28 - iter 3/5 - loss 0.00720046 - time (sec): 1.34 - samples/sec: 10618.38 - lr: 0.000041
2023-10-23 09:40:38,583 epoch 28 - iter 4/5 - loss 0.00771139 - time (sec): 1.78 - samples/sec: 10338.39 - lr: 0.000041
2023-10-23 09:40:38,783 epoch 28 - iter 5/5 - loss 0.00843879 - time (sec): 1.98 - samples/sec: 10123.86 - lr: 0.000041
2023-10-23 09:40:38,786 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:38,788 EPOCH 28 done: loss 0.0084 - lr 0.000041


100%|██████████| 2/2 [00:04<00:00,  2.23s/it]

2023-10-23 09:40:43,259 Evaluating as a multi-label problem: False
2023-10-23 09:40:43,294 DEV : loss 0.19700872898101807 - f1-score (micro avg)  0.8408
2023-10-23 09:40:43,316 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:43,989 epoch 29 - iter 1/5 - loss 0.00640658 - time (sec): 0.67 - samples/sec: 7967.39 - lr: 0.000041
2023-10-23 09:40:44,489 epoch 29 - iter 2/5 - loss 0.00777529 - time (sec): 1.17 - samples/sec: 8339.01 - lr: 0.000041
2023-10-23 09:40:44,952 epoch 29 - iter 3/5 - loss 0.01061425 - time (sec): 1.63 - samples/sec: 8465.13 - lr: 0.000041
2023-10-23 09:40:45,380 epoch 29 - iter 4/5 - loss 0.00956825 - time (sec): 2.06 - samples/sec: 8891.57 - lr: 0.000040
2023-10-23 09:40:45,563 epoch 29 - iter 5/5 - loss 0.00895994 - time (sec): 2.24 - samples/sec: 8940.14 - lr: 0.000040
2023-10-23 09:40:45,565 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:45,575 EPOCH 29 done: loss 0.0090 - lr 0.000040


100%|██████████| 2/2 [00:02<00:00,  1.32s/it]

2023-10-23 09:40:48,235 Evaluating as a multi-label problem: False
2023-10-23 09:40:48,256 DEV : loss 0.20675873756408691 - f1-score (micro avg)  0.8345
2023-10-23 09:40:48,267 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:48,729 epoch 30 - iter 1/5 - loss 0.01037450 - time (sec): 0.46 - samples/sec: 9670.92 - lr: 0.000040
2023-10-23 09:40:49,206 epoch 30 - iter 2/5 - loss 0.00879737 - time (sec): 0.94 - samples/sec: 10760.42 - lr: 0.000040
2023-10-23 09:40:49,630 epoch 30 - iter 3/5 - loss 0.00778668 - time (sec): 1.36 - samples/sec: 10714.75 - lr: 0.000040
2023-10-23 09:40:50,063 epoch 30 - iter 4/5 - loss 0.00700042 - time (sec): 1.79 - samples/sec: 10455.42 - lr: 0.000040
2023-10-23 09:40:50,202 epoch 30 - iter 5/5 - loss 0.00724709 - time (sec): 1.93 - samples/sec: 10371.38 - lr: 0.000040
2023-10-23 09:40:50,204 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:50,210 EPOCH 30 done: loss 0.0072 - lr 0.000040


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:40:52,508 Evaluating as a multi-label problem: False
2023-10-23 09:40:52,530 DEV : loss 0.2082379162311554 - f1-score (micro avg)  0.843
2023-10-23 09:40:52,541 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:53,045 epoch 31 - iter 1/5 - loss 0.00305510 - time (sec): 0.50 - samples/sec: 10279.49 - lr: 0.000040
2023-10-23 09:40:53,442 epoch 31 - iter 2/5 - loss 0.00421584 - time (sec): 0.90 - samples/sec: 10817.01 - lr: 0.000040
2023-10-23 09:40:53,915 epoch 31 - iter 3/5 - loss 0.00473765 - time (sec): 1.37 - samples/sec: 10722.95 - lr: 0.000040
2023-10-23 09:40:54,293 epoch 31 - iter 4/5 - loss 0.00543636 - time (sec): 1.75 - samples/sec: 10537.28 - lr: 0.000039
2023-10-23 09:40:54,452 epoch 31 - iter 5/5 - loss 0.00535738 - time (sec): 1.91 - samples/sec: 10504.19 - lr: 0.000039
2023-10-23 09:40:54,454 ----------------------------------------------------------------------------------------------------
2023-10-23 09:40:54,457 EPOCH 31 done: loss 0.0054 - lr 0.000039


100%|██████████| 2/2 [00:04<00:00,  2.42s/it]

2023-10-23 09:40:59,302 Evaluating as a multi-label problem: False
2023-10-23 09:40:59,333 DEV : loss 0.20785453915596008 - f1-score (micro avg)  0.8558
2023-10-23 09:40:59,350 ----------------------------------------------------------------------------------------------------





2023-10-23 09:40:59,911 epoch 32 - iter 1/5 - loss 0.00677056 - time (sec): 0.56 - samples/sec: 9302.54 - lr: 0.000039
2023-10-23 09:41:00,420 epoch 32 - iter 2/5 - loss 0.01192851 - time (sec): 1.07 - samples/sec: 9027.53 - lr: 0.000039
2023-10-23 09:41:00,999 epoch 32 - iter 3/5 - loss 0.00974858 - time (sec): 1.65 - samples/sec: 8798.07 - lr: 0.000039
2023-10-23 09:41:01,464 epoch 32 - iter 4/5 - loss 0.00845651 - time (sec): 2.11 - samples/sec: 8680.26 - lr: 0.000039
2023-10-23 09:41:01,624 epoch 32 - iter 5/5 - loss 0.00820805 - time (sec): 2.27 - samples/sec: 8830.57 - lr: 0.000039
2023-10-23 09:41:01,627 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:01,630 EPOCH 32 done: loss 0.0082 - lr 0.000039


100%|██████████| 2/2 [00:02<00:00,  1.18s/it]

2023-10-23 09:41:04,010 Evaluating as a multi-label problem: False
2023-10-23 09:41:04,028 DEV : loss 0.20814111828804016 - f1-score (micro avg)  0.8532
2023-10-23 09:41:04,040 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:04,525 epoch 33 - iter 1/5 - loss 0.01051986 - time (sec): 0.48 - samples/sec: 10398.04 - lr: 0.000039
2023-10-23 09:41:04,916 epoch 33 - iter 2/5 - loss 0.00693145 - time (sec): 0.88 - samples/sec: 10416.74 - lr: 0.000039
2023-10-23 09:41:05,368 epoch 33 - iter 3/5 - loss 0.00593572 - time (sec): 1.33 - samples/sec: 10329.22 - lr: 0.000038
2023-10-23 09:41:05,840 epoch 33 - iter 4/5 - loss 0.00528649 - time (sec): 1.80 - samples/sec: 10391.55 - lr: 0.000038
2023-10-23 09:41:05,967 epoch 33 - iter 5/5 - loss 0.00507797 - time (sec): 1.93 - samples/sec: 10415.24 - lr: 0.000038
2023-10-23 09:41:05,969 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:05,972 EPOCH 33 done: loss 0.0051 - lr 0.000038


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:41:08,283 Evaluating as a multi-label problem: False
2023-10-23 09:41:08,310 DEV : loss 0.21891479194164276 - f1-score (micro avg)  0.8404
2023-10-23 09:41:08,323 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:08,791 epoch 34 - iter 1/5 - loss 0.00289750 - time (sec): 0.47 - samples/sec: 10228.90 - lr: 0.000038
2023-10-23 09:41:09,636 epoch 34 - iter 2/5 - loss 0.00342087 - time (sec): 1.31 - samples/sec: 6970.93 - lr: 0.000038
2023-10-23 09:41:10,039 epoch 34 - iter 3/5 - loss 0.00362412 - time (sec): 1.71 - samples/sec: 8101.80 - lr: 0.000038
2023-10-23 09:41:10,465 epoch 34 - iter 4/5 - loss 0.00487393 - time (sec): 2.14 - samples/sec: 8635.99 - lr: 0.000038
2023-10-23 09:41:10,641 epoch 34 - iter 5/5 - loss 0.00469816 - time (sec): 2.32 - samples/sec: 8657.43 - lr: 0.000038
2023-10-23 09:41:10,643 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:10,647 EPOCH 34 done: loss 0.0047 - lr 0.000038


100%|██████████| 2/2 [00:04<00:00,  2.02s/it]

2023-10-23 09:41:14,701 Evaluating as a multi-label problem: False
2023-10-23 09:41:14,729 DEV : loss 0.21066424250602722 - f1-score (micro avg)  0.854
2023-10-23 09:41:14,746 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:15,273 epoch 35 - iter 1/5 - loss 0.00481575 - time (sec): 0.52 - samples/sec: 8868.10 - lr: 0.000038
2023-10-23 09:41:15,827 epoch 35 - iter 2/5 - loss 0.00465356 - time (sec): 1.08 - samples/sec: 8419.36 - lr: 0.000038
2023-10-23 09:41:16,314 epoch 35 - iter 3/5 - loss 0.00385321 - time (sec): 1.56 - samples/sec: 8384.15 - lr: 0.000037
2023-10-23 09:41:16,965 epoch 35 - iter 4/5 - loss 0.00368250 - time (sec): 2.22 - samples/sec: 8228.57 - lr: 0.000037
2023-10-23 09:41:17,208 epoch 35 - iter 5/5 - loss 0.00347233 - time (sec): 2.46 - samples/sec: 8157.64 - lr: 0.000037
2023-10-23 09:41:17,213 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:17,217 EPOCH 35 done: loss 0.0035 - lr 0.000037


100%|██████████| 2/2 [00:02<00:00,  1.26s/it]

2023-10-23 09:41:19,748 Evaluating as a multi-label problem: False
2023-10-23 09:41:19,769 DEV : loss 0.21673844754695892 - f1-score (micro avg)  0.8347
2023-10-23 09:41:19,780 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:20,606 epoch 36 - iter 1/5 - loss 0.00200580 - time (sec): 0.82 - samples/sec: 5705.58 - lr: 0.000037
2023-10-23 09:41:21,046 epoch 36 - iter 2/5 - loss 0.00779650 - time (sec): 1.26 - samples/sec: 7725.61 - lr: 0.000037
2023-10-23 09:41:21,525 epoch 36 - iter 3/5 - loss 0.00761829 - time (sec): 1.74 - samples/sec: 8441.48 - lr: 0.000037
2023-10-23 09:41:21,900 epoch 36 - iter 4/5 - loss 0.00762290 - time (sec): 2.12 - samples/sec: 8655.02 - lr: 0.000037
2023-10-23 09:41:22,064 epoch 36 - iter 5/5 - loss 0.00738567 - time (sec): 2.28 - samples/sec: 8789.93 - lr: 0.000037
2023-10-23 09:41:22,066 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:22,070 EPOCH 36 done: loss 0.0074 - lr 0.000037


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:41:24,361 Evaluating as a multi-label problem: False
2023-10-23 09:41:24,382 DEV : loss 0.2469092160463333 - f1-score (micro avg)  0.8192
2023-10-23 09:41:24,394 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:24,905 epoch 37 - iter 1/5 - loss 0.00965944 - time (sec): 0.51 - samples/sec: 10144.67 - lr: 0.000037
2023-10-23 09:41:25,341 epoch 37 - iter 2/5 - loss 0.00726425 - time (sec): 0.95 - samples/sec: 10433.70 - lr: 0.000037
2023-10-23 09:41:25,742 epoch 37 - iter 3/5 - loss 0.00793383 - time (sec): 1.35 - samples/sec: 10826.89 - lr: 0.000036
2023-10-23 09:41:26,127 epoch 37 - iter 4/5 - loss 0.00768174 - time (sec): 1.73 - samples/sec: 10662.29 - lr: 0.000036
2023-10-23 09:41:26,299 epoch 37 - iter 5/5 - loss 0.00731436 - time (sec): 1.90 - samples/sec: 10539.01 - lr: 0.000036
2023-10-23 09:41:26,301 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:26,304 EPOCH 37 done: loss 0.0073 - lr 0.000036


100%|██████████| 2/2 [00:03<00:00,  1.59s/it]

2023-10-23 09:41:29,499 Evaluating as a multi-label problem: False
2023-10-23 09:41:29,534 DEV : loss 0.23285284638404846 - f1-score (micro avg)  0.8319
2023-10-23 09:41:29,555 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:30,507 epoch 38 - iter 1/5 - loss 0.00311590 - time (sec): 0.95 - samples/sec: 4257.10 - lr: 0.000036
2023-10-23 09:41:30,964 epoch 38 - iter 2/5 - loss 0.00242579 - time (sec): 1.41 - samples/sec: 5875.86 - lr: 0.000036
2023-10-23 09:41:31,555 epoch 38 - iter 3/5 - loss 0.00304280 - time (sec): 2.00 - samples/sec: 6488.04 - lr: 0.000036
2023-10-23 09:41:32,134 epoch 38 - iter 4/5 - loss 0.00293961 - time (sec): 2.58 - samples/sec: 7005.32 - lr: 0.000036
2023-10-23 09:41:32,354 epoch 38 - iter 5/5 - loss 0.00382965 - time (sec): 2.80 - samples/sec: 7171.89 - lr: 0.000036
2023-10-23 09:41:32,360 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:32,363 EPOCH 38 done: loss 0.0038 - lr 0.000036


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-10-23 09:41:35,471 Evaluating as a multi-label problem: False
2023-10-23 09:41:35,493 DEV : loss 0.22996902465820312 - f1-score (micro avg)  0.8604
2023-10-23 09:41:35,504 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:35,913 epoch 39 - iter 1/5 - loss 0.00342766 - time (sec): 0.41 - samples/sec: 10408.19 - lr: 0.000036
2023-10-23 09:41:36,371 epoch 39 - iter 2/5 - loss 0.00380876 - time (sec): 0.86 - samples/sec: 9583.56 - lr: 0.000035
2023-10-23 09:41:36,807 epoch 39 - iter 3/5 - loss 0.00442702 - time (sec): 1.30 - samples/sec: 9906.32 - lr: 0.000035
2023-10-23 09:41:37,335 epoch 39 - iter 4/5 - loss 0.00373783 - time (sec): 1.83 - samples/sec: 9987.90 - lr: 0.000035
2023-10-23 09:41:37,506 epoch 39 - iter 5/5 - loss 0.00358154 - time (sec): 2.00 - samples/sec: 10033.10 - lr: 0.000035
2023-10-23 09:41:37,508 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:37,513 EPOCH 39 done: loss 0.0036 - lr 0.000035


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:41:39,824 Evaluating as a multi-label problem: False
2023-10-23 09:41:39,842 DEV : loss 0.22206102311611176 - f1-score (micro avg)  0.85
2023-10-23 09:41:39,854 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:40,221 epoch 40 - iter 1/5 - loss 0.00497645 - time (sec): 0.37 - samples/sec: 10341.67 - lr: 0.000035
2023-10-23 09:41:40,986 epoch 40 - iter 2/5 - loss 0.00424481 - time (sec): 1.13 - samples/sec: 6960.13 - lr: 0.000035
2023-10-23 09:41:41,373 epoch 40 - iter 3/5 - loss 0.00355387 - time (sec): 1.52 - samples/sec: 8106.19 - lr: 0.000035
2023-10-23 09:41:41,859 epoch 40 - iter 4/5 - loss 0.00452588 - time (sec): 2.00 - samples/sec: 8950.27 - lr: 0.000035
2023-10-23 09:41:42,049 epoch 40 - iter 5/5 - loss 0.00437602 - time (sec): 2.19 - samples/sec: 9146.00 - lr: 0.000035
2023-10-23 09:41:42,051 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:42,054 EPOCH 40 done: loss 0.0044 - lr 0.000035


100%|██████████| 2/2 [00:02<00:00,  1.24s/it]

2023-10-23 09:41:44,556 Evaluating as a multi-label problem: False
2023-10-23 09:41:44,582 DEV : loss 0.21825404465198517 - f1-score (micro avg)  0.8505
2023-10-23 09:41:44,601 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:45,145 epoch 41 - iter 1/5 - loss 0.00310929 - time (sec): 0.54 - samples/sec: 8557.55 - lr: 0.000035
2023-10-23 09:41:45,742 epoch 41 - iter 2/5 - loss 0.00403154 - time (sec): 1.14 - samples/sec: 8565.70 - lr: 0.000034
2023-10-23 09:41:46,233 epoch 41 - iter 3/5 - loss 0.00368095 - time (sec): 1.63 - samples/sec: 8453.82 - lr: 0.000034
2023-10-23 09:41:46,750 epoch 41 - iter 4/5 - loss 0.00372127 - time (sec): 2.15 - samples/sec: 8594.44 - lr: 0.000034
2023-10-23 09:41:46,969 epoch 41 - iter 5/5 - loss 0.00363967 - time (sec): 2.37 - samples/sec: 8482.20 - lr: 0.000034
2023-10-23 09:41:46,974 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:46,977 EPOCH 41 done: loss 0.0036 - lr 0.000034


100%|██████████| 2/2 [00:03<00:00,  1.98s/it]

2023-10-23 09:41:50,952 Evaluating as a multi-label problem: False
2023-10-23 09:41:50,972 DEV : loss 0.223564013838768 - f1-score (micro avg)  0.8494
2023-10-23 09:41:50,984 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:51,394 epoch 42 - iter 1/5 - loss 0.00448887 - time (sec): 0.41 - samples/sec: 10821.71 - lr: 0.000034
2023-10-23 09:41:52,167 epoch 42 - iter 2/5 - loss 0.00317047 - time (sec): 1.18 - samples/sec: 7598.46 - lr: 0.000034
2023-10-23 09:41:52,633 epoch 42 - iter 3/5 - loss 0.00515895 - time (sec): 1.65 - samples/sec: 8326.33 - lr: 0.000034
2023-10-23 09:41:53,123 epoch 42 - iter 4/5 - loss 0.00430029 - time (sec): 2.14 - samples/sec: 8750.30 - lr: 0.000034
2023-10-23 09:41:53,252 epoch 42 - iter 5/5 - loss 0.00429650 - time (sec): 2.27 - samples/sec: 8850.28 - lr: 0.000034
2023-10-23 09:41:53,254 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:53,258 EPOCH 42 done: loss 0.0043 - lr 0.000034


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:41:55,576 Evaluating as a multi-label problem: False
2023-10-23 09:41:55,602 DEV : loss 0.23387272655963898 - f1-score (micro avg)  0.8566
2023-10-23 09:41:55,612 ----------------------------------------------------------------------------------------------------





2023-10-23 09:41:56,029 epoch 43 - iter 1/5 - loss 0.00161886 - time (sec): 0.42 - samples/sec: 11365.95 - lr: 0.000033
2023-10-23 09:41:56,902 epoch 43 - iter 2/5 - loss 0.00268231 - time (sec): 1.29 - samples/sec: 7349.04 - lr: 0.000033
2023-10-23 09:41:57,310 epoch 43 - iter 3/5 - loss 0.00338518 - time (sec): 1.70 - samples/sec: 8324.90 - lr: 0.000033
2023-10-23 09:41:57,747 epoch 43 - iter 4/5 - loss 0.00276787 - time (sec): 2.13 - samples/sec: 8802.22 - lr: 0.000033
2023-10-23 09:41:57,867 epoch 43 - iter 5/5 - loss 0.00264716 - time (sec): 2.25 - samples/sec: 8900.42 - lr: 0.000033
2023-10-23 09:41:57,870 ----------------------------------------------------------------------------------------------------
2023-10-23 09:41:57,872 EPOCH 43 done: loss 0.0026 - lr 0.000033


100%|██████████| 2/2 [00:02<00:00,  1.16s/it]

2023-10-23 09:42:00,201 Evaluating as a multi-label problem: False
2023-10-23 09:42:00,225 DEV : loss 0.2287028431892395 - f1-score (micro avg)  0.8459
2023-10-23 09:42:00,237 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:00,644 epoch 44 - iter 1/5 - loss 0.00119040 - time (sec): 0.41 - samples/sec: 11074.92 - lr: 0.000033
2023-10-23 09:42:01,647 epoch 44 - iter 2/5 - loss 0.00178879 - time (sec): 1.41 - samples/sec: 6196.53 - lr: 0.000033
2023-10-23 09:42:02,148 epoch 44 - iter 3/5 - loss 0.00232593 - time (sec): 1.91 - samples/sec: 6761.69 - lr: 0.000033
2023-10-23 09:42:02,767 epoch 44 - iter 4/5 - loss 0.00199596 - time (sec): 2.53 - samples/sec: 7177.48 - lr: 0.000033
2023-10-23 09:42:02,992 epoch 44 - iter 5/5 - loss 0.00209195 - time (sec): 2.75 - samples/sec: 7286.33 - lr: 0.000033
2023-10-23 09:42:02,995 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:02,998 EPOCH 44 done: loss 0.0021 - lr 0.000033


100%|██████████| 2/2 [00:04<00:00,  2.04s/it]

2023-10-23 09:42:07,092 Evaluating as a multi-label problem: False
2023-10-23 09:42:07,121 DEV : loss 0.2389345020055771 - f1-score (micro avg)  0.8398
2023-10-23 09:42:07,133 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:07,601 epoch 45 - iter 1/5 - loss 0.00110822 - time (sec): 0.47 - samples/sec: 10011.55 - lr: 0.000032
2023-10-23 09:42:07,986 epoch 45 - iter 2/5 - loss 0.00143548 - time (sec): 0.85 - samples/sec: 10538.12 - lr: 0.000032
2023-10-23 09:42:08,440 epoch 45 - iter 3/5 - loss 0.00209784 - time (sec): 1.31 - samples/sec: 10766.65 - lr: 0.000032
2023-10-23 09:42:08,955 epoch 45 - iter 4/5 - loss 0.00194927 - time (sec): 1.82 - samples/sec: 10325.52 - lr: 0.000032
2023-10-23 09:42:09,080 epoch 45 - iter 5/5 - loss 0.00192089 - time (sec): 1.95 - samples/sec: 10308.81 - lr: 0.000032
2023-10-23 09:42:09,082 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:09,086 EPOCH 45 done: loss 0.0019 - lr 0.000032


100%|██████████| 2/2 [00:02<00:00,  1.16s/it]

2023-10-23 09:42:11,417 Evaluating as a multi-label problem: False
2023-10-23 09:42:11,443 DEV : loss 0.2445947527885437 - f1-score (micro avg)  0.8509
2023-10-23 09:42:11,456 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:12,245 epoch 46 - iter 1/5 - loss 0.00066898 - time (sec): 0.79 - samples/sec: 5582.92 - lr: 0.000032
2023-10-23 09:42:12,710 epoch 46 - iter 2/5 - loss 0.00088456 - time (sec): 1.25 - samples/sec: 7296.36 - lr: 0.000032
2023-10-23 09:42:13,087 epoch 46 - iter 3/5 - loss 0.00126870 - time (sec): 1.63 - samples/sec: 8038.69 - lr: 0.000032
2023-10-23 09:42:13,537 epoch 46 - iter 4/5 - loss 0.00143278 - time (sec): 2.08 - samples/sec: 8765.85 - lr: 0.000032
2023-10-23 09:42:13,736 epoch 46 - iter 5/5 - loss 0.00133471 - time (sec): 2.28 - samples/sec: 8804.00 - lr: 0.000031
2023-10-23 09:42:13,738 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:13,744 EPOCH 46 done: loss 0.0013 - lr 0.000031


100%|██████████| 2/2 [00:02<00:00,  1.16s/it]

2023-10-23 09:42:16,068 Evaluating as a multi-label problem: False
2023-10-23 09:42:16,093 DEV : loss 0.25042393803596497 - f1-score (micro avg)  0.8624
2023-10-23 09:42:16,109 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:16,522 epoch 47 - iter 1/5 - loss 0.00183109 - time (sec): 0.41 - samples/sec: 10032.22 - lr: 0.000031
2023-10-23 09:42:16,903 epoch 47 - iter 2/5 - loss 0.00134606 - time (sec): 0.79 - samples/sec: 9699.37 - lr: 0.000031
2023-10-23 09:42:17,516 epoch 47 - iter 3/5 - loss 0.00110814 - time (sec): 1.41 - samples/sec: 9404.86 - lr: 0.000031
2023-10-23 09:42:18,058 epoch 47 - iter 4/5 - loss 0.00128788 - time (sec): 1.95 - samples/sec: 9335.74 - lr: 0.000031
2023-10-23 09:42:18,285 epoch 47 - iter 5/5 - loss 0.00122247 - time (sec): 2.17 - samples/sec: 9225.95 - lr: 0.000031
2023-10-23 09:42:18,289 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:18,291 EPOCH 47 done: loss 0.0012 - lr 0.000031


100%|██████████| 2/2 [00:04<00:00,  2.20s/it]

2023-10-23 09:42:22,701 Evaluating as a multi-label problem: False
2023-10-23 09:42:22,729 DEV : loss 0.26171666383743286 - f1-score (micro avg)  0.8477
2023-10-23 09:42:22,751 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:23,308 epoch 48 - iter 1/5 - loss 0.00183343 - time (sec): 0.55 - samples/sec: 9029.17 - lr: 0.000031
2023-10-23 09:42:23,700 epoch 48 - iter 2/5 - loss 0.00185055 - time (sec): 0.94 - samples/sec: 9246.11 - lr: 0.000031
2023-10-23 09:42:24,126 epoch 48 - iter 3/5 - loss 0.00208100 - time (sec): 1.37 - samples/sec: 9820.91 - lr: 0.000031
2023-10-23 09:42:24,613 epoch 48 - iter 4/5 - loss 0.00173016 - time (sec): 1.86 - samples/sec: 9967.13 - lr: 0.000031
2023-10-23 09:42:24,753 epoch 48 - iter 5/5 - loss 0.00162423 - time (sec): 2.00 - samples/sec: 10043.33 - lr: 0.000030
2023-10-23 09:42:24,755 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:24,761 EPOCH 48 done: loss 0.0016 - lr 0.000030


100%|██████████| 2/2 [00:02<00:00,  1.31s/it]

2023-10-23 09:42:27,393 Evaluating as a multi-label problem: False
2023-10-23 09:42:27,423 DEV : loss 0.2505866587162018 - f1-score (micro avg)  0.8603
2023-10-23 09:42:27,437 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:27,881 epoch 49 - iter 1/5 - loss 0.00071749 - time (sec): 0.44 - samples/sec: 10076.00 - lr: 0.000030
2023-10-23 09:42:28,285 epoch 49 - iter 2/5 - loss 0.00174047 - time (sec): 0.85 - samples/sec: 10380.60 - lr: 0.000030
2023-10-23 09:42:28,697 epoch 49 - iter 3/5 - loss 0.00227790 - time (sec): 1.26 - samples/sec: 10512.32 - lr: 0.000030
2023-10-23 09:42:29,199 epoch 49 - iter 4/5 - loss 0.00182556 - time (sec): 1.76 - samples/sec: 10516.81 - lr: 0.000030
2023-10-23 09:42:29,336 epoch 49 - iter 5/5 - loss 0.00214839 - time (sec): 1.90 - samples/sec: 10574.00 - lr: 0.000030
2023-10-23 09:42:29,338 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:29,345 EPOCH 49 done: loss 0.0021 - lr 0.000030


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:42:31,637 Evaluating as a multi-label problem: False
2023-10-23 09:42:31,658 DEV : loss 0.2755885124206543 - f1-score (micro avg)  0.8351
2023-10-23 09:42:31,670 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:32,113 epoch 50 - iter 1/5 - loss 0.00098846 - time (sec): 0.44 - samples/sec: 10403.58 - lr: 0.000030
2023-10-23 09:42:32,488 epoch 50 - iter 2/5 - loss 0.00348854 - time (sec): 0.82 - samples/sec: 9820.78 - lr: 0.000030
2023-10-23 09:42:32,903 epoch 50 - iter 3/5 - loss 0.00298046 - time (sec): 1.23 - samples/sec: 10313.33 - lr: 0.000030
2023-10-23 09:42:33,408 epoch 50 - iter 4/5 - loss 0.00288590 - time (sec): 1.74 - samples/sec: 10328.62 - lr: 0.000029
2023-10-23 09:42:33,673 epoch 50 - iter 5/5 - loss 0.00259342 - time (sec): 2.00 - samples/sec: 10023.37 - lr: 0.000029
2023-10-23 09:42:33,679 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:33,681 EPOCH 50 done: loss 0.0026 - lr 0.000029


100%|██████████| 2/2 [00:04<00:00,  2.41s/it]

2023-10-23 09:42:38,514 Evaluating as a multi-label problem: False
2023-10-23 09:42:38,546 DEV : loss 0.2646978199481964 - f1-score (micro avg)  0.8468
2023-10-23 09:42:38,564 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:39,068 epoch 51 - iter 1/5 - loss 0.00081175 - time (sec): 0.50 - samples/sec: 8487.88 - lr: 0.000029
2023-10-23 09:42:39,649 epoch 51 - iter 2/5 - loss 0.00074572 - time (sec): 1.08 - samples/sec: 8099.55 - lr: 0.000029
2023-10-23 09:42:40,092 epoch 51 - iter 3/5 - loss 0.00114232 - time (sec): 1.53 - samples/sec: 9042.84 - lr: 0.000029
2023-10-23 09:42:40,495 epoch 51 - iter 4/5 - loss 0.00145896 - time (sec): 1.93 - samples/sec: 9292.81 - lr: 0.000029
2023-10-23 09:42:40,712 epoch 51 - iter 5/5 - loss 0.00181517 - time (sec): 2.15 - samples/sec: 9345.47 - lr: 0.000029
2023-10-23 09:42:40,714 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:40,717 EPOCH 51 done: loss 0.0018 - lr 0.000029


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:42:43,011 Evaluating as a multi-label problem: False
2023-10-23 09:42:43,028 DEV : loss 0.26938894391059875 - f1-score (micro avg)  0.8383
2023-10-23 09:42:43,040 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:43,416 epoch 52 - iter 1/5 - loss 0.00374527 - time (sec): 0.38 - samples/sec: 11612.47 - lr: 0.000029
2023-10-23 09:42:43,818 epoch 52 - iter 2/5 - loss 0.00484220 - time (sec): 0.78 - samples/sec: 10914.83 - lr: 0.000029
2023-10-23 09:42:44,294 epoch 52 - iter 3/5 - loss 0.00315772 - time (sec): 1.25 - samples/sec: 10701.15 - lr: 0.000029
2023-10-23 09:42:44,713 epoch 52 - iter 4/5 - loss 0.00260573 - time (sec): 1.67 - samples/sec: 10617.49 - lr: 0.000028
2023-10-23 09:42:44,936 epoch 52 - iter 5/5 - loss 0.00240881 - time (sec): 1.90 - samples/sec: 10586.21 - lr: 0.000028
2023-10-23 09:42:44,938 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:44,944 EPOCH 52 done: loss 0.0024 - lr 0.000028


100%|██████████| 2/2 [00:02<00:00,  1.31s/it]

2023-10-23 09:42:47,588 Evaluating as a multi-label problem: False
2023-10-23 09:42:47,612 DEV : loss 0.2563422620296478 - f1-score (micro avg)  0.8444
2023-10-23 09:42:47,623 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:48,006 epoch 53 - iter 1/5 - loss 0.00026450 - time (sec): 0.38 - samples/sec: 10527.97 - lr: 0.000028
2023-10-23 09:42:48,412 epoch 53 - iter 2/5 - loss 0.00076604 - time (sec): 0.79 - samples/sec: 10134.23 - lr: 0.000028
2023-10-23 09:42:48,890 epoch 53 - iter 3/5 - loss 0.00071562 - time (sec): 1.27 - samples/sec: 10732.90 - lr: 0.000028
2023-10-23 09:42:49,317 epoch 53 - iter 4/5 - loss 0.00106318 - time (sec): 1.69 - samples/sec: 10580.07 - lr: 0.000028
2023-10-23 09:42:49,539 epoch 53 - iter 5/5 - loss 0.00102458 - time (sec): 1.91 - samples/sec: 10478.76 - lr: 0.000028
2023-10-23 09:42:49,541 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:49,546 EPOCH 53 done: loss 0.0010 - lr 0.000028


100%|██████████| 2/2 [00:04<00:00,  2.28s/it]

2023-10-23 09:42:54,120 Evaluating as a multi-label problem: False
2023-10-23 09:42:54,149 DEV : loss 0.2509935796260834 - f1-score (micro avg)  0.8464
2023-10-23 09:42:54,166 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:54,668 epoch 54 - iter 1/5 - loss 0.00091704 - time (sec): 0.50 - samples/sec: 8788.38 - lr: 0.000028
2023-10-23 09:42:55,183 epoch 54 - iter 2/5 - loss 0.00364943 - time (sec): 1.01 - samples/sec: 9129.14 - lr: 0.000028
2023-10-23 09:42:55,758 epoch 54 - iter 3/5 - loss 0.00263069 - time (sec): 1.59 - samples/sec: 8919.71 - lr: 0.000028
2023-10-23 09:42:56,271 epoch 54 - iter 4/5 - loss 0.00211808 - time (sec): 2.10 - samples/sec: 8624.64 - lr: 0.000027
2023-10-23 09:42:56,442 epoch 54 - iter 5/5 - loss 0.00195566 - time (sec): 2.27 - samples/sec: 8823.30 - lr: 0.000027
2023-10-23 09:42:56,444 ----------------------------------------------------------------------------------------------------
2023-10-23 09:42:56,450 EPOCH 54 done: loss 0.0020 - lr 0.000027


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:42:58,769 Evaluating as a multi-label problem: False
2023-10-23 09:42:58,790 DEV : loss 0.2623606324195862 - f1-score (micro avg)  0.8406
2023-10-23 09:42:58,802 ----------------------------------------------------------------------------------------------------





2023-10-23 09:42:59,607 epoch 55 - iter 1/5 - loss 0.00143930 - time (sec): 0.80 - samples/sec: 5866.26 - lr: 0.000027
2023-10-23 09:43:00,113 epoch 55 - iter 2/5 - loss 0.00164589 - time (sec): 1.31 - samples/sec: 7560.35 - lr: 0.000027
2023-10-23 09:43:00,497 epoch 55 - iter 3/5 - loss 0.00128421 - time (sec): 1.69 - samples/sec: 8204.26 - lr: 0.000027
2023-10-23 09:43:00,898 epoch 55 - iter 4/5 - loss 0.00113418 - time (sec): 2.09 - samples/sec: 8701.08 - lr: 0.000027
2023-10-23 09:43:01,076 epoch 55 - iter 5/5 - loss 0.00107528 - time (sec): 2.27 - samples/sec: 8829.46 - lr: 0.000027
2023-10-23 09:43:01,078 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:01,084 EPOCH 55 done: loss 0.0011 - lr 0.000027


100%|██████████| 2/2 [00:02<00:00,  1.17s/it]

2023-10-23 09:43:03,434 Evaluating as a multi-label problem: False
2023-10-23 09:43:03,457 DEV : loss 0.2749468684196472 - f1-score (micro avg)  0.8384
2023-10-23 09:43:03,470 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:03,867 epoch 56 - iter 1/5 - loss 0.00022859 - time (sec): 0.40 - samples/sec: 10198.54 - lr: 0.000027
2023-10-23 09:43:04,302 epoch 56 - iter 2/5 - loss 0.00079758 - time (sec): 0.83 - samples/sec: 10377.62 - lr: 0.000027
2023-10-23 09:43:04,730 epoch 56 - iter 3/5 - loss 0.00093152 - time (sec): 1.26 - samples/sec: 10373.02 - lr: 0.000026
2023-10-23 09:43:05,187 epoch 56 - iter 4/5 - loss 0.00115684 - time (sec): 1.72 - samples/sec: 10558.65 - lr: 0.000026
2023-10-23 09:43:05,348 epoch 56 - iter 5/5 - loss 0.00108014 - time (sec): 1.88 - samples/sec: 10687.80 - lr: 0.000026
2023-10-23 09:43:05,351 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:05,352 EPOCH 56 done: loss 0.0011 - lr 0.000026


100%|██████████| 2/2 [00:03<00:00,  1.94s/it]

2023-10-23 09:43:09,244 Evaluating as a multi-label problem: False
2023-10-23 09:43:09,273 DEV : loss 0.24890045821666718 - f1-score (micro avg)  0.849
2023-10-23 09:43:09,293 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:09,863 epoch 57 - iter 1/5 - loss 0.00191649 - time (sec): 0.57 - samples/sec: 7565.51 - lr: 0.000026
2023-10-23 09:43:10,391 epoch 57 - iter 2/5 - loss 0.00111239 - time (sec): 1.09 - samples/sec: 8007.23 - lr: 0.000026
2023-10-23 09:43:10,872 epoch 57 - iter 3/5 - loss 0.00107906 - time (sec): 1.57 - samples/sec: 8553.72 - lr: 0.000026
2023-10-23 09:43:11,366 epoch 57 - iter 4/5 - loss 0.00086182 - time (sec): 2.07 - samples/sec: 8544.12 - lr: 0.000026
2023-10-23 09:43:11,654 epoch 57 - iter 5/5 - loss 0.00091692 - time (sec): 2.36 - samples/sec: 8511.69 - lr: 0.000026
2023-10-23 09:43:11,657 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:11,660 EPOCH 57 done: loss 0.0009 - lr 0.000026


100%|██████████| 2/2 [00:03<00:00,  1.50s/it]

2023-10-23 09:43:14,680 Evaluating as a multi-label problem: False
2023-10-23 09:43:14,699 DEV : loss 0.24655407667160034 - f1-score (micro avg)  0.8516
2023-10-23 09:43:14,714 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:15,153 epoch 58 - iter 1/5 - loss 0.00023128 - time (sec): 0.44 - samples/sec: 10054.32 - lr: 0.000026
2023-10-23 09:43:15,659 epoch 58 - iter 2/5 - loss 0.00062370 - time (sec): 0.94 - samples/sec: 9691.92 - lr: 0.000026
2023-10-23 09:43:16,022 epoch 58 - iter 3/5 - loss 0.00048075 - time (sec): 1.31 - samples/sec: 10240.64 - lr: 0.000025
2023-10-23 09:43:16,403 epoch 58 - iter 4/5 - loss 0.00040425 - time (sec): 1.69 - samples/sec: 10666.53 - lr: 0.000025
2023-10-23 09:43:16,582 epoch 58 - iter 5/5 - loss 0.00086754 - time (sec): 1.87 - samples/sec: 10747.50 - lr: 0.000025
2023-10-23 09:43:16,584 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:16,589 EPOCH 58 done: loss 0.0009 - lr 0.000025


100%|██████████| 2/2 [00:02<00:00,  1.17s/it]

2023-10-23 09:43:18,943 Evaluating as a multi-label problem: False
2023-10-23 09:43:18,960 DEV : loss 0.26340770721435547 - f1-score (micro avg)  0.8582
2023-10-23 09:43:18,971 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:19,371 epoch 59 - iter 1/5 - loss 0.00214062 - time (sec): 0.40 - samples/sec: 11040.80 - lr: 0.000025
2023-10-23 09:43:19,865 epoch 59 - iter 2/5 - loss 0.00196345 - time (sec): 0.89 - samples/sec: 10763.37 - lr: 0.000025
2023-10-23 09:43:20,243 epoch 59 - iter 3/5 - loss 0.00173797 - time (sec): 1.27 - samples/sec: 10592.76 - lr: 0.000025
2023-10-23 09:43:20,756 epoch 59 - iter 4/5 - loss 0.00137885 - time (sec): 1.78 - samples/sec: 10439.56 - lr: 0.000025
2023-10-23 09:43:20,921 epoch 59 - iter 5/5 - loss 0.00129484 - time (sec): 1.95 - samples/sec: 10297.57 - lr: 0.000025
2023-10-23 09:43:20,923 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:20,927 EPOCH 59 done: loss 0.0013 - lr 0.000025


100%|██████████| 2/2 [00:03<00:00,  1.80s/it]

2023-10-23 09:43:24,549 Evaluating as a multi-label problem: False
2023-10-23 09:43:24,576 DEV : loss 0.2693980634212494 - f1-score (micro avg)  0.8596
2023-10-23 09:43:24,599 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:25,121 epoch 60 - iter 1/5 - loss 0.00031155 - time (sec): 0.52 - samples/sec: 8317.00 - lr: 0.000025
2023-10-23 09:43:25,627 epoch 60 - iter 2/5 - loss 0.00161586 - time (sec): 1.02 - samples/sec: 8365.70 - lr: 0.000024
2023-10-23 09:43:26,204 epoch 60 - iter 3/5 - loss 0.00165935 - time (sec): 1.60 - samples/sec: 8485.04 - lr: 0.000024
2023-10-23 09:43:26,805 epoch 60 - iter 4/5 - loss 0.00125873 - time (sec): 2.20 - samples/sec: 8574.50 - lr: 0.000024
2023-10-23 09:43:26,950 epoch 60 - iter 5/5 - loss 0.00119480 - time (sec): 2.35 - samples/sec: 8552.96 - lr: 0.000024
2023-10-23 09:43:26,953 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:26,955 EPOCH 60 done: loss 0.0012 - lr 0.000024


100%|██████████| 2/2 [00:03<00:00,  1.65s/it]

2023-10-23 09:43:30,267 Evaluating as a multi-label problem: False
2023-10-23 09:43:30,287 DEV : loss 0.26430946588516235 - f1-score (micro avg)  0.8523
2023-10-23 09:43:30,299 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:30,722 epoch 61 - iter 1/5 - loss 0.00323163 - time (sec): 0.42 - samples/sec: 9478.93 - lr: 0.000024
2023-10-23 09:43:31,116 epoch 61 - iter 2/5 - loss 0.00171080 - time (sec): 0.81 - samples/sec: 10642.63 - lr: 0.000024
2023-10-23 09:43:31,592 epoch 61 - iter 3/5 - loss 0.00116941 - time (sec): 1.29 - samples/sec: 10447.91 - lr: 0.000024
2023-10-23 09:43:32,041 epoch 61 - iter 4/5 - loss 0.00101744 - time (sec): 1.74 - samples/sec: 10616.66 - lr: 0.000024
2023-10-23 09:43:32,230 epoch 61 - iter 5/5 - loss 0.00111876 - time (sec): 1.93 - samples/sec: 10401.78 - lr: 0.000024
2023-10-23 09:43:32,232 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:32,234 EPOCH 61 done: loss 0.0011 - lr 0.000024


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:43:34,529 Evaluating as a multi-label problem: False
2023-10-23 09:43:34,553 DEV : loss 0.2614254057407379 - f1-score (micro avg)  0.8513
2023-10-23 09:43:34,568 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:35,398 epoch 62 - iter 1/5 - loss 0.00126690 - time (sec): 0.83 - samples/sec: 5803.63 - lr: 0.000024
2023-10-23 09:43:35,884 epoch 62 - iter 2/5 - loss 0.00072121 - time (sec): 1.31 - samples/sec: 7873.96 - lr: 0.000023
2023-10-23 09:43:36,271 epoch 62 - iter 3/5 - loss 0.00056509 - time (sec): 1.70 - samples/sec: 8388.72 - lr: 0.000023
2023-10-23 09:43:36,683 epoch 62 - iter 4/5 - loss 0.00087462 - time (sec): 2.11 - samples/sec: 8802.36 - lr: 0.000023
2023-10-23 09:43:36,813 epoch 62 - iter 5/5 - loss 0.00082097 - time (sec): 2.24 - samples/sec: 8940.77 - lr: 0.000023
2023-10-23 09:43:36,815 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:36,819 EPOCH 62 done: loss 0.0008 - lr 0.000023


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:43:39,115 Evaluating as a multi-label problem: False
2023-10-23 09:43:39,134 DEV : loss 0.26000216603279114 - f1-score (micro avg)  0.8543
2023-10-23 09:43:39,157 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:39,626 epoch 63 - iter 1/5 - loss 0.00024836 - time (sec): 0.47 - samples/sec: 8436.35 - lr: 0.000023
2023-10-23 09:43:40,117 epoch 63 - iter 2/5 - loss 0.00021300 - time (sec): 0.96 - samples/sec: 8381.84 - lr: 0.000023
2023-10-23 09:43:40,662 epoch 63 - iter 3/5 - loss 0.00019807 - time (sec): 1.50 - samples/sec: 8382.22 - lr: 0.000023
2023-10-23 09:43:41,312 epoch 63 - iter 4/5 - loss 0.00029792 - time (sec): 2.15 - samples/sec: 8503.73 - lr: 0.000023
2023-10-23 09:43:41,508 epoch 63 - iter 5/5 - loss 0.00028867 - time (sec): 2.35 - samples/sec: 8536.58 - lr: 0.000023
2023-10-23 09:43:41,515 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:41,520 EPOCH 63 done: loss 0.0003 - lr 0.000023


100%|██████████| 2/2 [00:04<00:00,  2.09s/it]

2023-10-23 09:43:45,719 Evaluating as a multi-label problem: False
2023-10-23 09:43:45,740 DEV : loss 0.271983802318573 - f1-score (micro avg)  0.8579
2023-10-23 09:43:45,754 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:46,254 epoch 64 - iter 1/5 - loss 0.00582258 - time (sec): 0.50 - samples/sec: 10080.21 - lr: 0.000022
2023-10-23 09:43:46,645 epoch 64 - iter 2/5 - loss 0.00320196 - time (sec): 0.89 - samples/sec: 10463.88 - lr: 0.000022
2023-10-23 09:43:47,114 epoch 64 - iter 3/5 - loss 0.00231440 - time (sec): 1.36 - samples/sec: 10300.28 - lr: 0.000022
2023-10-23 09:43:47,894 epoch 64 - iter 4/5 - loss 0.00182030 - time (sec): 2.14 - samples/sec: 8538.90 - lr: 0.000022
2023-10-23 09:43:48,054 epoch 64 - iter 5/5 - loss 0.00167237 - time (sec): 2.30 - samples/sec: 8729.35 - lr: 0.000022
2023-10-23 09:43:48,056 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:48,062 EPOCH 64 done: loss 0.0017 - lr 0.000022


100%|██████████| 2/2 [00:02<00:00,  1.13s/it]

2023-10-23 09:43:50,336 Evaluating as a multi-label problem: False
2023-10-23 09:43:50,369 DEV : loss 0.29031580686569214 - f1-score (micro avg)  0.8564
2023-10-23 09:43:50,381 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:50,786 epoch 65 - iter 1/5 - loss 0.00025605 - time (sec): 0.40 - samples/sec: 10936.67 - lr: 0.000022
2023-10-23 09:43:51,238 epoch 65 - iter 2/5 - loss 0.00039607 - time (sec): 0.86 - samples/sec: 11093.43 - lr: 0.000022
2023-10-23 09:43:51,624 epoch 65 - iter 3/5 - loss 0.00030967 - time (sec): 1.24 - samples/sec: 11060.61 - lr: 0.000022
2023-10-23 09:43:52,090 epoch 65 - iter 4/5 - loss 0.00037287 - time (sec): 1.71 - samples/sec: 10724.81 - lr: 0.000022
2023-10-23 09:43:52,268 epoch 65 - iter 5/5 - loss 0.00035298 - time (sec): 1.88 - samples/sec: 10643.98 - lr: 0.000022
2023-10-23 09:43:52,270 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:52,275 EPOCH 65 done: loss 0.0004 - lr 0.000022


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:43:54,562 Evaluating as a multi-label problem: False
2023-10-23 09:43:54,584 DEV : loss 0.29829734563827515 - f1-score (micro avg)  0.859
2023-10-23 09:43:54,598 ----------------------------------------------------------------------------------------------------





2023-10-23 09:43:55,105 epoch 66 - iter 1/5 - loss 0.00225917 - time (sec): 0.51 - samples/sec: 10950.79 - lr: 0.000021
2023-10-23 09:43:55,508 epoch 66 - iter 2/5 - loss 0.00132488 - time (sec): 0.91 - samples/sec: 11062.84 - lr: 0.000021
2023-10-23 09:43:56,007 epoch 66 - iter 3/5 - loss 0.00185116 - time (sec): 1.41 - samples/sec: 10011.98 - lr: 0.000021
2023-10-23 09:43:56,483 epoch 66 - iter 4/5 - loss 0.00146689 - time (sec): 1.88 - samples/sec: 9799.61 - lr: 0.000021
2023-10-23 09:43:56,697 epoch 66 - iter 5/5 - loss 0.00135856 - time (sec): 2.10 - samples/sec: 9558.97 - lr: 0.000021
2023-10-23 09:43:56,702 ----------------------------------------------------------------------------------------------------
2023-10-23 09:43:56,707 EPOCH 66 done: loss 0.0014 - lr 0.000021


100%|██████████| 2/2 [00:04<00:00,  2.39s/it]

2023-10-23 09:44:01,509 Evaluating as a multi-label problem: False
2023-10-23 09:44:01,547 DEV : loss 0.303864449262619 - f1-score (micro avg)  0.8426
2023-10-23 09:44:01,568 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:02,105 epoch 67 - iter 1/5 - loss 0.00041071 - time (sec): 0.54 - samples/sec: 8594.33 - lr: 0.000021
2023-10-23 09:44:02,514 epoch 67 - iter 2/5 - loss 0.00095537 - time (sec): 0.94 - samples/sec: 9614.20 - lr: 0.000021
2023-10-23 09:44:02,948 epoch 67 - iter 3/5 - loss 0.00068696 - time (sec): 1.38 - samples/sec: 10239.69 - lr: 0.000021
2023-10-23 09:44:03,411 epoch 67 - iter 4/5 - loss 0.00072866 - time (sec): 1.84 - samples/sec: 10211.32 - lr: 0.000021
2023-10-23 09:44:03,541 epoch 67 - iter 5/5 - loss 0.00070062 - time (sec): 1.97 - samples/sec: 10177.23 - lr: 0.000021
2023-10-23 09:44:03,544 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:03,549 EPOCH 67 done: loss 0.0007 - lr 0.000021


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:44:05,874 Evaluating as a multi-label problem: False
2023-10-23 09:44:05,897 DEV : loss 0.3139787018299103 - f1-score (micro avg)  0.8379
2023-10-23 09:44:05,914 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:06,346 epoch 68 - iter 1/5 - loss 0.00023198 - time (sec): 0.43 - samples/sec: 10914.49 - lr: 0.000020
2023-10-23 09:44:06,742 epoch 68 - iter 2/5 - loss 0.00019861 - time (sec): 0.83 - samples/sec: 10723.44 - lr: 0.000020
2023-10-23 09:44:07,197 epoch 68 - iter 3/5 - loss 0.00033471 - time (sec): 1.28 - samples/sec: 10775.61 - lr: 0.000020
2023-10-23 09:44:07,678 epoch 68 - iter 4/5 - loss 0.00035746 - time (sec): 1.76 - samples/sec: 10582.33 - lr: 0.000020
2023-10-23 09:44:07,832 epoch 68 - iter 5/5 - loss 0.00033743 - time (sec): 1.92 - samples/sec: 10469.61 - lr: 0.000020
2023-10-23 09:44:07,834 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:07,840 EPOCH 68 done: loss 0.0003 - lr 0.000020


100%|██████████| 2/2 [00:02<00:00,  1.16s/it]

2023-10-23 09:44:10,166 Evaluating as a multi-label problem: False
2023-10-23 09:44:10,185 DEV : loss 0.30482304096221924 - f1-score (micro avg)  0.8444
2023-10-23 09:44:10,197 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:11,061 epoch 69 - iter 1/5 - loss 0.00011553 - time (sec): 0.86 - samples/sec: 5530.96 - lr: 0.000020
2023-10-23 09:44:11,442 epoch 69 - iter 2/5 - loss 0.00017378 - time (sec): 1.24 - samples/sec: 6971.50 - lr: 0.000020
2023-10-23 09:44:11,881 epoch 69 - iter 3/5 - loss 0.00017003 - time (sec): 1.68 - samples/sec: 7866.76 - lr: 0.000020
2023-10-23 09:44:12,414 epoch 69 - iter 4/5 - loss 0.00023535 - time (sec): 2.22 - samples/sec: 8173.35 - lr: 0.000020
2023-10-23 09:44:12,633 epoch 69 - iter 5/5 - loss 0.00022751 - time (sec): 2.43 - samples/sec: 8242.56 - lr: 0.000019
2023-10-23 09:44:12,636 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:12,641 EPOCH 69 done: loss 0.0002 - lr 0.000019


100%|██████████| 2/2 [00:04<00:00,  2.22s/it]

2023-10-23 09:44:17,099 Evaluating as a multi-label problem: False
2023-10-23 09:44:17,126 DEV : loss 0.3030979037284851 - f1-score (micro avg)  0.8563
2023-10-23 09:44:17,152 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:17,752 epoch 70 - iter 1/5 - loss 0.00185407 - time (sec): 0.60 - samples/sec: 8060.51 - lr: 0.000019
2023-10-23 09:44:18,290 epoch 70 - iter 2/5 - loss 0.00098687 - time (sec): 1.14 - samples/sec: 8301.29 - lr: 0.000019
2023-10-23 09:44:18,766 epoch 70 - iter 3/5 - loss 0.00074851 - time (sec): 1.61 - samples/sec: 8575.09 - lr: 0.000019
2023-10-23 09:44:19,135 epoch 70 - iter 4/5 - loss 0.00070621 - time (sec): 1.98 - samples/sec: 9257.00 - lr: 0.000019
2023-10-23 09:44:19,309 epoch 70 - iter 5/5 - loss 0.00065474 - time (sec): 2.15 - samples/sec: 9309.80 - lr: 0.000019
2023-10-23 09:44:19,311 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:19,316 EPOCH 70 done: loss 0.0007 - lr 0.000019


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:44:21,638 Evaluating as a multi-label problem: False
2023-10-23 09:44:21,663 DEV : loss 0.3002007007598877 - f1-score (micro avg)  0.8386
2023-10-23 09:44:21,682 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:22,093 epoch 71 - iter 1/5 - loss 0.00043251 - time (sec): 0.41 - samples/sec: 10318.59 - lr: 0.000019
2023-10-23 09:44:22,862 epoch 71 - iter 2/5 - loss 0.00064128 - time (sec): 1.18 - samples/sec: 7322.22 - lr: 0.000019
2023-10-23 09:44:23,392 epoch 71 - iter 3/5 - loss 0.00096798 - time (sec): 1.71 - samples/sec: 8070.93 - lr: 0.000019
2023-10-23 09:44:23,831 epoch 71 - iter 4/5 - loss 0.00075493 - time (sec): 2.15 - samples/sec: 8473.87 - lr: 0.000019
2023-10-23 09:44:23,991 epoch 71 - iter 5/5 - loss 0.00068982 - time (sec): 2.31 - samples/sec: 8696.23 - lr: 0.000018
2023-10-23 09:44:23,994 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:23,996 EPOCH 71 done: loss 0.0007 - lr 0.000018


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:44:26,311 Evaluating as a multi-label problem: False
2023-10-23 09:44:26,335 DEV : loss 0.30466097593307495 - f1-score (micro avg)  0.8333
2023-10-23 09:44:26,347 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:26,781 epoch 72 - iter 1/5 - loss 0.00012934 - time (sec): 0.43 - samples/sec: 9942.60 - lr: 0.000018
2023-10-23 09:44:27,230 epoch 72 - iter 2/5 - loss 0.00011087 - time (sec): 0.88 - samples/sec: 10393.57 - lr: 0.000018
2023-10-23 09:44:27,753 epoch 72 - iter 3/5 - loss 0.00073306 - time (sec): 1.40 - samples/sec: 10181.25 - lr: 0.000018
2023-10-23 09:44:28,156 epoch 72 - iter 4/5 - loss 0.00059959 - time (sec): 1.81 - samples/sec: 10227.78 - lr: 0.000018
2023-10-23 09:44:28,318 epoch 72 - iter 5/5 - loss 0.00056408 - time (sec): 1.97 - samples/sec: 10181.69 - lr: 0.000018
2023-10-23 09:44:28,320 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:28,324 EPOCH 72 done: loss 0.0006 - lr 0.000018


100%|██████████| 2/2 [00:04<00:00,  2.25s/it]

2023-10-23 09:44:32,829 Evaluating as a multi-label problem: False
2023-10-23 09:44:32,860 DEV : loss 0.297259122133255 - f1-score (micro avg)  0.8477
2023-10-23 09:44:32,877 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:33,351 epoch 73 - iter 1/5 - loss 0.00009596 - time (sec): 0.47 - samples/sec: 8203.07 - lr: 0.000018
2023-10-23 09:44:34,314 epoch 73 - iter 2/5 - loss 0.00009064 - time (sec): 1.43 - samples/sec: 5954.27 - lr: 0.000018
2023-10-23 09:44:34,856 epoch 73 - iter 3/5 - loss 0.00011685 - time (sec): 1.98 - samples/sec: 6834.95 - lr: 0.000018
2023-10-23 09:44:35,326 epoch 73 - iter 4/5 - loss 0.00011932 - time (sec): 2.45 - samples/sec: 7287.49 - lr: 0.000017
2023-10-23 09:44:35,532 epoch 73 - iter 5/5 - loss 0.00041566 - time (sec): 2.65 - samples/sec: 7566.00 - lr: 0.000017
2023-10-23 09:44:35,534 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:35,538 EPOCH 73 done: loss 0.0004 - lr 0.000017


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:44:37,836 Evaluating as a multi-label problem: False
2023-10-23 09:44:37,866 DEV : loss 0.3010244071483612 - f1-score (micro avg)  0.8574
2023-10-23 09:44:37,878 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:38,378 epoch 74 - iter 1/5 - loss 0.00008555 - time (sec): 0.50 - samples/sec: 10606.10 - lr: 0.000017
2023-10-23 09:44:38,779 epoch 74 - iter 2/5 - loss 0.00010370 - time (sec): 0.90 - samples/sec: 10179.83 - lr: 0.000017
2023-10-23 09:44:39,234 epoch 74 - iter 3/5 - loss 0.00022389 - time (sec): 1.35 - samples/sec: 10142.92 - lr: 0.000017
2023-10-23 09:44:39,643 epoch 74 - iter 4/5 - loss 0.00095324 - time (sec): 1.76 - samples/sec: 10593.28 - lr: 0.000017
2023-10-23 09:44:39,775 epoch 74 - iter 5/5 - loss 0.00088968 - time (sec): 1.90 - samples/sec: 10585.78 - lr: 0.000017
2023-10-23 09:44:39,777 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:39,782 EPOCH 74 done: loss 0.0009 - lr 0.000017


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:44:42,074 Evaluating as a multi-label problem: False
2023-10-23 09:44:42,095 DEV : loss 0.3090309798717499 - f1-score (micro avg)  0.8547
2023-10-23 09:44:42,107 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:42,533 epoch 75 - iter 1/5 - loss 0.00420768 - time (sec): 0.42 - samples/sec: 11403.84 - lr: 0.000017
2023-10-23 09:44:42,965 epoch 75 - iter 2/5 - loss 0.00340154 - time (sec): 0.86 - samples/sec: 11450.04 - lr: 0.000017
2023-10-23 09:44:43,388 epoch 75 - iter 3/5 - loss 0.00296799 - time (sec): 1.28 - samples/sec: 10815.80 - lr: 0.000017
2023-10-23 09:44:44,239 epoch 75 - iter 4/5 - loss 0.00249833 - time (sec): 2.13 - samples/sec: 8771.72 - lr: 0.000016
2023-10-23 09:44:44,396 epoch 75 - iter 5/5 - loss 0.00233494 - time (sec): 2.29 - samples/sec: 8768.11 - lr: 0.000016
2023-10-23 09:44:44,398 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:44,408 EPOCH 75 done: loss 0.0023 - lr 0.000016


100%|██████████| 2/2 [00:04<00:00,  2.03s/it]

2023-10-23 09:44:48,487 Evaluating as a multi-label problem: False
2023-10-23 09:44:48,527 DEV : loss 0.3095563054084778 - f1-score (micro avg)  0.8503
2023-10-23 09:44:48,548 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:49,186 epoch 76 - iter 1/5 - loss 0.00009288 - time (sec): 0.63 - samples/sec: 7560.46 - lr: 0.000016
2023-10-23 09:44:49,758 epoch 76 - iter 2/5 - loss 0.00030887 - time (sec): 1.20 - samples/sec: 7763.10 - lr: 0.000016
2023-10-23 09:44:50,314 epoch 76 - iter 3/5 - loss 0.00023396 - time (sec): 1.76 - samples/sec: 8035.52 - lr: 0.000016
2023-10-23 09:44:50,798 epoch 76 - iter 4/5 - loss 0.00020094 - time (sec): 2.24 - samples/sec: 8226.17 - lr: 0.000016
2023-10-23 09:44:50,972 epoch 76 - iter 5/5 - loss 0.00019306 - time (sec): 2.42 - samples/sec: 8299.81 - lr: 0.000016
2023-10-23 09:44:50,975 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:50,977 EPOCH 76 done: loss 0.0002 - lr 0.000016


100%|██████████| 2/2 [00:02<00:00,  1.25s/it]

2023-10-23 09:44:53,494 Evaluating as a multi-label problem: False
2023-10-23 09:44:53,519 DEV : loss 0.30463454127311707 - f1-score (micro avg)  0.8569
2023-10-23 09:44:53,532 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:53,967 epoch 77 - iter 1/5 - loss 0.00008415 - time (sec): 0.43 - samples/sec: 10809.30 - lr: 0.000016
2023-10-23 09:44:54,370 epoch 77 - iter 2/5 - loss 0.00006669 - time (sec): 0.84 - samples/sec: 10772.98 - lr: 0.000016
2023-10-23 09:44:54,757 epoch 77 - iter 3/5 - loss 0.00086346 - time (sec): 1.22 - samples/sec: 10970.93 - lr: 0.000015
2023-10-23 09:44:55,219 epoch 77 - iter 4/5 - loss 0.00076983 - time (sec): 1.69 - samples/sec: 10819.31 - lr: 0.000015
2023-10-23 09:44:55,412 epoch 77 - iter 5/5 - loss 0.00070591 - time (sec): 1.88 - samples/sec: 10675.33 - lr: 0.000015
2023-10-23 09:44:55,414 ----------------------------------------------------------------------------------------------------
2023-10-23 09:44:55,419 EPOCH 77 done: loss 0.0007 - lr 0.000015


100%|██████████| 2/2 [00:02<00:00,  1.33s/it]

2023-10-23 09:44:58,101 Evaluating as a multi-label problem: False
2023-10-23 09:44:58,120 DEV : loss 0.3002319037914276 - f1-score (micro avg)  0.8546
2023-10-23 09:44:58,134 ----------------------------------------------------------------------------------------------------





2023-10-23 09:44:58,613 epoch 78 - iter 1/5 - loss 0.00009262 - time (sec): 0.48 - samples/sec: 10523.44 - lr: 0.000015
2023-10-23 09:44:58,979 epoch 78 - iter 2/5 - loss 0.00067488 - time (sec): 0.84 - samples/sec: 10782.00 - lr: 0.000015
2023-10-23 09:44:59,461 epoch 78 - iter 3/5 - loss 0.00046863 - time (sec): 1.32 - samples/sec: 10479.26 - lr: 0.000015
2023-10-23 09:44:59,867 epoch 78 - iter 4/5 - loss 0.00036903 - time (sec): 1.73 - samples/sec: 10527.67 - lr: 0.000015
2023-10-23 09:45:00,043 epoch 78 - iter 5/5 - loss 0.00034476 - time (sec): 1.91 - samples/sec: 10518.11 - lr: 0.000015
2023-10-23 09:45:00,047 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:00,049 EPOCH 78 done: loss 0.0003 - lr 0.000015


100%|██████████| 2/2 [00:03<00:00,  1.61s/it]

2023-10-23 09:45:03,275 Evaluating as a multi-label problem: False
2023-10-23 09:45:03,314 DEV : loss 0.3044061064720154 - f1-score (micro avg)  0.8566
2023-10-23 09:45:03,330 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:03,834 epoch 79 - iter 1/5 - loss 0.00005826 - time (sec): 0.50 - samples/sec: 8469.27 - lr: 0.000015
2023-10-23 09:45:04,425 epoch 79 - iter 2/5 - loss 0.00004835 - time (sec): 1.09 - samples/sec: 8317.37 - lr: 0.000015
2023-10-23 09:45:05,020 epoch 79 - iter 3/5 - loss 0.00006391 - time (sec): 1.69 - samples/sec: 8480.69 - lr: 0.000014
2023-10-23 09:45:05,429 epoch 79 - iter 4/5 - loss 0.00016019 - time (sec): 2.10 - samples/sec: 8646.72 - lr: 0.000014
2023-10-23 09:45:05,643 epoch 79 - iter 5/5 - loss 0.00015161 - time (sec): 2.31 - samples/sec: 8681.04 - lr: 0.000014
2023-10-23 09:45:05,646 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:05,648 EPOCH 79 done: loss 0.0002 - lr 0.000014


100%|██████████| 2/2 [00:03<00:00,  1.67s/it]

2023-10-23 09:45:09,005 Evaluating as a multi-label problem: False
2023-10-23 09:45:09,025 DEV : loss 0.3055865168571472 - f1-score (micro avg)  0.8605
2023-10-23 09:45:09,039 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:09,448 epoch 80 - iter 1/5 - loss 0.00137299 - time (sec): 0.41 - samples/sec: 11138.08 - lr: 0.000014
2023-10-23 09:45:10,275 epoch 80 - iter 2/5 - loss 0.00071283 - time (sec): 1.23 - samples/sec: 7369.60 - lr: 0.000014
2023-10-23 09:45:10,713 epoch 80 - iter 3/5 - loss 0.00051048 - time (sec): 1.67 - samples/sec: 8285.72 - lr: 0.000014
2023-10-23 09:45:11,136 epoch 80 - iter 4/5 - loss 0.00040205 - time (sec): 2.10 - samples/sec: 8812.55 - lr: 0.000014
2023-10-23 09:45:11,312 epoch 80 - iter 5/5 - loss 0.00037549 - time (sec): 2.27 - samples/sec: 8831.03 - lr: 0.000014
2023-10-23 09:45:11,314 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:11,316 EPOCH 80 done: loss 0.0004 - lr 0.000014


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:45:13,600 Evaluating as a multi-label problem: False
2023-10-23 09:45:13,623 DEV : loss 0.3071970045566559 - f1-score (micro avg)  0.8577
2023-10-23 09:45:13,634 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:14,159 epoch 81 - iter 1/5 - loss 0.00006049 - time (sec): 0.52 - samples/sec: 10663.61 - lr: 0.000014
2023-10-23 09:45:14,525 epoch 81 - iter 2/5 - loss 0.00006527 - time (sec): 0.89 - samples/sec: 10764.94 - lr: 0.000013
2023-10-23 09:45:14,909 epoch 81 - iter 3/5 - loss 0.00042125 - time (sec): 1.27 - samples/sec: 10813.29 - lr: 0.000013
2023-10-23 09:45:15,310 epoch 81 - iter 4/5 - loss 0.00034400 - time (sec): 1.67 - samples/sec: 10735.93 - lr: 0.000013
2023-10-23 09:45:15,500 epoch 81 - iter 5/5 - loss 0.00031313 - time (sec): 1.86 - samples/sec: 10758.61 - lr: 0.000013
2023-10-23 09:45:15,502 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:15,507 EPOCH 81 done: loss 0.0003 - lr 0.000013


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:45:17,832 Evaluating as a multi-label problem: False
2023-10-23 09:45:17,851 DEV : loss 0.3066164255142212 - f1-score (micro avg)  0.854
2023-10-23 09:45:17,863 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:18,334 epoch 82 - iter 1/5 - loss 0.00006343 - time (sec): 0.47 - samples/sec: 9099.80 - lr: 0.000013
2023-10-23 09:45:18,838 epoch 82 - iter 2/5 - loss 0.00006410 - time (sec): 0.97 - samples/sec: 8635.59 - lr: 0.000013
2023-10-23 09:45:19,457 epoch 82 - iter 3/5 - loss 0.00006632 - time (sec): 1.59 - samples/sec: 8486.86 - lr: 0.000013
2023-10-23 09:45:20,461 epoch 82 - iter 4/5 - loss 0.00006102 - time (sec): 2.60 - samples/sec: 6979.45 - lr: 0.000013
2023-10-23 09:45:20,683 epoch 82 - iter 5/5 - loss 0.00006729 - time (sec): 2.82 - samples/sec: 7117.11 - lr: 0.000013
2023-10-23 09:45:20,687 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:20,690 EPOCH 82 done: loss 0.0001 - lr 0.000013


100%|██████████| 2/2 [00:03<00:00,  1.98s/it]

2023-10-23 09:45:24,656 Evaluating as a multi-label problem: False
2023-10-23 09:45:24,675 DEV : loss 0.30746057629585266 - f1-score (micro avg)  0.8658
2023-10-23 09:45:24,687 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:25,125 epoch 83 - iter 1/5 - loss 0.00012271 - time (sec): 0.44 - samples/sec: 11828.10 - lr: 0.000013
2023-10-23 09:45:25,528 epoch 83 - iter 2/5 - loss 0.00009447 - time (sec): 0.84 - samples/sec: 11853.92 - lr: 0.000012
2023-10-23 09:45:25,980 epoch 83 - iter 3/5 - loss 0.00007970 - time (sec): 1.29 - samples/sec: 11267.44 - lr: 0.000012
2023-10-23 09:45:26,404 epoch 83 - iter 4/5 - loss 0.00007923 - time (sec): 1.72 - samples/sec: 10956.32 - lr: 0.000012
2023-10-23 09:45:26,528 epoch 83 - iter 5/5 - loss 0.00007834 - time (sec): 1.84 - samples/sec: 10906.41 - lr: 0.000012
2023-10-23 09:45:26,530 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:26,534 EPOCH 83 done: loss 0.0001 - lr 0.000012


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:45:28,844 Evaluating as a multi-label problem: False
2023-10-23 09:45:28,865 DEV : loss 0.3129176199436188 - f1-score (micro avg)  0.8675
2023-10-23 09:45:28,877 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:29,316 epoch 84 - iter 1/5 - loss 0.00003867 - time (sec): 0.44 - samples/sec: 9327.61 - lr: 0.000012
2023-10-23 09:45:29,710 epoch 84 - iter 2/5 - loss 0.00004592 - time (sec): 0.83 - samples/sec: 10197.87 - lr: 0.000012
2023-10-23 09:45:30,177 epoch 84 - iter 3/5 - loss 0.00010677 - time (sec): 1.30 - samples/sec: 10454.00 - lr: 0.000012
2023-10-23 09:45:31,054 epoch 84 - iter 4/5 - loss 0.00009245 - time (sec): 2.18 - samples/sec: 8612.20 - lr: 0.000012
2023-10-23 09:45:31,187 epoch 84 - iter 5/5 - loss 0.00009027 - time (sec): 2.31 - samples/sec: 8689.84 - lr: 0.000012
2023-10-23 09:45:31,189 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:31,195 EPOCH 84 done: loss 0.0001 - lr 0.000012


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:45:33,479 Evaluating as a multi-label problem: False
2023-10-23 09:45:33,505 DEV : loss 0.3199433982372284 - f1-score (micro avg)  0.8606
2023-10-23 09:45:33,520 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:33,944 epoch 85 - iter 1/5 - loss 0.00007964 - time (sec): 0.42 - samples/sec: 11246.31 - lr: 0.000012
2023-10-23 09:45:34,373 epoch 85 - iter 2/5 - loss 0.00006379 - time (sec): 0.85 - samples/sec: 10932.22 - lr: 0.000011
2023-10-23 09:45:34,837 epoch 85 - iter 3/5 - loss 0.00005898 - time (sec): 1.32 - samples/sec: 10031.33 - lr: 0.000011
2023-10-23 09:45:35,415 epoch 85 - iter 4/5 - loss 0.00005947 - time (sec): 1.89 - samples/sec: 9500.68 - lr: 0.000011
2023-10-23 09:45:35,656 epoch 85 - iter 5/5 - loss 0.00005593 - time (sec): 2.13 - samples/sec: 9398.58 - lr: 0.000011
2023-10-23 09:45:35,661 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:35,665 EPOCH 85 done: loss 0.0001 - lr 0.000011


100%|██████████| 2/2 [00:04<00:00,  2.17s/it]

2023-10-23 09:45:40,017 Evaluating as a multi-label problem: False
2023-10-23 09:45:40,047 DEV : loss 0.3219834566116333 - f1-score (micro avg)  0.8582
2023-10-23 09:45:40,068 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:40,599 epoch 86 - iter 1/5 - loss 0.00010295 - time (sec): 0.53 - samples/sec: 8060.70 - lr: 0.000011
2023-10-23 09:45:41,034 epoch 86 - iter 2/5 - loss 0.00007944 - time (sec): 0.96 - samples/sec: 9267.27 - lr: 0.000011
2023-10-23 09:45:41,476 epoch 86 - iter 3/5 - loss 0.00007882 - time (sec): 1.41 - samples/sec: 9532.97 - lr: 0.000011
2023-10-23 09:45:41,949 epoch 86 - iter 4/5 - loss 0.00006977 - time (sec): 1.88 - samples/sec: 9736.91 - lr: 0.000011
2023-10-23 09:45:42,126 epoch 86 - iter 5/5 - loss 0.00006950 - time (sec): 2.06 - samples/sec: 9759.26 - lr: 0.000011
2023-10-23 09:45:42,128 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:42,133 EPOCH 86 done: loss 0.0001 - lr 0.000011


100%|██████████| 2/2 [00:02<00:00,  1.32s/it]

2023-10-23 09:45:44,788 Evaluating as a multi-label problem: False
2023-10-23 09:45:44,810 DEV : loss 0.32380685210227966 - f1-score (micro avg)  0.849
2023-10-23 09:45:44,823 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:45,274 epoch 87 - iter 1/5 - loss 0.00005843 - time (sec): 0.45 - samples/sec: 10377.92 - lr: 0.000010
2023-10-23 09:45:45,672 epoch 87 - iter 2/5 - loss 0.00005722 - time (sec): 0.84 - samples/sec: 10886.44 - lr: 0.000010
2023-10-23 09:45:46,179 epoch 87 - iter 3/5 - loss 0.00005172 - time (sec): 1.35 - samples/sec: 10321.76 - lr: 0.000010
2023-10-23 09:45:46,565 epoch 87 - iter 4/5 - loss 0.00005436 - time (sec): 1.74 - samples/sec: 10523.14 - lr: 0.000010
2023-10-23 09:45:46,742 epoch 87 - iter 5/5 - loss 0.00005149 - time (sec): 1.91 - samples/sec: 10478.15 - lr: 0.000010
2023-10-23 09:45:46,744 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:46,751 EPOCH 87 done: loss 0.0001 - lr 0.000010


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:45:49,049 Evaluating as a multi-label problem: False
2023-10-23 09:45:49,071 DEV : loss 0.3146955966949463 - f1-score (micro avg)  0.8662
2023-10-23 09:45:49,083 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:49,622 epoch 88 - iter 1/5 - loss 0.00007087 - time (sec): 0.54 - samples/sec: 9863.50 - lr: 0.000010
2023-10-23 09:45:50,060 epoch 88 - iter 2/5 - loss 0.00006602 - time (sec): 0.98 - samples/sec: 9852.02 - lr: 0.000010
2023-10-23 09:45:50,443 epoch 88 - iter 3/5 - loss 0.00006052 - time (sec): 1.36 - samples/sec: 9950.06 - lr: 0.000010
2023-10-23 09:45:50,827 epoch 88 - iter 4/5 - loss 0.00006039 - time (sec): 1.74 - samples/sec: 10297.88 - lr: 0.000010
2023-10-23 09:45:51,040 epoch 88 - iter 5/5 - loss 0.00005704 - time (sec): 1.96 - samples/sec: 10258.86 - lr: 0.000010
2023-10-23 09:45:51,043 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:51,045 EPOCH 88 done: loss 0.0001 - lr 0.000010


100%|██████████| 2/2 [00:04<00:00,  2.46s/it]

2023-10-23 09:45:55,983 Evaluating as a multi-label problem: False
2023-10-23 09:45:56,021 DEV : loss 0.31458526849746704 - f1-score (micro avg)  0.8717
2023-10-23 09:45:56,042 ----------------------------------------------------------------------------------------------------





2023-10-23 09:45:56,574 epoch 89 - iter 1/5 - loss 0.00006471 - time (sec): 0.53 - samples/sec: 8024.28 - lr: 0.000009
2023-10-23 09:45:57,047 epoch 89 - iter 2/5 - loss 0.00006250 - time (sec): 1.00 - samples/sec: 8041.09 - lr: 0.000009
2023-10-23 09:45:57,617 epoch 89 - iter 3/5 - loss 0.00005865 - time (sec): 1.57 - samples/sec: 8724.49 - lr: 0.000009
2023-10-23 09:45:58,065 epoch 89 - iter 4/5 - loss 0.00005445 - time (sec): 2.02 - samples/sec: 9169.53 - lr: 0.000009
2023-10-23 09:45:58,184 epoch 89 - iter 5/5 - loss 0.00005264 - time (sec): 2.14 - samples/sec: 9371.54 - lr: 0.000009
2023-10-23 09:45:58,187 ----------------------------------------------------------------------------------------------------
2023-10-23 09:45:58,190 EPOCH 89 done: loss 0.0001 - lr 0.000009


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:46:00,487 Evaluating as a multi-label problem: False
2023-10-23 09:46:00,513 DEV : loss 0.3181249499320984 - f1-score (micro avg)  0.8683
2023-10-23 09:46:00,526 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:01,014 epoch 90 - iter 1/5 - loss 0.00004396 - time (sec): 0.49 - samples/sec: 9139.41 - lr: 0.000009
2023-10-23 09:46:01,379 epoch 90 - iter 2/5 - loss 0.00018564 - time (sec): 0.85 - samples/sec: 9868.82 - lr: 0.000009
2023-10-23 09:46:01,849 epoch 90 - iter 3/5 - loss 0.00013028 - time (sec): 1.32 - samples/sec: 10394.33 - lr: 0.000009
2023-10-23 09:46:02,260 epoch 90 - iter 4/5 - loss 0.00048258 - time (sec): 1.73 - samples/sec: 10372.27 - lr: 0.000009
2023-10-23 09:46:02,469 epoch 90 - iter 5/5 - loss 0.00043765 - time (sec): 1.94 - samples/sec: 10329.22 - lr: 0.000008
2023-10-23 09:46:02,472 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:02,477 EPOCH 90 done: loss 0.0004 - lr 0.000008


100%|██████████| 2/2 [00:02<00:00,  1.18s/it]

2023-10-23 09:46:04,864 Evaluating as a multi-label problem: False
2023-10-23 09:46:04,885 DEV : loss 0.31998369097709656 - f1-score (micro avg)  0.8678
2023-10-23 09:46:04,898 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:05,781 epoch 91 - iter 1/5 - loss 0.00004364 - time (sec): 0.88 - samples/sec: 6129.77 - lr: 0.000008
2023-10-23 09:46:06,178 epoch 91 - iter 2/5 - loss 0.00004973 - time (sec): 1.28 - samples/sec: 7544.25 - lr: 0.000008
2023-10-23 09:46:06,557 epoch 91 - iter 3/5 - loss 0.00004353 - time (sec): 1.66 - samples/sec: 8120.63 - lr: 0.000008
2023-10-23 09:46:07,007 epoch 91 - iter 4/5 - loss 0.00004584 - time (sec): 2.11 - samples/sec: 8805.48 - lr: 0.000008
2023-10-23 09:46:07,193 epoch 91 - iter 5/5 - loss 0.00004563 - time (sec): 2.29 - samples/sec: 8744.29 - lr: 0.000008
2023-10-23 09:46:07,195 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:07,200 EPOCH 91 done: loss 0.0000 - lr 0.000008


100%|██████████| 2/2 [00:04<00:00,  2.22s/it]

2023-10-23 09:46:11,646 Evaluating as a multi-label problem: False
2023-10-23 09:46:11,672 DEV : loss 0.32136836647987366 - f1-score (micro avg)  0.8643
2023-10-23 09:46:11,690 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:12,227 epoch 92 - iter 1/5 - loss 0.00003843 - time (sec): 0.54 - samples/sec: 8422.01 - lr: 0.000008
2023-10-23 09:46:12,734 epoch 92 - iter 2/5 - loss 0.00003735 - time (sec): 1.04 - samples/sec: 8456.46 - lr: 0.000008
2023-10-23 09:46:13,188 epoch 92 - iter 3/5 - loss 0.00084928 - time (sec): 1.50 - samples/sec: 8789.74 - lr: 0.000008
2023-10-23 09:46:13,735 epoch 92 - iter 4/5 - loss 0.00063416 - time (sec): 2.04 - samples/sec: 8724.96 - lr: 0.000008
2023-10-23 09:46:13,947 epoch 92 - iter 5/5 - loss 0.00056696 - time (sec): 2.26 - samples/sec: 8895.49 - lr: 0.000007
2023-10-23 09:46:13,949 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:13,955 EPOCH 92 done: loss 0.0006 - lr 0.000007


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:46:16,257 Evaluating as a multi-label problem: False
2023-10-23 09:46:16,277 DEV : loss 0.3216927647590637 - f1-score (micro avg)  0.8672
2023-10-23 09:46:16,290 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:16,780 epoch 93 - iter 1/5 - loss 0.00004020 - time (sec): 0.49 - samples/sec: 9520.79 - lr: 0.000007
2023-10-23 09:46:17,543 epoch 93 - iter 2/5 - loss 0.00005360 - time (sec): 1.25 - samples/sec: 6907.87 - lr: 0.000007
2023-10-23 09:46:18,022 epoch 93 - iter 3/5 - loss 0.00004594 - time (sec): 1.73 - samples/sec: 8067.03 - lr: 0.000007
2023-10-23 09:46:18,402 epoch 93 - iter 4/5 - loss 0.00005056 - time (sec): 2.11 - samples/sec: 8827.12 - lr: 0.000007
2023-10-23 09:46:18,565 epoch 93 - iter 5/5 - loss 0.00004902 - time (sec): 2.27 - samples/sec: 8825.40 - lr: 0.000007
2023-10-23 09:46:18,567 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:18,570 EPOCH 93 done: loss 0.0000 - lr 0.000007


100%|██████████| 2/2 [00:02<00:00,  1.14s/it]

2023-10-23 09:46:20,872 Evaluating as a multi-label problem: False
2023-10-23 09:46:20,892 DEV : loss 0.32247042655944824 - f1-score (micro avg)  0.8679
2023-10-23 09:46:20,904 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:21,365 epoch 94 - iter 1/5 - loss 0.00006340 - time (sec): 0.46 - samples/sec: 11528.35 - lr: 0.000007
2023-10-23 09:46:21,783 epoch 94 - iter 2/5 - loss 0.00033326 - time (sec): 0.88 - samples/sec: 11222.11 - lr: 0.000007
2023-10-23 09:46:22,171 epoch 94 - iter 3/5 - loss 0.00025852 - time (sec): 1.27 - samples/sec: 10709.79 - lr: 0.000007
2023-10-23 09:46:22,656 epoch 94 - iter 4/5 - loss 0.00075527 - time (sec): 1.75 - samples/sec: 10681.88 - lr: 0.000006
2023-10-23 09:46:22,797 epoch 94 - iter 5/5 - loss 0.00105450 - time (sec): 1.89 - samples/sec: 10602.15 - lr: 0.000006
2023-10-23 09:46:22,800 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:22,803 EPOCH 94 done: loss 0.0011 - lr 0.000006


100%|██████████| 2/2 [00:03<00:00,  1.85s/it]

2023-10-23 09:46:26,525 Evaluating as a multi-label problem: False
2023-10-23 09:46:26,556 DEV : loss 0.32411178946495056 - f1-score (micro avg)  0.8603
2023-10-23 09:46:26,581 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:27,183 epoch 95 - iter 1/5 - loss 0.00003530 - time (sec): 0.60 - samples/sec: 7548.30 - lr: 0.000006
2023-10-23 09:46:27,675 epoch 95 - iter 2/5 - loss 0.00003958 - time (sec): 1.09 - samples/sec: 7993.78 - lr: 0.000006
2023-10-23 09:46:28,138 epoch 95 - iter 3/5 - loss 0.00004208 - time (sec): 1.55 - samples/sec: 8618.42 - lr: 0.000006
2023-10-23 09:46:28,681 epoch 95 - iter 4/5 - loss 0.00058805 - time (sec): 2.10 - samples/sec: 8606.65 - lr: 0.000006
2023-10-23 09:46:28,948 epoch 95 - iter 5/5 - loss 0.00053112 - time (sec): 2.36 - samples/sec: 8487.77 - lr: 0.000006
2023-10-23 09:46:28,953 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:28,957 EPOCH 95 done: loss 0.0005 - lr 0.000006


100%|██████████| 2/2 [00:03<00:00,  1.55s/it]

2023-10-23 09:46:32,067 Evaluating as a multi-label problem: False
2023-10-23 09:46:32,093 DEV : loss 0.3279092311859131 - f1-score (micro avg)  0.8589
2023-10-23 09:46:32,105 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:32,593 epoch 96 - iter 1/5 - loss 0.00021962 - time (sec): 0.49 - samples/sec: 9986.87 - lr: 0.000006
2023-10-23 09:46:33,009 epoch 96 - iter 2/5 - loss 0.00013162 - time (sec): 0.90 - samples/sec: 10422.65 - lr: 0.000006
2023-10-23 09:46:33,381 epoch 96 - iter 3/5 - loss 0.00010771 - time (sec): 1.27 - samples/sec: 10464.05 - lr: 0.000006
2023-10-23 09:46:33,829 epoch 96 - iter 4/5 - loss 0.00130964 - time (sec): 1.72 - samples/sec: 10800.94 - lr: 0.000005
2023-10-23 09:46:33,999 epoch 96 - iter 5/5 - loss 0.00121766 - time (sec): 1.89 - samples/sec: 10597.33 - lr: 0.000005
2023-10-23 09:46:34,002 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:34,008 EPOCH 96 done: loss 0.0012 - lr 0.000005


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:46:36,335 Evaluating as a multi-label problem: False
2023-10-23 09:46:36,358 DEV : loss 0.3273642361164093 - f1-score (micro avg)  0.8577
2023-10-23 09:46:36,370 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:36,788 epoch 97 - iter 1/5 - loss 0.00003349 - time (sec): 0.42 - samples/sec: 9857.74 - lr: 0.000005
2023-10-23 09:46:37,290 epoch 97 - iter 2/5 - loss 0.00003884 - time (sec): 0.92 - samples/sec: 9580.15 - lr: 0.000005
2023-10-23 09:46:37,700 epoch 97 - iter 3/5 - loss 0.00003738 - time (sec): 1.33 - samples/sec: 9775.91 - lr: 0.000005
2023-10-23 09:46:38,154 epoch 97 - iter 4/5 - loss 0.00003373 - time (sec): 1.78 - samples/sec: 10155.50 - lr: 0.000005
2023-10-23 09:46:38,331 epoch 97 - iter 5/5 - loss 0.00003291 - time (sec): 1.96 - samples/sec: 10237.82 - lr: 0.000005
2023-10-23 09:46:38,333 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:38,339 EPOCH 97 done: loss 0.0000 - lr 0.000005


100%|██████████| 2/2 [00:03<00:00,  1.77s/it]

2023-10-23 09:46:41,896 Evaluating as a multi-label problem: False
2023-10-23 09:46:41,921 DEV : loss 0.3267163634300232 - f1-score (micro avg)  0.8592
2023-10-23 09:46:41,945 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:42,550 epoch 98 - iter 1/5 - loss 0.00189819 - time (sec): 0.60 - samples/sec: 9061.80 - lr: 0.000005
2023-10-23 09:46:43,002 epoch 98 - iter 2/5 - loss 0.00111722 - time (sec): 1.06 - samples/sec: 9007.98 - lr: 0.000005
2023-10-23 09:46:43,468 epoch 98 - iter 3/5 - loss 0.00079427 - time (sec): 1.52 - samples/sec: 9022.74 - lr: 0.000004
2023-10-23 09:46:43,952 epoch 98 - iter 4/5 - loss 0.00061473 - time (sec): 2.00 - samples/sec: 8947.13 - lr: 0.000004
2023-10-23 09:46:44,197 epoch 98 - iter 5/5 - loss 0.00134679 - time (sec): 2.25 - samples/sec: 8914.00 - lr: 0.000004
2023-10-23 09:46:44,202 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:44,204 EPOCH 98 done: loss 0.0013 - lr 0.000004


100%|██████████| 2/2 [00:03<00:00,  1.76s/it]

2023-10-23 09:46:47,739 Evaluating as a multi-label problem: False
2023-10-23 09:46:47,760 DEV : loss 0.3245682716369629 - f1-score (micro avg)  0.8603
2023-10-23 09:46:47,771 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:48,178 epoch 99 - iter 1/5 - loss 0.00003570 - time (sec): 0.40 - samples/sec: 9729.50 - lr: 0.000004
2023-10-23 09:46:48,602 epoch 99 - iter 2/5 - loss 0.00155814 - time (sec): 0.83 - samples/sec: 10190.14 - lr: 0.000004
2023-10-23 09:46:48,984 epoch 99 - iter 3/5 - loss 0.00098614 - time (sec): 1.21 - samples/sec: 11182.60 - lr: 0.000004
2023-10-23 09:46:49,417 epoch 99 - iter 4/5 - loss 0.00075072 - time (sec): 1.64 - samples/sec: 11022.36 - lr: 0.000004
2023-10-23 09:46:49,612 epoch 99 - iter 5/5 - loss 0.00068053 - time (sec): 1.84 - samples/sec: 10910.63 - lr: 0.000004
2023-10-23 09:46:49,614 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:49,618 EPOCH 99 done: loss 0.0007 - lr 0.000004


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:46:51,931 Evaluating as a multi-label problem: False
2023-10-23 09:46:51,953 DEV : loss 0.32349857687950134 - f1-score (micro avg)  0.8551
2023-10-23 09:46:51,965 ----------------------------------------------------------------------------------------------------





2023-10-23 09:46:52,743 epoch 100 - iter 1/5 - loss 0.00003451 - time (sec): 0.78 - samples/sec: 5063.77 - lr: 0.000004
2023-10-23 09:46:53,172 epoch 100 - iter 2/5 - loss 0.00003662 - time (sec): 1.21 - samples/sec: 7537.91 - lr: 0.000004
2023-10-23 09:46:53,643 epoch 100 - iter 3/5 - loss 0.00003352 - time (sec): 1.68 - samples/sec: 8460.02 - lr: 0.000003
2023-10-23 09:46:54,072 epoch 100 - iter 4/5 - loss 0.00003403 - time (sec): 2.11 - samples/sec: 8890.51 - lr: 0.000003
2023-10-23 09:46:54,232 epoch 100 - iter 5/5 - loss 0.00003437 - time (sec): 2.27 - samples/sec: 8855.42 - lr: 0.000003
2023-10-23 09:46:54,234 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:54,240 EPOCH 100 done: loss 0.0000 - lr 0.000003


100%|██████████| 2/2 [00:02<00:00,  1.15s/it]

2023-10-23 09:46:56,549 Evaluating as a multi-label problem: False
2023-10-23 09:46:56,573 DEV : loss 0.3232954740524292 - f1-score (micro avg)  0.8582





2023-10-23 09:46:57,328 ----------------------------------------------------------------------------------------------------
2023-10-23 09:46:57,334 Testing using last state of model ...


100%|██████████| 3/3 [00:02<00:00,  1.42it/s]

2023-10-23 09:46:59,469 Evaluating as a multi-label problem: False
2023-10-23 09:46:59,507 0.7849	0.8502	0.8162	0.6895
2023-10-23 09:46:59,511 
Results:
- F-score (micro) 0.8162
- F-score (macro) 0.7237
- Accuracy 0.6895

By class:
              precision    recall  f1-score   support

      Winner     0.7056    0.7710    0.7368       345
        Date     0.9728    0.9921    0.9823       252
 Nationality     0.7979    0.8750    0.8347       176
   Prizetype     0.7600    0.9268    0.8352        41
      Reason     0.2059    0.2593    0.2295        27

   micro avg     0.7849    0.8502    0.8162       841
   macro avg     0.6884    0.7648    0.7237       841
weighted avg     0.7916    0.8502    0.8194       841

2023-10-23 09:46:59,513 ----------------------------------------------------------------------------------------------------





{'test_score': 0.8162100456621005,
 'dev_score_history': [0.018193094893529046,
  0.013029315960912051,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.3155216284987277,
  0.5126135216952573,
  0.5595959595959595,
  0.7158081705150978,
  0.7333333333333333,
  0.7896200185356812,
  0.7723502304147465,
  0.7992988606485539,
  0.7899159663865546,
  0.8388349514563107,
  0.828649138712602,
  0.8249763481551562,
  0.8181818181818183,
  0.8255915863277826,
  0.8299817184643511,
  0.8238482384823849,
  0.8469945355191256,
  0.850510677808728,
  0.8234265734265735,
  0.8407871198568873,
  0.8345323741007195,
  0.8430493273542601,
  0.855822550831793,
  0.853185595567867,
  0.8403967538322814,
  0.8539741219963033,
  0.8346595932802829,
  0.8191681735985534,
  0.831858407079646,
  0.860377358490566,
  0.8499550763701708,
  0.8504923903312445,
  0.8493919550982225,
  0.8566340160284951,
  0.8459383753501402,
  0.8398190045248868,
  0.850909090909091,
  0.8624192059095107,
  0.8476702508960574,
  0

In [None]:
# load the trained model
model = SequenceTagger.load("resources/taggers/ner/final-model.pt")

# create example sentence
sentence = Sentence('Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer. ')

# predict the tags
model.predict(sentence)
print(sentence.to_tagged_string())

2023-10-18 10:08:04,163 SequenceTagger predicts: Dictionary with 11 tags: O, B-Winner, I-Winner, B-Date, I-Date, B-Nationality, I-Nationality, B-Prizetype, I-Prizetype, B-Reason, I-Reason
Sentence[79]: "Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer." → ["Barack Hussein Obama II"/Winner, "August 4, 1961"/Date, "American"/Nationality, "the United States"/Nationality, "he"/Winner, "Obama"/Winner, "U.S."/Nationality, "Illinois"/Nationality]


In [None]:


relation_label_dict = corpus.make_label_dictionary(label_type="relation")
results_base_path = 'resources/relations/'
relation_model: RelationExtractor = RelationExtractor(
        embeddings=embeddings,
        label_dictionary=relation_label_dict,
        label_type="relation",
        entity_label_type="ner",
    )

trainer: ModelTrainer = ModelTrainer(relation_model, corpus)

trainer.train(
        results_base_path,
        learning_rate=5e-05,
        mini_batch_size=2,
        max_epochs=100,
        shuffle=True,
    )

2023-10-25 07:01:29,504 Computing label dictionary. Progress:


70it [00:00, 9517.06it/s]

2023-10-25 07:01:29,521 Dictionary created for label 'relation' with 7 values: has_won (seen 114 times), received_nobelprize_in (seen 111 times), received_nobelprize_for (seen 78 times), has_nationality (seen 70 times), born_on (seen 57 times), died_on (seen 29 times)





NameError: ignored

In [None]:


relation_label_dict = corpus.make_label_dictionary("relation")
results_base_path = 'resources/relations/'

for i in range(100):
  print(relation_label_dict[0])


2023-10-25 07:18:43,542 Computing label dictionary. Progress:


70it [00:00, 5157.96it/s]

2023-10-25 07:18:43,580 Dictionary created for label 'relation' with 7 values: has_won (seen 114 times), received_nobelprize_in (seen 111 times), received_nobelprize_for (seen 78 times), has_nationality (seen 70 times), born_on (seen 57 times), died_on (seen 29 times)





TypeError: ignored

In [None]:
relation_label_dict = corpus.make_label_dictionary(label_type="relation")
relation_dictionary = corpus.make_label_dictionary("relation")
doc_embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)
relation_model: RelationClassifier = RelationClassifier(
    embeddings =doc_embeddings,
    label_dictionary = relation_dictionary,
    label_type="relation",
    entity_label_types="ner",
    entity_pair_labels={  # Define valid entity pair combinations, used as relation candidates
        ("Winner", "Nationality"),
        ("Winner", "Date"),
        ("Winner", "Prizetype"),
        ("Winner", "Reason"),

    },
    zero_tag_value='',
    allow_unk_tag=False,
)

2023-10-23 10:03:20,155 Computing label dictionary. Progress:


70it [00:00, 7128.32it/s]

2023-10-23 10:03:20,172 Dictionary created for label 'relation' with 7 values: has_won (seen 114 times), received_nobelprize_in (seen 111 times), received_nobelprize_for (seen 78 times), has_nationality (seen 70 times), born_on (seen 57 times), died_on (seen 29 times)
2023-10-23 10:03:20,175 Computing label dictionary. Progress:



70it [00:00, 16680.94it/s]

2023-10-23 10:03:20,187 Dictionary created for label 'relation' with 7 values: has_won (seen 114 times), received_nobelprize_in (seen 111 times), received_nobelprize_for (seen 78 times), has_nationality (seen 70 times), born_on (seen 57 times), died_on (seen 29 times)





Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
trainer: ModelTrainer = ModelTrainer(model=relation_model, corpus=relation_model.transform_corpus(corpus))


In [None]:
trainer.fine_tune(
    'resources/relations/',
    max_epochs=20,
    learning_rate=4e-5,
    mini_batch_size=8,
    main_evaluation_metric=("macro avg", "f1-score"),
    shuffle=True,
    embeddings_storage_mode='gpu',
)

2023-10-23 10:16:13,234 ----------------------------------------------------------------------------------------------------
2023-10-23 10:16:13,240 Model: "RelationClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768,

100%|██████████| 373/373 [00:26<00:00, 13.86it/s]

2023-10-23 10:18:49,424 Evaluating as a multi-label problem: False
2023-10-23 10:18:49,454 DEV : loss 0.32806316018104553 - f1-score (macro avg)  0.1391





2023-10-23 10:18:49,881 ----------------------------------------------------------------------------------------------------
2023-10-23 10:19:05,500 epoch 2 - iter 104/1041 - loss 0.28892615 - time (sec): 15.62 - samples/sec: 53.27 - lr: 0.000022
2023-10-23 10:19:21,385 epoch 2 - iter 208/1041 - loss 0.24636098 - time (sec): 31.50 - samples/sec: 52.82 - lr: 0.000024
2023-10-23 10:19:37,209 epoch 2 - iter 312/1041 - loss 0.23749319 - time (sec): 47.33 - samples/sec: 52.74 - lr: 0.000026
2023-10-23 10:19:52,924 epoch 2 - iter 416/1041 - loss 0.22281695 - time (sec): 63.04 - samples/sec: 52.79 - lr: 0.000028
2023-10-23 10:20:08,706 epoch 2 - iter 520/1041 - loss 0.20695863 - time (sec): 78.82 - samples/sec: 52.78 - lr: 0.000030
2023-10-23 10:20:24,498 epoch 2 - iter 624/1041 - loss 0.20421484 - time (sec): 94.62 - samples/sec: 52.76 - lr: 0.000032
2023-10-23 10:20:40,314 epoch 2 - iter 728/1041 - loss 0.19837392 - time (sec): 110.43 - samples/sec: 52.74 - lr: 0.000034
2023-10-23 10:20:56,

100%|██████████| 373/373 [00:26<00:00, 14.21it/s]

2023-10-23 10:21:54,038 Evaluating as a multi-label problem: False





2023-10-23 10:21:54,092 DEV : loss 0.14974774420261383 - f1-score (macro avg)  0.6161
2023-10-23 10:21:54,892 ----------------------------------------------------------------------------------------------------
2023-10-23 10:22:10,534 epoch 3 - iter 104/1041 - loss 0.10912239 - time (sec): 15.64 - samples/sec: 53.20 - lr: 0.000040
2023-10-23 10:22:26,288 epoch 3 - iter 208/1041 - loss 0.10847437 - time (sec): 31.39 - samples/sec: 53.01 - lr: 0.000040
2023-10-23 10:22:42,113 epoch 3 - iter 312/1041 - loss 0.10833207 - time (sec): 47.22 - samples/sec: 52.86 - lr: 0.000039
2023-10-23 10:22:57,898 epoch 3 - iter 416/1041 - loss 0.11609057 - time (sec): 63.00 - samples/sec: 52.82 - lr: 0.000039
2023-10-23 10:23:13,674 epoch 3 - iter 520/1041 - loss 0.11648712 - time (sec): 78.78 - samples/sec: 52.81 - lr: 0.000039
2023-10-23 10:23:29,505 epoch 3 - iter 624/1041 - loss 0.11690390 - time (sec): 94.61 - samples/sec: 52.76 - lr: 0.000039
2023-10-23 10:23:45,229 epoch 3 - iter 728/1041 - loss 0.

100%|██████████| 373/373 [00:27<00:00, 13.47it/s]

2023-10-23 10:25:00,560 Evaluating as a multi-label problem: False





2023-10-23 10:25:00,594 DEV : loss 0.16298699378967285 - f1-score (macro avg)  0.7072
2023-10-23 10:25:00,997 ----------------------------------------------------------------------------------------------------
2023-10-23 10:25:16,881 epoch 4 - iter 104/1041 - loss 0.11056257 - time (sec): 15.88 - samples/sec: 52.39 - lr: 0.000038
2023-10-23 10:25:32,743 epoch 4 - iter 208/1041 - loss 0.07262972 - time (sec): 31.74 - samples/sec: 52.42 - lr: 0.000037
2023-10-23 10:25:48,504 epoch 4 - iter 312/1041 - loss 0.07266909 - time (sec): 47.51 - samples/sec: 52.54 - lr: 0.000037
2023-10-23 10:26:04,307 epoch 4 - iter 416/1041 - loss 0.07535784 - time (sec): 63.31 - samples/sec: 52.57 - lr: 0.000037
2023-10-23 10:26:20,183 epoch 4 - iter 520/1041 - loss 0.08660590 - time (sec): 79.18 - samples/sec: 52.54 - lr: 0.000037
2023-10-23 10:26:36,164 epoch 4 - iter 624/1041 - loss 0.08195579 - time (sec): 95.16 - samples/sec: 52.46 - lr: 0.000036
2023-10-23 10:26:51,807 epoch 4 - iter 728/1041 - loss 0.

100%|██████████| 373/373 [00:26<00:00, 14.02it/s]

2023-10-23 10:28:05,523 Evaluating as a multi-label problem: False





2023-10-23 10:28:05,572 DEV : loss 0.18164581060409546 - f1-score (macro avg)  0.7862
2023-10-23 10:28:06,326 ----------------------------------------------------------------------------------------------------
2023-10-23 10:28:22,022 epoch 5 - iter 104/1041 - loss 0.06854903 - time (sec): 15.69 - samples/sec: 53.02 - lr: 0.000035
2023-10-23 10:28:37,770 epoch 5 - iter 208/1041 - loss 0.06902662 - time (sec): 31.44 - samples/sec: 52.92 - lr: 0.000035
2023-10-23 10:28:53,579 epoch 5 - iter 312/1041 - loss 0.05744294 - time (sec): 47.25 - samples/sec: 52.82 - lr: 0.000035
2023-10-23 10:29:09,195 epoch 5 - iter 416/1041 - loss 0.05545067 - time (sec): 62.87 - samples/sec: 52.94 - lr: 0.000035
2023-10-23 10:29:24,939 epoch 5 - iter 520/1041 - loss 0.05182838 - time (sec): 78.61 - samples/sec: 52.92 - lr: 0.000034
2023-10-23 10:29:40,632 epoch 5 - iter 624/1041 - loss 0.05450908 - time (sec): 94.30 - samples/sec: 52.94 - lr: 0.000034
2023-10-23 10:29:56,108 epoch 5 - iter 728/1041 - loss 0.

100%|██████████| 373/373 [00:26<00:00, 14.15it/s]

2023-10-23 10:31:09,660 Evaluating as a multi-label problem: False
2023-10-23 10:31:09,709 DEV : loss 0.09006915241479874 - f1-score (macro avg)  0.8736





2023-10-23 10:31:10,433 ----------------------------------------------------------------------------------------------------
2023-10-23 10:31:26,235 epoch 6 - iter 104/1041 - loss 0.03108958 - time (sec): 15.80 - samples/sec: 52.66 - lr: 0.000033
2023-10-23 10:31:41,881 epoch 6 - iter 208/1041 - loss 0.04055950 - time (sec): 31.45 - samples/sec: 52.92 - lr: 0.000033
2023-10-23 10:31:57,543 epoch 6 - iter 312/1041 - loss 0.03803009 - time (sec): 47.11 - samples/sec: 52.98 - lr: 0.000033
2023-10-23 10:32:13,275 epoch 6 - iter 416/1041 - loss 0.03736091 - time (sec): 62.84 - samples/sec: 52.96 - lr: 0.000032
2023-10-23 10:32:28,900 epoch 6 - iter 520/1041 - loss 0.04286580 - time (sec): 78.47 - samples/sec: 53.02 - lr: 0.000032
2023-10-23 10:32:44,680 epoch 6 - iter 624/1041 - loss 0.04269437 - time (sec): 94.25 - samples/sec: 52.97 - lr: 0.000032
2023-10-23 10:33:00,374 epoch 6 - iter 728/1041 - loss 0.04045743 - time (sec): 109.94 - samples/sec: 52.97 - lr: 0.000032
2023-10-23 10:33:16,

100%|██████████| 373/373 [00:26<00:00, 14.13it/s]

2023-10-23 10:34:13,990 Evaluating as a multi-label problem: False





2023-10-23 10:34:14,041 DEV : loss 0.10865519195795059 - f1-score (macro avg)  0.8431
2023-10-23 10:34:14,805 ----------------------------------------------------------------------------------------------------
2023-10-23 10:34:30,411 epoch 7 - iter 104/1041 - loss 0.01895303 - time (sec): 15.60 - samples/sec: 53.33 - lr: 0.000031
2023-10-23 10:34:46,027 epoch 7 - iter 208/1041 - loss 0.04167521 - time (sec): 31.22 - samples/sec: 53.30 - lr: 0.000031
2023-10-23 10:35:01,792 epoch 7 - iter 312/1041 - loss 0.04302381 - time (sec): 46.98 - samples/sec: 53.13 - lr: 0.000030
2023-10-23 10:35:17,506 epoch 7 - iter 416/1041 - loss 0.04486235 - time (sec): 62.70 - samples/sec: 53.08 - lr: 0.000030
2023-10-23 10:35:33,190 epoch 7 - iter 520/1041 - loss 0.03961636 - time (sec): 78.38 - samples/sec: 53.07 - lr: 0.000030
2023-10-23 10:35:48,945 epoch 7 - iter 624/1041 - loss 0.04279156 - time (sec): 94.14 - samples/sec: 53.03 - lr: 0.000030
2023-10-23 10:36:04,647 epoch 7 - iter 728/1041 - loss 0.

100%|██████████| 373/373 [00:26<00:00, 13.99it/s]

2023-10-23 10:37:18,479 Evaluating as a multi-label problem: False
2023-10-23 10:37:18,528 DEV : loss 0.20684516429901123 - f1-score (macro avg)  0.8031





2023-10-23 10:37:19,295 ----------------------------------------------------------------------------------------------------
2023-10-23 10:37:35,059 epoch 8 - iter 104/1041 - loss 0.03559899 - time (sec): 15.76 - samples/sec: 52.79 - lr: 0.000029
2023-10-23 10:37:50,595 epoch 8 - iter 208/1041 - loss 0.02372005 - time (sec): 31.30 - samples/sec: 53.17 - lr: 0.000028
2023-10-23 10:38:06,362 epoch 8 - iter 312/1041 - loss 0.01989326 - time (sec): 47.07 - samples/sec: 53.03 - lr: 0.000028
2023-10-23 10:38:21,967 epoch 8 - iter 416/1041 - loss 0.01654778 - time (sec): 62.67 - samples/sec: 53.10 - lr: 0.000028
2023-10-23 10:38:37,610 epoch 8 - iter 520/1041 - loss 0.01651752 - time (sec): 78.31 - samples/sec: 53.12 - lr: 0.000028
2023-10-23 10:38:53,275 epoch 8 - iter 624/1041 - loss 0.02137782 - time (sec): 93.98 - samples/sec: 53.12 - lr: 0.000028
2023-10-23 10:39:08,961 epoch 8 - iter 728/1041 - loss 0.02929082 - time (sec): 109.66 - samples/sec: 53.11 - lr: 0.000027
2023-10-23 10:39:24,

100%|██████████| 373/373 [00:26<00:00, 14.08it/s]

2023-10-23 10:40:22,732 Evaluating as a multi-label problem: False





2023-10-23 10:40:22,780 DEV : loss 0.1246953085064888 - f1-score (macro avg)  0.8519
2023-10-23 10:40:23,528 ----------------------------------------------------------------------------------------------------
2023-10-23 10:40:39,162 epoch 9 - iter 104/1041 - loss 0.01592452 - time (sec): 15.63 - samples/sec: 53.23 - lr: 0.000026
2023-10-23 10:40:55,015 epoch 9 - iter 208/1041 - loss 0.01925624 - time (sec): 31.48 - samples/sec: 52.85 - lr: 0.000026
2023-10-23 10:41:10,685 epoch 9 - iter 312/1041 - loss 0.01982708 - time (sec): 47.15 - samples/sec: 52.93 - lr: 0.000026
2023-10-23 10:41:26,329 epoch 9 - iter 416/1041 - loss 0.01995681 - time (sec): 62.80 - samples/sec: 52.99 - lr: 0.000026
2023-10-23 10:41:42,106 epoch 9 - iter 520/1041 - loss 0.02242084 - time (sec): 78.58 - samples/sec: 52.94 - lr: 0.000026
2023-10-23 10:41:57,728 epoch 9 - iter 624/1041 - loss 0.02715023 - time (sec): 94.20 - samples/sec: 53.00 - lr: 0.000025
2023-10-23 10:42:13,605 epoch 9 - iter 728/1041 - loss 0.0

100%|██████████| 373/373 [00:26<00:00, 14.22it/s]

2023-10-23 10:43:26,960 Evaluating as a multi-label problem: False
2023-10-23 10:43:27,006 DEV : loss 0.13098777830600739 - f1-score (macro avg)  0.8664





2023-10-23 10:43:27,748 ----------------------------------------------------------------------------------------------------
2023-10-23 10:43:43,455 epoch 10 - iter 104/1041 - loss 0.00491949 - time (sec): 15.70 - samples/sec: 52.98 - lr: 0.000024
2023-10-23 10:43:59,109 epoch 10 - iter 208/1041 - loss 0.00894193 - time (sec): 31.36 - samples/sec: 53.07 - lr: 0.000024
2023-10-23 10:44:14,761 epoch 10 - iter 312/1041 - loss 0.00971685 - time (sec): 47.01 - samples/sec: 53.10 - lr: 0.000024
2023-10-23 10:44:30,487 epoch 10 - iter 416/1041 - loss 0.01173173 - time (sec): 62.74 - samples/sec: 53.05 - lr: 0.000024
2023-10-23 10:44:46,186 epoch 10 - iter 520/1041 - loss 0.01482974 - time (sec): 78.43 - samples/sec: 53.04 - lr: 0.000023
2023-10-23 10:45:01,812 epoch 10 - iter 624/1041 - loss 0.01635560 - time (sec): 94.06 - samples/sec: 53.07 - lr: 0.000023
2023-10-23 10:45:17,451 epoch 10 - iter 728/1041 - loss 0.01535241 - time (sec): 109.70 - samples/sec: 53.09 - lr: 0.000023
2023-10-23 10

100%|██████████| 373/373 [00:26<00:00, 14.16it/s]

2023-10-23 10:46:30,701 Evaluating as a multi-label problem: False
2023-10-23 10:46:30,748 DEV : loss 0.2091936320066452 - f1-score (macro avg)  0.8122





2023-10-23 10:46:31,517 ----------------------------------------------------------------------------------------------------
2023-10-23 10:46:47,071 epoch 11 - iter 104/1041 - loss 0.00269704 - time (sec): 15.55 - samples/sec: 53.50 - lr: 0.000022
2023-10-23 10:47:02,653 epoch 11 - iter 208/1041 - loss 0.02786618 - time (sec): 31.13 - samples/sec: 53.45 - lr: 0.000022
2023-10-23 10:47:18,236 epoch 11 - iter 312/1041 - loss 0.02084548 - time (sec): 46.72 - samples/sec: 53.43 - lr: 0.000022
2023-10-23 10:47:33,904 epoch 11 - iter 416/1041 - loss 0.02095242 - time (sec): 62.38 - samples/sec: 53.35 - lr: 0.000021
2023-10-23 10:47:49,525 epoch 11 - iter 520/1041 - loss 0.01714894 - time (sec): 78.01 - samples/sec: 53.33 - lr: 0.000021
2023-10-23 10:48:05,091 epoch 11 - iter 624/1041 - loss 0.01584693 - time (sec): 93.57 - samples/sec: 53.35 - lr: 0.000021
2023-10-23 10:48:20,680 epoch 11 - iter 728/1041 - loss 0.01910110 - time (sec): 109.16 - samples/sec: 53.35 - lr: 0.000021
2023-10-23 10

100%|██████████| 373/373 [00:26<00:00, 14.18it/s]

2023-10-23 10:49:34,242 Evaluating as a multi-label problem: False
2023-10-23 10:49:34,290 DEV : loss 0.22211472690105438 - f1-score (macro avg)  0.7866





2023-10-23 10:49:35,044 ----------------------------------------------------------------------------------------------------
2023-10-23 10:49:50,667 epoch 12 - iter 104/1041 - loss 0.01501787 - time (sec): 15.62 - samples/sec: 53.27 - lr: 0.000020
2023-10-23 10:50:06,390 epoch 12 - iter 208/1041 - loss 0.01224839 - time (sec): 31.34 - samples/sec: 53.09 - lr: 0.000020
2023-10-23 10:50:21,988 epoch 12 - iter 312/1041 - loss 0.01204100 - time (sec): 46.94 - samples/sec: 53.18 - lr: 0.000019
2023-10-23 10:50:37,701 epoch 12 - iter 416/1041 - loss 0.00989360 - time (sec): 62.65 - samples/sec: 53.12 - lr: 0.000019
2023-10-23 10:50:53,306 epoch 12 - iter 520/1041 - loss 0.01129030 - time (sec): 78.26 - samples/sec: 53.16 - lr: 0.000019
2023-10-23 10:51:08,948 epoch 12 - iter 624/1041 - loss 0.01072934 - time (sec): 93.90 - samples/sec: 53.16 - lr: 0.000019
2023-10-23 10:51:24,655 epoch 12 - iter 728/1041 - loss 0.01265977 - time (sec): 109.61 - samples/sec: 53.14 - lr: 0.000018
2023-10-23 10

100%|██████████| 373/373 [00:26<00:00, 14.13it/s]

2023-10-23 10:52:38,160 Evaluating as a multi-label problem: False
2023-10-23 10:52:38,205 DEV : loss 0.23373760282993317 - f1-score (macro avg)  0.7885





2023-10-23 10:52:38,944 ----------------------------------------------------------------------------------------------------
2023-10-23 10:52:54,637 epoch 13 - iter 104/1041 - loss 0.01219888 - time (sec): 15.69 - samples/sec: 53.04 - lr: 0.000018
2023-10-23 10:53:10,269 epoch 13 - iter 208/1041 - loss 0.01195932 - time (sec): 31.32 - samples/sec: 53.13 - lr: 0.000017
2023-10-23 10:53:25,806 epoch 13 - iter 312/1041 - loss 0.00801752 - time (sec): 46.86 - samples/sec: 53.27 - lr: 0.000017
2023-10-23 10:53:41,400 epoch 13 - iter 416/1041 - loss 0.01000707 - time (sec): 62.45 - samples/sec: 53.29 - lr: 0.000017
2023-10-23 10:53:57,022 epoch 13 - iter 520/1041 - loss 0.00941022 - time (sec): 78.07 - samples/sec: 53.28 - lr: 0.000017
2023-10-23 10:54:12,675 epoch 13 - iter 624/1041 - loss 0.01117291 - time (sec): 93.73 - samples/sec: 53.26 - lr: 0.000016
2023-10-23 10:54:28,281 epoch 13 - iter 728/1041 - loss 0.00960966 - time (sec): 109.33 - samples/sec: 53.27 - lr: 0.000016
2023-10-23 10

100%|██████████| 373/373 [00:26<00:00, 13.96it/s]

2023-10-23 10:55:42,010 Evaluating as a multi-label problem: False
2023-10-23 10:55:42,057 DEV : loss 0.1803145706653595 - f1-score (macro avg)  0.8473





2023-10-23 10:55:42,710 ----------------------------------------------------------------------------------------------------
2023-10-23 10:55:58,217 epoch 14 - iter 104/1041 - loss 0.00287877 - time (sec): 15.50 - samples/sec: 53.66 - lr: 0.000015
2023-10-23 10:56:13,783 epoch 14 - iter 208/1041 - loss 0.00215185 - time (sec): 31.07 - samples/sec: 53.56 - lr: 0.000015
2023-10-23 10:56:29,446 epoch 14 - iter 312/1041 - loss 0.00202680 - time (sec): 46.73 - samples/sec: 53.41 - lr: 0.000015
2023-10-23 10:56:44,924 epoch 14 - iter 416/1041 - loss 0.00193356 - time (sec): 62.21 - samples/sec: 53.49 - lr: 0.000015
2023-10-23 10:57:00,651 epoch 14 - iter 520/1041 - loss 0.00235172 - time (sec): 77.94 - samples/sec: 53.38 - lr: 0.000014
2023-10-23 10:57:16,220 epoch 14 - iter 624/1041 - loss 0.00509093 - time (sec): 93.51 - samples/sec: 53.39 - lr: 0.000014
2023-10-23 10:57:31,870 epoch 14 - iter 728/1041 - loss 0.00650101 - time (sec): 109.16 - samples/sec: 53.35 - lr: 0.000014
2023-10-23 10

100%|██████████| 373/373 [00:26<00:00, 13.93it/s]

2023-10-23 10:58:45,597 Evaluating as a multi-label problem: False





2023-10-23 10:58:45,646 DEV : loss 0.2439390867948532 - f1-score (macro avg)  0.7738
2023-10-23 10:58:46,278 ----------------------------------------------------------------------------------------------------
2023-10-23 10:59:01,888 epoch 15 - iter 104/1041 - loss 0.00229629 - time (sec): 15.61 - samples/sec: 53.30 - lr: 0.000013
2023-10-23 10:59:17,501 epoch 15 - iter 208/1041 - loss 0.01397762 - time (sec): 31.22 - samples/sec: 53.30 - lr: 0.000013
2023-10-23 10:59:33,034 epoch 15 - iter 312/1041 - loss 0.00966129 - time (sec): 46.75 - samples/sec: 53.39 - lr: 0.000013
2023-10-23 10:59:48,709 epoch 15 - iter 416/1041 - loss 0.00893813 - time (sec): 62.43 - samples/sec: 53.31 - lr: 0.000012
2023-10-23 11:00:04,366 epoch 15 - iter 520/1041 - loss 0.00850919 - time (sec): 78.09 - samples/sec: 53.27 - lr: 0.000012
2023-10-23 11:00:20,029 epoch 15 - iter 624/1041 - loss 0.00902940 - time (sec): 93.75 - samples/sec: 53.25 - lr: 0.000012
2023-10-23 11:00:35,598 epoch 15 - iter 728/1041 - l

100%|██████████| 373/373 [00:27<00:00, 13.80it/s]


2023-10-23 11:01:49,728 Evaluating as a multi-label problem: False
2023-10-23 11:01:49,760 DEV : loss 0.24746806919574738 - f1-score (macro avg)  0.7912
2023-10-23 11:01:50,156 ----------------------------------------------------------------------------------------------------
2023-10-23 11:02:05,685 epoch 16 - iter 104/1041 - loss 0.02125189 - time (sec): 15.53 - samples/sec: 53.58 - lr: 0.000011
2023-10-23 11:02:21,258 epoch 16 - iter 208/1041 - loss 0.01729131 - time (sec): 31.10 - samples/sec: 53.50 - lr: 0.000011
2023-10-23 11:02:36,815 epoch 16 - iter 312/1041 - loss 0.01283601 - time (sec): 46.66 - samples/sec: 53.50 - lr: 0.000010
2023-10-23 11:02:52,395 epoch 16 - iter 416/1041 - loss 0.00966650 - time (sec): 62.24 - samples/sec: 53.47 - lr: 0.000010
2023-10-23 11:03:08,178 epoch 16 - iter 520/1041 - loss 0.00773491 - time (sec): 78.02 - samples/sec: 53.32 - lr: 0.000010
2023-10-23 11:03:23,778 epoch 16 - iter 624/1041 - loss 0.00659600 - time (sec): 93.62 - samples/sec: 53.32

100%|██████████| 373/373 [00:26<00:00, 13.91it/s]

2023-10-23 11:04:53,402 Evaluating as a multi-label problem: False
2023-10-23 11:04:53,434 DEV : loss 0.22170577943325043 - f1-score (macro avg)  0.8271





2023-10-23 11:04:53,830 ----------------------------------------------------------------------------------------------------
2023-10-23 11:05:09,439 epoch 17 - iter 104/1041 - loss 0.00902276 - time (sec): 15.61 - samples/sec: 53.31 - lr: 0.000009
2023-10-23 11:05:25,219 epoch 17 - iter 208/1041 - loss 0.00553679 - time (sec): 31.39 - samples/sec: 53.02 - lr: 0.000008
2023-10-23 11:05:40,866 epoch 17 - iter 312/1041 - loss 0.00619079 - time (sec): 47.03 - samples/sec: 53.07 - lr: 0.000008
2023-10-23 11:05:56,420 epoch 17 - iter 416/1041 - loss 0.00464507 - time (sec): 62.59 - samples/sec: 53.17 - lr: 0.000008
2023-10-23 11:06:12,179 epoch 17 - iter 520/1041 - loss 0.00373664 - time (sec): 78.35 - samples/sec: 53.10 - lr: 0.000008
2023-10-23 11:06:27,812 epoch 17 - iter 624/1041 - loss 0.00383806 - time (sec): 93.98 - samples/sec: 53.12 - lr: 0.000008
2023-10-23 11:06:43,473 epoch 17 - iter 728/1041 - loss 0.00381924 - time (sec): 109.64 - samples/sec: 53.12 - lr: 0.000007
2023-10-23 11

100%|██████████| 373/373 [00:26<00:00, 13.84it/s]

2023-10-23 11:07:57,293 Evaluating as a multi-label problem: False





2023-10-23 11:07:57,328 DEV : loss 0.2084757387638092 - f1-score (macro avg)  0.8335
2023-10-23 11:07:57,737 ----------------------------------------------------------------------------------------------------
2023-10-23 11:08:13,360 epoch 18 - iter 104/1041 - loss 0.00105898 - time (sec): 15.62 - samples/sec: 53.26 - lr: 0.000006
2023-10-23 11:08:29,085 epoch 18 - iter 208/1041 - loss 0.00053021 - time (sec): 31.35 - samples/sec: 53.08 - lr: 0.000006
2023-10-23 11:08:44,808 epoch 18 - iter 312/1041 - loss 0.00041863 - time (sec): 47.07 - samples/sec: 53.03 - lr: 0.000006
2023-10-23 11:09:00,505 epoch 18 - iter 416/1041 - loss 0.00040861 - time (sec): 62.77 - samples/sec: 53.02 - lr: 0.000006
2023-10-23 11:09:16,179 epoch 18 - iter 520/1041 - loss 0.00068003 - time (sec): 78.44 - samples/sec: 53.03 - lr: 0.000006
2023-10-23 11:09:31,756 epoch 18 - iter 624/1041 - loss 0.00056990 - time (sec): 94.02 - samples/sec: 53.10 - lr: 0.000005
2023-10-23 11:09:47,374 epoch 18 - iter 728/1041 - l

100%|██████████| 373/373 [00:26<00:00, 13.89it/s]

2023-10-23 11:11:01,130 Evaluating as a multi-label problem: False





2023-10-23 11:11:01,163 DEV : loss 0.2269725501537323 - f1-score (macro avg)  0.8238
2023-10-23 11:11:01,570 ----------------------------------------------------------------------------------------------------
2023-10-23 11:11:17,157 epoch 19 - iter 104/1041 - loss 0.00495930 - time (sec): 15.59 - samples/sec: 53.38 - lr: 0.000004
2023-10-23 11:11:32,873 epoch 19 - iter 208/1041 - loss 0.00333730 - time (sec): 31.30 - samples/sec: 53.16 - lr: 0.000004
2023-10-23 11:11:48,532 epoch 19 - iter 312/1041 - loss 0.00276801 - time (sec): 46.96 - samples/sec: 53.15 - lr: 0.000004
2023-10-23 11:12:04,163 epoch 19 - iter 416/1041 - loss 0.00210345 - time (sec): 62.59 - samples/sec: 53.17 - lr: 0.000004
2023-10-23 11:12:19,817 epoch 19 - iter 520/1041 - loss 0.00228359 - time (sec): 78.25 - samples/sec: 53.17 - lr: 0.000003
2023-10-23 11:12:35,371 epoch 19 - iter 624/1041 - loss 0.00216142 - time (sec): 93.80 - samples/sec: 53.22 - lr: 0.000003
2023-10-23 11:12:51,100 epoch 19 - iter 728/1041 - l

100%|██████████| 373/373 [00:26<00:00, 13.83it/s]


2023-10-23 11:14:05,232 Evaluating as a multi-label problem: False
2023-10-23 11:14:05,264 DEV : loss 0.23200829327106476 - f1-score (macro avg)  0.8256
2023-10-23 11:14:05,668 ----------------------------------------------------------------------------------------------------
2023-10-23 11:14:21,216 epoch 20 - iter 104/1041 - loss 0.00000138 - time (sec): 15.55 - samples/sec: 53.52 - lr: 0.000002
2023-10-23 11:14:36,732 epoch 20 - iter 208/1041 - loss 0.00002374 - time (sec): 31.06 - samples/sec: 53.57 - lr: 0.000002
2023-10-23 11:14:52,387 epoch 20 - iter 312/1041 - loss 0.00083128 - time (sec): 46.72 - samples/sec: 53.43 - lr: 0.000002
2023-10-23 11:15:08,040 epoch 20 - iter 416/1041 - loss 0.00087294 - time (sec): 62.37 - samples/sec: 53.36 - lr: 0.000001
2023-10-23 11:15:23,755 epoch 20 - iter 520/1041 - loss 0.00157795 - time (sec): 78.09 - samples/sec: 53.27 - lr: 0.000001
2023-10-23 11:15:39,463 epoch 20 - iter 624/1041 - loss 0.00131642 - time (sec): 93.79 - samples/sec: 53.22

100%|██████████| 373/373 [00:26<00:00, 13.83it/s]


2023-10-23 11:17:09,217 Evaluating as a multi-label problem: False
2023-10-23 11:17:09,248 DEV : loss 0.23136982321739197 - f1-score (macro avg)  0.827
2023-10-23 11:17:10,274 ----------------------------------------------------------------------------------------------------
2023-10-23 11:17:10,276 Testing using last state of model ...


100%|██████████| 621/621 [00:45<00:00, 13.50it/s]

2023-10-23 11:17:56,324 Evaluating as a multi-label problem: False





2023-10-23 11:17:56,367 0.9698	0.9698	0.9698	0.9698
2023-10-23 11:17:56,368 
Results:
- F-score (micro) 0.9698
- F-score (macro) 0.76
- Accuracy 0.9698

By class:
                         precision    recall  f1-score   support

                            0.9891    0.9795    0.9843      4722
                has_won     0.4948    0.8571    0.6275        56
 received_nobelprize_in     0.7800    0.6842    0.7290        57
        has_nationality     0.7234    0.8293    0.7727        41
received_nobelprize_for     0.9000    0.6279    0.7397        43
                born_on     0.7105    0.8710    0.7826        31
                died_on     0.5200    1.0000    0.6842        13

               accuracy                         0.9698      4963
              macro avg     0.7311    0.8356    0.7600      4963
           weighted avg     0.9752    0.9698    0.9714      4963

2023-10-23 11:17:56,370 -----------------------------------------------------------------------------------------------

{'test_score': 0.759992490515059,
 'dev_score_history': [0.1390958035253337,
  0.6160874458762523,
  0.707193080150119,
  0.7862231454261847,
  0.8736109075274436,
  0.8431359510490344,
  0.8030713406771318,
  0.8519476743620954,
  0.86641966964504,
  0.8121858848860871,
  0.7866049532201755,
  0.7885083381019988,
  0.8473148562439665,
  0.7738092576039011,
  0.7912102509766569,
  0.827115409731006,
  0.8334859654930178,
  0.8238420679965188,
  0.8255898902125047,
  0.8269938965332065],
 'train_loss_history': [0.46602304046316584,
  0.18917429960275162,
  0.11208232270924051,
  0.08409133076309062,
  0.06015949468311575,
  0.04139665268956039,
  0.042408625662549736,
  0.02972297203058366,
  0.028851280507331743,
  0.015085002562761459,
  0.01795085951991243,
  0.012832369261971335,
  0.008864204655039811,
  0.00667855525070142,
  0.00915724442094882,
  0.006436865837237233,
  0.003423224463244922,
  0.0026045638967363022,
  0.0021427500815946144,
  0.0014627958245493652],
 'dev_loss_h

In [None]:
loaded_ner = SequenceTagger.load("/content/drive/MyDrive/enwiki20230820/component1/ner/final-model.pt")
loaded_re: RelationClassifier = RelationClassifier.load('/content/drive/MyDrive/enwiki20230820/component1/relations/final-model.pt')

2023-10-30 12:42:28,880 SequenceTagger predicts: Dictionary with 11 tags: O, B-Winner, I-Winner, B-Date, I-Date, B-Nationality, I-Nationality, B-Prizetype, I-Prizetype, B-Reason, I-Reason


In [None]:
sentence = Sentence('Barack Hussein Obama II ( born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008 and as an Illinois state senator from 1997 to 2004, and worked as a civil rights lawyer and university lecturer. ')
loaded_ner.predict(sentence)
loaded_re.predict(sentence)


In [None]:

# print(sentence)
# for item in sentence.get_labels():
  # label is item.value
  # if type(item.data_point) == Relation:
    # item:Relation
    # print(item.data_point.start_position, item.data_point.end_position, item.data_point.tag)
# for entity in sentence.get_labels('ner'):
#   e = entity
#   # print(e.to_dict(), e.shortstring, e.data_point.start_position, e.data_point.end_position)
#   print(entity.data_point.to_dict())
#   # print(entity.score, entity.value, entity)
# for entity in sentence.get_labels('relation'):
#   e = entity
#   # print(e.to_dict(), e.shortstring, e.data_point.start_position, e.data_point.end_position)
#   print(entity.data_point.to_dict())
#   # print(entity.score, entity.value, entity)

# srsly.write_json('sample.json', sentence.to_dict())

test_set = srsly.read_json('/content/drive/MyDrive/enwiki20230820/component1/val_set.json')
all_relations = {}
document_dicts = []
for document in test_set:
  sentence = Sentence(document["data"]["text"])
  loaded_ner.predict(sentence)
  loaded_re.predict(sentence)
  sent_dict = sentence.to_dict()
  # relation["labels"][0]["confidence"]
  all_relations[document["data"]["title"]] = [(relation["from_text"], relation["to_text"], relation["labels"][0]["value"]) for relation in sent_dict["relations"] if len(relation["labels"][0]["value"]) > 0]
  document_dicts.append(sentence.to_dict())


srsly.write_json('relations_test_set.json', all_relations)
srsly.write_json('test_document_dicts.json', document_dicts)


# Component 2
For component 2 the same model is used. This is finetuned to the new data.

In [None]:
train_json = srsly.read_json('/content/drive/MyDrive/enwiki20230820/component2/train_data.json')
make_conll(train_json, key_field='Abstract_new', file_name='component2/train_set')

test_json = srsly.read_json('/content/drive/MyDrive/enwiki20230820/component2/test_data.json')
make_conll(test_json, key_field='Abstract_new', file_name='component2/test_set')

# Split the train file into train/dev split
with open('component2/train_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  splits = sorted(splits, key=lambda x: random.random())

  train = splits[:90]
  dev = splits[90:]

  write_conll('component2/train.conll', train)
  write_conll('component2/dev.conll', dev)

with open('component2/test_set.conll') as f:
  data = f.read()
  splits = data.split(2*os.linesep)
  splits = sorted(splits, key=lambda x: random.random())

  write_conll('component2/test.conll', splits)


102
40


In [None]:
columns = {1: 'text', 2: 'ner'}
data_folder = ''

corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='component2/train.conll',
                              test_file='component2/test.conll',
                              dev_file='component2/dev.conll')
print(f'Corpus size: {len(corpus.train)}')

2023-10-30 16:25:19,526 Reading data from .
2023-10-30 16:25:19,528 Train: component2/train.conll
2023-10-30 16:25:19,530 Dev: component2/dev.conll
2023-10-30 16:25:19,531 Test: component2/test.conll
Corpus size: 90


In [None]:
# tag to predict
tag_type = 'ner'# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)

embeddings = TransformerWordEmbeddings(
    model='distilbert-base-cased',
    layers="-1",
    subtoken_pooling='mean',
    fine_tune=True,
    use_context=True,
    respect_document_boundaries=False,
)

tagger: SequenceTagger = SequenceTagger(
        hidden_size=256,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=False,
        use_rnn=False,
        reproject_embeddings=False,
        tag_format='BIO'
    )

2023-10-30 16:25:31,040 Computing label dictionary. Progress:


0it [00:00, ?it/s]
90it [00:00, 4474.24it/s]

2023-10-30 16:25:31,073 Dictionary created for label 'ner' with 4 values: Component (seen 1138 times), Person (seen 360 times), Time (seen 119 times), Date (seen 119 times)





2023-10-30 16:25:32,508 SequenceTagger predicts: Dictionary with 9 tags: O, B-Component, I-Component, B-Person, I-Person, B-Time, I-Time, B-Date, I-Date


In [None]:
trainer : ModelTrainer = ModelTrainer(tagger, corpus)

trainer.fine_tune(
        'component2/taggers/ner',
        learning_rate=5e-05,
        mini_batch_size=16,
        max_epochs=100,
        embeddings_storage_mode='gpu', # cpu, gpu
        weight_decay=0.0,
    )

2023-10-30 16:25:37,993 ----------------------------------------------------------------------------------------------------
2023-10-30 16:25:37,999 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(28997, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_fea

100%|██████████| 1/1 [00:00<00:00,  1.16it/s]

2023-10-30 16:25:56,993 DEV : loss 1.9528019428253174 - f1-score (micro avg)  0.0139
2023-10-30 16:25:56,999 ----------------------------------------------------------------------------------------------------





2023-10-30 16:25:59,998 epoch 2 - iter 1/6 - loss 2.08249749 - time (sec): 3.00 - samples/sec: 1389.74 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:26:02,597 epoch 2 - iter 2/6 - loss 2.06216805 - time (sec): 5.60 - samples/sec: 1402.78 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:26:06,299 epoch 2 - iter 3/6 - loss 2.02950072 - time (sec): 9.30 - samples/sec: 1299.45 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:26:09,915 epoch 2 - iter 4/6 - loss 2.02140152 - time (sec): 12.91 - samples/sec: 1207.09 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:26:12,856 epoch 2 - iter 5/6 - loss 2.00544941 - time (sec): 15.85 - samples/sec: 1242.21 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:26:14,546 epoch 2 - iter 6/6 - loss 1.97786846 - time (sec): 17.54 - samples/sec: 1276.97 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:26:14,549 ----------------------------------------------------------------------------------------------------
2023-10-30 16:26:14,552 EPOCH 2 done: 

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:26:15,408 DEV : loss 1.948030948638916 - f1-score (micro avg)  0.0143
2023-10-30 16:26:15,412 ----------------------------------------------------------------------------------------------------





2023-10-30 16:26:17,661 epoch 3 - iter 1/6 - loss 1.76086371 - time (sec): 2.25 - samples/sec: 1678.09 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:26:20,066 epoch 3 - iter 2/6 - loss 1.78741383 - time (sec): 4.65 - samples/sec: 1589.78 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:26:23,502 epoch 3 - iter 3/6 - loss 1.78148571 - time (sec): 8.09 - samples/sec: 1400.59 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:26:28,360 epoch 3 - iter 4/6 - loss 1.76524982 - time (sec): 12.95 - samples/sec: 1203.87 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:26:31,201 epoch 3 - iter 5/6 - loss 1.73445109 - time (sec): 15.79 - samples/sec: 1236.18 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:26:33,009 epoch 3 - iter 6/6 - loss 1.71849854 - time (sec): 17.59 - samples/sec: 1273.37 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:26:33,011 ----------------------------------------------------------------------------------------------------
2023-10-30 16:26:33,014 EPOCH 3 done: 

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:26:33,874 DEV : loss 1.9403245449066162 - f1-score (micro avg)  0.0119
2023-10-30 16:26:33,879 ----------------------------------------------------------------------------------------------------





2023-10-30 16:26:36,933 epoch 4 - iter 1/6 - loss 1.60652662 - time (sec): 3.05 - samples/sec: 1400.96 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:26:40,130 epoch 4 - iter 2/6 - loss 1.57667525 - time (sec): 6.25 - samples/sec: 1330.61 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:26:43,856 epoch 4 - iter 3/6 - loss 1.54293505 - time (sec): 9.98 - samples/sec: 1227.25 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:26:47,805 epoch 4 - iter 4/6 - loss 1.48870680 - time (sec): 13.92 - samples/sec: 1183.35 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:26:49,899 epoch 4 - iter 5/6 - loss 1.48363892 - time (sec): 16.02 - samples/sec: 1249.98 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:26:51,291 epoch 4 - iter 6/6 - loss 1.46985251 - time (sec): 17.41 - samples/sec: 1286.83 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:26:51,294 ----------------------------------------------------------------------------------------------------
2023-10-30 16:26:51,295 EPOCH 4 done: 

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:26:52,139 DEV : loss 1.9313247203826904 - f1-score (micro avg)  0.0123
2023-10-30 16:26:52,143 ----------------------------------------------------------------------------------------------------





2023-10-30 16:26:55,674 epoch 5 - iter 1/6 - loss 1.41283623 - time (sec): 3.53 - samples/sec: 1019.09 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:26:59,143 epoch 5 - iter 2/6 - loss 1.39479639 - time (sec): 7.00 - samples/sec: 1088.19 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:27:03,153 epoch 5 - iter 3/6 - loss 1.36114611 - time (sec): 11.01 - samples/sec: 1079.62 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:27:06,094 epoch 5 - iter 4/6 - loss 1.36160228 - time (sec): 13.95 - samples/sec: 1149.29 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:27:08,832 epoch 5 - iter 5/6 - loss 1.33798755 - time (sec): 16.69 - samples/sec: 1189.40 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:27:10,445 epoch 5 - iter 6/6 - loss 1.33776262 - time (sec): 18.30 - samples/sec: 1224.27 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:27:10,447 ----------------------------------------------------------------------------------------------------
2023-10-30 16:27:10,451 EPOCH 5 done:

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:27:11,308 DEV : loss 1.9240903854370117 - f1-score (micro avg)  0.0127
2023-10-30 16:27:11,312 ----------------------------------------------------------------------------------------------------





2023-10-30 16:27:13,936 epoch 6 - iter 1/6 - loss 1.26669681 - time (sec): 2.62 - samples/sec: 1487.06 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:27:17,399 epoch 6 - iter 2/6 - loss 1.24934580 - time (sec): 6.08 - samples/sec: 1287.24 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:27:21,465 epoch 6 - iter 3/6 - loss 1.22691866 - time (sec): 10.15 - samples/sec: 1149.56 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:27:24,556 epoch 6 - iter 4/6 - loss 1.20430654 - time (sec): 13.24 - samples/sec: 1205.99 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:27:26,783 epoch 6 - iter 5/6 - loss 1.19704628 - time (sec): 15.47 - samples/sec: 1278.76 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:27:28,415 epoch 6 - iter 6/6 - loss 1.19185374 - time (sec): 17.10 - samples/sec: 1310.27 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:27:28,416 ----------------------------------------------------------------------------------------------------
2023-10-30 16:27:28,423 EPOCH 6 done:

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:27:29,261 DEV : loss 1.9187811613082886 - f1-score (micro avg)  0.0127
2023-10-30 16:27:29,265 ----------------------------------------------------------------------------------------------------





2023-10-30 16:27:32,155 epoch 7 - iter 1/6 - loss 1.17994570 - time (sec): 2.89 - samples/sec: 1292.74 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:27:35,853 epoch 7 - iter 2/6 - loss 1.15984817 - time (sec): 6.59 - samples/sec: 1208.68 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:27:39,516 epoch 7 - iter 3/6 - loss 1.14359881 - time (sec): 10.25 - samples/sec: 1167.22 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:27:41,873 epoch 7 - iter 4/6 - loss 1.13993869 - time (sec): 12.61 - samples/sec: 1262.04 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:27:45,306 epoch 7 - iter 5/6 - loss 1.12480313 - time (sec): 16.04 - samples/sec: 1243.12 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:27:46,837 epoch 7 - iter 6/6 - loss 1.11971006 - time (sec): 17.57 - samples/sec: 1275.10 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:27:46,839 ----------------------------------------------------------------------------------------------------
2023-10-30 16:27:46,843 EPOCH 7 done:

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:27:47,687 DEV : loss 1.9144893884658813 - f1-score (micro avg)  0.0128
2023-10-30 16:27:47,690 ----------------------------------------------------------------------------------------------------





2023-10-30 16:27:51,327 epoch 8 - iter 1/6 - loss 1.07895755 - time (sec): 3.63 - samples/sec: 1117.78 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:27:54,968 epoch 8 - iter 2/6 - loss 1.04875985 - time (sec): 7.27 - samples/sec: 1113.86 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:27:57,926 epoch 8 - iter 3/6 - loss 1.02672336 - time (sec): 10.23 - samples/sec: 1164.66 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:28:00,718 epoch 8 - iter 4/6 - loss 1.01844441 - time (sec): 13.03 - samples/sec: 1208.49 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:28:03,400 epoch 8 - iter 5/6 - loss 1.02304242 - time (sec): 15.71 - samples/sec: 1242.24 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:28:05,192 epoch 8 - iter 6/6 - loss 1.00757591 - time (sec): 17.50 - samples/sec: 1280.26 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:28:05,195 ----------------------------------------------------------------------------------------------------
2023-10-30 16:28:05,197 EPOCH 8 done:

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:28:06,043 DEV : loss 1.9106130599975586 - f1-score (micro avg)  0.0129
2023-10-30 16:28:06,047 ----------------------------------------------------------------------------------------------------





2023-10-30 16:28:09,247 epoch 9 - iter 1/6 - loss 0.84894647 - time (sec): 3.20 - samples/sec: 1145.43 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:28:13,123 epoch 9 - iter 2/6 - loss 0.93003032 - time (sec): 7.07 - samples/sec: 1108.07 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:28:16,592 epoch 9 - iter 3/6 - loss 0.91609610 - time (sec): 10.54 - samples/sec: 1121.03 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:28:19,303 epoch 9 - iter 4/6 - loss 0.91082973 - time (sec): 13.25 - samples/sec: 1180.67 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:28:22,256 epoch 9 - iter 5/6 - loss 0.89841346 - time (sec): 16.21 - samples/sec: 1217.33 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:28:23,913 epoch 9 - iter 6/6 - loss 0.89467222 - time (sec): 17.86 - samples/sec: 1254.17 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:28:23,917 ----------------------------------------------------------------------------------------------------
2023-10-30 16:28:23,920 EPOCH 9 done:

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:28:24,797 DEV : loss 1.9070578813552856 - f1-score (micro avg)  0.0146
2023-10-30 16:28:24,802 ----------------------------------------------------------------------------------------------------





2023-10-30 16:28:28,353 epoch 10 - iter 1/6 - loss 0.80500527 - time (sec): 3.55 - samples/sec: 1168.01 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:28:32,019 epoch 10 - iter 2/6 - loss 0.82890310 - time (sec): 7.21 - samples/sec: 1111.18 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:28:35,356 epoch 10 - iter 3/6 - loss 0.81659473 - time (sec): 10.55 - samples/sec: 1158.47 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:28:38,965 epoch 10 - iter 4/6 - loss 0.80195261 - time (sec): 14.16 - samples/sec: 1163.80 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:28:41,480 epoch 10 - iter 5/6 - loss 0.80277596 - time (sec): 16.68 - samples/sec: 1195.01 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:28:43,058 epoch 10 - iter 6/6 - loss 0.79374491 - time (sec): 18.25 - samples/sec: 1227.36 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:28:43,065 ----------------------------------------------------------------------------------------------------
2023-10-30 16:28:43,069 EPOCH 1

100%|██████████| 1/1 [00:01<00:00,  1.73s/it]

2023-10-30 16:28:44,842 DEV : loss 1.9044408798217773 - f1-score (micro avg)  0.0146
2023-10-30 16:28:44,846 ----------------------------------------------------------------------------------------------------





2023-10-30 16:28:48,652 epoch 11 - iter 1/6 - loss 0.72264493 - time (sec): 3.80 - samples/sec: 1009.96 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:28:53,070 epoch 11 - iter 2/6 - loss 0.75125501 - time (sec): 8.22 - samples/sec: 1004.28 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:28:55,958 epoch 11 - iter 3/6 - loss 0.72184032 - time (sec): 11.11 - samples/sec: 1100.42 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:28:58,782 epoch 11 - iter 4/6 - loss 0.70942012 - time (sec): 13.93 - samples/sec: 1162.94 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:29:01,023 epoch 11 - iter 5/6 - loss 0.70577961 - time (sec): 16.17 - samples/sec: 1237.00 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:02,949 epoch 11 - iter 6/6 - loss 0.69681207 - time (sec): 18.10 - samples/sec: 1237.84 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:02,951 ----------------------------------------------------------------------------------------------------
2023-10-30 16:29:02,953 EPOCH 1

100%|██████████| 1/1 [00:01<00:00,  1.66s/it]

2023-10-30 16:29:04,662 DEV : loss 1.9026591777801514 - f1-score (micro avg)  0.0146
2023-10-30 16:29:04,666 ----------------------------------------------------------------------------------------------------





2023-10-30 16:29:08,198 epoch 12 - iter 1/6 - loss 0.63333764 - time (sec): 3.53 - samples/sec: 1058.81 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:11,421 epoch 12 - iter 2/6 - loss 0.60663925 - time (sec): 6.75 - samples/sec: 1146.63 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:14,621 epoch 12 - iter 3/6 - loss 0.60204673 - time (sec): 9.95 - samples/sec: 1222.46 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:17,578 epoch 12 - iter 4/6 - loss 0.59988314 - time (sec): 12.91 - samples/sec: 1261.31 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:20,843 epoch 12 - iter 5/6 - loss 0.60545358 - time (sec): 16.17 - samples/sec: 1236.41 - lr: 0.000050 - momentum: 0.000000
2023-10-30 16:29:22,880 epoch 12 - iter 6/6 - loss 0.59282130 - time (sec): 18.21 - samples/sec: 1230.24 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:22,883 ----------------------------------------------------------------------------------------------------
2023-10-30 16:29:22,886 EPOCH 12

100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

2023-10-30 16:29:24,486 DEV : loss 1.9009678363800049 - f1-score (micro avg)  0.0145
2023-10-30 16:29:24,491 ----------------------------------------------------------------------------------------------------





2023-10-30 16:29:28,501 epoch 13 - iter 1/6 - loss 0.46476510 - time (sec): 4.01 - samples/sec: 994.18 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:31,364 epoch 13 - iter 2/6 - loss 0.45953852 - time (sec): 6.87 - samples/sec: 1164.35 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:33,999 epoch 13 - iter 3/6 - loss 0.46783903 - time (sec): 9.50 - samples/sec: 1219.06 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:36,851 epoch 13 - iter 4/6 - loss 0.45155541 - time (sec): 12.36 - samples/sec: 1257.52 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:40,341 epoch 13 - iter 5/6 - loss 0.47428788 - time (sec): 15.85 - samples/sec: 1245.18 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:42,912 epoch 13 - iter 6/6 - loss 0.47348322 - time (sec): 18.42 - samples/sec: 1216.42 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:42,915 ----------------------------------------------------------------------------------------------------
2023-10-30 16:29:42,917 EPOCH 13 

100%|██████████| 1/1 [00:01<00:00,  1.54s/it]

2023-10-30 16:29:44,506 DEV : loss 1.8997899293899536 - f1-score (micro avg)  0.0176
2023-10-30 16:29:44,512 ----------------------------------------------------------------------------------------------------





2023-10-30 16:29:48,013 epoch 14 - iter 1/6 - loss 0.52239090 - time (sec): 3.50 - samples/sec: 1126.28 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:50,656 epoch 14 - iter 2/6 - loss 0.44166931 - time (sec): 6.14 - samples/sec: 1240.90 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:53,439 epoch 14 - iter 3/6 - loss 0.44591634 - time (sec): 8.92 - samples/sec: 1290.39 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:29:56,135 epoch 14 - iter 4/6 - loss 0.43319342 - time (sec): 11.62 - samples/sec: 1298.50 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:30:00,180 epoch 14 - iter 5/6 - loss 0.43006632 - time (sec): 15.66 - samples/sec: 1255.04 - lr: 0.000049 - momentum: 0.000000
2023-10-30 16:30:02,759 epoch 14 - iter 6/6 - loss 0.41895171 - time (sec): 18.24 - samples/sec: 1228.06 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:02,766 ----------------------------------------------------------------------------------------------------
2023-10-30 16:30:02,768 EPOCH 14

100%|██████████| 1/1 [00:01<00:00,  1.02s/it]

2023-10-30 16:30:03,828 DEV : loss 1.898698091506958 - f1-score (micro avg)  0.0176
2023-10-30 16:30:03,831 ----------------------------------------------------------------------------------------------------





2023-10-30 16:30:07,839 epoch 15 - iter 1/6 - loss 0.35839191 - time (sec): 4.00 - samples/sec: 1064.98 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:10,654 epoch 15 - iter 2/6 - loss 0.34984735 - time (sec): 6.82 - samples/sec: 1196.74 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:12,878 epoch 15 - iter 3/6 - loss 0.33909711 - time (sec): 9.04 - samples/sec: 1314.36 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:16,462 epoch 15 - iter 4/6 - loss 0.34225627 - time (sec): 12.63 - samples/sec: 1282.46 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:20,062 epoch 15 - iter 5/6 - loss 0.33522595 - time (sec): 16.23 - samples/sec: 1225.59 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:22,067 epoch 15 - iter 6/6 - loss 0.33278113 - time (sec): 18.23 - samples/sec: 1228.75 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:22,069 ----------------------------------------------------------------------------------------------------
2023-10-30 16:30:22,075 EPOCH 15

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:30:22,933 DEV : loss 1.8977737426757812 - f1-score (micro avg)  0.0176
2023-10-30 16:30:22,936 ----------------------------------------------------------------------------------------------------





2023-10-30 16:30:24,849 epoch 16 - iter 1/6 - loss 0.22385862 - time (sec): 1.91 - samples/sec: 1728.11 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:28,419 epoch 16 - iter 2/6 - loss 0.28020407 - time (sec): 5.48 - samples/sec: 1376.02 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:31,508 epoch 16 - iter 3/6 - loss 0.29365568 - time (sec): 8.57 - samples/sec: 1379.78 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:35,282 epoch 16 - iter 4/6 - loss 0.28266707 - time (sec): 12.34 - samples/sec: 1311.39 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:38,957 epoch 16 - iter 5/6 - loss 0.27491555 - time (sec): 16.02 - samples/sec: 1253.18 - lr: 0.000048 - momentum: 0.000000
2023-10-30 16:30:40,462 epoch 16 - iter 6/6 - loss 0.28069341 - time (sec): 17.52 - samples/sec: 1278.55 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:40,466 ----------------------------------------------------------------------------------------------------
2023-10-30 16:30:40,468 EPOCH 16

100%|██████████| 1/1 [00:00<00:00,  1.25it/s]

2023-10-30 16:30:41,306 DEV : loss 1.8970893621444702 - f1-score (micro avg)  0.0176
2023-10-30 16:30:41,310 ----------------------------------------------------------------------------------------------------





2023-10-30 16:30:44,650 epoch 17 - iter 1/6 - loss 0.24875500 - time (sec): 3.34 - samples/sec: 1240.74 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:46,905 epoch 17 - iter 2/6 - loss 0.24359219 - time (sec): 5.59 - samples/sec: 1424.00 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:49,883 epoch 17 - iter 3/6 - loss 0.26958026 - time (sec): 8.57 - samples/sec: 1384.05 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:53,013 epoch 17 - iter 4/6 - loss 0.25221206 - time (sec): 11.70 - samples/sec: 1345.27 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:57,000 epoch 17 - iter 5/6 - loss 0.24039678 - time (sec): 15.69 - samples/sec: 1274.26 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:58,430 epoch 17 - iter 6/6 - loss 0.23569581 - time (sec): 17.12 - samples/sec: 1308.89 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:30:58,432 ----------------------------------------------------------------------------------------------------
2023-10-30 16:30:58,434 EPOCH 17

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:30:59,307 DEV : loss 1.896294116973877 - f1-score (micro avg)  0.0176
2023-10-30 16:30:59,312 ----------------------------------------------------------------------------------------------------





2023-10-30 16:31:02,413 epoch 18 - iter 1/6 - loss 0.20976388 - time (sec): 3.10 - samples/sec: 1411.55 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:31:04,594 epoch 18 - iter 2/6 - loss 0.20012839 - time (sec): 5.28 - samples/sec: 1535.86 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:31:07,447 epoch 18 - iter 3/6 - loss 0.19169753 - time (sec): 8.13 - samples/sec: 1477.16 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:31:10,812 epoch 18 - iter 4/6 - loss 0.19670903 - time (sec): 11.50 - samples/sec: 1384.52 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:31:14,299 epoch 18 - iter 5/6 - loss 0.19755004 - time (sec): 14.98 - samples/sec: 1307.87 - lr: 0.000047 - momentum: 0.000000
2023-10-30 16:31:16,278 epoch 18 - iter 6/6 - loss 0.19634183 - time (sec): 16.96 - samples/sec: 1320.70 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:16,281 ----------------------------------------------------------------------------------------------------
2023-10-30 16:31:16,283 EPOCH 18

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:31:17,142 DEV : loss 1.8957018852233887 - f1-score (micro avg)  0.0191
2023-10-30 16:31:17,145 ----------------------------------------------------------------------------------------------------





2023-10-30 16:31:20,639 epoch 19 - iter 1/6 - loss 0.15923188 - time (sec): 3.49 - samples/sec: 1220.52 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:24,026 epoch 19 - iter 2/6 - loss 0.18967043 - time (sec): 6.88 - samples/sec: 1200.93 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:26,757 epoch 19 - iter 3/6 - loss 0.17039858 - time (sec): 9.61 - samples/sec: 1258.93 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:30,277 epoch 19 - iter 4/6 - loss 0.17151742 - time (sec): 13.13 - samples/sec: 1223.61 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:33,765 epoch 19 - iter 5/6 - loss 0.16825854 - time (sec): 16.62 - samples/sec: 1197.53 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:35,202 epoch 19 - iter 6/6 - loss 0.16686931 - time (sec): 18.05 - samples/sec: 1240.93 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:35,204 ----------------------------------------------------------------------------------------------------
2023-10-30 16:31:35,207 EPOCH 19

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:31:36,056 DEV : loss 1.8949729204177856 - f1-score (micro avg)  0.0191
2023-10-30 16:31:36,060 ----------------------------------------------------------------------------------------------------





2023-10-30 16:31:38,967 epoch 20 - iter 1/6 - loss 0.13429480 - time (sec): 2.90 - samples/sec: 1378.23 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:42,383 epoch 20 - iter 2/6 - loss 0.15321836 - time (sec): 6.32 - samples/sec: 1265.00 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:45,638 epoch 20 - iter 3/6 - loss 0.14203894 - time (sec): 9.58 - samples/sec: 1251.21 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:49,407 epoch 20 - iter 4/6 - loss 0.14681002 - time (sec): 13.34 - samples/sec: 1199.99 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:52,818 epoch 20 - iter 5/6 - loss 0.14473136 - time (sec): 16.76 - samples/sec: 1193.59 - lr: 0.000046 - momentum: 0.000000
2023-10-30 16:31:54,305 epoch 20 - iter 6/6 - loss 0.14573250 - time (sec): 18.24 - samples/sec: 1228.09 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:31:54,307 ----------------------------------------------------------------------------------------------------
2023-10-30 16:31:54,309 EPOCH 20

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:31:55,163 DEV : loss 1.8943932056427002 - f1-score (micro avg)  0.0191
2023-10-30 16:31:55,166 ----------------------------------------------------------------------------------------------------





2023-10-30 16:31:57,771 epoch 21 - iter 1/6 - loss 0.13476204 - time (sec): 2.60 - samples/sec: 1466.97 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:00,794 epoch 21 - iter 2/6 - loss 0.13161726 - time (sec): 5.63 - samples/sec: 1411.09 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:04,085 epoch 21 - iter 3/6 - loss 0.12720443 - time (sec): 8.92 - samples/sec: 1325.89 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:07,599 epoch 21 - iter 4/6 - loss 0.12232070 - time (sec): 12.43 - samples/sec: 1256.88 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:11,540 epoch 21 - iter 5/6 - loss 0.12894934 - time (sec): 16.37 - samples/sec: 1209.56 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:13,046 epoch 21 - iter 6/6 - loss 0.13111200 - time (sec): 17.88 - samples/sec: 1253.23 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:13,048 ----------------------------------------------------------------------------------------------------
2023-10-30 16:32:13,050 EPOCH 21

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:32:13,918 DEV : loss 1.8938382863998413 - f1-score (micro avg)  0.0191
2023-10-30 16:32:13,922 ----------------------------------------------------------------------------------------------------





2023-10-30 16:32:16,667 epoch 22 - iter 1/6 - loss 0.08394336 - time (sec): 2.74 - samples/sec: 1381.54 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:19,061 epoch 22 - iter 2/6 - loss 0.12814939 - time (sec): 5.14 - samples/sec: 1498.53 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:22,003 epoch 22 - iter 3/6 - loss 0.11881439 - time (sec): 8.08 - samples/sec: 1413.68 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:26,649 epoch 22 - iter 4/6 - loss 0.12059319 - time (sec): 12.73 - samples/sec: 1240.98 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:29,084 epoch 22 - iter 5/6 - loss 0.10986164 - time (sec): 15.16 - samples/sec: 1316.81 - lr: 0.000045 - momentum: 0.000000
2023-10-30 16:32:30,506 epoch 22 - iter 6/6 - loss 0.10865912 - time (sec): 16.58 - samples/sec: 1351.08 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:30,508 ----------------------------------------------------------------------------------------------------
2023-10-30 16:32:30,513 EPOCH 22

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:32:31,362 DEV : loss 1.8933968544006348 - f1-score (micro avg)  0.0191
2023-10-30 16:32:31,366 ----------------------------------------------------------------------------------------------------





2023-10-30 16:32:34,749 epoch 23 - iter 1/6 - loss 0.09070381 - time (sec): 3.38 - samples/sec: 1243.05 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:37,019 epoch 23 - iter 2/6 - loss 0.09972901 - time (sec): 5.65 - samples/sec: 1393.25 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:40,611 epoch 23 - iter 3/6 - loss 0.10408789 - time (sec): 9.24 - samples/sec: 1301.21 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:44,443 epoch 23 - iter 4/6 - loss 0.10170795 - time (sec): 13.07 - samples/sec: 1226.47 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:47,404 epoch 23 - iter 5/6 - loss 0.09317019 - time (sec): 16.04 - samples/sec: 1251.43 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:49,080 epoch 23 - iter 6/6 - loss 0.09851800 - time (sec): 17.71 - samples/sec: 1264.94 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:49,082 ----------------------------------------------------------------------------------------------------
2023-10-30 16:32:49,084 EPOCH 23

100%|██████████| 1/1 [00:00<00:00,  1.25it/s]

2023-10-30 16:32:49,931 DEV : loss 1.892975091934204 - f1-score (micro avg)  0.0191
2023-10-30 16:32:49,935 ----------------------------------------------------------------------------------------------------





2023-10-30 16:32:53,372 epoch 24 - iter 1/6 - loss 0.08630470 - time (sec): 3.43 - samples/sec: 1186.01 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:32:56,764 epoch 24 - iter 2/6 - loss 0.09157852 - time (sec): 6.83 - samples/sec: 1246.47 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:33:00,105 epoch 24 - iter 3/6 - loss 0.09065598 - time (sec): 10.17 - samples/sec: 1200.35 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:33:03,671 epoch 24 - iter 4/6 - loss 0.08528593 - time (sec): 13.73 - samples/sec: 1177.57 - lr: 0.000044 - momentum: 0.000000
2023-10-30 16:33:06,318 epoch 24 - iter 5/6 - loss 0.08708915 - time (sec): 16.38 - samples/sec: 1212.70 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:07,925 epoch 24 - iter 6/6 - loss 0.08762169 - time (sec): 17.99 - samples/sec: 1245.52 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:07,928 ----------------------------------------------------------------------------------------------------
2023-10-30 16:33:07,930 EPOCH 2

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:33:08,799 DEV : loss 1.8927448987960815 - f1-score (micro avg)  0.019
2023-10-30 16:33:08,802 ----------------------------------------------------------------------------------------------------





2023-10-30 16:33:12,107 epoch 25 - iter 1/6 - loss 0.06407550 - time (sec): 3.30 - samples/sec: 1236.67 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:15,391 epoch 25 - iter 2/6 - loss 0.06003283 - time (sec): 6.59 - samples/sec: 1215.77 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:18,957 epoch 25 - iter 3/6 - loss 0.06735534 - time (sec): 10.15 - samples/sec: 1165.94 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:22,373 epoch 25 - iter 4/6 - loss 0.08084308 - time (sec): 13.57 - samples/sec: 1170.04 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:24,661 epoch 25 - iter 5/6 - loss 0.07783281 - time (sec): 15.86 - samples/sec: 1245.87 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:26,548 epoch 25 - iter 6/6 - loss 0.08652390 - time (sec): 17.74 - samples/sec: 1262.73 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:26,549 ----------------------------------------------------------------------------------------------------
2023-10-30 16:33:26,556 EPOCH 2

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:33:27,407 DEV : loss 1.892543077468872 - f1-score (micro avg)  0.019
2023-10-30 16:33:27,412 ----------------------------------------------------------------------------------------------------





2023-10-30 16:33:30,499 epoch 26 - iter 1/6 - loss 0.07373724 - time (sec): 3.09 - samples/sec: 1404.70 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:34,044 epoch 26 - iter 2/6 - loss 0.06621555 - time (sec): 6.63 - samples/sec: 1276.21 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:37,887 epoch 26 - iter 3/6 - loss 0.06195891 - time (sec): 10.47 - samples/sec: 1199.30 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:40,688 epoch 26 - iter 4/6 - loss 0.06961828 - time (sec): 13.27 - samples/sec: 1219.57 - lr: 0.000043 - momentum: 0.000000
2023-10-30 16:33:43,356 epoch 26 - iter 5/6 - loss 0.06617249 - time (sec): 15.94 - samples/sec: 1249.85 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:33:44,946 epoch 26 - iter 6/6 - loss 0.06927498 - time (sec): 17.53 - samples/sec: 1277.91 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:33:44,948 ----------------------------------------------------------------------------------------------------
2023-10-30 16:33:44,952 EPOCH 2

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:33:45,815 DEV : loss 1.892240047454834 - f1-score (micro avg)  0.0206
2023-10-30 16:33:45,819 ----------------------------------------------------------------------------------------------------





2023-10-30 16:33:48,404 epoch 27 - iter 1/6 - loss 0.04213337 - time (sec): 2.58 - samples/sec: 1463.26 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:33:52,119 epoch 27 - iter 2/6 - loss 0.04955292 - time (sec): 6.30 - samples/sec: 1281.45 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:33:55,742 epoch 27 - iter 3/6 - loss 0.05936548 - time (sec): 9.92 - samples/sec: 1194.30 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:33:59,045 epoch 27 - iter 4/6 - loss 0.05890241 - time (sec): 13.22 - samples/sec: 1214.82 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:02,074 epoch 27 - iter 5/6 - loss 0.05941012 - time (sec): 16.25 - samples/sec: 1243.65 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:03,363 epoch 27 - iter 6/6 - loss 0.05842954 - time (sec): 17.54 - samples/sec: 1277.24 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:03,364 ----------------------------------------------------------------------------------------------------
2023-10-30 16:34:03,369 EPOCH 27

100%|██████████| 1/1 [00:00<00:00,  1.14it/s]

2023-10-30 16:34:04,286 DEV : loss 1.8919711112976074 - f1-score (micro avg)  0.0206
2023-10-30 16:34:04,290 ----------------------------------------------------------------------------------------------------





2023-10-30 16:34:07,668 epoch 28 - iter 1/6 - loss 0.03831295 - time (sec): 3.37 - samples/sec: 1337.53 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:11,113 epoch 28 - iter 2/6 - loss 0.03877733 - time (sec): 6.82 - samples/sec: 1243.54 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:14,639 epoch 28 - iter 3/6 - loss 0.04365326 - time (sec): 10.35 - samples/sec: 1196.51 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:17,438 epoch 28 - iter 4/6 - loss 0.05740375 - time (sec): 13.14 - samples/sec: 1238.06 - lr: 0.000042 - momentum: 0.000000
2023-10-30 16:34:20,101 epoch 28 - iter 5/6 - loss 0.05721858 - time (sec): 15.81 - samples/sec: 1264.88 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:21,610 epoch 28 - iter 6/6 - loss 0.05702448 - time (sec): 17.32 - samples/sec: 1293.79 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:21,612 ----------------------------------------------------------------------------------------------------
2023-10-30 16:34:21,616 EPOCH 2

100%|██████████| 1/1 [00:00<00:00,  1.27it/s]

2023-10-30 16:34:22,439 DEV : loss 1.8916587829589844 - f1-score (micro avg)  0.0206
2023-10-30 16:34:22,443 ----------------------------------------------------------------------------------------------------





2023-10-30 16:34:25,599 epoch 29 - iter 1/6 - loss 0.05388389 - time (sec): 3.15 - samples/sec: 1186.55 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:29,106 epoch 29 - iter 2/6 - loss 0.05022946 - time (sec): 6.66 - samples/sec: 1164.69 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:32,462 epoch 29 - iter 3/6 - loss 0.04631272 - time (sec): 10.02 - samples/sec: 1126.56 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:36,019 epoch 29 - iter 4/6 - loss 0.04718146 - time (sec): 13.57 - samples/sec: 1138.50 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:38,970 epoch 29 - iter 5/6 - loss 0.05434778 - time (sec): 16.52 - samples/sec: 1187.17 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:40,690 epoch 29 - iter 6/6 - loss 0.05217007 - time (sec): 18.24 - samples/sec: 1228.07 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:40,691 ----------------------------------------------------------------------------------------------------
2023-10-30 16:34:40,698 EPOCH 2

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:34:41,544 DEV : loss 1.8914060592651367 - f1-score (micro avg)  0.0205
2023-10-30 16:34:41,548 ----------------------------------------------------------------------------------------------------





2023-10-30 16:34:44,120 epoch 30 - iter 1/6 - loss 0.06874166 - time (sec): 2.57 - samples/sec: 1370.11 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:48,020 epoch 30 - iter 2/6 - loss 0.06325674 - time (sec): 6.47 - samples/sec: 1222.61 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:51,544 epoch 30 - iter 3/6 - loss 0.04996552 - time (sec): 9.99 - samples/sec: 1186.86 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:54,355 epoch 30 - iter 4/6 - loss 0.04607725 - time (sec): 12.80 - samples/sec: 1230.44 - lr: 0.000041 - momentum: 0.000000
2023-10-30 16:34:57,845 epoch 30 - iter 5/6 - loss 0.05156283 - time (sec): 16.29 - samples/sec: 1220.52 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:34:59,410 epoch 30 - iter 6/6 - loss 0.05281205 - time (sec): 17.86 - samples/sec: 1254.56 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:34:59,412 ----------------------------------------------------------------------------------------------------
2023-10-30 16:34:59,415 EPOCH 30

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]

2023-10-30 16:35:00,489 DEV : loss 1.891112208366394 - f1-score (micro avg)  0.0205
2023-10-30 16:35:00,497 ----------------------------------------------------------------------------------------------------





2023-10-30 16:35:04,752 epoch 31 - iter 1/6 - loss 0.08397150 - time (sec): 4.25 - samples/sec: 1024.66 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:08,835 epoch 31 - iter 2/6 - loss 0.07112615 - time (sec): 8.34 - samples/sec: 995.44 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:11,677 epoch 31 - iter 3/6 - loss 0.06395753 - time (sec): 11.18 - samples/sec: 1095.46 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:14,670 epoch 31 - iter 4/6 - loss 0.05468604 - time (sec): 14.17 - samples/sec: 1157.55 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:17,402 epoch 31 - iter 5/6 - loss 0.04776042 - time (sec): 16.90 - samples/sec: 1196.03 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:18,966 epoch 31 - iter 6/6 - loss 0.04758998 - time (sec): 18.47 - samples/sec: 1213.25 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:18,969 ----------------------------------------------------------------------------------------------------
2023-10-30 16:35:18,972 EPOCH 31

100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

2023-10-30 16:35:20,714 DEV : loss 1.8908169269561768 - f1-score (micro avg)  0.0206
2023-10-30 16:35:20,720 ----------------------------------------------------------------------------------------------------





2023-10-30 16:35:23,857 epoch 32 - iter 1/6 - loss 0.03213509 - time (sec): 3.13 - samples/sec: 1140.08 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:27,237 epoch 32 - iter 2/6 - loss 0.03485507 - time (sec): 6.51 - samples/sec: 1117.84 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:30,469 epoch 32 - iter 3/6 - loss 0.03520544 - time (sec): 9.74 - samples/sec: 1212.15 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:33,372 epoch 32 - iter 4/6 - loss 0.03765155 - time (sec): 12.65 - samples/sec: 1252.64 - lr: 0.000040 - momentum: 0.000000
2023-10-30 16:35:36,283 epoch 32 - iter 5/6 - loss 0.04650145 - time (sec): 15.56 - samples/sec: 1280.47 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:38,347 epoch 32 - iter 6/6 - loss 0.06177757 - time (sec): 17.62 - samples/sec: 1271.32 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:38,355 ----------------------------------------------------------------------------------------------------
2023-10-30 16:35:38,358 EPOCH 32

100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

2023-10-30 16:35:40,104 DEV : loss 1.890626072883606 - f1-score (micro avg)  0.0206
2023-10-30 16:35:40,110 ----------------------------------------------------------------------------------------------------





2023-10-30 16:35:44,371 epoch 33 - iter 1/6 - loss 0.05488078 - time (sec): 4.26 - samples/sec: 963.89 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:47,566 epoch 33 - iter 2/6 - loss 0.05090287 - time (sec): 7.45 - samples/sec: 1143.71 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:50,766 epoch 33 - iter 3/6 - loss 0.04841853 - time (sec): 10.65 - samples/sec: 1146.21 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:53,498 epoch 33 - iter 4/6 - loss 0.04628696 - time (sec): 13.38 - samples/sec: 1196.25 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:56,425 epoch 33 - iter 5/6 - loss 0.04489081 - time (sec): 16.31 - samples/sec: 1213.48 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:58,689 epoch 33 - iter 6/6 - loss 0.04632643 - time (sec): 18.57 - samples/sec: 1206.21 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:35:58,692 ----------------------------------------------------------------------------------------------------
2023-10-30 16:35:58,695 EPOCH 33

100%|██████████| 1/1 [00:01<00:00,  1.54s/it]

2023-10-30 16:36:00,286 DEV : loss 1.8904401063919067 - f1-score (micro avg)  0.0205
2023-10-30 16:36:00,292 ----------------------------------------------------------------------------------------------------





2023-10-30 16:36:03,826 epoch 34 - iter 1/6 - loss 0.02555900 - time (sec): 3.53 - samples/sec: 1133.25 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:36:07,381 epoch 34 - iter 2/6 - loss 0.04359880 - time (sec): 7.09 - samples/sec: 1158.72 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:36:10,086 epoch 34 - iter 3/6 - loss 0.04445044 - time (sec): 9.79 - samples/sec: 1219.20 - lr: 0.000039 - momentum: 0.000000
2023-10-30 16:36:12,710 epoch 34 - iter 4/6 - loss 0.03953535 - time (sec): 12.41 - samples/sec: 1253.23 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:16,496 epoch 34 - iter 5/6 - loss 0.03782393 - time (sec): 16.20 - samples/sec: 1234.49 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:18,810 epoch 34 - iter 6/6 - loss 0.03790752 - time (sec): 18.51 - samples/sec: 1210.08 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:18,814 ----------------------------------------------------------------------------------------------------
2023-10-30 16:36:18,818 EPOCH 34

100%|██████████| 1/1 [00:01<00:00,  1.54s/it]

2023-10-30 16:36:20,398 DEV : loss 1.890213131904602 - f1-score (micro avg)  0.0205
2023-10-30 16:36:20,402 ----------------------------------------------------------------------------------------------------





2023-10-30 16:36:23,597 epoch 35 - iter 1/6 - loss 0.05947563 - time (sec): 3.19 - samples/sec: 1240.95 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:26,497 epoch 35 - iter 2/6 - loss 0.04086704 - time (sec): 6.09 - samples/sec: 1305.85 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:30,063 epoch 35 - iter 3/6 - loss 0.04283620 - time (sec): 9.66 - samples/sec: 1256.89 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:33,504 epoch 35 - iter 4/6 - loss 0.03903393 - time (sec): 13.10 - samples/sec: 1244.54 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:37,178 epoch 35 - iter 5/6 - loss 0.03600163 - time (sec): 16.77 - samples/sec: 1204.25 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:38,941 epoch 35 - iter 6/6 - loss 0.03558413 - time (sec): 18.54 - samples/sec: 1208.63 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:38,943 ----------------------------------------------------------------------------------------------------
2023-10-30 16:36:38,950 EPOCH 35

100%|██████████| 1/1 [00:00<00:00,  1.17it/s]

2023-10-30 16:36:39,839 DEV : loss 1.889983892440796 - f1-score (micro avg)  0.0189
2023-10-30 16:36:39,844 ----------------------------------------------------------------------------------------------------





2023-10-30 16:36:42,822 epoch 36 - iter 1/6 - loss 0.05128602 - time (sec): 2.97 - samples/sec: 1193.63 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:45,726 epoch 36 - iter 2/6 - loss 0.04282468 - time (sec): 5.88 - samples/sec: 1295.99 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:48,623 epoch 36 - iter 3/6 - loss 0.03468047 - time (sec): 8.78 - samples/sec: 1324.02 - lr: 0.000038 - momentum: 0.000000
2023-10-30 16:36:51,985 epoch 36 - iter 4/6 - loss 0.03384918 - time (sec): 12.14 - samples/sec: 1278.80 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:36:56,079 epoch 36 - iter 5/6 - loss 0.03029662 - time (sec): 16.23 - samples/sec: 1226.12 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:36:57,553 epoch 36 - iter 6/6 - loss 0.02857497 - time (sec): 17.71 - samples/sec: 1265.27 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:36:57,555 ----------------------------------------------------------------------------------------------------
2023-10-30 16:36:57,560 EPOCH 36

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:36:58,411 DEV : loss 1.8896650075912476 - f1-score (micro avg)  0.0189
2023-10-30 16:36:58,415 ----------------------------------------------------------------------------------------------------





2023-10-30 16:37:01,041 epoch 37 - iter 1/6 - loss 0.03268127 - time (sec): 2.62 - samples/sec: 1450.18 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:03,796 epoch 37 - iter 2/6 - loss 0.02290262 - time (sec): 5.38 - samples/sec: 1419.72 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:06,651 epoch 37 - iter 3/6 - loss 0.02127804 - time (sec): 8.23 - samples/sec: 1405.43 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:10,744 epoch 37 - iter 4/6 - loss 0.02760214 - time (sec): 12.33 - samples/sec: 1270.83 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:14,551 epoch 37 - iter 5/6 - loss 0.02533317 - time (sec): 16.13 - samples/sec: 1225.56 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:16,176 epoch 37 - iter 6/6 - loss 0.02524451 - time (sec): 17.76 - samples/sec: 1261.63 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:16,179 ----------------------------------------------------------------------------------------------------
2023-10-30 16:37:16,182 EPOCH 37

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:37:17,043 DEV : loss 1.8894460201263428 - f1-score (micro avg)  0.0189
2023-10-30 16:37:17,048 ----------------------------------------------------------------------------------------------------





2023-10-30 16:37:19,774 epoch 38 - iter 1/6 - loss 0.02149519 - time (sec): 2.72 - samples/sec: 1401.73 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:22,805 epoch 38 - iter 2/6 - loss 0.02934385 - time (sec): 5.76 - samples/sec: 1402.93 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:26,069 epoch 38 - iter 3/6 - loss 0.02998924 - time (sec): 9.02 - samples/sec: 1371.38 - lr: 0.000037 - momentum: 0.000000
2023-10-30 16:37:29,578 epoch 38 - iter 4/6 - loss 0.03336381 - time (sec): 12.53 - samples/sec: 1295.25 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:33,327 epoch 38 - iter 5/6 - loss 0.03143144 - time (sec): 16.28 - samples/sec: 1221.89 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:34,891 epoch 38 - iter 6/6 - loss 0.03033467 - time (sec): 17.84 - samples/sec: 1255.74 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:34,893 ----------------------------------------------------------------------------------------------------
2023-10-30 16:37:34,895 EPOCH 38

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:37:35,748 DEV : loss 1.8890609741210938 - f1-score (micro avg)  0.0189
2023-10-30 16:37:35,752 ----------------------------------------------------------------------------------------------------





2023-10-30 16:37:38,957 epoch 39 - iter 1/6 - loss 0.02915646 - time (sec): 3.20 - samples/sec: 1249.01 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:42,079 epoch 39 - iter 2/6 - loss 0.02148533 - time (sec): 6.32 - samples/sec: 1310.73 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:45,488 epoch 39 - iter 3/6 - loss 0.02362450 - time (sec): 9.73 - samples/sec: 1262.63 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:49,731 epoch 39 - iter 4/6 - loss 0.02424135 - time (sec): 13.98 - samples/sec: 1165.99 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:51,950 epoch 39 - iter 5/6 - loss 0.02514294 - time (sec): 16.20 - samples/sec: 1232.29 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:53,480 epoch 39 - iter 6/6 - loss 0.02495000 - time (sec): 17.72 - samples/sec: 1264.02 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:53,482 ----------------------------------------------------------------------------------------------------
2023-10-30 16:37:53,484 EPOCH 39

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:37:54,347 DEV : loss 1.8887830972671509 - f1-score (micro avg)  0.0189
2023-10-30 16:37:54,350 ----------------------------------------------------------------------------------------------------





2023-10-30 16:37:57,299 epoch 40 - iter 1/6 - loss 0.02885997 - time (sec): 2.95 - samples/sec: 1372.99 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:37:59,465 epoch 40 - iter 2/6 - loss 0.02236753 - time (sec): 5.11 - samples/sec: 1499.07 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:38:02,899 epoch 40 - iter 3/6 - loss 0.02193362 - time (sec): 8.55 - samples/sec: 1382.25 - lr: 0.000036 - momentum: 0.000000
2023-10-30 16:38:06,737 epoch 40 - iter 4/6 - loss 0.02364653 - time (sec): 12.38 - samples/sec: 1282.52 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:10,137 epoch 40 - iter 5/6 - loss 0.02105713 - time (sec): 15.78 - samples/sec: 1268.17 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:11,612 epoch 40 - iter 6/6 - loss 0.02235987 - time (sec): 17.26 - samples/sec: 1298.15 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:11,613 ----------------------------------------------------------------------------------------------------
2023-10-30 16:38:11,621 EPOCH 40

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:38:12,476 DEV : loss 1.8885897397994995 - f1-score (micro avg)  0.0189
2023-10-30 16:38:12,480 ----------------------------------------------------------------------------------------------------





2023-10-30 16:38:15,839 epoch 41 - iter 1/6 - loss 0.02283196 - time (sec): 3.36 - samples/sec: 1251.67 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:18,620 epoch 41 - iter 2/6 - loss 0.01887311 - time (sec): 6.14 - samples/sec: 1300.08 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:21,645 epoch 41 - iter 3/6 - loss 0.02046701 - time (sec): 9.16 - samples/sec: 1274.44 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:25,911 epoch 41 - iter 4/6 - loss 0.02027023 - time (sec): 13.43 - samples/sec: 1168.38 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:28,933 epoch 41 - iter 5/6 - loss 0.01746259 - time (sec): 16.45 - samples/sec: 1208.06 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:30,491 epoch 41 - iter 6/6 - loss 0.01763156 - time (sec): 18.01 - samples/sec: 1244.09 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:30,493 ----------------------------------------------------------------------------------------------------
2023-10-30 16:38:30,496 EPOCH 41

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:38:31,334 DEV : loss 1.888453483581543 - f1-score (micro avg)  0.0188
2023-10-30 16:38:31,338 ----------------------------------------------------------------------------------------------------





2023-10-30 16:38:34,057 epoch 42 - iter 1/6 - loss 0.00882894 - time (sec): 2.72 - samples/sec: 1399.56 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:37,663 epoch 42 - iter 2/6 - loss 0.01491633 - time (sec): 6.32 - samples/sec: 1250.25 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:41,262 epoch 42 - iter 3/6 - loss 0.01212659 - time (sec): 9.92 - samples/sec: 1205.81 - lr: 0.000035 - momentum: 0.000000
2023-10-30 16:38:45,067 epoch 42 - iter 4/6 - loss 0.01326415 - time (sec): 13.73 - samples/sec: 1187.75 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:38:47,834 epoch 42 - iter 5/6 - loss 0.01577971 - time (sec): 16.49 - samples/sec: 1224.17 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:38:49,224 epoch 42 - iter 6/6 - loss 0.01520324 - time (sec): 17.88 - samples/sec: 1252.73 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:38:49,226 ----------------------------------------------------------------------------------------------------
2023-10-30 16:38:49,231 EPOCH 42

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:38:50,089 DEV : loss 1.8882198333740234 - f1-score (micro avg)  0.0188
2023-10-30 16:38:50,093 ----------------------------------------------------------------------------------------------------





2023-10-30 16:38:53,187 epoch 43 - iter 1/6 - loss 0.00610048 - time (sec): 3.09 - samples/sec: 1219.58 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:38:56,586 epoch 43 - iter 2/6 - loss 0.01769399 - time (sec): 6.49 - samples/sec: 1131.03 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:00,414 epoch 43 - iter 3/6 - loss 0.01507732 - time (sec): 10.32 - samples/sec: 1100.38 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:03,802 epoch 43 - iter 4/6 - loss 0.02319725 - time (sec): 13.71 - samples/sec: 1131.21 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:06,842 epoch 43 - iter 5/6 - loss 0.02246749 - time (sec): 16.75 - samples/sec: 1180.66 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:08,373 epoch 43 - iter 6/6 - loss 0.02124129 - time (sec): 18.28 - samples/sec: 1225.78 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:08,375 ----------------------------------------------------------------------------------------------------
2023-10-30 16:39:08,377 EPOCH 4

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]

2023-10-30 16:39:09,255 DEV : loss 1.8880705833435059 - f1-score (micro avg)  0.0188
2023-10-30 16:39:09,258 ----------------------------------------------------------------------------------------------------





2023-10-30 16:39:12,198 epoch 44 - iter 1/6 - loss 0.00935538 - time (sec): 2.94 - samples/sec: 1320.59 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:15,368 epoch 44 - iter 2/6 - loss 0.01395150 - time (sec): 6.11 - samples/sec: 1262.11 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:19,035 epoch 44 - iter 3/6 - loss 0.01434856 - time (sec): 9.77 - samples/sec: 1183.29 - lr: 0.000034 - momentum: 0.000000
2023-10-30 16:39:22,437 epoch 44 - iter 4/6 - loss 0.01645354 - time (sec): 13.18 - samples/sec: 1229.47 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:25,125 epoch 44 - iter 5/6 - loss 0.01619670 - time (sec): 15.86 - samples/sec: 1258.53 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:26,572 epoch 44 - iter 6/6 - loss 0.01877867 - time (sec): 17.31 - samples/sec: 1294.23 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:26,574 ----------------------------------------------------------------------------------------------------
2023-10-30 16:39:26,577 EPOCH 44

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]


2023-10-30 16:39:27,800 DEV : loss 1.8879255056381226 - f1-score (micro avg)  0.0188
2023-10-30 16:39:27,804 ----------------------------------------------------------------------------------------------------
2023-10-30 16:39:30,722 epoch 45 - iter 1/6 - loss 0.01150481 - time (sec): 2.91 - samples/sec: 1325.93 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:33,790 epoch 45 - iter 2/6 - loss 0.01585566 - time (sec): 5.98 - samples/sec: 1266.46 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:37,365 epoch 45 - iter 3/6 - loss 0.01372582 - time (sec): 9.56 - samples/sec: 1192.58 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:40,727 epoch 45 - iter 4/6 - loss 0.01513963 - time (sec): 12.92 - samples/sec: 1190.42 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:43,803 epoch 45 - iter 5/6 - loss 0.01876092 - time (sec): 16.00 - samples/sec: 1235.44 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:45,485 epoch 45 - iter 6/6 - loss 0.01755774 - time (sec): 17.68 - samp

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:39:46,343 DEV : loss 1.8878017663955688 - f1-score (micro avg)  0.0188
2023-10-30 16:39:46,347 ----------------------------------------------------------------------------------------------------





2023-10-30 16:39:49,204 epoch 46 - iter 1/6 - loss 0.01801608 - time (sec): 2.85 - samples/sec: 1293.29 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:52,600 epoch 46 - iter 2/6 - loss 0.01180083 - time (sec): 6.25 - samples/sec: 1204.04 - lr: 0.000033 - momentum: 0.000000
2023-10-30 16:39:56,236 epoch 46 - iter 3/6 - loss 0.01376928 - time (sec): 9.89 - samples/sec: 1178.91 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:39:59,161 epoch 46 - iter 4/6 - loss 0.01621234 - time (sec): 12.81 - samples/sec: 1223.75 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:02,149 epoch 46 - iter 5/6 - loss 0.01727810 - time (sec): 15.80 - samples/sec: 1256.08 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:03,992 epoch 46 - iter 6/6 - loss 0.01741038 - time (sec): 17.64 - samples/sec: 1269.98 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:03,993 ----------------------------------------------------------------------------------------------------
2023-10-30 16:40:03,999 EPOCH 46

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]


2023-10-30 16:40:05,359 DEV : loss 1.8876527547836304 - f1-score (micro avg)  0.0188
2023-10-30 16:40:05,367 ----------------------------------------------------------------------------------------------------
2023-10-30 16:40:08,697 epoch 47 - iter 1/6 - loss 0.01214033 - time (sec): 3.32 - samples/sec: 1176.36 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:12,406 epoch 47 - iter 2/6 - loss 0.00986831 - time (sec): 7.03 - samples/sec: 1117.26 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:15,483 epoch 47 - iter 3/6 - loss 0.01397015 - time (sec): 10.11 - samples/sec: 1189.38 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:18,823 epoch 47 - iter 4/6 - loss 0.01369600 - time (sec): 13.45 - samples/sec: 1187.03 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:21,886 epoch 47 - iter 5/6 - loss 0.01305834 - time (sec): 16.51 - samples/sec: 1228.89 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:23,364 epoch 47 - iter 6/6 - loss 0.01223792 - time (sec): 17.99 - sam

100%|██████████| 1/1 [00:01<00:00,  1.69s/it]

2023-10-30 16:40:25,115 DEV : loss 1.8874646425247192 - f1-score (micro avg)  0.0188
2023-10-30 16:40:25,122 ----------------------------------------------------------------------------------------------------





2023-10-30 16:40:28,991 epoch 48 - iter 1/6 - loss 0.02490010 - time (sec): 3.87 - samples/sec: 994.76 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:32,323 epoch 48 - iter 2/6 - loss 0.01565036 - time (sec): 7.20 - samples/sec: 1054.68 - lr: 0.000032 - momentum: 0.000000
2023-10-30 16:40:35,261 epoch 48 - iter 3/6 - loss 0.01263070 - time (sec): 10.14 - samples/sec: 1152.67 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:38,290 epoch 48 - iter 4/6 - loss 0.01169800 - time (sec): 13.16 - samples/sec: 1212.64 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:41,394 epoch 48 - iter 5/6 - loss 0.01081621 - time (sec): 16.27 - samples/sec: 1241.35 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:43,187 epoch 48 - iter 6/6 - loss 0.01143033 - time (sec): 18.06 - samples/sec: 1240.42 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:43,200 ----------------------------------------------------------------------------------------------------
2023-10-30 16:40:43,202 EPOCH 48

100%|██████████| 1/1 [00:01<00:00,  1.66s/it]

2023-10-30 16:40:44,917 DEV : loss 1.8872685432434082 - f1-score (micro avg)  0.0188
2023-10-30 16:40:44,921 ----------------------------------------------------------------------------------------------------





2023-10-30 16:40:49,203 epoch 49 - iter 1/6 - loss 0.01439663 - time (sec): 4.28 - samples/sec: 954.82 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:52,091 epoch 49 - iter 2/6 - loss 0.01351034 - time (sec): 7.17 - samples/sec: 1117.08 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:54,910 epoch 49 - iter 3/6 - loss 0.01307286 - time (sec): 9.99 - samples/sec: 1192.60 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:40:57,777 epoch 49 - iter 4/6 - loss 0.01335197 - time (sec): 12.85 - samples/sec: 1240.41 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:41:00,918 epoch 49 - iter 5/6 - loss 0.01327070 - time (sec): 15.99 - samples/sec: 1253.01 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:41:02,927 epoch 49 - iter 6/6 - loss 0.01249320 - time (sec): 18.00 - samples/sec: 1244.47 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:41:02,936 ----------------------------------------------------------------------------------------------------
2023-10-30 16:41:02,938 EPOCH 49 

100%|██████████| 1/1 [00:01<00:00,  1.57s/it]

2023-10-30 16:41:04,557 DEV : loss 1.8871514797210693 - f1-score (micro avg)  0.0188
2023-10-30 16:41:04,561 ----------------------------------------------------------------------------------------------------





2023-10-30 16:41:08,210 epoch 50 - iter 1/6 - loss 0.02143805 - time (sec): 3.65 - samples/sec: 1141.42 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:41:11,266 epoch 50 - iter 2/6 - loss 0.01642977 - time (sec): 6.70 - samples/sec: 1248.27 - lr: 0.000031 - momentum: 0.000000
2023-10-30 16:41:13,625 epoch 50 - iter 3/6 - loss 0.01358959 - time (sec): 9.06 - samples/sec: 1362.71 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:17,086 epoch 50 - iter 4/6 - loss 0.01283831 - time (sec): 12.52 - samples/sec: 1311.54 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:19,998 epoch 50 - iter 5/6 - loss 0.01272492 - time (sec): 15.43 - samples/sec: 1300.09 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:22,144 epoch 50 - iter 6/6 - loss 0.01198613 - time (sec): 17.58 - samples/sec: 1274.45 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:22,148 ----------------------------------------------------------------------------------------------------
2023-10-30 16:41:22,150 EPOCH 50

100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

2023-10-30 16:41:23,753 DEV : loss 1.887056589126587 - f1-score (micro avg)  0.0188
2023-10-30 16:41:23,758 ----------------------------------------------------------------------------------------------------





2023-10-30 16:41:27,063 epoch 51 - iter 1/6 - loss 0.01166188 - time (sec): 3.30 - samples/sec: 1121.10 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:30,101 epoch 51 - iter 2/6 - loss 0.01546016 - time (sec): 6.34 - samples/sec: 1236.45 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:33,773 epoch 51 - iter 3/6 - loss 0.02007696 - time (sec): 10.01 - samples/sec: 1215.63 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:36,852 epoch 51 - iter 4/6 - loss 0.01743605 - time (sec): 13.09 - samples/sec: 1235.68 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:40,406 epoch 51 - iter 5/6 - loss 0.01531124 - time (sec): 16.64 - samples/sec: 1209.51 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:42,566 epoch 51 - iter 6/6 - loss 0.01892691 - time (sec): 18.80 - samples/sec: 1191.65 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:42,567 ----------------------------------------------------------------------------------------------------
2023-10-30 16:41:42,572 EPOCH 5

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:41:43,414 DEV : loss 1.8868670463562012 - f1-score (micro avg)  0.0188
2023-10-30 16:41:43,417 ----------------------------------------------------------------------------------------------------





2023-10-30 16:41:46,195 epoch 52 - iter 1/6 - loss 0.03798852 - time (sec): 2.77 - samples/sec: 1408.36 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:48,500 epoch 52 - iter 2/6 - loss 0.02512477 - time (sec): 5.08 - samples/sec: 1553.85 - lr: 0.000030 - momentum: 0.000000
2023-10-30 16:41:51,638 epoch 52 - iter 3/6 - loss 0.02247582 - time (sec): 8.22 - samples/sec: 1493.37 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:41:55,230 epoch 52 - iter 4/6 - loss 0.02087974 - time (sec): 11.81 - samples/sec: 1378.86 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:41:58,684 epoch 52 - iter 5/6 - loss 0.01828204 - time (sec): 15.26 - samples/sec: 1314.86 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:00,809 epoch 52 - iter 6/6 - loss 0.01804746 - time (sec): 17.39 - samples/sec: 1288.43 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:00,811 ----------------------------------------------------------------------------------------------------
2023-10-30 16:42:00,813 EPOCH 52

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:42:01,651 DEV : loss 1.8866974115371704 - f1-score (micro avg)  0.0188
2023-10-30 16:42:01,655 ----------------------------------------------------------------------------------------------------





2023-10-30 16:42:05,081 epoch 53 - iter 1/6 - loss 0.01953453 - time (sec): 3.42 - samples/sec: 1231.97 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:07,994 epoch 53 - iter 2/6 - loss 0.01546227 - time (sec): 6.34 - samples/sec: 1293.89 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:11,072 epoch 53 - iter 3/6 - loss 0.01298768 - time (sec): 9.41 - samples/sec: 1248.69 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:14,576 epoch 53 - iter 4/6 - loss 0.01246987 - time (sec): 12.92 - samples/sec: 1228.66 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:18,234 epoch 53 - iter 5/6 - loss 0.01182151 - time (sec): 16.58 - samples/sec: 1196.83 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:19,804 epoch 53 - iter 6/6 - loss 0.01138148 - time (sec): 18.15 - samples/sec: 1234.64 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:19,806 ----------------------------------------------------------------------------------------------------
2023-10-30 16:42:19,807 EPOCH 53

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:42:20,668 DEV : loss 1.8864933252334595 - f1-score (micro avg)  0.0188
2023-10-30 16:42:20,672 ----------------------------------------------------------------------------------------------------





2023-10-30 16:42:22,794 epoch 54 - iter 1/6 - loss 0.02129169 - time (sec): 2.12 - samples/sec: 1681.99 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:26,108 epoch 54 - iter 2/6 - loss 0.01435729 - time (sec): 5.43 - samples/sec: 1384.10 - lr: 0.000029 - momentum: 0.000000
2023-10-30 16:42:29,260 epoch 54 - iter 3/6 - loss 0.01126598 - time (sec): 8.58 - samples/sec: 1371.25 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:32,754 epoch 54 - iter 4/6 - loss 0.01238596 - time (sec): 12.08 - samples/sec: 1310.67 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:36,376 epoch 54 - iter 5/6 - loss 0.01293771 - time (sec): 15.70 - samples/sec: 1256.19 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:38,034 epoch 54 - iter 6/6 - loss 0.01162116 - time (sec): 17.36 - samples/sec: 1290.69 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:38,037 ----------------------------------------------------------------------------------------------------
2023-10-30 16:42:38,039 EPOCH 54

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:42:38,904 DEV : loss 1.8863698244094849 - f1-score (micro avg)  0.0188
2023-10-30 16:42:38,907 ----------------------------------------------------------------------------------------------------





2023-10-30 16:42:42,036 epoch 55 - iter 1/6 - loss 0.00832830 - time (sec): 3.13 - samples/sec: 1218.98 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:45,130 epoch 55 - iter 2/6 - loss 0.00871612 - time (sec): 6.22 - samples/sec: 1312.29 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:47,726 epoch 55 - iter 3/6 - loss 0.00968585 - time (sec): 8.82 - samples/sec: 1360.42 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:51,139 epoch 55 - iter 4/6 - loss 0.00830697 - time (sec): 12.23 - samples/sec: 1301.67 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:54,739 epoch 55 - iter 5/6 - loss 0.01000925 - time (sec): 15.83 - samples/sec: 1252.91 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:56,233 epoch 55 - iter 6/6 - loss 0.00942308 - time (sec): 17.32 - samples/sec: 1293.32 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:42:56,235 ----------------------------------------------------------------------------------------------------
2023-10-30 16:42:56,243 EPOCH 55

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:42:57,098 DEV : loss 1.8862541913986206 - f1-score (micro avg)  0.0188
2023-10-30 16:42:57,103 ----------------------------------------------------------------------------------------------------





2023-10-30 16:42:59,931 epoch 56 - iter 1/6 - loss 0.01003303 - time (sec): 2.83 - samples/sec: 1421.04 - lr: 0.000028 - momentum: 0.000000
2023-10-30 16:43:02,815 epoch 56 - iter 2/6 - loss 0.00811579 - time (sec): 5.71 - samples/sec: 1400.98 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:05,509 epoch 56 - iter 3/6 - loss 0.00686669 - time (sec): 8.40 - samples/sec: 1367.25 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:09,611 epoch 56 - iter 4/6 - loss 0.00821238 - time (sec): 12.51 - samples/sec: 1245.05 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:13,351 epoch 56 - iter 5/6 - loss 0.00792024 - time (sec): 16.25 - samples/sec: 1210.63 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:15,033 epoch 56 - iter 6/6 - loss 0.00882350 - time (sec): 17.93 - samples/sec: 1249.69 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:15,034 ----------------------------------------------------------------------------------------------------
2023-10-30 16:43:15,044 EPOCH 56

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]


2023-10-30 16:43:16,272 DEV : loss 1.8860747814178467 - f1-score (micro avg)  0.0188
2023-10-30 16:43:16,277 ----------------------------------------------------------------------------------------------------
2023-10-30 16:43:19,644 epoch 57 - iter 1/6 - loss 0.00726201 - time (sec): 3.36 - samples/sec: 1186.57 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:22,400 epoch 57 - iter 2/6 - loss 0.00536923 - time (sec): 6.12 - samples/sec: 1271.06 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:25,754 epoch 57 - iter 3/6 - loss 0.00501618 - time (sec): 9.47 - samples/sec: 1233.23 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:29,903 epoch 57 - iter 4/6 - loss 0.01008131 - time (sec): 13.62 - samples/sec: 1183.16 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:32,688 epoch 57 - iter 5/6 - loss 0.00916162 - time (sec): 16.41 - samples/sec: 1216.15 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:34,209 epoch 57 - iter 6/6 - loss 0.00896126 - time (sec): 17.93 - samp

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:43:35,071 DEV : loss 1.8860057592391968 - f1-score (micro avg)  0.0187
2023-10-30 16:43:35,075 ----------------------------------------------------------------------------------------------------





2023-10-30 16:43:38,553 epoch 58 - iter 1/6 - loss 0.00850180 - time (sec): 3.47 - samples/sec: 1171.25 - lr: 0.000027 - momentum: 0.000000
2023-10-30 16:43:41,727 epoch 58 - iter 2/6 - loss 0.01398884 - time (sec): 6.65 - samples/sec: 1232.09 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:45,233 epoch 58 - iter 3/6 - loss 0.01248311 - time (sec): 10.16 - samples/sec: 1196.53 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:48,615 epoch 58 - iter 4/6 - loss 0.01161633 - time (sec): 13.54 - samples/sec: 1174.13 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:50,865 epoch 58 - iter 5/6 - loss 0.01403015 - time (sec): 15.79 - samples/sec: 1250.30 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:52,505 epoch 58 - iter 6/6 - loss 0.01277014 - time (sec): 17.43 - samples/sec: 1285.63 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:52,507 ----------------------------------------------------------------------------------------------------
2023-10-30 16:43:52,509 EPOCH 5

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:43:53,353 DEV : loss 1.8858503103256226 - f1-score (micro avg)  0.0188
2023-10-30 16:43:53,356 ----------------------------------------------------------------------------------------------------





2023-10-30 16:43:55,943 epoch 59 - iter 1/6 - loss 0.00235206 - time (sec): 2.58 - samples/sec: 1463.11 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:43:58,497 epoch 59 - iter 2/6 - loss 0.00802474 - time (sec): 5.14 - samples/sec: 1530.59 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:02,231 epoch 59 - iter 3/6 - loss 0.00619075 - time (sec): 8.87 - samples/sec: 1359.38 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:05,869 epoch 59 - iter 4/6 - loss 0.00660088 - time (sec): 12.51 - samples/sec: 1272.60 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:08,808 epoch 59 - iter 5/6 - loss 0.00743626 - time (sec): 15.45 - samples/sec: 1296.25 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:10,286 epoch 59 - iter 6/6 - loss 0.00970763 - time (sec): 16.93 - samples/sec: 1323.56 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:10,289 ----------------------------------------------------------------------------------------------------
2023-10-30 16:44:10,291 EPOCH 59

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:44:11,140 DEV : loss 1.885684609413147 - f1-score (micro avg)  0.0188
2023-10-30 16:44:11,143 ----------------------------------------------------------------------------------------------------





2023-10-30 16:44:14,177 epoch 60 - iter 1/6 - loss 0.00559383 - time (sec): 3.03 - samples/sec: 1398.21 - lr: 0.000026 - momentum: 0.000000
2023-10-30 16:44:16,638 epoch 60 - iter 2/6 - loss 0.00572962 - time (sec): 5.49 - samples/sec: 1466.88 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:20,181 epoch 60 - iter 3/6 - loss 0.00492655 - time (sec): 9.03 - samples/sec: 1341.66 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:23,723 epoch 60 - iter 4/6 - loss 0.00543874 - time (sec): 12.58 - samples/sec: 1259.82 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:26,651 epoch 60 - iter 5/6 - loss 0.00721692 - time (sec): 15.50 - samples/sec: 1283.26 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:28,403 epoch 60 - iter 6/6 - loss 0.00834821 - time (sec): 17.26 - samples/sec: 1298.37 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:28,405 ----------------------------------------------------------------------------------------------------
2023-10-30 16:44:28,409 EPOCH 60

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:44:29,271 DEV : loss 1.8855106830596924 - f1-score (micro avg)  0.0188
2023-10-30 16:44:29,275 ----------------------------------------------------------------------------------------------------





2023-10-30 16:44:32,677 epoch 61 - iter 1/6 - loss 0.00681271 - time (sec): 3.40 - samples/sec: 1227.15 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:35,852 epoch 61 - iter 2/6 - loss 0.00668411 - time (sec): 6.57 - samples/sec: 1256.48 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:39,125 epoch 61 - iter 3/6 - loss 0.00596082 - time (sec): 9.85 - samples/sec: 1210.75 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:42,670 epoch 61 - iter 4/6 - loss 0.00542589 - time (sec): 13.39 - samples/sec: 1188.36 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:46,060 epoch 61 - iter 5/6 - loss 0.00502384 - time (sec): 16.78 - samples/sec: 1188.77 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:47,499 epoch 61 - iter 6/6 - loss 0.00526846 - time (sec): 18.22 - samples/sec: 1229.57 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:47,501 ----------------------------------------------------------------------------------------------------
2023-10-30 16:44:47,503 EPOCH 61

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:44:48,353 DEV : loss 1.8854551315307617 - f1-score (micro avg)  0.0188
2023-10-30 16:44:48,357 ----------------------------------------------------------------------------------------------------





2023-10-30 16:44:51,321 epoch 62 - iter 1/6 - loss 0.01324129 - time (sec): 2.96 - samples/sec: 1394.16 - lr: 0.000025 - momentum: 0.000000
2023-10-30 16:44:54,811 epoch 62 - iter 2/6 - loss 0.00861862 - time (sec): 6.45 - samples/sec: 1257.00 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:44:58,541 epoch 62 - iter 3/6 - loss 0.00791399 - time (sec): 10.18 - samples/sec: 1177.61 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:01,275 epoch 62 - iter 4/6 - loss 0.00732183 - time (sec): 12.92 - samples/sec: 1245.44 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:04,049 epoch 62 - iter 5/6 - loss 0.00768245 - time (sec): 15.69 - samples/sec: 1267.66 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:05,595 epoch 62 - iter 6/6 - loss 0.00759633 - time (sec): 17.24 - samples/sec: 1299.86 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:05,598 ----------------------------------------------------------------------------------------------------
2023-10-30 16:45:05,600 EPOCH 6

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:45:06,458 DEV : loss 1.8853598833084106 - f1-score (micro avg)  0.0187
2023-10-30 16:45:06,463 ----------------------------------------------------------------------------------------------------





2023-10-30 16:45:09,836 epoch 63 - iter 1/6 - loss 0.00456310 - time (sec): 3.37 - samples/sec: 1219.71 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:13,722 epoch 63 - iter 2/6 - loss 0.00727414 - time (sec): 7.26 - samples/sec: 1179.33 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:17,320 epoch 63 - iter 3/6 - loss 0.00598022 - time (sec): 10.85 - samples/sec: 1146.17 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:19,857 epoch 63 - iter 4/6 - loss 0.00612676 - time (sec): 13.39 - samples/sec: 1190.88 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:22,719 epoch 63 - iter 5/6 - loss 0.00650914 - time (sec): 16.25 - samples/sec: 1226.25 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:24,481 epoch 63 - iter 6/6 - loss 0.00735954 - time (sec): 18.02 - samples/sec: 1243.59 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:24,483 ----------------------------------------------------------------------------------------------------
2023-10-30 16:45:24,492 EPOCH 6

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:45:25,344 DEV : loss 1.8852108716964722 - f1-score (micro avg)  0.0203
2023-10-30 16:45:25,348 ----------------------------------------------------------------------------------------------------





2023-10-30 16:45:28,274 epoch 64 - iter 1/6 - loss 0.00907946 - time (sec): 2.92 - samples/sec: 1328.22 - lr: 0.000024 - momentum: 0.000000
2023-10-30 16:45:31,670 epoch 64 - iter 2/6 - loss 0.00730423 - time (sec): 6.32 - samples/sec: 1224.32 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:35,989 epoch 64 - iter 3/6 - loss 0.00546811 - time (sec): 10.64 - samples/sec: 1113.45 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:38,472 epoch 64 - iter 4/6 - loss 0.00810217 - time (sec): 13.12 - samples/sec: 1226.70 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:41,273 epoch 64 - iter 5/6 - loss 0.00728563 - time (sec): 15.92 - samples/sec: 1255.05 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:42,772 epoch 64 - iter 6/6 - loss 0.00845817 - time (sec): 17.42 - samples/sec: 1285.98 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:42,774 ----------------------------------------------------------------------------------------------------
2023-10-30 16:45:42,777 EPOCH 6

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:45:43,635 DEV : loss 1.8850691318511963 - f1-score (micro avg)  0.0203
2023-10-30 16:45:43,640 ----------------------------------------------------------------------------------------------------





2023-10-30 16:45:47,117 epoch 65 - iter 1/6 - loss 0.01016231 - time (sec): 3.47 - samples/sec: 1163.95 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:50,657 epoch 65 - iter 2/6 - loss 0.00765320 - time (sec): 7.01 - samples/sec: 1138.32 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:54,287 epoch 65 - iter 3/6 - loss 0.00699918 - time (sec): 10.64 - samples/sec: 1127.83 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:45:57,373 epoch 65 - iter 4/6 - loss 0.00622025 - time (sec): 13.73 - samples/sec: 1182.02 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:46:00,602 epoch 65 - iter 5/6 - loss 0.00603316 - time (sec): 16.96 - samples/sec: 1182.14 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:46:02,066 epoch 65 - iter 6/6 - loss 0.00617108 - time (sec): 18.42 - samples/sec: 1216.09 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:46:02,068 ----------------------------------------------------------------------------------------------------
2023-10-30 16:46:02,071 EPOCH 6

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:46:02,937 DEV : loss 1.8849208354949951 - f1-score (micro avg)  0.0203
2023-10-30 16:46:02,941 ----------------------------------------------------------------------------------------------------





2023-10-30 16:46:06,276 epoch 66 - iter 1/6 - loss 0.00352217 - time (sec): 3.33 - samples/sec: 1158.72 - lr: 0.000023 - momentum: 0.000000
2023-10-30 16:46:09,972 epoch 66 - iter 2/6 - loss 0.00567879 - time (sec): 7.03 - samples/sec: 1111.93 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:13,306 epoch 66 - iter 3/6 - loss 0.00537850 - time (sec): 10.36 - samples/sec: 1146.24 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:16,199 epoch 66 - iter 4/6 - loss 0.00722339 - time (sec): 13.26 - samples/sec: 1197.36 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:19,143 epoch 66 - iter 5/6 - loss 0.00689431 - time (sec): 16.20 - samples/sec: 1233.17 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:20,663 epoch 66 - iter 6/6 - loss 0.00633568 - time (sec): 17.72 - samples/sec: 1264.37 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:20,665 ----------------------------------------------------------------------------------------------------
2023-10-30 16:46:20,668 EPOCH 6

100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

2023-10-30 16:46:21,962 DEV : loss 1.884842038154602 - f1-score (micro avg)  0.0203
2023-10-30 16:46:21,966 ----------------------------------------------------------------------------------------------------





2023-10-30 16:46:26,129 epoch 67 - iter 1/6 - loss 0.00464813 - time (sec): 4.16 - samples/sec: 1037.96 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:30,117 epoch 67 - iter 2/6 - loss 0.00463790 - time (sec): 8.15 - samples/sec: 1058.10 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:33,806 epoch 67 - iter 3/6 - loss 0.00918709 - time (sec): 11.84 - samples/sec: 1096.56 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:35,999 epoch 67 - iter 4/6 - loss 0.00889975 - time (sec): 14.03 - samples/sec: 1185.69 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:38,201 epoch 67 - iter 5/6 - loss 0.00771451 - time (sec): 16.23 - samples/sec: 1255.74 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:39,578 epoch 67 - iter 6/6 - loss 0.00870340 - time (sec): 17.61 - samples/sec: 1272.39 - lr: 0.000022 - momentum: 0.000000
2023-10-30 16:46:39,586 ----------------------------------------------------------------------------------------------------
2023-10-30 16:46:39,588 EPOCH 6

100%|██████████| 1/1 [00:01<00:00,  1.75s/it]

2023-10-30 16:46:41,392 DEV : loss 1.8847649097442627 - f1-score (micro avg)  0.0203
2023-10-30 16:46:41,403 ----------------------------------------------------------------------------------------------------





2023-10-30 16:46:44,913 epoch 68 - iter 1/6 - loss 0.00340310 - time (sec): 3.51 - samples/sec: 1139.13 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:49,274 epoch 68 - iter 2/6 - loss 0.00352637 - time (sec): 7.87 - samples/sec: 1063.38 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:51,697 epoch 68 - iter 3/6 - loss 0.00313728 - time (sec): 10.29 - samples/sec: 1215.88 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:54,410 epoch 68 - iter 4/6 - loss 0.00486375 - time (sec): 13.00 - samples/sec: 1252.00 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:57,227 epoch 68 - iter 5/6 - loss 0.00444737 - time (sec): 15.82 - samples/sec: 1271.88 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:59,043 epoch 68 - iter 6/6 - loss 0.00507652 - time (sec): 17.64 - samples/sec: 1270.20 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:46:59,049 ----------------------------------------------------------------------------------------------------
2023-10-30 16:46:59,054 EPOCH 6

100%|██████████| 1/1 [00:01<00:00,  1.72s/it]

2023-10-30 16:47:00,824 DEV : loss 1.8846946954727173 - f1-score (micro avg)  0.0203
2023-10-30 16:47:00,830 ----------------------------------------------------------------------------------------------------





2023-10-30 16:47:05,131 epoch 69 - iter 1/6 - loss 0.00277977 - time (sec): 4.30 - samples/sec: 924.24 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:08,214 epoch 69 - iter 2/6 - loss 0.00562770 - time (sec): 7.38 - samples/sec: 1094.10 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:11,020 epoch 69 - iter 3/6 - loss 0.00511913 - time (sec): 10.19 - samples/sec: 1172.31 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:13,177 epoch 69 - iter 4/6 - loss 0.00460375 - time (sec): 12.34 - samples/sec: 1262.52 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:16,126 epoch 69 - iter 5/6 - loss 0.00448259 - time (sec): 15.29 - samples/sec: 1280.94 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:18,375 epoch 69 - iter 6/6 - loss 0.00536078 - time (sec): 17.54 - samples/sec: 1277.16 - lr: 0.000021 - momentum: 0.000000
2023-10-30 16:47:18,378 ----------------------------------------------------------------------------------------------------
2023-10-30 16:47:18,382 EPOCH 69

100%|██████████| 1/1 [00:01<00:00,  1.59s/it]

2023-10-30 16:47:20,020 DEV : loss 1.884627342224121 - f1-score (micro avg)  0.0218
2023-10-30 16:47:20,025 ----------------------------------------------------------------------------------------------------





2023-10-30 16:47:23,696 epoch 70 - iter 1/6 - loss 0.00472364 - time (sec): 3.67 - samples/sec: 1067.36 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:26,178 epoch 70 - iter 2/6 - loss 0.00493730 - time (sec): 6.15 - samples/sec: 1322.11 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:29,299 epoch 70 - iter 3/6 - loss 0.00362449 - time (sec): 9.27 - samples/sec: 1341.67 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:31,972 epoch 70 - iter 4/6 - loss 0.00442589 - time (sec): 11.94 - samples/sec: 1352.76 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:34,605 epoch 70 - iter 5/6 - loss 0.00521796 - time (sec): 14.58 - samples/sec: 1382.09 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:36,403 epoch 70 - iter 6/6 - loss 0.00479457 - time (sec): 16.37 - samples/sec: 1368.36 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:36,408 ----------------------------------------------------------------------------------------------------
2023-10-30 16:47:36,414 EPOCH 70

100%|██████████| 1/1 [00:01<00:00,  1.57s/it]

2023-10-30 16:47:38,032 DEV : loss 1.8844780921936035 - f1-score (micro avg)  0.0218
2023-10-30 16:47:38,040 ----------------------------------------------------------------------------------------------------





2023-10-30 16:47:42,709 epoch 71 - iter 1/6 - loss 0.00408719 - time (sec): 4.67 - samples/sec: 877.17 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:45,305 epoch 71 - iter 2/6 - loss 0.00322063 - time (sec): 7.26 - samples/sec: 1061.11 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:48,272 epoch 71 - iter 3/6 - loss 0.00356491 - time (sec): 10.23 - samples/sec: 1149.29 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:50,676 epoch 71 - iter 4/6 - loss 0.00663601 - time (sec): 12.63 - samples/sec: 1255.50 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:53,415 epoch 71 - iter 5/6 - loss 0.00556343 - time (sec): 15.37 - samples/sec: 1280.66 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:55,709 epoch 71 - iter 6/6 - loss 0.00531691 - time (sec): 17.67 - samples/sec: 1268.16 - lr: 0.000020 - momentum: 0.000000
2023-10-30 16:47:55,717 ----------------------------------------------------------------------------------------------------
2023-10-30 16:47:55,719 EPOCH 71

100%|██████████| 1/1 [00:01<00:00,  1.54s/it]

2023-10-30 16:47:57,304 DEV : loss 1.8842480182647705 - f1-score (micro avg)  0.0203
2023-10-30 16:47:57,309 ----------------------------------------------------------------------------------------------------





2023-10-30 16:48:00,742 epoch 72 - iter 1/6 - loss 0.00515254 - time (sec): 3.43 - samples/sec: 1085.50 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:03,631 epoch 72 - iter 2/6 - loss 0.00359155 - time (sec): 6.32 - samples/sec: 1231.13 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:06,496 epoch 72 - iter 3/6 - loss 0.00644308 - time (sec): 9.19 - samples/sec: 1282.72 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:09,408 epoch 72 - iter 4/6 - loss 0.00523987 - time (sec): 12.10 - samples/sec: 1312.57 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:12,793 epoch 72 - iter 5/6 - loss 0.00445130 - time (sec): 15.48 - samples/sec: 1283.47 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:15,171 epoch 72 - iter 6/6 - loss 0.00416561 - time (sec): 17.86 - samples/sec: 1254.45 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:15,174 ----------------------------------------------------------------------------------------------------
2023-10-30 16:48:15,178 EPOCH 72

100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

2023-10-30 16:48:16,772 DEV : loss 1.8840551376342773 - f1-score (micro avg)  0.0203
2023-10-30 16:48:16,777 ----------------------------------------------------------------------------------------------------





2023-10-30 16:48:20,248 epoch 73 - iter 1/6 - loss 0.00319553 - time (sec): 3.47 - samples/sec: 1181.02 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:23,248 epoch 73 - iter 2/6 - loss 0.00308287 - time (sec): 6.47 - samples/sec: 1279.49 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:25,430 epoch 73 - iter 3/6 - loss 0.00397373 - time (sec): 8.65 - samples/sec: 1385.84 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:28,612 epoch 73 - iter 4/6 - loss 0.00344010 - time (sec): 11.83 - samples/sec: 1324.44 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:32,324 epoch 73 - iter 5/6 - loss 0.00353450 - time (sec): 15.54 - samples/sec: 1280.06 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:34,722 epoch 73 - iter 6/6 - loss 0.00337899 - time (sec): 17.94 - samples/sec: 1248.68 - lr: 0.000019 - momentum: 0.000000
2023-10-30 16:48:34,724 ----------------------------------------------------------------------------------------------------
2023-10-30 16:48:34,730 EPOCH 73

100%|██████████| 1/1 [00:00<00:00,  1.01it/s]

2023-10-30 16:48:35,762 DEV : loss 1.883941650390625 - f1-score (micro avg)  0.0203
2023-10-30 16:48:35,766 ----------------------------------------------------------------------------------------------------





2023-10-30 16:48:38,014 epoch 74 - iter 1/6 - loss 0.01138012 - time (sec): 2.25 - samples/sec: 1708.10 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:41,086 epoch 74 - iter 2/6 - loss 0.00747898 - time (sec): 5.32 - samples/sec: 1529.58 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:43,795 epoch 74 - iter 3/6 - loss 0.00937614 - time (sec): 8.03 - samples/sec: 1483.02 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:46,542 epoch 74 - iter 4/6 - loss 0.00850170 - time (sec): 10.77 - samples/sec: 1484.08 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:50,232 epoch 74 - iter 5/6 - loss 0.00718943 - time (sec): 14.46 - samples/sec: 1394.18 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:52,322 epoch 74 - iter 6/6 - loss 0.00680077 - time (sec): 16.55 - samples/sec: 1353.45 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:52,330 ----------------------------------------------------------------------------------------------------
2023-10-30 16:48:52,332 EPOCH 74

100%|██████████| 1/1 [00:01<00:00,  1.09s/it]

2023-10-30 16:48:53,463 DEV : loss 1.8838688135147095 - f1-score (micro avg)  0.0203
2023-10-30 16:48:53,467 ----------------------------------------------------------------------------------------------------





2023-10-30 16:48:56,766 epoch 75 - iter 1/6 - loss 0.00216178 - time (sec): 3.30 - samples/sec: 1234.93 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:48:59,874 epoch 75 - iter 2/6 - loss 0.00397324 - time (sec): 6.40 - samples/sec: 1306.14 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:49:02,706 epoch 75 - iter 3/6 - loss 0.00386193 - time (sec): 9.24 - samples/sec: 1332.91 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:49:06,521 epoch 75 - iter 4/6 - loss 0.00847135 - time (sec): 13.05 - samples/sec: 1241.88 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:49:10,071 epoch 75 - iter 5/6 - loss 0.00741323 - time (sec): 16.60 - samples/sec: 1209.52 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:49:11,835 epoch 75 - iter 6/6 - loss 0.00752758 - time (sec): 18.37 - samples/sec: 1219.86 - lr: 0.000018 - momentum: 0.000000
2023-10-30 16:49:11,837 ----------------------------------------------------------------------------------------------------
2023-10-30 16:49:11,842 EPOCH 75

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:49:12,702 DEV : loss 1.8837785720825195 - f1-score (micro avg)  0.0203
2023-10-30 16:49:12,705 ----------------------------------------------------------------------------------------------------





2023-10-30 16:49:15,526 epoch 76 - iter 1/6 - loss 0.00372020 - time (sec): 2.82 - samples/sec: 1386.42 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:18,534 epoch 76 - iter 2/6 - loss 0.00431676 - time (sec): 5.83 - samples/sec: 1401.84 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:21,706 epoch 76 - iter 3/6 - loss 0.00567755 - time (sec): 9.00 - samples/sec: 1386.32 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:25,558 epoch 76 - iter 4/6 - loss 0.00495877 - time (sec): 12.85 - samples/sec: 1273.76 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:29,189 epoch 76 - iter 5/6 - loss 0.00528777 - time (sec): 16.48 - samples/sec: 1227.07 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:30,514 epoch 76 - iter 6/6 - loss 0.00489104 - time (sec): 17.81 - samples/sec: 1258.22 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:30,516 ----------------------------------------------------------------------------------------------------
2023-10-30 16:49:30,518 EPOCH 76

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:49:31,368 DEV : loss 1.8837238550186157 - f1-score (micro avg)  0.0203
2023-10-30 16:49:31,372 ----------------------------------------------------------------------------------------------------





2023-10-30 16:49:34,659 epoch 77 - iter 1/6 - loss 0.01356628 - time (sec): 3.29 - samples/sec: 1252.00 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:36,868 epoch 77 - iter 2/6 - loss 0.00843702 - time (sec): 5.49 - samples/sec: 1426.04 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:39,837 epoch 77 - iter 3/6 - loss 0.00989676 - time (sec): 8.46 - samples/sec: 1388.57 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:43,434 epoch 77 - iter 4/6 - loss 0.00769681 - time (sec): 12.06 - samples/sec: 1308.04 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:47,246 epoch 77 - iter 5/6 - loss 0.00699283 - time (sec): 15.87 - samples/sec: 1254.54 - lr: 0.000017 - momentum: 0.000000
2023-10-30 16:49:48,782 epoch 77 - iter 6/6 - loss 0.00640970 - time (sec): 17.41 - samples/sec: 1287.02 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:49:48,784 ----------------------------------------------------------------------------------------------------
2023-10-30 16:49:48,786 EPOCH 77

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]

2023-10-30 16:49:49,662 DEV : loss 1.8836441040039062 - f1-score (micro avg)  0.0203
2023-10-30 16:49:49,666 ----------------------------------------------------------------------------------------------------





2023-10-30 16:49:52,205 epoch 78 - iter 1/6 - loss 0.01525266 - time (sec): 2.54 - samples/sec: 1690.61 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:49:55,251 epoch 78 - iter 2/6 - loss 0.00846656 - time (sec): 5.58 - samples/sec: 1526.28 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:49:58,631 epoch 78 - iter 3/6 - loss 0.00716635 - time (sec): 8.96 - samples/sec: 1370.84 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:02,220 epoch 78 - iter 4/6 - loss 0.00826112 - time (sec): 12.55 - samples/sec: 1302.25 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:05,684 epoch 78 - iter 5/6 - loss 0.00986568 - time (sec): 16.01 - samples/sec: 1260.47 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:06,991 epoch 78 - iter 6/6 - loss 0.00905703 - time (sec): 17.32 - samples/sec: 1293.40 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:06,993 ----------------------------------------------------------------------------------------------------
2023-10-30 16:50:06,998 EPOCH 78

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:50:07,847 DEV : loss 1.883515477180481 - f1-score (micro avg)  0.0203
2023-10-30 16:50:07,852 ----------------------------------------------------------------------------------------------------





2023-10-30 16:50:10,532 epoch 79 - iter 1/6 - loss 0.00159854 - time (sec): 2.68 - samples/sec: 1470.61 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:12,877 epoch 79 - iter 2/6 - loss 0.00191475 - time (sec): 5.02 - samples/sec: 1571.85 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:16,021 epoch 79 - iter 3/6 - loss 0.00261693 - time (sec): 8.16 - samples/sec: 1485.58 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:19,586 epoch 79 - iter 4/6 - loss 0.00227223 - time (sec): 11.73 - samples/sec: 1382.92 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:23,341 epoch 79 - iter 5/6 - loss 0.00366219 - time (sec): 15.48 - samples/sec: 1313.23 - lr: 0.000016 - momentum: 0.000000
2023-10-30 16:50:24,825 epoch 79 - iter 6/6 - loss 0.00347354 - time (sec): 16.97 - samples/sec: 1320.35 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:24,826 ----------------------------------------------------------------------------------------------------
2023-10-30 16:50:24,831 EPOCH 79

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:50:25,679 DEV : loss 1.8834974765777588 - f1-score (micro avg)  0.0203
2023-10-30 16:50:25,683 ----------------------------------------------------------------------------------------------------





2023-10-30 16:50:28,557 epoch 80 - iter 1/6 - loss 0.00127809 - time (sec): 2.87 - samples/sec: 1394.56 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:31,294 epoch 80 - iter 2/6 - loss 0.00112780 - time (sec): 5.61 - samples/sec: 1398.35 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:33,793 epoch 80 - iter 3/6 - loss 0.00403502 - time (sec): 8.11 - samples/sec: 1424.96 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:37,345 epoch 80 - iter 4/6 - loss 0.00337076 - time (sec): 11.66 - samples/sec: 1350.85 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:41,244 epoch 80 - iter 5/6 - loss 0.00466071 - time (sec): 15.56 - samples/sec: 1283.99 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:42,671 epoch 80 - iter 6/6 - loss 0.00482995 - time (sec): 16.99 - samples/sec: 1318.94 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:42,674 ----------------------------------------------------------------------------------------------------
2023-10-30 16:50:42,675 EPOCH 80

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:50:43,531 DEV : loss 1.883518934249878 - f1-score (micro avg)  0.0203
2023-10-30 16:50:43,534 ----------------------------------------------------------------------------------------------------





2023-10-30 16:50:47,370 epoch 81 - iter 1/6 - loss 0.00206056 - time (sec): 3.83 - samples/sec: 1062.54 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:49,787 epoch 81 - iter 2/6 - loss 0.00209709 - time (sec): 6.25 - samples/sec: 1187.87 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:52,485 epoch 81 - iter 3/6 - loss 0.00352273 - time (sec): 8.95 - samples/sec: 1240.71 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:56,474 epoch 81 - iter 4/6 - loss 0.00444509 - time (sec): 12.94 - samples/sec: 1209.04 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:50:59,767 epoch 81 - iter 5/6 - loss 0.00539225 - time (sec): 16.23 - samples/sec: 1197.78 - lr: 0.000015 - momentum: 0.000000
2023-10-30 16:51:01,567 epoch 81 - iter 6/6 - loss 0.00483701 - time (sec): 18.03 - samples/sec: 1242.72 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:01,568 ----------------------------------------------------------------------------------------------------
2023-10-30 16:51:01,575 EPOCH 81

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:51:02,424 DEV : loss 1.8834573030471802 - f1-score (micro avg)  0.0203
2023-10-30 16:51:02,429 ----------------------------------------------------------------------------------------------------





2023-10-30 16:51:05,203 epoch 82 - iter 1/6 - loss 0.00810979 - time (sec): 2.77 - samples/sec: 1391.67 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:08,388 epoch 82 - iter 2/6 - loss 0.00468249 - time (sec): 5.96 - samples/sec: 1385.37 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:11,615 epoch 82 - iter 3/6 - loss 0.00356110 - time (sec): 9.18 - samples/sec: 1313.14 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:15,414 epoch 82 - iter 4/6 - loss 0.00331746 - time (sec): 12.98 - samples/sec: 1239.10 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:18,893 epoch 82 - iter 5/6 - loss 0.00279867 - time (sec): 16.46 - samples/sec: 1215.45 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:20,386 epoch 82 - iter 6/6 - loss 0.00260733 - time (sec): 17.96 - samples/sec: 1247.77 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:20,389 ----------------------------------------------------------------------------------------------------
2023-10-30 16:51:20,390 EPOCH 82

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:51:21,254 DEV : loss 1.8833163976669312 - f1-score (micro avg)  0.0203
2023-10-30 16:51:21,258 ----------------------------------------------------------------------------------------------------





2023-10-30 16:51:24,423 epoch 83 - iter 1/6 - loss 0.00225871 - time (sec): 3.16 - samples/sec: 1239.14 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:27,746 epoch 83 - iter 2/6 - loss 0.00170914 - time (sec): 6.49 - samples/sec: 1283.45 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:31,384 epoch 83 - iter 3/6 - loss 0.00732444 - time (sec): 10.12 - samples/sec: 1235.04 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:34,819 epoch 83 - iter 4/6 - loss 0.00891266 - time (sec): 13.56 - samples/sec: 1198.93 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:37,038 epoch 83 - iter 5/6 - loss 0.00796695 - time (sec): 15.78 - samples/sec: 1270.72 - lr: 0.000014 - momentum: 0.000000
2023-10-30 16:51:38,517 epoch 83 - iter 6/6 - loss 0.00730516 - time (sec): 17.26 - samples/sec: 1298.28 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:38,519 ----------------------------------------------------------------------------------------------------
2023-10-30 16:51:38,521 EPOCH 8

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]

2023-10-30 16:51:39,393 DEV : loss 1.8832263946533203 - f1-score (micro avg)  0.0203
2023-10-30 16:51:39,396 ----------------------------------------------------------------------------------------------------





2023-10-30 16:51:42,097 epoch 84 - iter 1/6 - loss 0.00820522 - time (sec): 2.70 - samples/sec: 1384.93 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:45,093 epoch 84 - iter 2/6 - loss 0.00542431 - time (sec): 5.69 - samples/sec: 1372.86 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:48,327 epoch 84 - iter 3/6 - loss 0.00410035 - time (sec): 8.93 - samples/sec: 1320.18 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:51,759 epoch 84 - iter 4/6 - loss 0.00392344 - time (sec): 12.36 - samples/sec: 1245.78 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:55,035 epoch 84 - iter 5/6 - loss 0.00349335 - time (sec): 15.63 - samples/sec: 1268.96 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:56,607 epoch 84 - iter 6/6 - loss 0.00330329 - time (sec): 17.21 - samples/sec: 1302.13 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:51:56,609 ----------------------------------------------------------------------------------------------------
2023-10-30 16:51:56,611 EPOCH 84

100%|██████████| 1/1 [00:00<00:00,  1.26it/s]


2023-10-30 16:51:57,832 DEV : loss 1.8832088708877563 - f1-score (micro avg)  0.0203
2023-10-30 16:51:57,836 ----------------------------------------------------------------------------------------------------
2023-10-30 16:52:00,790 epoch 85 - iter 1/6 - loss 0.01860104 - time (sec): 2.95 - samples/sec: 1402.46 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:52:03,502 epoch 85 - iter 2/6 - loss 0.01073953 - time (sec): 5.66 - samples/sec: 1427.41 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:52:06,547 epoch 85 - iter 3/6 - loss 0.00858245 - time (sec): 8.71 - samples/sec: 1337.37 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:52:10,353 epoch 85 - iter 4/6 - loss 0.00801897 - time (sec): 12.51 - samples/sec: 1261.37 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:52:13,149 epoch 85 - iter 5/6 - loss 0.00712197 - time (sec): 15.31 - samples/sec: 1288.93 - lr: 0.000013 - momentum: 0.000000
2023-10-30 16:52:14,814 epoch 85 - iter 6/6 - loss 0.00643033 - time (sec): 16.97 - samp

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:52:15,681 DEV : loss 1.883172869682312 - f1-score (micro avg)  0.0203
2023-10-30 16:52:15,684 ----------------------------------------------------------------------------------------------------





2023-10-30 16:52:17,945 epoch 86 - iter 1/6 - loss 0.01148213 - time (sec): 2.26 - samples/sec: 1689.45 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:20,714 epoch 86 - iter 2/6 - loss 0.00894348 - time (sec): 5.02 - samples/sec: 1618.82 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:24,214 epoch 86 - iter 3/6 - loss 0.00753676 - time (sec): 8.52 - samples/sec: 1429.48 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:27,769 epoch 86 - iter 4/6 - loss 0.00613636 - time (sec): 12.08 - samples/sec: 1321.76 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:31,143 epoch 86 - iter 5/6 - loss 0.00533570 - time (sec): 15.45 - samples/sec: 1290.95 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:32,671 epoch 86 - iter 6/6 - loss 0.00484437 - time (sec): 16.98 - samples/sec: 1319.27 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:32,673 ----------------------------------------------------------------------------------------------------
2023-10-30 16:52:32,677 EPOCH 86

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:52:33,546 DEV : loss 1.8831300735473633 - f1-score (micro avg)  0.0218
2023-10-30 16:52:33,550 ----------------------------------------------------------------------------------------------------





2023-10-30 16:52:36,960 epoch 87 - iter 1/6 - loss 0.00174479 - time (sec): 3.41 - samples/sec: 1233.68 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:39,894 epoch 87 - iter 2/6 - loss 0.00309033 - time (sec): 6.34 - samples/sec: 1284.98 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:43,261 epoch 87 - iter 3/6 - loss 0.00263019 - time (sec): 9.71 - samples/sec: 1229.26 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:46,297 epoch 87 - iter 4/6 - loss 0.00231587 - time (sec): 12.74 - samples/sec: 1233.13 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:49,187 epoch 87 - iter 5/6 - loss 0.00256395 - time (sec): 15.63 - samples/sec: 1263.41 - lr: 0.000012 - momentum: 0.000000
2023-10-30 16:52:50,834 epoch 87 - iter 6/6 - loss 0.00233356 - time (sec): 17.28 - samples/sec: 1296.39 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:52:50,839 ----------------------------------------------------------------------------------------------------
2023-10-30 16:52:50,845 EPOCH 87

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:52:51,692 DEV : loss 1.883076548576355 - f1-score (micro avg)  0.0218
2023-10-30 16:52:51,696 ----------------------------------------------------------------------------------------------------





2023-10-30 16:52:54,009 epoch 88 - iter 1/6 - loss 0.00772207 - time (sec): 2.31 - samples/sec: 1696.27 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:52:57,124 epoch 88 - iter 2/6 - loss 0.00929304 - time (sec): 5.43 - samples/sec: 1465.38 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:00,394 epoch 88 - iter 3/6 - loss 0.00743526 - time (sec): 8.70 - samples/sec: 1338.56 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:03,789 epoch 88 - iter 4/6 - loss 0.00568879 - time (sec): 12.09 - samples/sec: 1290.53 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:06,867 epoch 88 - iter 5/6 - loss 0.00538076 - time (sec): 15.17 - samples/sec: 1312.51 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:08,420 epoch 88 - iter 6/6 - loss 0.00489575 - time (sec): 16.72 - samples/sec: 1339.81 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:08,422 ----------------------------------------------------------------------------------------------------
2023-10-30 16:53:08,424 EPOCH 88

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]

2023-10-30 16:53:09,308 DEV : loss 1.882960557937622 - f1-score (micro avg)  0.0218
2023-10-30 16:53:09,311 ----------------------------------------------------------------------------------------------------





2023-10-30 16:53:13,216 epoch 89 - iter 1/6 - loss 0.00182437 - time (sec): 3.90 - samples/sec: 1079.37 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:16,748 epoch 89 - iter 2/6 - loss 0.00134791 - time (sec): 7.43 - samples/sec: 1116.44 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:20,456 epoch 89 - iter 3/6 - loss 0.00137284 - time (sec): 11.14 - samples/sec: 1102.12 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:22,769 epoch 89 - iter 4/6 - loss 0.00129110 - time (sec): 13.46 - samples/sec: 1179.09 - lr: 0.000011 - momentum: 0.000000
2023-10-30 16:53:25,642 epoch 89 - iter 5/6 - loss 0.00116010 - time (sec): 16.33 - samples/sec: 1213.89 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:27,163 epoch 89 - iter 6/6 - loss 0.00314407 - time (sec): 17.85 - samples/sec: 1255.17 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:27,165 ----------------------------------------------------------------------------------------------------
2023-10-30 16:53:27,167 EPOCH 8

100%|██████████| 1/1 [00:00<00:00,  1.21it/s]

2023-10-30 16:53:28,029 DEV : loss 1.8828803300857544 - f1-score (micro avg)  0.0218
2023-10-30 16:53:28,033 ----------------------------------------------------------------------------------------------------





2023-10-30 16:53:30,880 epoch 90 - iter 1/6 - loss 0.00229545 - time (sec): 2.84 - samples/sec: 1391.08 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:34,113 epoch 90 - iter 2/6 - loss 0.00312325 - time (sec): 6.08 - samples/sec: 1284.00 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:37,825 epoch 90 - iter 3/6 - loss 0.00275514 - time (sec): 9.79 - samples/sec: 1195.69 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:41,176 epoch 90 - iter 4/6 - loss 0.00353479 - time (sec): 13.14 - samples/sec: 1206.30 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:44,133 epoch 90 - iter 5/6 - loss 0.00321849 - time (sec): 16.10 - samples/sec: 1241.71 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:45,848 epoch 90 - iter 6/6 - loss 0.00314269 - time (sec): 17.81 - samples/sec: 1257.91 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:45,850 ----------------------------------------------------------------------------------------------------
2023-10-30 16:53:45,852 EPOCH 90

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]

2023-10-30 16:53:46,720 DEV : loss 1.8827985525131226 - f1-score (micro avg)  0.0218
2023-10-30 16:53:46,724 ----------------------------------------------------------------------------------------------------





2023-10-30 16:53:50,236 epoch 91 - iter 1/6 - loss 0.00316475 - time (sec): 3.51 - samples/sec: 1185.93 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:53,520 epoch 91 - iter 2/6 - loss 0.00338768 - time (sec): 6.79 - samples/sec: 1169.63 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:53:57,539 epoch 91 - iter 3/6 - loss 0.00457849 - time (sec): 10.81 - samples/sec: 1098.08 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:54:00,351 epoch 91 - iter 4/6 - loss 0.00378581 - time (sec): 13.63 - samples/sec: 1152.51 - lr: 0.000010 - momentum: 0.000000
2023-10-30 16:54:03,370 epoch 91 - iter 5/6 - loss 0.00384049 - time (sec): 16.64 - samples/sec: 1199.95 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:04,816 epoch 91 - iter 6/6 - loss 0.00472023 - time (sec): 18.09 - samples/sec: 1238.51 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:04,818 ----------------------------------------------------------------------------------------------------
2023-10-30 16:54:04,821 EPOCH 9

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:54:05,679 DEV : loss 1.8827158212661743 - f1-score (micro avg)  0.0218
2023-10-30 16:54:05,683 ----------------------------------------------------------------------------------------------------





2023-10-30 16:54:08,658 epoch 92 - iter 1/6 - loss 0.00043221 - time (sec): 2.97 - samples/sec: 1267.39 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:12,361 epoch 92 - iter 2/6 - loss 0.00045853 - time (sec): 6.67 - samples/sec: 1117.98 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:16,096 epoch 92 - iter 3/6 - loss 0.00237552 - time (sec): 10.41 - samples/sec: 1120.73 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:18,891 epoch 92 - iter 4/6 - loss 0.00191897 - time (sec): 13.20 - samples/sec: 1177.88 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:21,260 epoch 92 - iter 5/6 - loss 0.00169639 - time (sec): 15.57 - samples/sec: 1263.00 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:22,932 epoch 92 - iter 6/6 - loss 0.00181156 - time (sec): 17.25 - samples/sec: 1299.10 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:22,934 ----------------------------------------------------------------------------------------------------
2023-10-30 16:54:22,936 EPOCH 9

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]

2023-10-30 16:54:23,779 DEV : loss 1.8826645612716675 - f1-score (micro avg)  0.0218
2023-10-30 16:54:23,782 ----------------------------------------------------------------------------------------------------





2023-10-30 16:54:27,505 epoch 93 - iter 1/6 - loss 0.00520409 - time (sec): 3.72 - samples/sec: 1115.44 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:31,253 epoch 93 - iter 2/6 - loss 0.00372872 - time (sec): 7.47 - samples/sec: 1089.21 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:34,589 epoch 93 - iter 3/6 - loss 0.00411469 - time (sec): 10.80 - samples/sec: 1131.33 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:37,476 epoch 93 - iter 4/6 - loss 0.00357264 - time (sec): 13.69 - samples/sec: 1182.96 - lr: 0.000009 - momentum: 0.000000
2023-10-30 16:54:39,734 epoch 93 - iter 5/6 - loss 0.00295409 - time (sec): 15.95 - samples/sec: 1258.43 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:41,148 epoch 93 - iter 6/6 - loss 0.00398009 - time (sec): 17.36 - samples/sec: 1290.47 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:41,151 ----------------------------------------------------------------------------------------------------
2023-10-30 16:54:41,152 EPOCH 9

100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

2023-10-30 16:54:42,001 DEV : loss 1.882646918296814 - f1-score (micro avg)  0.0218
2023-10-30 16:54:42,005 ----------------------------------------------------------------------------------------------------





2023-10-30 16:54:45,371 epoch 94 - iter 1/6 - loss 0.01155161 - time (sec): 3.36 - samples/sec: 1216.33 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:49,022 epoch 94 - iter 2/6 - loss 0.00621205 - time (sec): 7.01 - samples/sec: 1131.60 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:52,565 epoch 94 - iter 3/6 - loss 0.00467057 - time (sec): 10.55 - samples/sec: 1144.57 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:55,350 epoch 94 - iter 4/6 - loss 0.00418674 - time (sec): 13.34 - samples/sec: 1197.57 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:54:58,747 epoch 94 - iter 5/6 - loss 0.00482905 - time (sec): 16.74 - samples/sec: 1194.57 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:00,166 epoch 94 - iter 6/6 - loss 0.00480131 - time (sec): 18.16 - samples/sec: 1234.03 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:00,168 ----------------------------------------------------------------------------------------------------
2023-10-30 16:55:00,170 EPOCH 9

100%|██████████| 1/1 [00:01<00:00,  1.57s/it]

2023-10-30 16:55:01,797 DEV : loss 1.8826158046722412 - f1-score (micro avg)  0.0218
2023-10-30 16:55:01,804 ----------------------------------------------------------------------------------------------------





2023-10-30 16:55:05,797 epoch 95 - iter 1/6 - loss 0.00232214 - time (sec): 3.99 - samples/sec: 1011.42 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:08,794 epoch 95 - iter 2/6 - loss 0.00161614 - time (sec): 6.99 - samples/sec: 1059.50 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:11,656 epoch 95 - iter 3/6 - loss 0.00157139 - time (sec): 9.85 - samples/sec: 1159.63 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:14,966 epoch 95 - iter 4/6 - loss 0.00149033 - time (sec): 13.16 - samples/sec: 1171.54 - lr: 0.000008 - momentum: 0.000000
2023-10-30 16:55:17,996 epoch 95 - iter 5/6 - loss 0.00242428 - time (sec): 16.19 - samples/sec: 1208.78 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:20,176 epoch 95 - iter 6/6 - loss 0.00220890 - time (sec): 18.37 - samples/sec: 1219.78 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:20,178 ----------------------------------------------------------------------------------------------------
2023-10-30 16:55:20,181 EPOCH 95

100%|██████████| 1/1 [00:01<00:00,  1.70s/it]

2023-10-30 16:55:21,927 DEV : loss 1.8825677633285522 - f1-score (micro avg)  0.0218
2023-10-30 16:55:21,933 ----------------------------------------------------------------------------------------------------





2023-10-30 16:55:25,677 epoch 96 - iter 1/6 - loss 0.00121642 - time (sec): 3.74 - samples/sec: 1070.13 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:28,790 epoch 96 - iter 2/6 - loss 0.00136043 - time (sec): 6.85 - samples/sec: 1137.80 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:32,029 epoch 96 - iter 3/6 - loss 0.00159517 - time (sec): 10.09 - samples/sec: 1147.71 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:35,324 epoch 96 - iter 4/6 - loss 0.00195465 - time (sec): 13.39 - samples/sec: 1205.00 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:38,225 epoch 96 - iter 5/6 - loss 0.00244849 - time (sec): 16.29 - samples/sec: 1225.31 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:40,263 epoch 96 - iter 6/6 - loss 0.00223413 - time (sec): 18.33 - samples/sec: 1222.50 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:40,266 ----------------------------------------------------------------------------------------------------
2023-10-30 16:55:40,269 EPOCH 9

100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


2023-10-30 16:55:42,365 DEV : loss 1.882534146308899 - f1-score (micro avg)  0.0218
2023-10-30 16:55:42,372 ----------------------------------------------------------------------------------------------------
2023-10-30 16:55:46,465 epoch 97 - iter 1/6 - loss 0.00583073 - time (sec): 4.09 - samples/sec: 1169.17 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:48,914 epoch 97 - iter 2/6 - loss 0.00597164 - time (sec): 6.54 - samples/sec: 1363.55 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:51,754 epoch 97 - iter 3/6 - loss 0.00433245 - time (sec): 9.38 - samples/sec: 1376.28 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:54,769 epoch 97 - iter 4/6 - loss 0.00405080 - time (sec): 12.39 - samples/sec: 1331.20 - lr: 0.000007 - momentum: 0.000000
2023-10-30 16:55:57,909 epoch 97 - iter 5/6 - loss 0.00453210 - time (sec): 15.53 - samples/sec: 1296.60 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:00,025 epoch 97 - iter 6/6 - loss 0.00411429 - time (sec): 17.65 - sampl

100%|██████████| 1/1 [00:01<00:00,  1.60s/it]

2023-10-30 16:56:01,682 DEV : loss 1.8825304508209229 - f1-score (micro avg)  0.0217
2023-10-30 16:56:01,687 ----------------------------------------------------------------------------------------------------





2023-10-30 16:56:05,016 epoch 98 - iter 1/6 - loss 0.00429252 - time (sec): 3.32 - samples/sec: 1261.11 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:07,852 epoch 98 - iter 2/6 - loss 0.00312564 - time (sec): 6.16 - samples/sec: 1318.81 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:10,053 epoch 98 - iter 3/6 - loss 0.00230980 - time (sec): 8.36 - samples/sec: 1424.56 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:12,843 epoch 98 - iter 4/6 - loss 0.00191645 - time (sec): 11.15 - samples/sec: 1416.48 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:16,346 epoch 98 - iter 5/6 - loss 0.00291896 - time (sec): 14.65 - samples/sec: 1359.69 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:18,738 epoch 98 - iter 6/6 - loss 0.00330791 - time (sec): 17.05 - samples/sec: 1314.38 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:18,743 ----------------------------------------------------------------------------------------------------
2023-10-30 16:56:18,748 EPOCH 98

100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

2023-10-30 16:56:20,349 DEV : loss 1.8824948072433472 - f1-score (micro avg)  0.0218
2023-10-30 16:56:20,353 ----------------------------------------------------------------------------------------------------





2023-10-30 16:56:24,281 epoch 99 - iter 1/6 - loss 0.00282007 - time (sec): 3.93 - samples/sec: 1072.72 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:27,309 epoch 99 - iter 2/6 - loss 0.00404150 - time (sec): 6.95 - samples/sec: 1205.26 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:30,116 epoch 99 - iter 3/6 - loss 0.00478628 - time (sec): 9.76 - samples/sec: 1260.39 - lr: 0.000006 - momentum: 0.000000
2023-10-30 16:56:33,289 epoch 99 - iter 4/6 - loss 0.00406910 - time (sec): 12.93 - samples/sec: 1244.70 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:36,813 epoch 99 - iter 5/6 - loss 0.00342750 - time (sec): 16.46 - samples/sec: 1206.04 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:38,911 epoch 99 - iter 6/6 - loss 0.00325860 - time (sec): 18.56 - samples/sec: 1207.40 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:38,914 ----------------------------------------------------------------------------------------------------
2023-10-30 16:56:38,916 EPOCH 99

100%|██████████| 1/1 [00:00<00:00,  1.22it/s]

2023-10-30 16:56:39,771 DEV : loss 1.8824622631072998 - f1-score (micro avg)  0.0218
2023-10-30 16:56:39,775 ----------------------------------------------------------------------------------------------------





2023-10-30 16:56:43,138 epoch 100 - iter 1/6 - loss 0.00651940 - time (sec): 3.36 - samples/sec: 1192.00 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:45,830 epoch 100 - iter 2/6 - loss 0.00515140 - time (sec): 6.05 - samples/sec: 1273.70 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:48,609 epoch 100 - iter 3/6 - loss 0.00363406 - time (sec): 8.83 - samples/sec: 1307.74 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:52,109 epoch 100 - iter 4/6 - loss 0.00381610 - time (sec): 12.33 - samples/sec: 1268.69 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:56,240 epoch 100 - iter 5/6 - loss 0.00310822 - time (sec): 16.46 - samples/sec: 1216.01 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:57,637 epoch 100 - iter 6/6 - loss 0.00347366 - time (sec): 17.86 - samples/sec: 1254.42 - lr: 0.000005 - momentum: 0.000000
2023-10-30 16:56:57,639 ----------------------------------------------------------------------------------------------------
2023-10-30 16:56:57,644 EP

100%|██████████| 1/1 [00:00<00:00,  1.18it/s]

2023-10-30 16:56:58,535 DEV : loss 1.8824336528778076 - f1-score (micro avg)  0.0218





2023-10-30 16:56:59,123 ----------------------------------------------------------------------------------------------------
2023-10-30 16:56:59,127 Testing using last state of model ...


100%|██████████| 3/3 [00:03<00:00,  1.10s/it]

2023-10-30 16:57:02,468 
Results:
- F-score (micro) 0.6274
- F-score (macro) 0.7334
- Accuracy 0.4676

By class:
              precision    recall  f1-score   support

   Component     0.6122    0.6862    0.6471       513
      Person     0.3498    0.4670    0.4000       182
        Time     0.9420    0.9701    0.9559        67
        Date     0.9216    0.9400    0.9307        50

   micro avg     0.5853    0.6761    0.6274       812
   macro avg     0.7064    0.7658    0.7334       812
weighted avg     0.5996    0.6761    0.6346       812

2023-10-30 16:57:02,469 ----------------------------------------------------------------------------------------------------





{'test_score': 0.6274285714285713}

### RE component 2

In [None]:
relation_label_dict = corpus.make_label_dictionary(label_type="relation")
relation_dictionary = corpus.make_label_dictionary("relation")
doc_embeddings = TransformerDocumentEmbeddings(model="distilbert-base-uncased", layers="-1", fine_tune=True)
relation_model: RelationClassifier = RelationClassifier(
    embeddings =doc_embeddings,
    label_dictionary = relation_dictionary,
    label_type="relation",
    entity_label_types="ner",
    entity_pair_labels={  # Define valid entity pair combinations, used as relation candidates
        ("Person", "Component"),
        ("Person", "Time"),
        ("Person", "Date"),
        ("Person", "Person"),
        ("Component", "Person"),
        ("Component", "Date"),
        ("Component", "Time"),
        ("Component", "Component")
    },
    zero_tag_value='',
    allow_unk_tag=False,
)

2023-10-30 17:10:12,927 Computing label dictionary. Progress:


0it [00:00, ?it/s]
90it [00:00, 7395.04it/s]

2023-10-30 17:10:12,954 Dictionary created for label 'relation' with 2 values: org:happened_at (seen 355 times), org:caused_by (seen 344 times)
2023-10-30 17:10:12,956 Computing label dictionary. Progress:



0it [00:00, ?it/s]
90it [00:00, 6671.16it/s]

2023-10-30 17:10:12,983 Dictionary created for label 'relation' with 2 values: org:happened_at (seen 355 times), org:caused_by (seen 344 times)





Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
trainer: ModelTrainer = ModelTrainer(model=relation_model, corpus=relation_model.transform_corpus(corpus))
trainer.fine_tune(
    'resources/relations/',
    max_epochs=20,
    learning_rate=4e-5,
    mini_batch_size=8,
    main_evaluation_metric=("macro avg", "f1-score"),
    shuffle=True,
    embeddings_storage_mode='gpu',
)

2023-10-30 17:13:47,091 ----------------------------------------------------------------------------------------------------
2023-10-30 17:13:47,093 Model: "RelationClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30523, 768)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0-5): 6 x TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768,

100%|██████████| 234/234 [00:24<00:00,  9.52it/s]


2023-10-30 17:22:03,687 DEV : loss 0.108233243227005 - f1-score (macro avg)  0.5761
2023-10-30 17:22:03,984 ----------------------------------------------------------------------------------------------------
2023-10-30 17:22:51,703 epoch 2 - iter 366/3661 - loss 0.12598204 - time (sec): 47.71 - samples/sec: 61.37 - lr: 0.000022 - momentum: 0.000000
2023-10-30 17:23:38,815 epoch 2 - iter 732/3661 - loss 0.11764908 - time (sec): 94.82 - samples/sec: 61.76 - lr: 0.000024 - momentum: 0.000000
2023-10-30 17:24:25,878 epoch 2 - iter 1098/3661 - loss 0.11674089 - time (sec): 141.89 - samples/sec: 61.91 - lr: 0.000026 - momentum: 0.000000
2023-10-30 17:25:13,234 epoch 2 - iter 1464/3661 - loss 0.10741457 - time (sec): 189.24 - samples/sec: 61.89 - lr: 0.000028 - momentum: 0.000000
2023-10-30 17:25:59,706 epoch 2 - iter 1830/3661 - loss 0.10942523 - time (sec): 235.71 - samples/sec: 62.11 - lr: 0.000030 - momentum: 0.000000
2023-10-30 17:26:47,230 epoch 2 - iter 2196/3661 - loss 0.10896951 - t

100%|██████████| 234/234 [00:05<00:00, 42.90it/s]


2023-10-30 17:30:01,298 DEV : loss 0.10641758888959885 - f1-score (macro avg)  0.5761
2023-10-30 17:30:01,770 ----------------------------------------------------------------------------------------------------
2023-10-30 17:30:48,528 epoch 3 - iter 366/3661 - loss 0.07807636 - time (sec): 46.75 - samples/sec: 62.62 - lr: 0.000040 - momentum: 0.000000
2023-10-30 17:31:36,027 epoch 3 - iter 732/3661 - loss 0.08977313 - time (sec): 94.25 - samples/sec: 62.13 - lr: 0.000040 - momentum: 0.000000
2023-10-30 17:32:22,937 epoch 3 - iter 1098/3661 - loss 0.09140590 - time (sec): 141.16 - samples/sec: 62.23 - lr: 0.000039 - momentum: 0.000000
2023-10-30 17:33:10,179 epoch 3 - iter 1464/3661 - loss 0.09155498 - time (sec): 188.41 - samples/sec: 62.16 - lr: 0.000039 - momentum: 0.000000
2023-10-30 17:33:56,820 epoch 3 - iter 1830/3661 - loss 0.09137301 - time (sec): 235.05 - samples/sec: 62.29 - lr: 0.000039 - momentum: 0.000000
2023-10-30 17:34:43,622 epoch 3 - iter 2196/3661 - loss 0.09467251 -

100%|██████████| 234/234 [00:05<00:00, 42.82it/s]


2023-10-30 17:37:57,753 DEV : loss 0.10433728247880936 - f1-score (macro avg)  0.5761
2023-10-30 17:37:58,052 ----------------------------------------------------------------------------------------------------
2023-10-30 17:38:44,901 epoch 4 - iter 366/3661 - loss 0.06662166 - time (sec): 46.85 - samples/sec: 62.50 - lr: 0.000038 - momentum: 0.000000
2023-10-30 17:39:32,053 epoch 4 - iter 732/3661 - loss 0.07683537 - time (sec): 94.00 - samples/sec: 62.30 - lr: 0.000037 - momentum: 0.000000
2023-10-30 17:40:19,370 epoch 4 - iter 1098/3661 - loss 0.08817905 - time (sec): 141.31 - samples/sec: 62.16 - lr: 0.000037 - momentum: 0.000000
2023-10-30 17:41:05,767 epoch 4 - iter 1464/3661 - loss 0.08708562 - time (sec): 187.71 - samples/sec: 62.39 - lr: 0.000037 - momentum: 0.000000
2023-10-30 17:41:53,116 epoch 4 - iter 1830/3661 - loss 0.08237197 - time (sec): 235.06 - samples/sec: 62.28 - lr: 0.000037 - momentum: 0.000000
2023-10-30 17:42:40,091 epoch 4 - iter 2196/3661 - loss 0.08267987 -

100%|██████████| 234/234 [00:03<00:00, 66.74it/s]

2023-10-30 17:45:51,344 DEV : loss 0.10334734618663788 - f1-score (macro avg)  0.5761





2023-10-30 17:45:51,665 ----------------------------------------------------------------------------------------------------
2023-10-30 17:46:38,675 epoch 5 - iter 366/3661 - loss 0.07115186 - time (sec): 47.01 - samples/sec: 62.29 - lr: 0.000035 - momentum: 0.000000
2023-10-30 17:47:25,491 epoch 5 - iter 732/3661 - loss 0.07741839 - time (sec): 93.82 - samples/sec: 62.42 - lr: 0.000035 - momentum: 0.000000
2023-10-30 17:48:12,597 epoch 5 - iter 1098/3661 - loss 0.08263314 - time (sec): 140.93 - samples/sec: 62.33 - lr: 0.000035 - momentum: 0.000000
2023-10-30 17:48:58,945 epoch 5 - iter 1464/3661 - loss 0.08286964 - time (sec): 187.28 - samples/sec: 62.54 - lr: 0.000035 - momentum: 0.000000
2023-10-30 17:49:46,710 epoch 5 - iter 1830/3661 - loss 0.08290716 - time (sec): 235.04 - samples/sec: 62.29 - lr: 0.000034 - momentum: 0.000000
2023-10-30 17:50:33,989 epoch 5 - iter 2196/3661 - loss 0.08052766 - time (sec): 282.32 - samples/sec: 62.23 - lr: 0.000034 - momentum: 0.000000
2023-10-3

100%|██████████| 234/234 [00:03<00:00, 67.56it/s]


2023-10-30 17:53:45,424 DEV : loss 0.10099315643310547 - f1-score (macro avg)  0.5761
2023-10-30 17:53:45,755 ----------------------------------------------------------------------------------------------------
2023-10-30 17:54:32,629 epoch 6 - iter 366/3661 - loss 0.04361502 - time (sec): 46.87 - samples/sec: 62.47 - lr: 0.000033 - momentum: 0.000000
2023-10-30 17:55:18,960 epoch 6 - iter 732/3661 - loss 0.06030149 - time (sec): 93.20 - samples/sec: 62.83 - lr: 0.000033 - momentum: 0.000000
2023-10-30 17:56:06,470 epoch 6 - iter 1098/3661 - loss 0.06628043 - time (sec): 140.71 - samples/sec: 62.43 - lr: 0.000033 - momentum: 0.000000
2023-10-30 17:56:53,039 epoch 6 - iter 1464/3661 - loss 0.07043349 - time (sec): 187.28 - samples/sec: 62.54 - lr: 0.000032 - momentum: 0.000000
2023-10-30 17:57:40,225 epoch 6 - iter 1830/3661 - loss 0.06790871 - time (sec): 234.47 - samples/sec: 62.44 - lr: 0.000032 - momentum: 0.000000
2023-10-30 17:58:27,668 epoch 6 - iter 2196/3661 - loss 0.07131298 -

100%|██████████| 234/234 [00:03<00:00, 67.08it/s]

2023-10-30 18:01:39,013 DEV : loss 0.09580113738775253 - f1-score (macro avg)  0.5761





2023-10-30 18:01:39,324 ----------------------------------------------------------------------------------------------------
2023-10-30 18:02:26,806 epoch 7 - iter 366/3661 - loss 0.05947281 - time (sec): 47.48 - samples/sec: 61.67 - lr: 0.000031 - momentum: 0.000000
2023-10-30 18:03:13,895 epoch 7 - iter 732/3661 - loss 0.06125034 - time (sec): 94.57 - samples/sec: 61.92 - lr: 0.000031 - momentum: 0.000000
2023-10-30 18:04:00,419 epoch 7 - iter 1098/3661 - loss 0.06646222 - time (sec): 141.09 - samples/sec: 62.26 - lr: 0.000030 - momentum: 0.000000
2023-10-30 18:04:47,908 epoch 7 - iter 1464/3661 - loss 0.06476251 - time (sec): 188.58 - samples/sec: 62.11 - lr: 0.000030 - momentum: 0.000000
2023-10-30 18:05:34,714 epoch 7 - iter 1830/3661 - loss 0.06386621 - time (sec): 235.39 - samples/sec: 62.20 - lr: 0.000030 - momentum: 0.000000
2023-10-30 18:06:21,651 epoch 7 - iter 2196/3661 - loss 0.06456572 - time (sec): 282.32 - samples/sec: 62.23 - lr: 0.000030 - momentum: 0.000000
2023-10-3

100%|██████████| 234/234 [00:05<00:00, 39.75it/s]


2023-10-30 18:09:35,263 DEV : loss 0.09658127278089523 - f1-score (macro avg)  0.5761
2023-10-30 18:09:35,730 ----------------------------------------------------------------------------------------------------
2023-10-30 18:10:22,870 epoch 8 - iter 366/3661 - loss 0.05515024 - time (sec): 47.13 - samples/sec: 62.12 - lr: 0.000029 - momentum: 0.000000
2023-10-30 18:11:10,553 epoch 8 - iter 732/3661 - loss 0.06145406 - time (sec): 94.82 - samples/sec: 61.76 - lr: 0.000028 - momentum: 0.000000
2023-10-30 18:11:57,237 epoch 8 - iter 1098/3661 - loss 0.05866136 - time (sec): 141.50 - samples/sec: 62.08 - lr: 0.000028 - momentum: 0.000000
2023-10-30 18:12:44,478 epoch 8 - iter 1464/3661 - loss 0.06032779 - time (sec): 188.74 - samples/sec: 62.05 - lr: 0.000028 - momentum: 0.000000
2023-10-30 18:13:31,347 epoch 8 - iter 1830/3661 - loss 0.05975541 - time (sec): 235.61 - samples/sec: 62.14 - lr: 0.000028 - momentum: 0.000000
2023-10-30 18:14:17,938 epoch 8 - iter 2196/3661 - loss 0.05924911 -

100%|██████████| 234/234 [00:05<00:00, 46.46it/s]


2023-10-30 18:17:30,847 DEV : loss 0.09290987253189087 - f1-score (macro avg)  0.5761
2023-10-30 18:17:31,147 ----------------------------------------------------------------------------------------------------
2023-10-30 18:18:17,925 epoch 9 - iter 366/3661 - loss 0.04646033 - time (sec): 46.78 - samples/sec: 62.60 - lr: 0.000026 - momentum: 0.000000
2023-10-30 18:19:05,066 epoch 9 - iter 732/3661 - loss 0.04431561 - time (sec): 93.92 - samples/sec: 62.35 - lr: 0.000026 - momentum: 0.000000
2023-10-30 18:19:52,361 epoch 9 - iter 1098/3661 - loss 0.04639437 - time (sec): 141.21 - samples/sec: 62.20 - lr: 0.000026 - momentum: 0.000000
2023-10-30 18:20:38,877 epoch 9 - iter 1464/3661 - loss 0.04461236 - time (sec): 187.73 - samples/sec: 62.39 - lr: 0.000026 - momentum: 0.000000
2023-10-30 18:21:26,031 epoch 9 - iter 1830/3661 - loss 0.04778845 - time (sec): 234.88 - samples/sec: 62.33 - lr: 0.000026 - momentum: 0.000000
2023-10-30 18:22:12,700 epoch 9 - iter 2196/3661 - loss 0.04972623 -

100%|██████████| 234/234 [00:03<00:00, 67.38it/s]


2023-10-30 18:25:24,592 DEV : loss 0.09225400537252426 - f1-score (macro avg)  0.5761
2023-10-30 18:25:24,905 ----------------------------------------------------------------------------------------------------
2023-10-30 18:26:12,545 epoch 10 - iter 366/3661 - loss 0.04020148 - time (sec): 47.64 - samples/sec: 61.46 - lr: 0.000024 - momentum: 0.000000
2023-10-30 18:26:59,309 epoch 10 - iter 732/3661 - loss 0.04511469 - time (sec): 94.40 - samples/sec: 62.03 - lr: 0.000024 - momentum: 0.000000
2023-10-30 18:27:46,633 epoch 10 - iter 1098/3661 - loss 0.04928955 - time (sec): 141.73 - samples/sec: 61.98 - lr: 0.000024 - momentum: 0.000000
2023-10-30 18:28:33,185 epoch 10 - iter 1464/3661 - loss 0.04571007 - time (sec): 188.28 - samples/sec: 62.21 - lr: 0.000024 - momentum: 0.000000
2023-10-30 18:29:20,181 epoch 10 - iter 1830/3661 - loss 0.04777280 - time (sec): 235.27 - samples/sec: 62.23 - lr: 0.000023 - momentum: 0.000000
2023-10-30 18:30:07,314 epoch 10 - iter 2196/3661 - loss 0.0476

100%|██████████| 234/234 [00:03<00:00, 67.87it/s]

2023-10-30 18:33:18,547 DEV : loss 0.09164595603942871 - f1-score (macro avg)  0.5761





2023-10-30 18:33:18,858 ----------------------------------------------------------------------------------------------------
2023-10-30 18:34:06,337 epoch 11 - iter 366/3661 - loss 0.04151998 - time (sec): 47.48 - samples/sec: 61.67 - lr: 0.000022 - momentum: 0.000000
2023-10-30 18:34:52,717 epoch 11 - iter 732/3661 - loss 0.03471421 - time (sec): 93.86 - samples/sec: 62.39 - lr: 0.000022 - momentum: 0.000000
2023-10-30 18:35:40,097 epoch 11 - iter 1098/3661 - loss 0.03313681 - time (sec): 141.24 - samples/sec: 62.19 - lr: 0.000022 - momentum: 0.000000
2023-10-30 18:36:26,737 epoch 11 - iter 1464/3661 - loss 0.03340845 - time (sec): 187.88 - samples/sec: 62.34 - lr: 0.000021 - momentum: 0.000000
2023-10-30 18:37:14,198 epoch 11 - iter 1830/3661 - loss 0.03412003 - time (sec): 235.34 - samples/sec: 62.21 - lr: 0.000021 - momentum: 0.000000
2023-10-30 18:38:01,302 epoch 11 - iter 2196/3661 - loss 0.03154766 - time (sec): 282.44 - samples/sec: 62.20 - lr: 0.000021 - momentum: 0.000000
202

100%|██████████| 234/234 [00:03<00:00, 59.88it/s]

2023-10-30 18:41:12,912 DEV : loss 0.08805502951145172 - f1-score (macro avg)  0.5761





2023-10-30 18:41:13,421 ----------------------------------------------------------------------------------------------------
2023-10-30 18:42:00,310 epoch 12 - iter 366/3661 - loss 0.02843221 - time (sec): 46.88 - samples/sec: 62.45 - lr: 0.000020 - momentum: 0.000000
2023-10-30 18:42:46,989 epoch 12 - iter 732/3661 - loss 0.02045156 - time (sec): 93.56 - samples/sec: 62.59 - lr: 0.000020 - momentum: 0.000000
2023-10-30 18:43:34,003 epoch 12 - iter 1098/3661 - loss 0.02393653 - time (sec): 140.58 - samples/sec: 62.49 - lr: 0.000019 - momentum: 0.000000
2023-10-30 18:44:21,068 epoch 12 - iter 1464/3661 - loss 0.02465842 - time (sec): 187.64 - samples/sec: 62.42 - lr: 0.000019 - momentum: 0.000000
2023-10-30 18:45:07,563 epoch 12 - iter 1830/3661 - loss 0.02690782 - time (sec): 234.14 - samples/sec: 62.53 - lr: 0.000019 - momentum: 0.000000
2023-10-30 18:45:55,087 epoch 12 - iter 2196/3661 - loss 0.02798129 - time (sec): 281.66 - samples/sec: 62.37 - lr: 0.000019 - momentum: 0.000000
202

100%|██████████| 234/234 [00:05<00:00, 40.69it/s]


2023-10-30 18:49:10,703 DEV : loss 0.08697988092899323 - f1-score (macro avg)  0.5761
2023-10-30 18:49:11,180 ----------------------------------------------------------------------------------------------------
2023-10-30 18:49:58,132 epoch 13 - iter 366/3661 - loss 0.02316164 - time (sec): 46.95 - samples/sec: 62.37 - lr: 0.000018 - momentum: 0.000000
2023-10-30 18:50:46,120 epoch 13 - iter 732/3661 - loss 0.02233646 - time (sec): 94.94 - samples/sec: 61.68 - lr: 0.000017 - momentum: 0.000000
2023-10-30 18:51:33,562 epoch 13 - iter 1098/3661 - loss 0.02186044 - time (sec): 142.38 - samples/sec: 61.69 - lr: 0.000017 - momentum: 0.000000
2023-10-30 18:52:21,824 epoch 13 - iter 1464/3661 - loss 0.02233772 - time (sec): 190.64 - samples/sec: 61.43 - lr: 0.000017 - momentum: 0.000000
2023-10-30 18:53:08,413 epoch 13 - iter 1830/3661 - loss 0.02219315 - time (sec): 237.23 - samples/sec: 61.71 - lr: 0.000017 - momentum: 0.000000
2023-10-30 18:53:56,090 epoch 13 - iter 2196/3661 - loss 0.0228

100%|██████████| 234/234 [00:04<00:00, 57.68it/s]

2023-10-30 18:57:09,802 DEV : loss 0.0886736512184143 - f1-score (macro avg)  0.5761





2023-10-30 18:57:10,297 ----------------------------------------------------------------------------------------------------
2023-10-30 18:57:57,632 epoch 14 - iter 366/3661 - loss 0.01785263 - time (sec): 47.33 - samples/sec: 61.87 - lr: 0.000015 - momentum: 0.000000
2023-10-30 18:58:44,673 epoch 14 - iter 732/3661 - loss 0.01347030 - time (sec): 94.37 - samples/sec: 62.06 - lr: 0.000015 - momentum: 0.000000
2023-10-30 18:59:31,434 epoch 14 - iter 1098/3661 - loss 0.01445788 - time (sec): 141.13 - samples/sec: 62.24 - lr: 0.000015 - momentum: 0.000000
2023-10-30 19:00:19,034 epoch 14 - iter 1464/3661 - loss 0.01492766 - time (sec): 188.73 - samples/sec: 62.06 - lr: 0.000015 - momentum: 0.000000
2023-10-30 19:01:05,466 epoch 14 - iter 1830/3661 - loss 0.01524367 - time (sec): 235.16 - samples/sec: 62.26 - lr: 0.000014 - momentum: 0.000000
2023-10-30 19:01:52,620 epoch 14 - iter 2196/3661 - loss 0.01578222 - time (sec): 282.31 - samples/sec: 62.23 - lr: 0.000014 - momentum: 0.000000
202

100%|██████████| 234/234 [00:06<00:00, 37.08it/s]


2023-10-30 19:05:07,340 DEV : loss 0.0859120711684227 - f1-score (macro avg)  0.5761
2023-10-30 19:05:07,814 ----------------------------------------------------------------------------------------------------
2023-10-30 19:05:54,809 epoch 15 - iter 366/3661 - loss 0.01227689 - time (sec): 46.99 - samples/sec: 62.31 - lr: 0.000013 - momentum: 0.000000
2023-10-30 19:06:42,247 epoch 15 - iter 732/3661 - loss 0.00775955 - time (sec): 94.43 - samples/sec: 62.02 - lr: 0.000013 - momentum: 0.000000
2023-10-30 19:07:28,770 epoch 15 - iter 1098/3661 - loss 0.01028703 - time (sec): 140.95 - samples/sec: 62.32 - lr: 0.000013 - momentum: 0.000000
2023-10-30 19:08:15,974 epoch 15 - iter 1464/3661 - loss 0.01205037 - time (sec): 188.15 - samples/sec: 62.25 - lr: 0.000012 - momentum: 0.000000
2023-10-30 19:09:03,194 epoch 15 - iter 1830/3661 - loss 0.01257685 - time (sec): 235.37 - samples/sec: 62.20 - lr: 0.000012 - momentum: 0.000000
2023-10-30 19:09:50,015 epoch 15 - iter 2196/3661 - loss 0.01191

100%|██████████| 234/234 [00:04<00:00, 58.25it/s]

2023-10-30 19:13:01,867 DEV : loss 0.08618324995040894 - f1-score (macro avg)  0.5761





2023-10-30 19:13:02,167 ----------------------------------------------------------------------------------------------------
2023-10-30 19:13:49,322 epoch 16 - iter 366/3661 - loss 0.00699239 - time (sec): 47.15 - samples/sec: 62.10 - lr: 0.000011 - momentum: 0.000000
2023-10-30 19:14:36,443 epoch 16 - iter 732/3661 - loss 0.00373952 - time (sec): 94.27 - samples/sec: 62.12 - lr: 0.000011 - momentum: 0.000000
2023-10-30 19:15:23,386 epoch 16 - iter 1098/3661 - loss 0.00682336 - time (sec): 141.22 - samples/sec: 62.20 - lr: 0.000010 - momentum: 0.000000
2023-10-30 19:16:10,194 epoch 16 - iter 1464/3661 - loss 0.00987542 - time (sec): 188.02 - samples/sec: 62.29 - lr: 0.000010 - momentum: 0.000000
2023-10-30 19:16:57,623 epoch 16 - iter 1830/3661 - loss 0.01069997 - time (sec): 235.45 - samples/sec: 62.18 - lr: 0.000010 - momentum: 0.000000
2023-10-30 19:17:44,418 epoch 16 - iter 2196/3661 - loss 0.01016819 - time (sec): 282.25 - samples/sec: 62.24 - lr: 0.000010 - momentum: 0.000000
202

100%|██████████| 234/234 [00:03<00:00, 67.72it/s]

2023-10-30 19:20:55,592 DEV : loss 0.08561782538890839 - f1-score (macro avg)  0.5761





2023-10-30 19:20:55,897 ----------------------------------------------------------------------------------------------------
2023-10-30 19:21:42,986 epoch 17 - iter 366/3661 - loss 0.00238119 - time (sec): 47.09 - samples/sec: 62.18 - lr: 0.000009 - momentum: 0.000000
2023-10-30 19:22:29,116 epoch 17 - iter 732/3661 - loss 0.00211488 - time (sec): 93.22 - samples/sec: 62.82 - lr: 0.000008 - momentum: 0.000000
2023-10-30 19:23:16,623 epoch 17 - iter 1098/3661 - loss 0.00414175 - time (sec): 140.72 - samples/sec: 62.42 - lr: 0.000008 - momentum: 0.000000
2023-10-30 19:24:03,495 epoch 17 - iter 1464/3661 - loss 0.00480191 - time (sec): 187.59 - samples/sec: 62.43 - lr: 0.000008 - momentum: 0.000000
2023-10-30 19:24:50,417 epoch 17 - iter 1830/3661 - loss 0.00541970 - time (sec): 234.52 - samples/sec: 62.43 - lr: 0.000008 - momentum: 0.000000
2023-10-30 19:25:38,426 epoch 17 - iter 2196/3661 - loss 0.00549156 - time (sec): 282.53 - samples/sec: 62.18 - lr: 0.000008 - momentum: 0.000000
202

100%|██████████| 234/234 [00:03<00:00, 67.62it/s]


2023-10-30 19:28:49,828 DEV : loss 0.08542721718549728 - f1-score (macro avg)  0.5761
2023-10-30 19:28:50,226 ----------------------------------------------------------------------------------------------------
2023-10-30 19:29:37,271 epoch 18 - iter 366/3661 - loss 0.00276031 - time (sec): 47.04 - samples/sec: 62.24 - lr: 0.000006 - momentum: 0.000000
2023-10-30 19:30:24,156 epoch 18 - iter 732/3661 - loss 0.00569448 - time (sec): 93.93 - samples/sec: 62.35 - lr: 0.000006 - momentum: 0.000000
2023-10-30 19:31:10,977 epoch 18 - iter 1098/3661 - loss 0.00575711 - time (sec): 140.75 - samples/sec: 62.41 - lr: 0.000006 - momentum: 0.000000
2023-10-30 19:31:57,761 epoch 18 - iter 1464/3661 - loss 0.00496690 - time (sec): 187.53 - samples/sec: 62.45 - lr: 0.000006 - momentum: 0.000000
2023-10-30 19:32:44,050 epoch 18 - iter 1830/3661 - loss 0.00485165 - time (sec): 233.82 - samples/sec: 62.61 - lr: 0.000006 - momentum: 0.000000
2023-10-30 19:33:31,489 epoch 18 - iter 2196/3661 - loss 0.0058

100%|██████████| 234/234 [00:05<00:00, 39.19it/s]


2023-10-30 19:36:45,974 DEV : loss 0.08606021851301193 - f1-score (macro avg)  0.5761
2023-10-30 19:36:46,440 ----------------------------------------------------------------------------------------------------
2023-10-30 19:37:32,994 epoch 19 - iter 366/3661 - loss 0.00004559 - time (sec): 46.55 - samples/sec: 62.90 - lr: 0.000004 - momentum: 0.000000
2023-10-30 19:38:20,706 epoch 19 - iter 732/3661 - loss 0.00037830 - time (sec): 94.26 - samples/sec: 62.13 - lr: 0.000004 - momentum: 0.000000
2023-10-30 19:39:07,243 epoch 19 - iter 1098/3661 - loss 0.00256447 - time (sec): 140.80 - samples/sec: 62.39 - lr: 0.000004 - momentum: 0.000000
2023-10-30 19:39:54,033 epoch 19 - iter 1464/3661 - loss 0.00194820 - time (sec): 187.59 - samples/sec: 62.44 - lr: 0.000004 - momentum: 0.000000
2023-10-30 19:40:41,302 epoch 19 - iter 1830/3661 - loss 0.00233499 - time (sec): 234.85 - samples/sec: 62.34 - lr: 0.000003 - momentum: 0.000000
2023-10-30 19:41:27,822 epoch 19 - iter 2196/3661 - loss 0.0019

100%|██████████| 234/234 [00:04<00:00, 47.45it/s]

2023-10-30 19:44:40,556 DEV : loss 0.08627602458000183 - f1-score (macro avg)  0.5761





2023-10-30 19:44:40,862 ----------------------------------------------------------------------------------------------------
2023-10-30 19:45:27,418 epoch 20 - iter 366/3661 - loss 0.00285315 - time (sec): 46.55 - samples/sec: 62.90 - lr: 0.000002 - momentum: 0.000000
2023-10-30 19:46:14,799 epoch 20 - iter 732/3661 - loss 0.00223591 - time (sec): 93.93 - samples/sec: 62.34 - lr: 0.000002 - momentum: 0.000000
2023-10-30 19:47:02,127 epoch 20 - iter 1098/3661 - loss 0.00372895 - time (sec): 141.26 - samples/sec: 62.18 - lr: 0.000002 - momentum: 0.000000
2023-10-30 19:47:48,480 epoch 20 - iter 1464/3661 - loss 0.00279749 - time (sec): 187.61 - samples/sec: 62.43 - lr: 0.000001 - momentum: 0.000000
2023-10-30 19:48:35,914 epoch 20 - iter 1830/3661 - loss 0.00262338 - time (sec): 235.05 - samples/sec: 62.29 - lr: 0.000001 - momentum: 0.000000
2023-10-30 19:49:22,191 epoch 20 - iter 2196/3661 - loss 0.00219803 - time (sec): 281.33 - samples/sec: 62.45 - lr: 0.000001 - momentum: 0.000000
202

100%|██████████| 234/234 [00:03<00:00, 67.32it/s]

2023-10-30 19:52:34,088 DEV : loss 0.08625127375125885 - f1-score (macro avg)  0.5761





2023-10-30 19:52:35,277 ----------------------------------------------------------------------------------------------------
2023-10-30 19:52:35,279 Testing using last state of model ...


100%|██████████| 888/888 [01:34<00:00,  9.35it/s]


2023-10-30 19:54:10,474 
Results:
- F-score (micro) 0.9892
- F-score (macro) 0.8241
- Accuracy 0.9892

By class:
                 precision    recall  f1-score   support

                    0.9920    0.9969    0.9945     13866
org:happened_at     0.9720    0.7898    0.8715       176
  org:caused_by     0.6905    0.5404    0.6063       161

       accuracy                         0.9892     14203
      macro avg     0.8848    0.7757    0.8241     14203
   weighted avg     0.9884    0.9892    0.9885     14203

2023-10-30 19:54:10,475 ----------------------------------------------------------------------------------------------------


{'test_score': 0.8240685209633786}

### Using 2-step RE on the validation set


In [3]:
loaded_ner = SequenceTagger.load("/content/drive/MyDrive/enwiki20230820/component2/ner/final-model.pt")
loaded_re: RelationClassifier = RelationClassifier.load('/content/drive/MyDrive/enwiki20230820/component2/relations/final-model.pt')

2023-11-08 11:14:33,191 SequenceTagger predicts: Dictionary with 9 tags: O, B-Component, I-Component, B-Person, I-Person, B-Time, I-Time, B-Date, I-Date


In [None]:
test_set = srsly.read_json('/content/drive/MyDrive/enwiki20230820/component2/test_data.json')
all_relations = {}
document_dicts = []
for document in test_set:
  sentence = Sentence(document["data"]["Abstract_new"])
  loaded_ner.predict(sentence)
  loaded_re.predict(sentence)
  sent_dict = sentence.to_dict()
  # relation["labels"][0]["confidence"]
  unique_id = " ".join([str(document["data"]["EventDt"]), document["data"]["IPFCode"], document["data"]["IPFShortDesc"], str(document["inner_id"])])
  all_relations[unique_id] = [(relation["from_text"], relation["to_text"], relation["labels"][0]["value"]) for relation in sent_dict["relations"] if len(relation["labels"][0]["value"]) > 0]
  document_dicts.append(sentence.to_dict())


srsly.write_json('comp2_relations.json', all_relations)
srsly.write_json('comp2_output.json', document_dicts)
