# Experimentos utilizando a rede BiLSTM-CRF com o framework FlairNLP

Os experimentos à seguir avaliam a Influencia do uso de Embeddings no Reconhecimento de Entidades Nomeadas para o Portugues, utilizando os Corpora Multi_WikiNER, LeNER_br e PL-Corpus e os embeddings Pt-Wiki-Fasttext, Flair Embeddings e BERTimbau.

Baseado nos tutoriais do flairNLP
https://github.com/flairNLP/flair

In [None]:
%%capture
!pip install flair transformers
#seqeval git-lfs

# Corpus Multi_WikiNER

## Vetor Estático Pt-Wiki-Fasttext


### Imports

In [None]:
## Imports

## Corpus
from flair.datasets import NER_MULTI_WIKINER

## Embeddings
from flair.embeddings import WordEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Corpus
# 1. get the corpus
corpus = NER_MULTI_WIKINER()
print(corpus)

## Tarefa
# 2. what label do we want to predict?
label_type = 'ner'

In [None]:
## Dicionário de rótulos
# 3. make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

### Embeddings

In [None]:
## Embeddings
# Initialize embedding
embeddings = WordEmbeddings('pt')

### Treino

In [None]:
## Inicializando o modelo
# 5. initialize sequence tagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

In [None]:
## Treinando o modelo
# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/sota-ner-flair',
              learning_rate=0.1,
              mini_batch_size=32,
              #embeddings_storage_mode='gpu',
              max_epochs=100)

## Vetor de Contexto Flair Embeddings


### Imports

In [None]:
## Imports

## Corpus
from flair.datasets import NER_MULTI_WIKINER

## Importando os Embeddings, Flair-pt
from flair.embeddings import FlairEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Corpus
# 1. get the corpus
corpus = NER_MULTI_WIKINER()
print(corpus)

## Tarefa
# 2. what label do we want to predict?
label_type = 'ner'

In [None]:
## Dicionário de rótulos
# 3. make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

### Embeddings

In [None]:
## Empilhando os Embeddings
from flair.embeddings import StackedEmbeddings

# init Flair embeddings
flair_embedding_forward = FlairEmbeddings('pt-forward')
flair_embedding_backward = FlairEmbeddings('pt-backward')

# create a StackedEmbedding object that combines glove and forward/backward flair embeddings
embeddings = StackedEmbeddings([
                                        flair_embedding_forward,
                                        flair_embedding_backward,
                                       ])

### Treino

In [None]:
## Inicializando o modelo
# 5. initialize sequence tagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

In [None]:
## Treinando o modelo
# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=10,
              checkpoint=True)

In [None]:
## Continuando o treinamento (Em virtude do longo tempo de treinamento foi necessário segmentar o treinamento em etapas, por essa razão foi utilizado o drive para carregar o checkpoint doe um esquema para continuar o treinamento)
trainer = ModelTrainer(tagger, corpus)

# 8. continue training at later point. Load previously trained model checkpoint, then resume
trained_model = SequenceTagger.load(path + '/checkpoint.pt')

# resume training best model, but this time until new max-epochs
trainer.resume(trained_model,
               base_path=path + '-resume',
               max_epochs=65,
               checkpoint=True,
               )

## Vetores Estático e de Contexto concatenados (Pt-Wiki-Fastext e Flair Embeddings)

### Imports

In [None]:
## Imports

## Corpus
from flair.datasets import NER_MULTI_WIKINER

## Embeddings
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Corpus
# 1. get the corpus
corpus = NER_MULTI_WIKINER()
print(corpus)

## Tarefa
# 2. what label do we want to predict?
label_type = 'ner'

In [None]:
## Dicionário de rótulos
# 3. make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

### Embeddings

In [None]:
## Stacked Embeddings
# Initialize embedding stack with 
embedding_types = [
    WordEmbeddings('pt'),
    FlairEmbeddings('pt-forward'),
    FlairEmbeddings('pt-backward')
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

### Treino

In [None]:
## Inicializando o modelo
# 5. initialize sequence tagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

In [None]:
## Treinando o modelo
# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=1,
              checkpoint=True)

In [None]:
## Continuando o treinamento (Em virtude do longo tempo de treinamento foi necessário segmentar o treinamento em etapas, por essa razão foi utilizado o drive para carregar o checkpoint doe um esquema para continuar o treinamento)
trainer = ModelTrainer(tagger, corpus)

# 8. continue training at later point. Load previously trained model checkpoint, then resume
trained_model = SequenceTagger.load(path + '/checkpoint.pt')

# resume training best model, but this time until new max-epochs
trainer.resume(trained_model,
               base_path=path + '-resume',
               max_epochs=40,
               checkpoint=True,
               )

## Vetor de Contexto BERTimbau


### Imports

In [None]:
## Imports

## Corpus
from flair.datasets import NER_MULTI_WIKINER

## Importando os Embeddings, BERTinbaum e Flair-pt
from flair.embeddings import TransformerWordEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Corpus
# 1. get the corpus
corpus = NER_MULTI_WIKINER() #.downsample(0.8)
print(corpus)

## Tarefa
# 2. what label do we want to predict?
label_type = 'ner'

In [None]:
## Dicionário de rótulos
# 3. make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

### Embeddings

In [None]:
## Apenas Bert
embeddings = TransformerWordEmbeddings('neuralmind/bert-base-portuguese-cased')

### Treino

In [None]:
## Inicializando o modelo
# 5. initialize sequence tagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

In [None]:
## Treinando o modelo
# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=15,
              checkpoint=True)

In [None]:
## Continuando o treinamento (Em virtude do longo tempo de treinamento foi necessário segmentar o treinamento em etapas, por essa razão foi utilizado o drive para carregar o checkpoint doe um esquema para continuar o treinamento)
trainer = ModelTrainer(tagger, corpus)

# 8. continue training at later point. Load previously trained model checkpoint, then resume
trained_model = SequenceTagger.load(path + '/checkpoint.pt')

# resume training best model, but this time until new max-epochs
trainer.resume(trained_model,
               base_path=path + '-resume',
               max_epochs=40,
               checkpoint=True,
               )

# Corpus LeNER_br

## Vetor Estático Pt-Wiki-Fasttext


### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import WordEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='dev.txt')

## Tarefa
label_type = 'ner'

2022-12-12 00:01:51,273 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig
2022-12-12 00:01:51,275 Train: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/train.txt
2022-12-12 00:01:51,279 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/dev.txt
2022-12-12 00:01:51,280 Test: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-12-12 00:02:01,521 Computing label dictionary. Progress:


7827it [00:00, 47659.78it/s]

2022-12-12 00:02:01,722 Dictionary created for label 'ner' with 7 values: ORGANIZACAO (seen 2400 times), LEGISLACAO (seen 1920 times), PESSOA (seen 1525 times), TEMPO (seen 1334 times), JURISPRUDENCIA (seen 1104 times), LOCAL (seen 611 times)
Dictionary with 7 tags: <unk>, ORGANIZACAO, LEGISLACAO, PESSOA, TEMPO, JURISPRUDENCIA, LOCAL





### Embeddings

In [None]:
## Embeddings
# Initialize embedding
embeddings = WordEmbeddings('pt')

2022-12-12 00:02:02,756 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M.vectors.npy not found in cache, downloading to /tmp/tmpxn5w9s_0


100%|██████████| 710528528/710528528 [00:53<00:00, 13373490.70B/s]

2022-12-12 00:02:56,534 copying /tmp/tmpxn5w9s_0 to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M.vectors.npy





2022-12-12 00:02:58,505 removing temp file /tmp/tmpxn5w9s_0
2022-12-12 00:02:59,556 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M not found in cache, downloading to /tmp/tmpzhe_duta


100%|██████████| 23541010/23541010 [00:03<00:00, 7598937.33B/s] 

2022-12-12 00:03:03,311 copying /tmp/tmpzhe_duta to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M
2022-12-12 00:03:03,330 removing temp file /tmp/tmpzhe_duta





### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-12-12 00:03:06,066 SequenceTagger predicts: Dictionary with 25 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-TEMPO, B-TEMPO, E-TEMPO, I-TEMPO, S-JURISPRUDENCIA, B-JURISPRUDENCIA, E-JURISPRUDENCIA, I-JURISPRUDENCIA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)

# Start training
trainer.train('resources/taggers/sota-ner-flair',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

2022-12-12 00:03:14,172 ----------------------------------------------------------------------------------------------------
2022-12-12 00:03:14,175 Model: "SequenceTagger(
  (embeddings): WordEmbeddings(
    'pt'
    (embedding): Embedding(592108, 300)
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=300, out_features=300, bias=True)
  (rnn): LSTM(300, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=512, out_features=27, bias=True)
  (loss_function): ViterbiLoss()
  (crf): CRF()
)"
2022-12-12 00:03:14,177 ----------------------------------------------------------------------------------------------------
2022-12-12 00:03:14,179 Corpus: "Corpus: 7827 train + 1176 dev + 1389 test sentences"
2022-12-12 00:03:14,181 ----------------------------------------------------------------------------------------------------
2022-12-12 00:03:14,182 Parameters:
2022-12-12 00:03:14,185  - learning_rate: "

100%|██████████| 37/37 [00:08<00:00,  4.54it/s]

2022-12-12 00:04:01,106 Evaluating as a multi-label problem: False
2022-12-12 00:04:01,127 DEV : loss 0.2923157811164856 - f1-score (micro avg)  0.3266





2022-12-12 00:04:01,213 BAD EPOCHS (no improvement): 0
2022-12-12 00:04:01,215 saving best model
2022-12-12 00:04:03,843 ----------------------------------------------------------------------------------------------------
2022-12-12 00:04:07,577 epoch 2 - iter 24/245 - loss 0.26401509 - samples/sec: 205.98 - lr: 0.100000
2022-12-12 00:04:11,315 epoch 2 - iter 48/245 - loss 0.24095730 - samples/sec: 205.71 - lr: 0.100000
2022-12-12 00:04:14,920 epoch 2 - iter 72/245 - loss 0.23139441 - samples/sec: 213.26 - lr: 0.100000
2022-12-12 00:04:18,608 epoch 2 - iter 96/245 - loss 0.22623612 - samples/sec: 208.51 - lr: 0.100000
2022-12-12 00:04:22,780 epoch 2 - iter 120/245 - loss 0.22045064 - samples/sec: 184.26 - lr: 0.100000
2022-12-12 00:04:25,881 epoch 2 - iter 144/245 - loss 0.21562629 - samples/sec: 247.96 - lr: 0.100000
2022-12-12 00:04:30,512 epoch 2 - iter 168/245 - loss 0.21055466 - samples/sec: 166.00 - lr: 0.100000
2022-12-12 00:04:33,397 epoch 2 - iter 192/245 - loss 0.20625175 - s

100%|██████████| 37/37 [00:07<00:00,  5.11it/s]

2022-12-12 00:04:49,626 Evaluating as a multi-label problem: False
2022-12-12 00:04:49,649 DEV : loss 0.15578848123550415 - f1-score (micro avg)  0.4512





2022-12-12 00:04:49,732 BAD EPOCHS (no improvement): 0
2022-12-12 00:04:49,735 saving best model
2022-12-12 00:04:52,557 ----------------------------------------------------------------------------------------------------
2022-12-12 00:04:56,147 epoch 3 - iter 24/245 - loss 0.15902660 - samples/sec: 215.66 - lr: 0.100000
2022-12-12 00:05:00,502 epoch 3 - iter 48/245 - loss 0.15090710 - samples/sec: 176.54 - lr: 0.100000
2022-12-12 00:05:04,123 epoch 3 - iter 72/245 - loss 0.14509615 - samples/sec: 212.31 - lr: 0.100000
2022-12-12 00:05:08,384 epoch 3 - iter 96/245 - loss 0.14392058 - samples/sec: 180.43 - lr: 0.100000
2022-12-12 00:05:13,036 epoch 3 - iter 120/245 - loss 0.14231136 - samples/sec: 165.24 - lr: 0.100000
2022-12-12 00:05:16,646 epoch 3 - iter 144/245 - loss 0.13823325 - samples/sec: 212.97 - lr: 0.100000
2022-12-12 00:05:19,540 epoch 3 - iter 168/245 - loss 0.13671142 - samples/sec: 265.75 - lr: 0.100000
2022-12-12 00:05:23,708 epoch 3 - iter 192/245 - loss 0.13551729 - s

100%|██████████| 37/37 [00:08<00:00,  4.60it/s]

2022-12-12 00:05:39,232 Evaluating as a multi-label problem: False
2022-12-12 00:05:39,253 DEV : loss 0.11385537683963776 - f1-score (micro avg)  0.5675





2022-12-12 00:05:39,338 BAD EPOCHS (no improvement): 0
2022-12-12 00:05:39,340 saving best model
2022-12-12 00:05:42,035 ----------------------------------------------------------------------------------------------------
2022-12-12 00:05:46,475 epoch 4 - iter 24/245 - loss 0.11153287 - samples/sec: 173.85 - lr: 0.100000
2022-12-12 00:05:51,317 epoch 4 - iter 48/245 - loss 0.10553651 - samples/sec: 158.77 - lr: 0.100000
2022-12-12 00:05:55,404 epoch 4 - iter 72/245 - loss 0.10524185 - samples/sec: 188.14 - lr: 0.100000
2022-12-12 00:05:59,425 epoch 4 - iter 96/245 - loss 0.10660237 - samples/sec: 191.18 - lr: 0.100000
2022-12-12 00:06:03,022 epoch 4 - iter 120/245 - loss 0.10432074 - samples/sec: 213.74 - lr: 0.100000
2022-12-12 00:06:06,037 epoch 4 - iter 144/245 - loss 0.10346559 - samples/sec: 255.09 - lr: 0.100000
2022-12-12 00:06:09,294 epoch 4 - iter 168/245 - loss 0.10365828 - samples/sec: 236.05 - lr: 0.100000
2022-12-12 00:06:12,072 epoch 4 - iter 192/245 - loss 0.10214064 - s

100%|██████████| 37/37 [00:08<00:00,  4.61it/s]

2022-12-12 00:06:27,936 Evaluating as a multi-label problem: False
2022-12-12 00:06:27,963 DEV : loss 0.09648843109607697 - f1-score (micro avg)  0.5993





2022-12-12 00:06:28,050 BAD EPOCHS (no improvement): 0
2022-12-12 00:06:28,053 saving best model
2022-12-12 00:06:30,920 ----------------------------------------------------------------------------------------------------
2022-12-12 00:06:36,185 epoch 5 - iter 24/245 - loss 0.09842539 - samples/sec: 146.48 - lr: 0.100000
2022-12-12 00:06:40,964 epoch 5 - iter 48/245 - loss 0.08962839 - samples/sec: 160.83 - lr: 0.100000
2022-12-12 00:06:44,110 epoch 5 - iter 72/245 - loss 0.09298976 - samples/sec: 244.43 - lr: 0.100000
2022-12-12 00:06:48,083 epoch 5 - iter 96/245 - loss 0.09191562 - samples/sec: 193.51 - lr: 0.100000
2022-12-12 00:06:51,459 epoch 5 - iter 120/245 - loss 0.09046449 - samples/sec: 227.75 - lr: 0.100000
2022-12-12 00:06:54,442 epoch 5 - iter 144/245 - loss 0.08996636 - samples/sec: 257.82 - lr: 0.100000
2022-12-12 00:06:57,374 epoch 5 - iter 168/245 - loss 0.08836427 - samples/sec: 262.15 - lr: 0.100000
2022-12-12 00:07:01,328 epoch 5 - iter 192/245 - loss 0.08808060 - s

100%|██████████| 37/37 [00:07<00:00,  5.03it/s]

2022-12-12 00:07:16,447 Evaluating as a multi-label problem: False
2022-12-12 00:07:16,470 DEV : loss 0.08739913254976273 - f1-score (micro avg)  0.6363





2022-12-12 00:07:16,556 BAD EPOCHS (no improvement): 0
2022-12-12 00:07:16,559 saving best model
2022-12-12 00:07:19,237 ----------------------------------------------------------------------------------------------------
2022-12-12 00:07:23,005 epoch 6 - iter 24/245 - loss 0.08610648 - samples/sec: 204.96 - lr: 0.100000
2022-12-12 00:07:27,228 epoch 6 - iter 48/245 - loss 0.08246356 - samples/sec: 182.06 - lr: 0.100000
2022-12-12 00:07:30,766 epoch 6 - iter 72/245 - loss 0.07920964 - samples/sec: 217.29 - lr: 0.100000
2022-12-12 00:07:35,217 epoch 6 - iter 96/245 - loss 0.07788415 - samples/sec: 172.72 - lr: 0.100000
2022-12-12 00:07:38,327 epoch 6 - iter 120/245 - loss 0.07777157 - samples/sec: 247.22 - lr: 0.100000
2022-12-12 00:07:42,179 epoch 6 - iter 144/245 - loss 0.07708356 - samples/sec: 199.59 - lr: 0.100000
2022-12-12 00:07:45,215 epoch 6 - iter 168/245 - loss 0.07734724 - samples/sec: 253.32 - lr: 0.100000
2022-12-12 00:07:49,862 epoch 6 - iter 192/245 - loss 0.07753427 - s

100%|██████████| 37/37 [00:08<00:00,  4.48it/s]

2022-12-12 00:08:06,057 Evaluating as a multi-label problem: False
2022-12-12 00:08:06,077 DEV : loss 0.08484203368425369 - f1-score (micro avg)  0.6697





2022-12-12 00:08:06,160 BAD EPOCHS (no improvement): 0
2022-12-12 00:08:06,162 saving best model
2022-12-12 00:08:08,904 ----------------------------------------------------------------------------------------------------
2022-12-12 00:08:12,408 epoch 7 - iter 24/245 - loss 0.06820545 - samples/sec: 220.49 - lr: 0.100000
2022-12-12 00:08:16,281 epoch 7 - iter 48/245 - loss 0.06997260 - samples/sec: 198.53 - lr: 0.100000
2022-12-12 00:08:19,718 epoch 7 - iter 72/245 - loss 0.06759878 - samples/sec: 223.68 - lr: 0.100000
2022-12-12 00:08:23,537 epoch 7 - iter 96/245 - loss 0.06902120 - samples/sec: 201.34 - lr: 0.100000
2022-12-12 00:08:27,372 epoch 7 - iter 120/245 - loss 0.07283041 - samples/sec: 200.48 - lr: 0.100000
2022-12-12 00:08:31,420 epoch 7 - iter 144/245 - loss 0.07190493 - samples/sec: 189.91 - lr: 0.100000
2022-12-12 00:08:34,750 epoch 7 - iter 168/245 - loss 0.07191624 - samples/sec: 230.88 - lr: 0.100000
2022-12-12 00:08:38,812 epoch 7 - iter 192/245 - loss 0.07232403 - s

100%|██████████| 37/37 [00:07<00:00,  5.13it/s]

2022-12-12 00:08:55,060 Evaluating as a multi-label problem: False
2022-12-12 00:08:55,080 DEV : loss 0.08396030217409134 - f1-score (micro avg)  0.6781





2022-12-12 00:08:55,164 BAD EPOCHS (no improvement): 0
2022-12-12 00:08:55,166 saving best model
2022-12-12 00:08:57,887 ----------------------------------------------------------------------------------------------------
2022-12-12 00:09:01,150 epoch 8 - iter 24/245 - loss 0.07088550 - samples/sec: 236.64 - lr: 0.100000
2022-12-12 00:09:05,236 epoch 8 - iter 48/245 - loss 0.06677235 - samples/sec: 188.16 - lr: 0.100000
2022-12-12 00:09:09,372 epoch 8 - iter 72/245 - loss 0.06364550 - samples/sec: 185.87 - lr: 0.100000
2022-12-12 00:09:13,819 epoch 8 - iter 96/245 - loss 0.06406538 - samples/sec: 172.86 - lr: 0.100000
2022-12-12 00:09:17,073 epoch 8 - iter 120/245 - loss 0.06413275 - samples/sec: 236.30 - lr: 0.100000
2022-12-12 00:09:21,464 epoch 8 - iter 144/245 - loss 0.06318637 - samples/sec: 175.07 - lr: 0.100000
2022-12-12 00:09:25,492 epoch 8 - iter 168/245 - loss 0.06405435 - samples/sec: 190.87 - lr: 0.100000
2022-12-12 00:09:28,875 epoch 8 - iter 192/245 - loss 0.06513528 - s

100%|██████████| 37/37 [00:07<00:00,  4.64it/s]

2022-12-12 00:09:44,385 Evaluating as a multi-label problem: False
2022-12-12 00:09:44,406 DEV : loss 0.0750160664319992 - f1-score (micro avg)  0.7149





2022-12-12 00:09:44,493 BAD EPOCHS (no improvement): 0
2022-12-12 00:09:44,495 saving best model
2022-12-12 00:09:47,284 ----------------------------------------------------------------------------------------------------
2022-12-12 00:09:51,400 epoch 9 - iter 24/245 - loss 0.06027273 - samples/sec: 187.70 - lr: 0.100000
2022-12-12 00:09:55,108 epoch 9 - iter 48/245 - loss 0.06026652 - samples/sec: 207.36 - lr: 0.100000
2022-12-12 00:09:58,337 epoch 9 - iter 72/245 - loss 0.05974409 - samples/sec: 238.14 - lr: 0.100000
2022-12-12 00:10:02,219 epoch 9 - iter 96/245 - loss 0.06023450 - samples/sec: 198.07 - lr: 0.100000
2022-12-12 00:10:05,453 epoch 9 - iter 120/245 - loss 0.06163003 - samples/sec: 237.70 - lr: 0.100000
2022-12-12 00:10:10,414 epoch 9 - iter 144/245 - loss 0.06431734 - samples/sec: 154.97 - lr: 0.100000
2022-12-12 00:10:14,539 epoch 9 - iter 168/245 - loss 0.06415749 - samples/sec: 186.34 - lr: 0.100000
2022-12-12 00:10:17,825 epoch 9 - iter 192/245 - loss 0.06332840 - s

100%|██████████| 37/37 [00:07<00:00,  5.04it/s]

2022-12-12 00:10:33,251 Evaluating as a multi-label problem: False
2022-12-12 00:10:33,274 DEV : loss 0.07586988061666489 - f1-score (micro avg)  0.7015





2022-12-12 00:10:33,361 BAD EPOCHS (no improvement): 1
2022-12-12 00:10:33,364 ----------------------------------------------------------------------------------------------------
2022-12-12 00:10:36,517 epoch 10 - iter 24/245 - loss 0.05059169 - samples/sec: 244.05 - lr: 0.100000
2022-12-12 00:10:39,892 epoch 10 - iter 48/245 - loss 0.05400193 - samples/sec: 227.85 - lr: 0.100000
2022-12-12 00:10:43,450 epoch 10 - iter 72/245 - loss 0.05778809 - samples/sec: 216.11 - lr: 0.100000
2022-12-12 00:10:47,192 epoch 10 - iter 96/245 - loss 0.05886440 - samples/sec: 205.53 - lr: 0.100000
2022-12-12 00:10:51,139 epoch 10 - iter 120/245 - loss 0.05718147 - samples/sec: 194.80 - lr: 0.100000
2022-12-12 00:10:55,256 epoch 10 - iter 144/245 - loss 0.06050747 - samples/sec: 186.71 - lr: 0.100000
2022-12-12 00:11:00,465 epoch 10 - iter 168/245 - loss 0.05913320 - samples/sec: 147.57 - lr: 0.100000
2022-12-12 00:11:04,330 epoch 10 - iter 192/245 - loss 0.05906193 - samples/sec: 198.85 - lr: 0.100000


100%|██████████| 37/37 [00:07<00:00,  5.04it/s]

2022-12-12 00:11:19,900 Evaluating as a multi-label problem: False
2022-12-12 00:11:19,921 DEV : loss 0.0714409276843071 - f1-score (micro avg)  0.7322





2022-12-12 00:11:20,006 BAD EPOCHS (no improvement): 0
2022-12-12 00:11:20,008 saving best model
2022-12-12 00:11:22,882 ----------------------------------------------------------------------------------------------------
2022-12-12 00:11:26,425 epoch 11 - iter 24/245 - loss 0.04951035 - samples/sec: 217.16 - lr: 0.100000
2022-12-12 00:11:29,933 epoch 11 - iter 48/245 - loss 0.05543702 - samples/sec: 219.17 - lr: 0.100000
2022-12-12 00:11:33,674 epoch 11 - iter 72/245 - loss 0.05252121 - samples/sec: 205.49 - lr: 0.100000
2022-12-12 00:11:37,159 epoch 11 - iter 96/245 - loss 0.05369661 - samples/sec: 220.65 - lr: 0.100000
2022-12-12 00:11:40,053 epoch 11 - iter 120/245 - loss 0.05412239 - samples/sec: 265.72 - lr: 0.100000
2022-12-12 00:11:43,767 epoch 11 - iter 144/245 - loss 0.05243791 - samples/sec: 206.98 - lr: 0.100000
2022-12-12 00:11:48,419 epoch 11 - iter 168/245 - loss 0.05275741 - samples/sec: 165.25 - lr: 0.100000
2022-12-12 00:11:52,978 epoch 11 - iter 192/245 - loss 0.0535

100%|██████████| 37/37 [00:08<00:00,  4.56it/s]

2022-12-12 00:12:09,541 Evaluating as a multi-label problem: False
2022-12-12 00:12:09,562 DEV : loss 0.06436680257320404 - f1-score (micro avg)  0.7623





2022-12-12 00:12:09,645 BAD EPOCHS (no improvement): 0
2022-12-12 00:12:09,647 saving best model
2022-12-12 00:12:12,490 ----------------------------------------------------------------------------------------------------
2022-12-12 00:12:15,536 epoch 12 - iter 24/245 - loss 0.05081558 - samples/sec: 252.88 - lr: 0.100000
2022-12-12 00:12:19,518 epoch 12 - iter 48/245 - loss 0.04639467 - samples/sec: 193.07 - lr: 0.100000
2022-12-12 00:12:23,739 epoch 12 - iter 72/245 - loss 0.04612579 - samples/sec: 182.12 - lr: 0.100000
2022-12-12 00:12:28,754 epoch 12 - iter 96/245 - loss 0.05033583 - samples/sec: 153.26 - lr: 0.100000
2022-12-12 00:12:31,699 epoch 12 - iter 120/245 - loss 0.05151383 - samples/sec: 261.09 - lr: 0.100000
2022-12-12 00:12:35,300 epoch 12 - iter 144/245 - loss 0.05139241 - samples/sec: 213.55 - lr: 0.100000
2022-12-12 00:12:39,366 epoch 12 - iter 168/245 - loss 0.05280428 - samples/sec: 189.10 - lr: 0.100000
2022-12-12 00:12:42,916 epoch 12 - iter 192/245 - loss 0.0532

100%|██████████| 37/37 [00:07<00:00,  5.06it/s]

2022-12-12 00:12:58,325 Evaluating as a multi-label problem: False
2022-12-12 00:12:58,347 DEV : loss 0.06989233940839767 - f1-score (micro avg)  0.7377





2022-12-12 00:12:58,444 BAD EPOCHS (no improvement): 1
2022-12-12 00:12:58,446 ----------------------------------------------------------------------------------------------------
2022-12-12 00:13:02,709 epoch 13 - iter 24/245 - loss 0.05501558 - samples/sec: 180.39 - lr: 0.100000
2022-12-12 00:13:05,744 epoch 13 - iter 48/245 - loss 0.05742985 - samples/sec: 253.36 - lr: 0.100000
2022-12-12 00:13:08,919 epoch 13 - iter 72/245 - loss 0.05441051 - samples/sec: 242.15 - lr: 0.100000
2022-12-12 00:13:12,688 epoch 13 - iter 96/245 - loss 0.05155758 - samples/sec: 203.95 - lr: 0.100000
2022-12-12 00:13:15,953 epoch 13 - iter 120/245 - loss 0.05040607 - samples/sec: 235.53 - lr: 0.100000
2022-12-12 00:13:20,044 epoch 13 - iter 144/245 - loss 0.05013262 - samples/sec: 187.93 - lr: 0.100000
2022-12-12 00:13:23,757 epoch 13 - iter 168/245 - loss 0.04962759 - samples/sec: 207.10 - lr: 0.100000
2022-12-12 00:13:28,452 epoch 13 - iter 192/245 - loss 0.04950799 - samples/sec: 163.70 - lr: 0.100000


100%|██████████| 37/37 [00:08<00:00,  4.62it/s]

2022-12-12 00:13:45,302 Evaluating as a multi-label problem: False
2022-12-12 00:13:45,322 DEV : loss 0.0637994259595871 - f1-score (micro avg)  0.7336





2022-12-12 00:13:45,406 BAD EPOCHS (no improvement): 2
2022-12-12 00:13:45,407 ----------------------------------------------------------------------------------------------------
2022-12-12 00:13:49,338 epoch 14 - iter 24/245 - loss 0.05102853 - samples/sec: 195.64 - lr: 0.100000
2022-12-12 00:13:53,874 epoch 14 - iter 48/245 - loss 0.04761547 - samples/sec: 169.46 - lr: 0.100000
2022-12-12 00:13:57,333 epoch 14 - iter 72/245 - loss 0.04798379 - samples/sec: 222.26 - lr: 0.100000
2022-12-12 00:14:00,692 epoch 14 - iter 96/245 - loss 0.04870307 - samples/sec: 228.97 - lr: 0.100000
2022-12-12 00:14:04,417 epoch 14 - iter 120/245 - loss 0.04901188 - samples/sec: 206.41 - lr: 0.100000
2022-12-12 00:14:07,517 epoch 14 - iter 144/245 - loss 0.04770171 - samples/sec: 248.07 - lr: 0.100000
2022-12-12 00:14:11,042 epoch 14 - iter 168/245 - loss 0.04795038 - samples/sec: 218.17 - lr: 0.100000
2022-12-12 00:14:14,632 epoch 14 - iter 192/245 - loss 0.04828727 - samples/sec: 214.20 - lr: 0.100000


100%|██████████| 37/37 [00:07<00:00,  5.08it/s]

2022-12-12 00:14:31,062 Evaluating as a multi-label problem: False
2022-12-12 00:14:31,082 DEV : loss 0.06944262236356735 - f1-score (micro avg)  0.7472





2022-12-12 00:14:31,164 BAD EPOCHS (no improvement): 3
2022-12-12 00:14:31,166 ----------------------------------------------------------------------------------------------------
2022-12-12 00:14:35,317 epoch 15 - iter 24/245 - loss 0.05003095 - samples/sec: 185.21 - lr: 0.100000
2022-12-12 00:14:39,046 epoch 15 - iter 48/245 - loss 0.04653491 - samples/sec: 206.17 - lr: 0.100000
2022-12-12 00:14:42,525 epoch 15 - iter 72/245 - loss 0.04720241 - samples/sec: 220.97 - lr: 0.100000
2022-12-12 00:14:46,276 epoch 15 - iter 96/245 - loss 0.04636203 - samples/sec: 204.98 - lr: 0.100000
2022-12-12 00:14:50,370 epoch 15 - iter 120/245 - loss 0.04724298 - samples/sec: 187.80 - lr: 0.100000
2022-12-12 00:14:54,117 epoch 15 - iter 144/245 - loss 0.04738967 - samples/sec: 205.18 - lr: 0.100000
2022-12-12 00:14:57,395 epoch 15 - iter 168/245 - loss 0.04731568 - samples/sec: 234.61 - lr: 0.100000
2022-12-12 00:15:01,566 epoch 15 - iter 192/245 - loss 0.04718094 - samples/sec: 184.31 - lr: 0.100000


100%|██████████| 37/37 [00:07<00:00,  4.65it/s]

2022-12-12 00:15:17,178 Evaluating as a multi-label problem: False
2022-12-12 00:15:17,198 DEV : loss 0.075251005589962 - f1-score (micro avg)  0.7271





2022-12-12 00:15:17,280 Epoch    15: reducing learning rate of group 0 to 5.0000e-02.
2022-12-12 00:15:17,281 BAD EPOCHS (no improvement): 4
2022-12-12 00:15:17,286 ----------------------------------------------------------------------------------------------------
2022-12-12 00:15:20,419 epoch 16 - iter 24/245 - loss 0.03592490 - samples/sec: 245.47 - lr: 0.050000
2022-12-12 00:15:23,900 epoch 16 - iter 48/245 - loss 0.03698879 - samples/sec: 220.89 - lr: 0.050000
2022-12-12 00:15:28,288 epoch 16 - iter 72/245 - loss 0.03669612 - samples/sec: 175.16 - lr: 0.050000
2022-12-12 00:15:31,390 epoch 16 - iter 96/245 - loss 0.03703881 - samples/sec: 247.96 - lr: 0.050000
2022-12-12 00:15:35,365 epoch 16 - iter 120/245 - loss 0.03755946 - samples/sec: 193.37 - lr: 0.050000
2022-12-12 00:15:38,679 epoch 16 - iter 144/245 - loss 0.03805249 - samples/sec: 232.07 - lr: 0.050000
2022-12-12 00:15:42,353 epoch 16 - iter 168/245 - loss 0.03926307 - samples/sec: 209.27 - lr: 0.050000
2022-12-12 00:15:

100%|██████████| 37/37 [00:08<00:00,  4.55it/s]

2022-12-12 00:16:03,796 Evaluating as a multi-label problem: False
2022-12-12 00:16:03,819 DEV : loss 0.06169668585062027 - f1-score (micro avg)  0.7724





2022-12-12 00:16:03,906 BAD EPOCHS (no improvement): 0
2022-12-12 00:16:03,908 saving best model
2022-12-12 00:16:06,615 ----------------------------------------------------------------------------------------------------
2022-12-12 00:16:10,188 epoch 17 - iter 24/245 - loss 0.04317580 - samples/sec: 215.34 - lr: 0.050000
2022-12-12 00:16:13,761 epoch 17 - iter 48/245 - loss 0.04020211 - samples/sec: 215.14 - lr: 0.050000
2022-12-12 00:16:18,305 epoch 17 - iter 72/245 - loss 0.04017417 - samples/sec: 169.17 - lr: 0.050000
2022-12-12 00:16:22,441 epoch 17 - iter 96/245 - loss 0.03800439 - samples/sec: 185.90 - lr: 0.050000
2022-12-12 00:16:25,589 epoch 17 - iter 120/245 - loss 0.03761091 - samples/sec: 244.28 - lr: 0.050000
2022-12-12 00:16:28,958 epoch 17 - iter 144/245 - loss 0.03804841 - samples/sec: 228.22 - lr: 0.050000
2022-12-12 00:16:32,522 epoch 17 - iter 168/245 - loss 0.03836792 - samples/sec: 215.74 - lr: 0.050000
2022-12-12 00:16:35,912 epoch 17 - iter 192/245 - loss 0.0386

100%|██████████| 37/37 [00:07<00:00,  5.03it/s]

2022-12-12 00:16:52,475 Evaluating as a multi-label problem: False
2022-12-12 00:16:52,499 DEV : loss 0.05496588721871376 - f1-score (micro avg)  0.7981





2022-12-12 00:16:52,587 BAD EPOCHS (no improvement): 0
2022-12-12 00:16:52,589 saving best model
2022-12-12 00:16:55,338 ----------------------------------------------------------------------------------------------------
2022-12-12 00:16:59,288 epoch 18 - iter 24/245 - loss 0.04294363 - samples/sec: 195.61 - lr: 0.050000
2022-12-12 00:17:02,716 epoch 18 - iter 48/245 - loss 0.04090553 - samples/sec: 224.31 - lr: 0.050000
2022-12-12 00:17:08,282 epoch 18 - iter 72/245 - loss 0.03837357 - samples/sec: 138.10 - lr: 0.050000
2022-12-12 00:17:11,341 epoch 18 - iter 96/245 - loss 0.03797434 - samples/sec: 251.37 - lr: 0.050000
2022-12-12 00:17:15,249 epoch 18 - iter 120/245 - loss 0.03818410 - samples/sec: 196.70 - lr: 0.050000
2022-12-12 00:17:18,933 epoch 18 - iter 144/245 - loss 0.03748600 - samples/sec: 208.73 - lr: 0.050000
2022-12-12 00:17:22,150 epoch 18 - iter 168/245 - loss 0.03723495 - samples/sec: 239.03 - lr: 0.050000
2022-12-12 00:17:25,378 epoch 18 - iter 192/245 - loss 0.0374

100%|██████████| 37/37 [00:08<00:00,  4.59it/s]

2022-12-12 00:17:41,753 Evaluating as a multi-label problem: False
2022-12-12 00:17:41,775 DEV : loss 0.05796543508768082 - f1-score (micro avg)  0.7874





2022-12-12 00:17:41,865 BAD EPOCHS (no improvement): 1
2022-12-12 00:17:41,868 ----------------------------------------------------------------------------------------------------
2022-12-12 00:17:45,497 epoch 19 - iter 24/245 - loss 0.03614377 - samples/sec: 212.10 - lr: 0.050000
2022-12-12 00:17:49,167 epoch 19 - iter 48/245 - loss 0.03512100 - samples/sec: 209.53 - lr: 0.050000
2022-12-12 00:17:53,374 epoch 19 - iter 72/245 - loss 0.03422527 - samples/sec: 182.71 - lr: 0.050000
2022-12-12 00:17:56,954 epoch 19 - iter 96/245 - loss 0.03368849 - samples/sec: 214.81 - lr: 0.050000
2022-12-12 00:18:01,248 epoch 19 - iter 120/245 - loss 0.03495017 - samples/sec: 178.99 - lr: 0.050000
2022-12-12 00:18:03,847 epoch 19 - iter 144/245 - loss 0.03527527 - samples/sec: 295.98 - lr: 0.050000
2022-12-12 00:18:08,695 epoch 19 - iter 168/245 - loss 0.03620585 - samples/sec: 158.54 - lr: 0.050000
2022-12-12 00:18:12,382 epoch 19 - iter 192/245 - loss 0.03559307 - samples/sec: 208.51 - lr: 0.050000


100%|██████████| 37/37 [00:07<00:00,  5.02it/s]

2022-12-12 00:18:27,901 Evaluating as a multi-label problem: False
2022-12-12 00:18:27,922 DEV : loss 0.058128539472818375 - f1-score (micro avg)  0.7912





2022-12-12 00:18:28,007 BAD EPOCHS (no improvement): 2
2022-12-12 00:18:28,010 ----------------------------------------------------------------------------------------------------
2022-12-12 00:18:31,817 epoch 20 - iter 24/245 - loss 0.03813562 - samples/sec: 202.02 - lr: 0.050000
2022-12-12 00:18:37,863 epoch 20 - iter 48/245 - loss 0.03679526 - samples/sec: 127.10 - lr: 0.050000
2022-12-12 00:18:41,554 epoch 20 - iter 72/245 - loss 0.03644457 - samples/sec: 208.35 - lr: 0.050000
2022-12-12 00:18:44,802 epoch 20 - iter 96/245 - loss 0.03773249 - samples/sec: 236.68 - lr: 0.050000
2022-12-12 00:18:47,997 epoch 20 - iter 120/245 - loss 0.03697183 - samples/sec: 240.73 - lr: 0.050000
2022-12-12 00:18:51,452 epoch 20 - iter 144/245 - loss 0.03659627 - samples/sec: 222.54 - lr: 0.050000
2022-12-12 00:18:54,856 epoch 20 - iter 168/245 - loss 0.03615226 - samples/sec: 225.91 - lr: 0.050000
2022-12-12 00:18:58,040 epoch 20 - iter 192/245 - loss 0.03558518 - samples/sec: 241.49 - lr: 0.050000


100%|██████████| 37/37 [00:08<00:00,  4.58it/s]

2022-12-12 00:19:13,881 Evaluating as a multi-label problem: False
2022-12-12 00:19:13,903 DEV : loss 0.05565234646201134 - f1-score (micro avg)  0.7877





2022-12-12 00:19:13,993 BAD EPOCHS (no improvement): 3
2022-12-12 00:19:13,996 ----------------------------------------------------------------------------------------------------
2022-12-12 00:19:17,321 epoch 21 - iter 24/245 - loss 0.03648111 - samples/sec: 231.43 - lr: 0.050000
2022-12-12 00:19:20,321 epoch 21 - iter 48/245 - loss 0.03576872 - samples/sec: 256.27 - lr: 0.050000
2022-12-12 00:19:23,200 epoch 21 - iter 72/245 - loss 0.03668871 - samples/sec: 267.13 - lr: 0.050000
2022-12-12 00:19:28,705 epoch 21 - iter 96/245 - loss 0.03603880 - samples/sec: 139.62 - lr: 0.050000
2022-12-12 00:19:31,670 epoch 21 - iter 120/245 - loss 0.03539313 - samples/sec: 259.28 - lr: 0.050000
2022-12-12 00:19:35,026 epoch 21 - iter 144/245 - loss 0.03560013 - samples/sec: 229.14 - lr: 0.050000
2022-12-12 00:19:39,503 epoch 21 - iter 168/245 - loss 0.03500558 - samples/sec: 171.68 - lr: 0.050000
2022-12-12 00:19:43,229 epoch 21 - iter 192/245 - loss 0.03490618 - samples/sec: 206.34 - lr: 0.050000


100%|██████████| 37/37 [00:07<00:00,  5.04it/s]

2022-12-12 00:19:59,294 Evaluating as a multi-label problem: False
2022-12-12 00:19:59,315 DEV : loss 0.05434331297874451 - f1-score (micro avg)  0.7924





2022-12-12 00:19:59,399 Epoch    21: reducing learning rate of group 0 to 2.5000e-02.
2022-12-12 00:19:59,401 BAD EPOCHS (no improvement): 4
2022-12-12 00:19:59,405 ----------------------------------------------------------------------------------------------------
2022-12-12 00:20:03,767 epoch 22 - iter 24/245 - loss 0.02951122 - samples/sec: 176.29 - lr: 0.025000
2022-12-12 00:20:07,912 epoch 22 - iter 48/245 - loss 0.03023044 - samples/sec: 185.46 - lr: 0.025000
2022-12-12 00:20:13,525 epoch 22 - iter 72/245 - loss 0.03334559 - samples/sec: 136.92 - lr: 0.025000
2022-12-12 00:20:17,256 epoch 22 - iter 96/245 - loss 0.03223842 - samples/sec: 206.05 - lr: 0.025000
2022-12-12 00:20:20,553 epoch 22 - iter 120/245 - loss 0.03152971 - samples/sec: 233.29 - lr: 0.025000
2022-12-12 00:20:24,072 epoch 22 - iter 144/245 - loss 0.03137732 - samples/sec: 218.55 - lr: 0.025000
2022-12-12 00:20:27,465 epoch 22 - iter 168/245 - loss 0.03190462 - samples/sec: 226.60 - lr: 0.025000
2022-12-12 00:20:

100%|██████████| 37/37 [00:07<00:00,  5.06it/s]

2022-12-12 00:20:45,822 Evaluating as a multi-label problem: False
2022-12-12 00:20:45,842 DEV : loss 0.05765221640467644 - f1-score (micro avg)  0.7894





2022-12-12 00:20:45,925 BAD EPOCHS (no improvement): 1
2022-12-12 00:20:45,927 ----------------------------------------------------------------------------------------------------
2022-12-12 00:20:49,383 epoch 23 - iter 24/245 - loss 0.03350781 - samples/sec: 222.56 - lr: 0.025000
2022-12-12 00:20:54,494 epoch 23 - iter 48/245 - loss 0.03703544 - samples/sec: 150.38 - lr: 0.025000
2022-12-12 00:20:57,443 epoch 23 - iter 72/245 - loss 0.03414646 - samples/sec: 260.81 - lr: 0.025000
2022-12-12 00:21:00,600 epoch 23 - iter 96/245 - loss 0.03261156 - samples/sec: 243.54 - lr: 0.025000
2022-12-12 00:21:04,283 epoch 23 - iter 120/245 - loss 0.03175288 - samples/sec: 208.74 - lr: 0.025000
2022-12-12 00:21:08,424 epoch 23 - iter 144/245 - loss 0.03250808 - samples/sec: 185.67 - lr: 0.025000
2022-12-12 00:21:13,620 epoch 23 - iter 168/245 - loss 0.03235882 - samples/sec: 147.94 - lr: 0.025000
2022-12-12 00:21:16,459 epoch 23 - iter 192/245 - loss 0.03204140 - samples/sec: 270.91 - lr: 0.025000


100%|██████████| 37/37 [00:08<00:00,  4.61it/s]

2022-12-12 00:21:32,260 Evaluating as a multi-label problem: False
2022-12-12 00:21:32,281 DEV : loss 0.05738389119505882 - f1-score (micro avg)  0.7911





2022-12-12 00:21:32,366 BAD EPOCHS (no improvement): 2
2022-12-12 00:21:32,368 ----------------------------------------------------------------------------------------------------
2022-12-12 00:21:35,772 epoch 24 - iter 24/245 - loss 0.02879268 - samples/sec: 225.97 - lr: 0.025000
2022-12-12 00:21:39,855 epoch 24 - iter 48/245 - loss 0.03036666 - samples/sec: 188.25 - lr: 0.025000
2022-12-12 00:21:43,456 epoch 24 - iter 72/245 - loss 0.03021611 - samples/sec: 213.49 - lr: 0.025000
2022-12-12 00:21:47,813 epoch 24 - iter 96/245 - loss 0.02964171 - samples/sec: 176.43 - lr: 0.025000
2022-12-12 00:21:51,353 epoch 24 - iter 120/245 - loss 0.02934928 - samples/sec: 217.24 - lr: 0.025000
2022-12-12 00:21:54,260 epoch 24 - iter 144/245 - loss 0.02889815 - samples/sec: 264.58 - lr: 0.025000
2022-12-12 00:21:57,360 epoch 24 - iter 168/245 - loss 0.02964090 - samples/sec: 248.04 - lr: 0.025000
2022-12-12 00:22:02,132 epoch 24 - iter 192/245 - loss 0.03042136 - samples/sec: 161.08 - lr: 0.025000


100%|██████████| 37/37 [00:07<00:00,  5.04it/s]

2022-12-12 00:22:18,030 Evaluating as a multi-label problem: False
2022-12-12 00:22:18,051 DEV : loss 0.055567171424627304 - f1-score (micro avg)  0.7792





2022-12-12 00:22:18,135 BAD EPOCHS (no improvement): 3
2022-12-12 00:22:18,137 ----------------------------------------------------------------------------------------------------
2022-12-12 00:22:21,690 epoch 25 - iter 24/245 - loss 0.03248474 - samples/sec: 216.44 - lr: 0.025000
2022-12-12 00:22:25,424 epoch 25 - iter 48/245 - loss 0.02903011 - samples/sec: 205.91 - lr: 0.025000
2022-12-12 00:22:29,098 epoch 25 - iter 72/245 - loss 0.03029577 - samples/sec: 209.25 - lr: 0.025000
2022-12-12 00:22:32,453 epoch 25 - iter 96/245 - loss 0.02966817 - samples/sec: 229.17 - lr: 0.025000
2022-12-12 00:22:36,011 epoch 25 - iter 120/245 - loss 0.02929285 - samples/sec: 216.11 - lr: 0.025000
2022-12-12 00:22:39,047 epoch 25 - iter 144/245 - loss 0.02956586 - samples/sec: 253.33 - lr: 0.025000
2022-12-12 00:22:42,349 epoch 25 - iter 168/245 - loss 0.02943033 - samples/sec: 232.86 - lr: 0.025000
2022-12-12 00:22:47,140 epoch 25 - iter 192/245 - loss 0.03062265 - samples/sec: 160.45 - lr: 0.025000


100%|██████████| 37/37 [00:07<00:00,  4.67it/s]

2022-12-12 00:23:04,092 Evaluating as a multi-label problem: False
2022-12-12 00:23:04,113 DEV : loss 0.05524937063455582 - f1-score (micro avg)  0.792





2022-12-12 00:23:04,198 Epoch    25: reducing learning rate of group 0 to 1.2500e-02.
2022-12-12 00:23:04,198 BAD EPOCHS (no improvement): 4
2022-12-12 00:23:04,200 ----------------------------------------------------------------------------------------------------
2022-12-12 00:23:08,440 epoch 26 - iter 24/245 - loss 0.02685290 - samples/sec: 181.41 - lr: 0.012500
2022-12-12 00:23:12,737 epoch 26 - iter 48/245 - loss 0.03149097 - samples/sec: 178.93 - lr: 0.012500
2022-12-12 00:23:16,667 epoch 26 - iter 72/245 - loss 0.03166157 - samples/sec: 195.63 - lr: 0.012500
2022-12-12 00:23:19,710 epoch 26 - iter 96/245 - loss 0.03117864 - samples/sec: 252.65 - lr: 0.012500
2022-12-12 00:23:24,523 epoch 26 - iter 120/245 - loss 0.03117928 - samples/sec: 159.72 - lr: 0.012500
2022-12-12 00:23:28,308 epoch 26 - iter 144/245 - loss 0.03076256 - samples/sec: 203.09 - lr: 0.012500
2022-12-12 00:23:31,887 epoch 26 - iter 168/245 - loss 0.03015995 - samples/sec: 214.87 - lr: 0.012500
2022-12-12 00:23:

100%|██████████| 37/37 [00:07<00:00,  5.09it/s]

2022-12-12 00:23:49,862 Evaluating as a multi-label problem: False
2022-12-12 00:23:49,883 DEV : loss 0.05366700515151024 - f1-score (micro avg)  0.7997





2022-12-12 00:23:49,966 BAD EPOCHS (no improvement): 0
2022-12-12 00:23:49,968 saving best model
2022-12-12 00:23:52,775 ----------------------------------------------------------------------------------------------------
2022-12-12 00:23:56,274 epoch 27 - iter 24/245 - loss 0.02979833 - samples/sec: 219.88 - lr: 0.012500
2022-12-12 00:23:59,575 epoch 27 - iter 48/245 - loss 0.02764379 - samples/sec: 232.90 - lr: 0.012500
2022-12-12 00:24:02,810 epoch 27 - iter 72/245 - loss 0.02808562 - samples/sec: 237.78 - lr: 0.012500
2022-12-12 00:24:06,077 epoch 27 - iter 96/245 - loss 0.02759860 - samples/sec: 235.39 - lr: 0.012500
2022-12-12 00:24:10,789 epoch 27 - iter 120/245 - loss 0.02778734 - samples/sec: 163.14 - lr: 0.012500
2022-12-12 00:24:15,309 epoch 27 - iter 144/245 - loss 0.02916735 - samples/sec: 170.07 - lr: 0.012500
2022-12-12 00:24:20,572 epoch 27 - iter 168/245 - loss 0.02887917 - samples/sec: 146.03 - lr: 0.012500
2022-12-12 00:24:24,200 epoch 27 - iter 192/245 - loss 0.0288

100%|██████████| 37/37 [00:07<00:00,  4.98it/s]

2022-12-12 00:24:39,552 Evaluating as a multi-label problem: False
2022-12-12 00:24:39,574 DEV : loss 0.05467750132083893 - f1-score (micro avg)  0.7953





2022-12-12 00:24:39,658 BAD EPOCHS (no improvement): 1
2022-12-12 00:24:39,660 ----------------------------------------------------------------------------------------------------
2022-12-12 00:24:43,205 epoch 28 - iter 24/245 - loss 0.02774837 - samples/sec: 216.95 - lr: 0.012500
2022-12-12 00:24:46,299 epoch 28 - iter 48/245 - loss 0.02861203 - samples/sec: 248.54 - lr: 0.012500
2022-12-12 00:24:50,166 epoch 28 - iter 72/245 - loss 0.02785741 - samples/sec: 198.81 - lr: 0.012500
2022-12-12 00:24:54,033 epoch 28 - iter 96/245 - loss 0.02745632 - samples/sec: 198.80 - lr: 0.012500
2022-12-12 00:24:57,790 epoch 28 - iter 120/245 - loss 0.02808065 - samples/sec: 204.68 - lr: 0.012500
2022-12-12 00:25:00,618 epoch 28 - iter 144/245 - loss 0.02800248 - samples/sec: 271.94 - lr: 0.012500
2022-12-12 00:25:05,213 epoch 28 - iter 168/245 - loss 0.02854134 - samples/sec: 167.26 - lr: 0.012500
2022-12-12 00:25:08,380 epoch 28 - iter 192/245 - loss 0.02913000 - samples/sec: 242.79 - lr: 0.012500


100%|██████████| 37/37 [00:08<00:00,  4.59it/s]

2022-12-12 00:25:26,025 Evaluating as a multi-label problem: False
2022-12-12 00:25:26,047 DEV : loss 0.053843773901462555 - f1-score (micro avg)  0.7968





2022-12-12 00:25:26,134 BAD EPOCHS (no improvement): 2
2022-12-12 00:25:26,137 ----------------------------------------------------------------------------------------------------
2022-12-12 00:25:29,275 epoch 29 - iter 24/245 - loss 0.03509223 - samples/sec: 245.17 - lr: 0.012500
2022-12-12 00:25:34,027 epoch 29 - iter 48/245 - loss 0.03284114 - samples/sec: 161.76 - lr: 0.012500
2022-12-12 00:25:38,630 epoch 29 - iter 72/245 - loss 0.03225266 - samples/sec: 167.00 - lr: 0.012500
2022-12-12 00:25:41,520 epoch 29 - iter 96/245 - loss 0.03089442 - samples/sec: 266.09 - lr: 0.012500
2022-12-12 00:25:45,109 epoch 29 - iter 120/245 - loss 0.03050364 - samples/sec: 214.20 - lr: 0.012500
2022-12-12 00:25:48,710 epoch 29 - iter 144/245 - loss 0.02975153 - samples/sec: 213.52 - lr: 0.012500
2022-12-12 00:25:51,774 epoch 29 - iter 168/245 - loss 0.02921907 - samples/sec: 251.02 - lr: 0.012500
2022-12-12 00:25:55,369 epoch 29 - iter 192/245 - loss 0.02952837 - samples/sec: 213.88 - lr: 0.012500


100%|██████████| 37/37 [00:07<00:00,  4.98it/s]

2022-12-12 00:26:11,867 Evaluating as a multi-label problem: False
2022-12-12 00:26:11,889 DEV : loss 0.053881824016571045 - f1-score (micro avg)  0.796





2022-12-12 00:26:11,977 BAD EPOCHS (no improvement): 3
2022-12-12 00:26:11,979 ----------------------------------------------------------------------------------------------------
2022-12-12 00:26:16,559 epoch 30 - iter 24/245 - loss 0.02871823 - samples/sec: 167.92 - lr: 0.012500
2022-12-12 00:26:20,070 epoch 30 - iter 48/245 - loss 0.02718697 - samples/sec: 219.04 - lr: 0.012500
2022-12-12 00:26:23,471 epoch 30 - iter 72/245 - loss 0.02944228 - samples/sec: 226.03 - lr: 0.012500
2022-12-12 00:26:26,984 epoch 30 - iter 96/245 - loss 0.02875363 - samples/sec: 218.87 - lr: 0.012500
2022-12-12 00:26:30,775 epoch 30 - iter 120/245 - loss 0.02813593 - samples/sec: 202.81 - lr: 0.012500
2022-12-12 00:26:35,741 epoch 30 - iter 144/245 - loss 0.02903299 - samples/sec: 154.77 - lr: 0.012500
2022-12-12 00:26:39,124 epoch 30 - iter 168/245 - loss 0.02982778 - samples/sec: 227.31 - lr: 0.012500
2022-12-12 00:26:42,373 epoch 30 - iter 192/245 - loss 0.02946697 - samples/sec: 236.66 - lr: 0.012500


100%|██████████| 37/37 [00:08<00:00,  4.61it/s]

2022-12-12 00:26:59,242 Evaluating as a multi-label problem: False
2022-12-12 00:26:59,263 DEV : loss 0.053141385316848755 - f1-score (micro avg)  0.7991





2022-12-12 00:26:59,347 Epoch    30: reducing learning rate of group 0 to 6.2500e-03.
2022-12-12 00:26:59,349 BAD EPOCHS (no improvement): 4
2022-12-12 00:26:59,352 ----------------------------------------------------------------------------------------------------
2022-12-12 00:27:02,527 epoch 31 - iter 24/245 - loss 0.02859614 - samples/sec: 242.23 - lr: 0.006250
2022-12-12 00:27:05,693 epoch 31 - iter 48/245 - loss 0.02819758 - samples/sec: 242.88 - lr: 0.006250
2022-12-12 00:27:10,228 epoch 31 - iter 72/245 - loss 0.02694984 - samples/sec: 169.50 - lr: 0.006250
2022-12-12 00:27:14,747 epoch 31 - iter 96/245 - loss 0.02701232 - samples/sec: 170.10 - lr: 0.006250
2022-12-12 00:27:18,493 epoch 31 - iter 120/245 - loss 0.02739739 - samples/sec: 205.24 - lr: 0.006250
2022-12-12 00:27:22,317 epoch 31 - iter 144/245 - loss 0.02671338 - samples/sec: 201.06 - lr: 0.006250
2022-12-12 00:27:25,497 epoch 31 - iter 168/245 - loss 0.02627026 - samples/sec: 241.84 - lr: 0.006250
2022-12-12 00:27:

100%|██████████| 37/37 [00:07<00:00,  5.00it/s]

2022-12-12 00:27:45,031 Evaluating as a multi-label problem: False
2022-12-12 00:27:45,052 DEV : loss 0.05262794718146324 - f1-score (micro avg)  0.8013





2022-12-12 00:27:45,135 BAD EPOCHS (no improvement): 0
2022-12-12 00:27:45,137 saving best model
2022-12-12 00:27:47,895 ----------------------------------------------------------------------------------------------------
2022-12-12 00:27:51,190 epoch 32 - iter 24/245 - loss 0.02656297 - samples/sec: 234.92 - lr: 0.006250
2022-12-12 00:27:55,150 epoch 32 - iter 48/245 - loss 0.02729281 - samples/sec: 194.16 - lr: 0.006250
2022-12-12 00:27:58,177 epoch 32 - iter 72/245 - loss 0.02665917 - samples/sec: 254.04 - lr: 0.006250
2022-12-12 00:28:01,725 epoch 32 - iter 96/245 - loss 0.02670544 - samples/sec: 216.70 - lr: 0.006250
2022-12-12 00:28:05,232 epoch 32 - iter 120/245 - loss 0.02667180 - samples/sec: 219.31 - lr: 0.006250
2022-12-12 00:28:09,391 epoch 32 - iter 144/245 - loss 0.02701764 - samples/sec: 184.84 - lr: 0.006250
2022-12-12 00:28:15,228 epoch 32 - iter 168/245 - loss 0.02696302 - samples/sec: 131.64 - lr: 0.006250
2022-12-12 00:28:18,178 epoch 32 - iter 192/245 - loss 0.0269

100%|██████████| 37/37 [00:07<00:00,  5.05it/s]

2022-12-12 00:28:34,274 Evaluating as a multi-label problem: False
2022-12-12 00:28:34,296 DEV : loss 0.053773507475852966 - f1-score (micro avg)  0.8029





2022-12-12 00:28:34,380 BAD EPOCHS (no improvement): 0
2022-12-12 00:28:34,382 saving best model
2022-12-12 00:28:37,127 ----------------------------------------------------------------------------------------------------
2022-12-12 00:28:40,692 epoch 33 - iter 24/245 - loss 0.02607784 - samples/sec: 216.94 - lr: 0.006250
2022-12-12 00:28:44,145 epoch 33 - iter 48/245 - loss 0.02688184 - samples/sec: 222.69 - lr: 0.006250
2022-12-12 00:28:47,998 epoch 33 - iter 72/245 - loss 0.02729285 - samples/sec: 199.51 - lr: 0.006250
2022-12-12 00:28:53,101 epoch 33 - iter 96/245 - loss 0.02692850 - samples/sec: 150.63 - lr: 0.006250
2022-12-12 00:28:56,203 epoch 33 - iter 120/245 - loss 0.02674202 - samples/sec: 247.86 - lr: 0.006250
2022-12-12 00:28:59,920 epoch 33 - iter 144/245 - loss 0.02632017 - samples/sec: 206.87 - lr: 0.006250
2022-12-12 00:29:03,681 epoch 33 - iter 168/245 - loss 0.02798797 - samples/sec: 204.41 - lr: 0.006250
2022-12-12 00:29:08,362 epoch 33 - iter 192/245 - loss 0.0281

100%|██████████| 37/37 [00:07<00:00,  4.64it/s]

2022-12-12 00:29:23,778 Evaluating as a multi-label problem: False
2022-12-12 00:29:23,799 DEV : loss 0.053234782069921494 - f1-score (micro avg)  0.8001





2022-12-12 00:29:23,882 BAD EPOCHS (no improvement): 1
2022-12-12 00:29:23,884 ----------------------------------------------------------------------------------------------------
2022-12-12 00:29:27,163 epoch 34 - iter 24/245 - loss 0.02862711 - samples/sec: 234.59 - lr: 0.006250
2022-12-12 00:29:30,480 epoch 34 - iter 48/245 - loss 0.02977449 - samples/sec: 231.83 - lr: 0.006250
2022-12-12 00:29:33,504 epoch 34 - iter 72/245 - loss 0.02967465 - samples/sec: 254.32 - lr: 0.006250
2022-12-12 00:29:36,775 epoch 34 - iter 96/245 - loss 0.02824383 - samples/sec: 235.06 - lr: 0.006250
2022-12-12 00:29:40,120 epoch 34 - iter 120/245 - loss 0.02770536 - samples/sec: 229.88 - lr: 0.006250
2022-12-12 00:29:43,391 epoch 34 - iter 144/245 - loss 0.02642784 - samples/sec: 235.10 - lr: 0.006250
2022-12-12 00:29:50,571 epoch 34 - iter 168/245 - loss 0.02724332 - samples/sec: 107.03 - lr: 0.006250
2022-12-12 00:29:54,511 epoch 34 - iter 192/245 - loss 0.02692574 - samples/sec: 195.15 - lr: 0.006250


100%|██████████| 37/37 [00:07<00:00,  5.10it/s]

2022-12-12 00:30:09,205 Evaluating as a multi-label problem: False
2022-12-12 00:30:09,225 DEV : loss 0.054175663739442825 - f1-score (micro avg)  0.8038





2022-12-12 00:30:09,309 BAD EPOCHS (no improvement): 0
2022-12-12 00:30:09,311 saving best model
2022-12-12 00:30:12,109 ----------------------------------------------------------------------------------------------------
2022-12-12 00:30:15,794 epoch 35 - iter 24/245 - loss 0.02540975 - samples/sec: 209.79 - lr: 0.006250
2022-12-12 00:30:19,473 epoch 35 - iter 48/245 - loss 0.02838032 - samples/sec: 208.95 - lr: 0.006250
2022-12-12 00:30:24,446 epoch 35 - iter 72/245 - loss 0.02938859 - samples/sec: 154.58 - lr: 0.006250
2022-12-12 00:30:27,976 epoch 35 - iter 96/245 - loss 0.02937099 - samples/sec: 217.83 - lr: 0.006250
2022-12-12 00:30:31,224 epoch 35 - iter 120/245 - loss 0.02902425 - samples/sec: 236.76 - lr: 0.006250
2022-12-12 00:30:35,046 epoch 35 - iter 144/245 - loss 0.02847592 - samples/sec: 201.12 - lr: 0.006250
2022-12-12 00:30:38,506 epoch 35 - iter 168/245 - loss 0.02830928 - samples/sec: 222.20 - lr: 0.006250
2022-12-12 00:30:42,663 epoch 35 - iter 192/245 - loss 0.0281

100%|██████████| 37/37 [00:08<00:00,  4.60it/s]

2022-12-12 00:30:58,131 Evaluating as a multi-label problem: False
2022-12-12 00:30:58,152 DEV : loss 0.05312367528676987 - f1-score (micro avg)  0.8032





2022-12-12 00:30:58,235 BAD EPOCHS (no improvement): 1
2022-12-12 00:30:58,237 ----------------------------------------------------------------------------------------------------
2022-12-12 00:31:01,636 epoch 36 - iter 24/245 - loss 0.03470156 - samples/sec: 226.32 - lr: 0.006250
2022-12-12 00:31:05,323 epoch 36 - iter 48/245 - loss 0.03025361 - samples/sec: 208.56 - lr: 0.006250
2022-12-12 00:31:09,548 epoch 36 - iter 72/245 - loss 0.02739179 - samples/sec: 181.93 - lr: 0.006250
2022-12-12 00:31:13,194 epoch 36 - iter 96/245 - loss 0.02651195 - samples/sec: 210.88 - lr: 0.006250
2022-12-12 00:31:17,555 epoch 36 - iter 120/245 - loss 0.02670512 - samples/sec: 176.26 - lr: 0.006250
2022-12-12 00:31:20,983 epoch 36 - iter 144/245 - loss 0.02671356 - samples/sec: 224.34 - lr: 0.006250
2022-12-12 00:31:25,123 epoch 36 - iter 168/245 - loss 0.02680747 - samples/sec: 185.67 - lr: 0.006250
2022-12-12 00:31:28,360 epoch 36 - iter 192/245 - loss 0.02628512 - samples/sec: 237.54 - lr: 0.006250


100%|██████████| 37/37 [00:07<00:00,  5.02it/s]

2022-12-12 00:31:44,583 Evaluating as a multi-label problem: False
2022-12-12 00:31:44,605 DEV : loss 0.05461762845516205 - f1-score (micro avg)  0.8012





2022-12-12 00:31:44,691 BAD EPOCHS (no improvement): 2
2022-12-12 00:31:44,693 ----------------------------------------------------------------------------------------------------
2022-12-12 00:31:49,155 epoch 37 - iter 24/245 - loss 0.02795666 - samples/sec: 172.33 - lr: 0.006250
2022-12-12 00:31:53,278 epoch 37 - iter 48/245 - loss 0.02583506 - samples/sec: 186.47 - lr: 0.006250
2022-12-12 00:31:58,064 epoch 37 - iter 72/245 - loss 0.02509968 - samples/sec: 160.63 - lr: 0.006250
2022-12-12 00:32:01,404 epoch 37 - iter 96/245 - loss 0.02526318 - samples/sec: 230.21 - lr: 0.006250
2022-12-12 00:32:04,324 epoch 37 - iter 120/245 - loss 0.02595263 - samples/sec: 263.37 - lr: 0.006250
2022-12-12 00:32:07,238 epoch 37 - iter 144/245 - loss 0.02639000 - samples/sec: 263.88 - lr: 0.006250
2022-12-12 00:32:10,557 epoch 37 - iter 168/245 - loss 0.02663034 - samples/sec: 231.70 - lr: 0.006250
2022-12-12 00:32:13,578 epoch 37 - iter 192/245 - loss 0.02649797 - samples/sec: 254.54 - lr: 0.006250


100%|██████████| 37/37 [00:07<00:00,  5.01it/s]

2022-12-12 00:32:31,324 Evaluating as a multi-label problem: False
2022-12-12 00:32:31,346 DEV : loss 0.05320560187101364 - f1-score (micro avg)  0.8007





2022-12-12 00:32:31,435 BAD EPOCHS (no improvement): 3
2022-12-12 00:32:31,437 ----------------------------------------------------------------------------------------------------
2022-12-12 00:32:35,096 epoch 38 - iter 24/245 - loss 0.03075486 - samples/sec: 210.21 - lr: 0.006250
2022-12-12 00:32:38,354 epoch 38 - iter 48/245 - loss 0.02809421 - samples/sec: 236.07 - lr: 0.006250
2022-12-12 00:32:42,117 epoch 38 - iter 72/245 - loss 0.02697999 - samples/sec: 204.27 - lr: 0.006250
2022-12-12 00:32:45,152 epoch 38 - iter 96/245 - loss 0.02573055 - samples/sec: 253.45 - lr: 0.006250
2022-12-12 00:32:49,317 epoch 38 - iter 120/245 - loss 0.02643506 - samples/sec: 184.50 - lr: 0.006250
2022-12-12 00:32:54,180 epoch 38 - iter 144/245 - loss 0.02620568 - samples/sec: 158.07 - lr: 0.006250
2022-12-12 00:32:57,583 epoch 38 - iter 168/245 - loss 0.02661031 - samples/sec: 225.87 - lr: 0.006250
2022-12-12 00:33:01,686 epoch 38 - iter 192/245 - loss 0.02669671 - samples/sec: 187.41 - lr: 0.006250


100%|██████████| 37/37 [00:08<00:00,  4.62it/s]

2022-12-12 00:33:17,740 Evaluating as a multi-label problem: False
2022-12-12 00:33:17,761 DEV : loss 0.05393466725945473 - f1-score (micro avg)  0.8034





2022-12-12 00:33:17,845 Epoch    38: reducing learning rate of group 0 to 3.1250e-03.
2022-12-12 00:33:17,846 BAD EPOCHS (no improvement): 4
2022-12-12 00:33:17,849 ----------------------------------------------------------------------------------------------------
2022-12-12 00:33:21,883 epoch 39 - iter 24/245 - loss 0.02316003 - samples/sec: 190.66 - lr: 0.003125
2022-12-12 00:33:26,259 epoch 39 - iter 48/245 - loss 0.02206484 - samples/sec: 175.64 - lr: 0.003125
2022-12-12 00:33:30,814 epoch 39 - iter 72/245 - loss 0.02385982 - samples/sec: 168.77 - lr: 0.003125
2022-12-12 00:33:34,264 epoch 39 - iter 96/245 - loss 0.02455100 - samples/sec: 222.85 - lr: 0.003125
2022-12-12 00:33:37,499 epoch 39 - iter 120/245 - loss 0.02514075 - samples/sec: 237.69 - lr: 0.003125
2022-12-12 00:33:41,609 epoch 39 - iter 144/245 - loss 0.02489528 - samples/sec: 187.02 - lr: 0.003125
2022-12-12 00:33:44,641 epoch 39 - iter 168/245 - loss 0.02512818 - samples/sec: 253.67 - lr: 0.003125
2022-12-12 00:33:

100%|██████████| 37/37 [00:07<00:00,  4.92it/s]

2022-12-12 00:34:03,616 Evaluating as a multi-label problem: False





2022-12-12 00:34:03,641 DEV : loss 0.05306617170572281 - f1-score (micro avg)  0.8016
2022-12-12 00:34:03,727 BAD EPOCHS (no improvement): 1
2022-12-12 00:34:03,731 ----------------------------------------------------------------------------------------------------
2022-12-12 00:34:08,584 epoch 40 - iter 24/245 - loss 0.03402282 - samples/sec: 158.45 - lr: 0.003125
2022-12-12 00:34:11,380 epoch 40 - iter 48/245 - loss 0.02866341 - samples/sec: 275.05 - lr: 0.003125
2022-12-12 00:34:14,485 epoch 40 - iter 72/245 - loss 0.02635898 - samples/sec: 247.64 - lr: 0.003125
2022-12-12 00:34:17,511 epoch 40 - iter 96/245 - loss 0.02575466 - samples/sec: 254.10 - lr: 0.003125
2022-12-12 00:34:20,595 epoch 40 - iter 120/245 - loss 0.02528801 - samples/sec: 249.30 - lr: 0.003125
2022-12-12 00:34:24,286 epoch 40 - iter 144/245 - loss 0.02536216 - samples/sec: 208.28 - lr: 0.003125
2022-12-12 00:34:29,053 epoch 40 - iter 168/245 - loss 0.02637214 - samples/sec: 161.26 - lr: 0.003125
2022-12-12 00:34:

100%|██████████| 37/37 [00:07<00:00,  4.63it/s]

2022-12-12 00:34:49,822 Evaluating as a multi-label problem: False
2022-12-12 00:34:49,843 DEV : loss 0.0536733977496624 - f1-score (micro avg)  0.803





2022-12-12 00:34:49,925 BAD EPOCHS (no improvement): 2
2022-12-12 00:34:49,927 ----------------------------------------------------------------------------------------------------
2022-12-12 00:34:54,118 epoch 41 - iter 24/245 - loss 0.02560113 - samples/sec: 183.49 - lr: 0.003125
2022-12-12 00:34:57,978 epoch 41 - iter 48/245 - loss 0.02715970 - samples/sec: 199.16 - lr: 0.003125
2022-12-12 00:35:01,987 epoch 41 - iter 72/245 - loss 0.02771466 - samples/sec: 191.75 - lr: 0.003125
2022-12-12 00:35:06,158 epoch 41 - iter 96/245 - loss 0.02774666 - samples/sec: 184.31 - lr: 0.003125
2022-12-12 00:35:10,542 epoch 41 - iter 120/245 - loss 0.02766871 - samples/sec: 175.35 - lr: 0.003125
2022-12-12 00:35:14,695 epoch 41 - iter 144/245 - loss 0.02717703 - samples/sec: 185.11 - lr: 0.003125
2022-12-12 00:35:18,021 epoch 41 - iter 168/245 - loss 0.02659295 - samples/sec: 231.20 - lr: 0.003125
2022-12-12 00:35:21,741 epoch 41 - iter 192/245 - loss 0.02690659 - samples/sec: 206.62 - lr: 0.003125


100%|██████████| 37/37 [00:07<00:00,  5.11it/s]

2022-12-12 00:35:35,868 Evaluating as a multi-label problem: False
2022-12-12 00:35:35,888 DEV : loss 0.05328686162829399 - f1-score (micro avg)  0.803





2022-12-12 00:35:35,972 BAD EPOCHS (no improvement): 3
2022-12-12 00:35:35,974 ----------------------------------------------------------------------------------------------------
2022-12-12 00:35:39,084 epoch 42 - iter 24/245 - loss 0.02198495 - samples/sec: 247.22 - lr: 0.003125
2022-12-12 00:35:41,931 epoch 42 - iter 48/245 - loss 0.02390467 - samples/sec: 270.17 - lr: 0.003125
2022-12-12 00:35:45,664 epoch 42 - iter 72/245 - loss 0.02394672 - samples/sec: 205.96 - lr: 0.003125
2022-12-12 00:35:49,546 epoch 42 - iter 96/245 - loss 0.02454406 - samples/sec: 198.04 - lr: 0.003125
2022-12-12 00:35:54,171 epoch 42 - iter 120/245 - loss 0.02515285 - samples/sec: 166.21 - lr: 0.003125
2022-12-12 00:35:59,075 epoch 42 - iter 144/245 - loss 0.02601843 - samples/sec: 156.73 - lr: 0.003125
2022-12-12 00:36:03,026 epoch 42 - iter 168/245 - loss 0.02652777 - samples/sec: 194.58 - lr: 0.003125
2022-12-12 00:36:06,039 epoch 42 - iter 192/245 - loss 0.02650944 - samples/sec: 255.26 - lr: 0.003125


100%|██████████| 37/37 [00:07<00:00,  5.15it/s]

2022-12-12 00:36:22,615 Evaluating as a multi-label problem: False
2022-12-12 00:36:22,634 DEV : loss 0.05319751054048538 - f1-score (micro avg)  0.8032





2022-12-12 00:36:22,718 Epoch    42: reducing learning rate of group 0 to 1.5625e-03.
2022-12-12 00:36:22,719 BAD EPOCHS (no improvement): 4
2022-12-12 00:36:22,722 ----------------------------------------------------------------------------------------------------
2022-12-12 00:36:26,119 epoch 43 - iter 24/245 - loss 0.02508161 - samples/sec: 226.46 - lr: 0.001563
2022-12-12 00:36:31,311 epoch 43 - iter 48/245 - loss 0.02857420 - samples/sec: 148.03 - lr: 0.001563
2022-12-12 00:36:34,806 epoch 43 - iter 72/245 - loss 0.02882846 - samples/sec: 220.02 - lr: 0.001563
2022-12-12 00:36:38,773 epoch 43 - iter 96/245 - loss 0.02738163 - samples/sec: 193.85 - lr: 0.001563
2022-12-12 00:36:42,356 epoch 43 - iter 120/245 - loss 0.02730848 - samples/sec: 214.56 - lr: 0.001563
2022-12-12 00:36:46,515 epoch 43 - iter 144/245 - loss 0.02665134 - samples/sec: 184.82 - lr: 0.001563
2022-12-12 00:36:50,267 epoch 43 - iter 168/245 - loss 0.02702321 - samples/sec: 204.96 - lr: 0.001563
2022-12-12 00:36:

100%|██████████| 37/37 [00:08<00:00,  4.60it/s]

2022-12-12 00:37:09,284 Evaluating as a multi-label problem: False
2022-12-12 00:37:09,305 DEV : loss 0.053099218755960464 - f1-score (micro avg)  0.8026





2022-12-12 00:37:09,393 BAD EPOCHS (no improvement): 1
2022-12-12 00:37:09,395 ----------------------------------------------------------------------------------------------------
2022-12-12 00:37:12,432 epoch 44 - iter 24/245 - loss 0.02273614 - samples/sec: 253.27 - lr: 0.001563
2022-12-12 00:37:15,963 epoch 44 - iter 48/245 - loss 0.02636880 - samples/sec: 217.73 - lr: 0.001563
2022-12-12 00:37:20,457 epoch 44 - iter 72/245 - loss 0.02612038 - samples/sec: 171.08 - lr: 0.001563
2022-12-12 00:37:23,882 epoch 44 - iter 96/245 - loss 0.02619903 - samples/sec: 224.44 - lr: 0.001563
2022-12-12 00:37:27,845 epoch 44 - iter 120/245 - loss 0.02576437 - samples/sec: 194.03 - lr: 0.001563
2022-12-12 00:37:32,501 epoch 44 - iter 144/245 - loss 0.02560221 - samples/sec: 165.11 - lr: 0.001563
2022-12-12 00:37:35,658 epoch 44 - iter 168/245 - loss 0.02560387 - samples/sec: 243.55 - lr: 0.001563
2022-12-12 00:37:39,145 epoch 44 - iter 192/245 - loss 0.02553265 - samples/sec: 220.50 - lr: 0.001563


100%|██████████| 37/37 [00:07<00:00,  5.10it/s]

2022-12-12 00:37:54,758 Evaluating as a multi-label problem: False
2022-12-12 00:37:54,780 DEV : loss 0.05307067930698395 - f1-score (micro avg)  0.8022





2022-12-12 00:37:54,864 BAD EPOCHS (no improvement): 2
2022-12-12 00:37:54,866 ----------------------------------------------------------------------------------------------------
2022-12-12 00:37:57,949 epoch 45 - iter 24/245 - loss 0.02540571 - samples/sec: 249.55 - lr: 0.001563
2022-12-12 00:38:01,101 epoch 45 - iter 48/245 - loss 0.02309963 - samples/sec: 244.01 - lr: 0.001563
2022-12-12 00:38:04,708 epoch 45 - iter 72/245 - loss 0.02400251 - samples/sec: 213.17 - lr: 0.001563
2022-12-12 00:38:09,579 epoch 45 - iter 96/245 - loss 0.02385688 - samples/sec: 157.80 - lr: 0.001563
2022-12-12 00:38:14,134 epoch 45 - iter 120/245 - loss 0.02593165 - samples/sec: 168.76 - lr: 0.001563
2022-12-12 00:38:17,602 epoch 45 - iter 144/245 - loss 0.02579112 - samples/sec: 221.74 - lr: 0.001563
2022-12-12 00:38:22,609 epoch 45 - iter 168/245 - loss 0.02572135 - samples/sec: 153.50 - lr: 0.001563
2022-12-12 00:38:25,290 epoch 45 - iter 192/245 - loss 0.02620190 - samples/sec: 286.88 - lr: 0.001563


100%|██████████| 37/37 [00:08<00:00,  4.56it/s]

2022-12-12 00:38:41,867 Evaluating as a multi-label problem: False
2022-12-12 00:38:41,893 DEV : loss 0.05302637815475464 - f1-score (micro avg)  0.8022





2022-12-12 00:38:41,985 BAD EPOCHS (no improvement): 3
2022-12-12 00:38:41,988 ----------------------------------------------------------------------------------------------------
2022-12-12 00:38:45,754 epoch 46 - iter 24/245 - loss 0.02647346 - samples/sec: 204.23 - lr: 0.001563
2022-12-12 00:38:48,979 epoch 46 - iter 48/245 - loss 0.02588446 - samples/sec: 238.41 - lr: 0.001563
2022-12-12 00:38:51,982 epoch 46 - iter 72/245 - loss 0.02559113 - samples/sec: 256.15 - lr: 0.001563
2022-12-12 00:38:56,233 epoch 46 - iter 96/245 - loss 0.02588637 - samples/sec: 180.80 - lr: 0.001563
2022-12-12 00:38:59,836 epoch 46 - iter 120/245 - loss 0.02543800 - samples/sec: 213.41 - lr: 0.001563
2022-12-12 00:39:03,396 epoch 46 - iter 144/245 - loss 0.02595242 - samples/sec: 215.95 - lr: 0.001563
2022-12-12 00:39:07,810 epoch 46 - iter 168/245 - loss 0.02635067 - samples/sec: 174.14 - lr: 0.001563
2022-12-12 00:39:12,820 epoch 46 - iter 192/245 - loss 0.02669359 - samples/sec: 153.44 - lr: 0.001563


100%|██████████| 37/37 [00:07<00:00,  4.84it/s]

2022-12-12 00:39:27,707 Evaluating as a multi-label problem: False
2022-12-12 00:39:27,731 DEV : loss 0.052497994154691696 - f1-score (micro avg)  0.801





2022-12-12 00:39:27,819 Epoch    46: reducing learning rate of group 0 to 7.8125e-04.
2022-12-12 00:39:27,821 BAD EPOCHS (no improvement): 4
2022-12-12 00:39:27,823 ----------------------------------------------------------------------------------------------------
2022-12-12 00:39:30,793 epoch 47 - iter 24/245 - loss 0.02510992 - samples/sec: 259.06 - lr: 0.000781
2022-12-12 00:39:34,428 epoch 47 - iter 48/245 - loss 0.02715738 - samples/sec: 211.52 - lr: 0.000781
2022-12-12 00:39:37,751 epoch 47 - iter 72/245 - loss 0.02623227 - samples/sec: 231.45 - lr: 0.000781
2022-12-12 00:39:42,805 epoch 47 - iter 96/245 - loss 0.02647805 - samples/sec: 152.06 - lr: 0.000781
2022-12-12 00:39:47,412 epoch 47 - iter 120/245 - loss 0.02622758 - samples/sec: 166.85 - lr: 0.000781
2022-12-12 00:39:51,059 epoch 47 - iter 144/245 - loss 0.02687353 - samples/sec: 210.82 - lr: 0.000781
2022-12-12 00:39:55,635 epoch 47 - iter 168/245 - loss 0.02679963 - samples/sec: 167.97 - lr: 0.000781
2022-12-12 00:39:

100%|██████████| 37/37 [00:07<00:00,  5.01it/s]

2022-12-12 00:40:14,581 Evaluating as a multi-label problem: False
2022-12-12 00:40:14,602 DEV : loss 0.05272059515118599 - f1-score (micro avg)  0.8008





2022-12-12 00:40:14,687 BAD EPOCHS (no improvement): 1
2022-12-12 00:40:14,690 ----------------------------------------------------------------------------------------------------
2022-12-12 00:40:19,003 epoch 48 - iter 24/245 - loss 0.02377653 - samples/sec: 178.26 - lr: 0.000781
2022-12-12 00:40:22,398 epoch 48 - iter 48/245 - loss 0.02406732 - samples/sec: 226.48 - lr: 0.000781
2022-12-12 00:40:26,658 epoch 48 - iter 72/245 - loss 0.02486197 - samples/sec: 180.45 - lr: 0.000781
2022-12-12 00:40:31,389 epoch 48 - iter 96/245 - loss 0.02452774 - samples/sec: 162.45 - lr: 0.000781
2022-12-12 00:40:34,739 epoch 48 - iter 120/245 - loss 0.02472448 - samples/sec: 229.52 - lr: 0.000781
2022-12-12 00:40:38,205 epoch 48 - iter 144/245 - loss 0.02604778 - samples/sec: 221.83 - lr: 0.000781
2022-12-12 00:40:41,924 epoch 48 - iter 168/245 - loss 0.02613275 - samples/sec: 206.74 - lr: 0.000781
2022-12-12 00:40:45,742 epoch 48 - iter 192/245 - loss 0.02644681 - samples/sec: 201.36 - lr: 0.000781


100%|██████████| 37/37 [00:08<00:00,  4.60it/s]

2022-12-12 00:41:00,403 Evaluating as a multi-label problem: False
2022-12-12 00:41:00,424 DEV : loss 0.05287087708711624 - f1-score (micro avg)  0.8032





2022-12-12 00:41:00,509 BAD EPOCHS (no improvement): 2
2022-12-12 00:41:00,513 ----------------------------------------------------------------------------------------------------
2022-12-12 00:41:04,108 epoch 49 - iter 24/245 - loss 0.02400593 - samples/sec: 213.96 - lr: 0.000781
2022-12-12 00:41:08,034 epoch 49 - iter 48/245 - loss 0.02513051 - samples/sec: 195.83 - lr: 0.000781
2022-12-12 00:41:11,881 epoch 49 - iter 72/245 - loss 0.02618339 - samples/sec: 199.85 - lr: 0.000781
2022-12-12 00:41:15,401 epoch 49 - iter 96/245 - loss 0.02540211 - samples/sec: 218.39 - lr: 0.000781
2022-12-12 00:41:19,545 epoch 49 - iter 120/245 - loss 0.02567124 - samples/sec: 185.47 - lr: 0.000781
2022-12-12 00:41:23,152 epoch 49 - iter 144/245 - loss 0.02528351 - samples/sec: 213.17 - lr: 0.000781
2022-12-12 00:41:27,629 epoch 49 - iter 168/245 - loss 0.02557259 - samples/sec: 171.68 - lr: 0.000781
2022-12-12 00:41:31,082 epoch 49 - iter 192/245 - loss 0.02524521 - samples/sec: 222.70 - lr: 0.000781


100%|██████████| 37/37 [00:07<00:00,  5.06it/s]

2022-12-12 00:41:45,682 Evaluating as a multi-label problem: False
2022-12-12 00:41:45,702 DEV : loss 0.0527239628136158 - f1-score (micro avg)  0.8008





2022-12-12 00:41:45,787 BAD EPOCHS (no improvement): 3
2022-12-12 00:41:45,789 ----------------------------------------------------------------------------------------------------
2022-12-12 00:41:49,916 epoch 50 - iter 24/245 - loss 0.02354179 - samples/sec: 186.31 - lr: 0.000781
2022-12-12 00:41:53,165 epoch 50 - iter 48/245 - loss 0.02304942 - samples/sec: 236.68 - lr: 0.000781
2022-12-12 00:41:58,055 epoch 50 - iter 72/245 - loss 0.02683802 - samples/sec: 157.17 - lr: 0.000781
2022-12-12 00:42:01,006 epoch 50 - iter 96/245 - loss 0.02633873 - samples/sec: 260.55 - lr: 0.000781
2022-12-12 00:42:04,451 epoch 50 - iter 120/245 - loss 0.02602177 - samples/sec: 223.18 - lr: 0.000781
2022-12-12 00:42:07,836 epoch 50 - iter 144/245 - loss 0.02596853 - samples/sec: 227.16 - lr: 0.000781
2022-12-12 00:42:11,429 epoch 50 - iter 168/245 - loss 0.02633474 - samples/sec: 214.00 - lr: 0.000781
2022-12-12 00:42:14,314 epoch 50 - iter 192/245 - loss 0.02611443 - samples/sec: 266.59 - lr: 0.000781


100%|██████████| 37/37 [00:07<00:00,  4.63it/s]

2022-12-12 00:42:31,485 Evaluating as a multi-label problem: False
2022-12-12 00:42:31,506 DEV : loss 0.05290211737155914 - f1-score (micro avg)  0.8016





2022-12-12 00:42:31,588 Epoch    50: reducing learning rate of group 0 to 3.9063e-04.
2022-12-12 00:42:31,589 BAD EPOCHS (no improvement): 4
2022-12-12 00:42:31,591 ----------------------------------------------------------------------------------------------------
2022-12-12 00:42:34,857 epoch 51 - iter 24/245 - loss 0.02606544 - samples/sec: 235.55 - lr: 0.000391
2022-12-12 00:42:39,012 epoch 51 - iter 48/245 - loss 0.02824679 - samples/sec: 185.00 - lr: 0.000391
2022-12-12 00:42:42,801 epoch 51 - iter 72/245 - loss 0.02721593 - samples/sec: 202.91 - lr: 0.000391
2022-12-12 00:42:46,300 epoch 51 - iter 96/245 - loss 0.02702886 - samples/sec: 219.70 - lr: 0.000391
2022-12-12 00:42:49,888 epoch 51 - iter 120/245 - loss 0.02681326 - samples/sec: 214.27 - lr: 0.000391
2022-12-12 00:42:54,163 epoch 51 - iter 144/245 - loss 0.02653610 - samples/sec: 179.83 - lr: 0.000391
2022-12-12 00:42:58,732 epoch 51 - iter 168/245 - loss 0.02672649 - samples/sec: 168.24 - lr: 0.000391
2022-12-12 00:43:

100%|██████████| 37/37 [00:07<00:00,  5.02it/s]

2022-12-12 00:43:17,019 Evaluating as a multi-label problem: False
2022-12-12 00:43:17,041 DEV : loss 0.052991803735494614 - f1-score (micro avg)  0.8013





2022-12-12 00:43:17,124 BAD EPOCHS (no improvement): 1
2022-12-12 00:43:17,126 ----------------------------------------------------------------------------------------------------
2022-12-12 00:43:20,062 epoch 52 - iter 24/245 - loss 0.02661215 - samples/sec: 262.07 - lr: 0.000391
2022-12-12 00:43:23,842 epoch 52 - iter 48/245 - loss 0.02616719 - samples/sec: 203.36 - lr: 0.000391
2022-12-12 00:43:27,017 epoch 52 - iter 72/245 - loss 0.02665110 - samples/sec: 242.19 - lr: 0.000391
2022-12-12 00:43:32,451 epoch 52 - iter 96/245 - loss 0.02627430 - samples/sec: 141.44 - lr: 0.000391
2022-12-12 00:43:36,300 epoch 52 - iter 120/245 - loss 0.02621390 - samples/sec: 199.76 - lr: 0.000391
2022-12-12 00:43:40,349 epoch 52 - iter 144/245 - loss 0.02565733 - samples/sec: 189.82 - lr: 0.000391
2022-12-12 00:43:44,580 epoch 52 - iter 168/245 - loss 0.02616870 - samples/sec: 181.72 - lr: 0.000391
2022-12-12 00:43:47,726 epoch 52 - iter 192/245 - loss 0.02583370 - samples/sec: 244.39 - lr: 0.000391


100%|██████████| 37/37 [00:07<00:00,  5.08it/s]

2022-12-12 00:44:02,632 Evaluating as a multi-label problem: False
2022-12-12 00:44:02,653 DEV : loss 0.05290910601615906 - f1-score (micro avg)  0.8016





2022-12-12 00:44:02,738 BAD EPOCHS (no improvement): 2
2022-12-12 00:44:02,740 ----------------------------------------------------------------------------------------------------
2022-12-12 00:44:06,961 epoch 53 - iter 24/245 - loss 0.02839747 - samples/sec: 182.16 - lr: 0.000391
2022-12-12 00:44:10,372 epoch 53 - iter 48/245 - loss 0.02782537 - samples/sec: 225.45 - lr: 0.000391
2022-12-12 00:44:13,421 epoch 53 - iter 72/245 - loss 0.02663294 - samples/sec: 252.22 - lr: 0.000391
2022-12-12 00:44:16,788 epoch 53 - iter 96/245 - loss 0.02628096 - samples/sec: 228.34 - lr: 0.000391
2022-12-12 00:44:20,485 epoch 53 - iter 120/245 - loss 0.02626413 - samples/sec: 207.96 - lr: 0.000391
2022-12-12 00:44:24,134 epoch 53 - iter 144/245 - loss 0.02582245 - samples/sec: 210.70 - lr: 0.000391
2022-12-12 00:44:27,370 epoch 53 - iter 168/245 - loss 0.02618170 - samples/sec: 237.60 - lr: 0.000391
2022-12-12 00:44:31,748 epoch 53 - iter 192/245 - loss 0.02659458 - samples/sec: 175.58 - lr: 0.000391


100%|██████████| 37/37 [00:08<00:00,  4.51it/s]

2022-12-12 00:44:48,883 Evaluating as a multi-label problem: False
2022-12-12 00:44:48,903 DEV : loss 0.05285537242889404 - f1-score (micro avg)  0.8014





2022-12-12 00:44:48,987 BAD EPOCHS (no improvement): 3
2022-12-12 00:44:48,990 ----------------------------------------------------------------------------------------------------
2022-12-12 00:44:52,527 epoch 54 - iter 24/245 - loss 0.02629050 - samples/sec: 217.52 - lr: 0.000391
2022-12-12 00:44:56,444 epoch 54 - iter 48/245 - loss 0.02723530 - samples/sec: 196.32 - lr: 0.000391
2022-12-12 00:44:59,340 epoch 54 - iter 72/245 - loss 0.02534500 - samples/sec: 265.52 - lr: 0.000391
2022-12-12 00:45:03,014 epoch 54 - iter 96/245 - loss 0.02516725 - samples/sec: 209.23 - lr: 0.000391
2022-12-12 00:45:07,410 epoch 54 - iter 120/245 - loss 0.02553682 - samples/sec: 174.88 - lr: 0.000391
2022-12-12 00:45:10,519 epoch 54 - iter 144/245 - loss 0.02547046 - samples/sec: 247.34 - lr: 0.000391
2022-12-12 00:45:14,896 epoch 54 - iter 168/245 - loss 0.02541109 - samples/sec: 175.62 - lr: 0.000391
2022-12-12 00:45:18,950 epoch 54 - iter 192/245 - loss 0.02581585 - samples/sec: 189.65 - lr: 0.000391


100%|██████████| 37/37 [00:07<00:00,  5.05it/s]

2022-12-12 00:45:34,482 Evaluating as a multi-label problem: False
2022-12-12 00:45:34,503 DEV : loss 0.05279255658388138 - f1-score (micro avg)  0.8034





2022-12-12 00:45:34,587 Epoch    54: reducing learning rate of group 0 to 1.9531e-04.
2022-12-12 00:45:34,589 BAD EPOCHS (no improvement): 4
2022-12-12 00:45:34,592 ----------------------------------------------------------------------------------------------------
2022-12-12 00:45:38,507 epoch 55 - iter 24/245 - loss 0.02436786 - samples/sec: 196.38 - lr: 0.000195
2022-12-12 00:45:42,734 epoch 55 - iter 48/245 - loss 0.02560804 - samples/sec: 181.84 - lr: 0.000195
2022-12-12 00:45:46,019 epoch 55 - iter 72/245 - loss 0.02491226 - samples/sec: 234.11 - lr: 0.000195
2022-12-12 00:45:49,723 epoch 55 - iter 96/245 - loss 0.02494316 - samples/sec: 207.52 - lr: 0.000195
2022-12-12 00:45:52,869 epoch 55 - iter 120/245 - loss 0.02510293 - samples/sec: 244.50 - lr: 0.000195
2022-12-12 00:45:56,793 epoch 55 - iter 144/245 - loss 0.02511961 - samples/sec: 195.91 - lr: 0.000195
2022-12-12 00:45:59,489 epoch 55 - iter 168/245 - loss 0.02535129 - samples/sec: 285.29 - lr: 0.000195
2022-12-12 00:46:

100%|██████████| 37/37 [00:07<00:00,  4.64it/s]

2022-12-12 00:46:20,770 Evaluating as a multi-label problem: False
2022-12-12 00:46:20,790 DEV : loss 0.05284939706325531 - f1-score (micro avg)  0.8034





2022-12-12 00:46:20,874 BAD EPOCHS (no improvement): 1
2022-12-12 00:46:20,876 ----------------------------------------------------------------------------------------------------
2022-12-12 00:46:25,824 epoch 56 - iter 24/245 - loss 0.02647208 - samples/sec: 155.34 - lr: 0.000195
2022-12-12 00:46:30,102 epoch 56 - iter 48/245 - loss 0.02732099 - samples/sec: 179.69 - lr: 0.000195
2022-12-12 00:46:33,190 epoch 56 - iter 72/245 - loss 0.02791748 - samples/sec: 249.07 - lr: 0.000195
2022-12-12 00:46:37,168 epoch 56 - iter 96/245 - loss 0.02725817 - samples/sec: 193.24 - lr: 0.000195
2022-12-12 00:46:40,268 epoch 56 - iter 120/245 - loss 0.02617114 - samples/sec: 248.07 - lr: 0.000195
2022-12-12 00:46:43,282 epoch 56 - iter 144/245 - loss 0.02656387 - samples/sec: 255.11 - lr: 0.000195
2022-12-12 00:46:47,127 epoch 56 - iter 168/245 - loss 0.02647422 - samples/sec: 199.99 - lr: 0.000195
2022-12-12 00:46:50,375 epoch 56 - iter 192/245 - loss 0.02606114 - samples/sec: 236.72 - lr: 0.000195


100%|██████████| 37/37 [00:07<00:00,  5.10it/s]

2022-12-12 00:47:06,044 Evaluating as a multi-label problem: False
2022-12-12 00:47:06,068 DEV : loss 0.05289311707019806 - f1-score (micro avg)  0.8027





2022-12-12 00:47:06,151 BAD EPOCHS (no improvement): 2
2022-12-12 00:47:06,153 ----------------------------------------------------------------------------------------------------
2022-12-12 00:47:09,719 epoch 57 - iter 24/245 - loss 0.02778393 - samples/sec: 215.67 - lr: 0.000195
2022-12-12 00:47:12,766 epoch 57 - iter 48/245 - loss 0.02427305 - samples/sec: 252.41 - lr: 0.000195
2022-12-12 00:47:16,024 epoch 57 - iter 72/245 - loss 0.02513660 - samples/sec: 235.99 - lr: 0.000195
2022-12-12 00:47:19,166 epoch 57 - iter 96/245 - loss 0.02426736 - samples/sec: 244.74 - lr: 0.000195
2022-12-12 00:47:23,074 epoch 57 - iter 120/245 - loss 0.02610872 - samples/sec: 196.72 - lr: 0.000195
2022-12-12 00:47:27,246 epoch 57 - iter 144/245 - loss 0.02659660 - samples/sec: 184.28 - lr: 0.000195
2022-12-12 00:47:30,735 epoch 57 - iter 168/245 - loss 0.02621336 - samples/sec: 220.39 - lr: 0.000195
2022-12-12 00:47:34,432 epoch 57 - iter 192/245 - loss 0.02651921 - samples/sec: 207.96 - lr: 0.000195


100%|██████████| 37/37 [00:08<00:00,  4.58it/s]

2022-12-12 00:47:51,541 Evaluating as a multi-label problem: False
2022-12-12 00:47:51,564 DEV : loss 0.05288544297218323 - f1-score (micro avg)  0.8021





2022-12-12 00:47:51,649 BAD EPOCHS (no improvement): 3
2022-12-12 00:47:51,651 ----------------------------------------------------------------------------------------------------
2022-12-12 00:47:55,006 epoch 58 - iter 24/245 - loss 0.02627087 - samples/sec: 229.29 - lr: 0.000195
2022-12-12 00:47:59,413 epoch 58 - iter 48/245 - loss 0.02629019 - samples/sec: 174.43 - lr: 0.000195
2022-12-12 00:48:04,575 epoch 58 - iter 72/245 - loss 0.02764349 - samples/sec: 148.90 - lr: 0.000195
2022-12-12 00:48:08,237 epoch 58 - iter 96/245 - loss 0.02717192 - samples/sec: 209.93 - lr: 0.000195
2022-12-12 00:48:11,974 epoch 58 - iter 120/245 - loss 0.02600061 - samples/sec: 205.72 - lr: 0.000195
2022-12-12 00:48:15,501 epoch 58 - iter 144/245 - loss 0.02559039 - samples/sec: 218.05 - lr: 0.000195
2022-12-12 00:48:18,840 epoch 58 - iter 168/245 - loss 0.02549325 - samples/sec: 230.23 - lr: 0.000195
2022-12-12 00:48:22,401 epoch 58 - iter 192/245 - loss 0.02565497 - samples/sec: 215.93 - lr: 0.000195


100%|██████████| 37/37 [00:07<00:00,  4.64it/s]

2022-12-12 00:48:37,597 Evaluating as a multi-label problem: False
2022-12-12 00:48:37,618 DEV : loss 0.052935414016246796 - f1-score (micro avg)  0.8009
2022-12-12 00:48:37,699 Epoch    58: reducing learning rate of group 0 to 9.7656e-05.
2022-12-12 00:48:37,701 BAD EPOCHS (no improvement): 4
2022-12-12 00:48:37,703 ----------------------------------------------------------------------------------------------------
2022-12-12 00:48:37,704 ----------------------------------------------------------------------------------------------------
2022-12-12 00:48:37,707 learning rate too small - quitting training!
2022-12-12 00:48:37,708 ----------------------------------------------------------------------------------------------------





2022-12-12 00:48:39,984 ----------------------------------------------------------------------------------------------------
2022-12-12 00:48:39,987 loading file resources/taggers/sota-ner-flair/best-model.pt
2022-12-12 00:48:41,901 SequenceTagger predicts: Dictionary with 27 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-TEMPO, B-TEMPO, E-TEMPO, I-TEMPO, S-JURISPRUDENCIA, B-JURISPRUDENCIA, E-JURISPRUDENCIA, I-JURISPRUDENCIA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, <START>, <STOP>


100%|██████████| 44/44 [00:08<00:00,  5.31it/s]

2022-12-12 00:48:50,382 Evaluating as a multi-label problem: False
2022-12-12 00:48:50,402 0.8704	0.8086	0.8383	0.728
2022-12-12 00:48:50,403 
Results:
- F-score (micro) 0.8383
- F-score (macro) 0.8219
- Accuracy 0.728

By class:
                precision    recall  f1-score   support

   ORGANIZACAO     0.8529    0.7525    0.7996       501
    LEGISLACAO     0.9054    0.8862    0.8957       378
        PESSOA     0.8991    0.8798    0.8894       233
         TEMPO     0.8750    0.8385    0.8564       192
JURISPRUDENCIA     0.8377    0.6973    0.7611       185
         LOCAL     0.7143    0.7447    0.7292        47

     micro avg     0.8704    0.8086    0.8383      1536
     macro avg     0.8474    0.7998    0.8219      1536
  weighted avg     0.8695    0.8086    0.8372      1536

2022-12-12 00:48:50,405 ----------------------------------------------------------------------------------------------------





{'test_score': 0.8383395207559906,
 'dev_score_history': [0.32655737704918036,
  0.4511825348696179,
  0.567515923566879,
  0.599316133043208,
  0.636336245857186,
  0.6696750902527077,
  0.678129713423831,
  0.7148760330578512,
  0.7014542343883661,
  0.7322274881516588,
  0.7622619734564338,
  0.7377238590410167,
  0.7336368810472397,
  0.7471763683753258,
  0.727056727056727,
  0.7724377533294731,
  0.7980714690867838,
  0.787369640787949,
  0.7912467607255974,
  0.7876598110061145,
  0.7924421883812747,
  0.789443488238669,
  0.7911392405063291,
  0.7792059411596687,
  0.7920392269974043,
  0.7996580222285552,
  0.7952981651376148,
  0.7967990854529865,
  0.7959942775393419,
  0.7990828317569504,
  0.801254633589963,
  0.8028612303290414,
  0.8001142204454598,
  0.8037865748709122,
  0.8031990859754357,
  0.8011510791366907,
  0.8006853226727585,
  0.8034433285509326,
  0.8015940791346428,
  0.8029697315819532,
  0.8029697315819532,
  0.8032082497851618,
  0.8026353480378114,
  0.8

## Vetor de Contexto Flair Embeddings


### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import FlairEmbeddings, StackedEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='dev.txt')

## Tarefa
label_type = 'ner'

2022-12-15 22:36:41,670 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig
2022-12-15 22:36:41,674 Train: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/train.txt
2022-12-15 22:36:41,675 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/dev.txt
2022-12-15 22:36:41,677 Test: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-12-15 22:36:49,338 Computing label dictionary. Progress:


7827it [00:00, 49652.16it/s]

2022-12-15 22:36:49,548 Dictionary created for label 'ner' with 7 values: ORGANIZACAO (seen 2400 times), LEGISLACAO (seen 1920 times), PESSOA (seen 1525 times), TEMPO (seen 1334 times), JURISPRUDENCIA (seen 1104 times), LOCAL (seen 611 times)
Dictionary with 7 tags: <unk>, ORGANIZACAO, LEGISLACAO, PESSOA, TEMPO, JURISPRUDENCIA, LOCAL





### Embeddings

In [None]:
## Empilhando os Embeddings
# init Flair embeddings
flair_embedding_forward = FlairEmbeddings('pt-forward')
flair_embedding_backward = FlairEmbeddings('pt-backward')

# create a StackedEmbedding object that combines glove and forward/backward flair embeddings
embeddings = StackedEmbeddings([
                                        flair_embedding_forward,
                                        flair_embedding_backward,
                                       ])

2022-12-15 22:37:16,551 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-forward.pt not found in cache, downloading to /tmp/tmpj6hq3w1h


100%|██████████| 72819080/72819080 [01:36<00:00, 758206.02B/s]

2022-12-15 22:39:09,224 copying /tmp/tmpj6hq3w1h to cache at /root/.flair/embeddings/lm-pt-forward.pt
2022-12-15 22:39:09,280 removing temp file /tmp/tmpj6hq3w1h





2022-12-15 22:39:28,913 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-backward.pt not found in cache, downloading to /tmp/tmpfd8qtr6v


100%|██████████| 72819080/72819080 [01:41<00:00, 717468.69B/s]

2022-12-15 22:41:21,566 copying /tmp/tmpfd8qtr6v to cache at /root/.flair/embeddings/lm-pt-backward.pt
2022-12-15 22:41:21,624 removing temp file /tmp/tmpfd8qtr6v





### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-12-15 22:41:21,854 SequenceTagger predicts: Dictionary with 25 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-TEMPO, B-TEMPO, E-TEMPO, I-TEMPO, S-JURISPRUDENCIA, B-JURISPRUDENCIA, E-JURISPRUDENCIA, I-JURISPRUDENCIA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)

# Start training
trainer.train('resources/taggers/sota-ner-flair',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

2022-12-15 22:41:22,092 ----------------------------------------------------------------------------------------------------
2022-12-15 22:41:22,093 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True)
  (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=

100%|██████████| 37/37 [00:42<00:00,  1.16s/it]

2022-12-15 22:46:30,313 Evaluating as a multi-label problem: False
2022-12-15 22:46:30,341 DEV : loss 0.17013703286647797 - f1-score (micro avg)  0.596
2022-12-15 22:46:30,469 BAD EPOCHS (no improvement): 0
2022-12-15 22:46:30,471 saving best model





2022-12-15 22:46:30,866 ----------------------------------------------------------------------------------------------------
2022-12-15 22:46:37,990 epoch 2 - iter 24/245 - loss 0.15698277 - samples/sec: 107.91 - lr: 0.100000
2022-12-15 22:46:44,692 epoch 2 - iter 48/245 - loss 0.15709476 - samples/sec: 114.67 - lr: 0.100000
2022-12-15 22:46:49,519 epoch 2 - iter 72/245 - loss 0.14674881 - samples/sec: 159.24 - lr: 0.100000
2022-12-15 22:46:55,200 epoch 2 - iter 96/245 - loss 0.14350264 - samples/sec: 135.31 - lr: 0.100000
2022-12-15 22:47:00,922 epoch 2 - iter 120/245 - loss 0.13685348 - samples/sec: 134.32 - lr: 0.100000
2022-12-15 22:47:06,815 epoch 2 - iter 144/245 - loss 0.13138519 - samples/sec: 130.42 - lr: 0.100000
2022-12-15 22:47:13,332 epoch 2 - iter 168/245 - loss 0.12520622 - samples/sec: 117.93 - lr: 0.100000
2022-12-15 22:47:18,506 epoch 2 - iter 192/245 - loss 0.12164040 - samples/sec: 148.57 - lr: 0.100000
2022-12-15 22:47:23,814 epoch 2 - iter 216/245 - loss 0.1185606

100%|██████████| 37/37 [00:10<00:00,  3.42it/s]

2022-12-15 22:47:41,017 Evaluating as a multi-label problem: False
2022-12-15 22:47:41,040 DEV : loss 0.08818749338388443 - f1-score (micro avg)  0.7386
2022-12-15 22:47:41,170 BAD EPOCHS (no improvement): 0
2022-12-15 22:47:41,173 saving best model





2022-12-15 22:47:41,638 ----------------------------------------------------------------------------------------------------
2022-12-15 22:47:47,959 epoch 3 - iter 24/245 - loss 0.07915326 - samples/sec: 121.62 - lr: 0.100000
2022-12-15 22:47:54,804 epoch 3 - iter 48/245 - loss 0.07974518 - samples/sec: 112.27 - lr: 0.100000
2022-12-15 22:48:02,505 epoch 3 - iter 72/245 - loss 0.08251373 - samples/sec: 99.78 - lr: 0.100000
2022-12-15 22:48:07,459 epoch 3 - iter 96/245 - loss 0.08165152 - samples/sec: 155.15 - lr: 0.100000
2022-12-15 22:48:13,199 epoch 3 - iter 120/245 - loss 0.08041906 - samples/sec: 133.91 - lr: 0.100000
2022-12-15 22:48:18,656 epoch 3 - iter 144/245 - loss 0.08086948 - samples/sec: 140.83 - lr: 0.100000
2022-12-15 22:48:23,152 epoch 3 - iter 168/245 - loss 0.07787205 - samples/sec: 170.99 - lr: 0.100000
2022-12-15 22:48:28,830 epoch 3 - iter 192/245 - loss 0.07684415 - samples/sec: 135.38 - lr: 0.100000
2022-12-15 22:48:34,574 epoch 3 - iter 216/245 - loss 0.07527850

100%|██████████| 37/37 [00:09<00:00,  3.70it/s]

2022-12-15 22:48:51,039 Evaluating as a multi-label problem: False
2022-12-15 22:48:51,062 DEV : loss 0.060143593698740005 - f1-score (micro avg)  0.7974
2022-12-15 22:48:51,199 BAD EPOCHS (no improvement): 0
2022-12-15 22:48:51,201 saving best model





2022-12-15 22:48:51,662 ----------------------------------------------------------------------------------------------------
2022-12-15 22:48:57,101 epoch 4 - iter 24/245 - loss 0.05479600 - samples/sec: 141.37 - lr: 0.100000
2022-12-15 22:49:02,918 epoch 4 - iter 48/245 - loss 0.06093690 - samples/sec: 132.14 - lr: 0.100000
2022-12-15 22:49:09,927 epoch 4 - iter 72/245 - loss 0.06155248 - samples/sec: 109.65 - lr: 0.100000
2022-12-15 22:49:16,444 epoch 4 - iter 96/245 - loss 0.06063725 - samples/sec: 117.95 - lr: 0.100000
2022-12-15 22:49:22,470 epoch 4 - iter 120/245 - loss 0.06099043 - samples/sec: 127.54 - lr: 0.100000
2022-12-15 22:49:27,655 epoch 4 - iter 144/245 - loss 0.06099086 - samples/sec: 148.26 - lr: 0.100000
2022-12-15 22:49:34,415 epoch 4 - iter 168/245 - loss 0.06037620 - samples/sec: 113.68 - lr: 0.100000
2022-12-15 22:49:41,413 epoch 4 - iter 192/245 - loss 0.06035901 - samples/sec: 109.82 - lr: 0.100000
2022-12-15 22:49:47,522 epoch 4 - iter 216/245 - loss 0.0595075

100%|██████████| 37/37 [00:10<00:00,  3.61it/s]

2022-12-15 22:50:03,159 Evaluating as a multi-label problem: False
2022-12-15 22:50:03,188 DEV : loss 0.05808359757065773 - f1-score (micro avg)  0.7867
2022-12-15 22:50:03,320 BAD EPOCHS (no improvement): 1
2022-12-15 22:50:03,322 ----------------------------------------------------------------------------------------------------





2022-12-15 22:50:09,570 epoch 5 - iter 24/245 - loss 0.04502739 - samples/sec: 123.03 - lr: 0.100000
2022-12-15 22:50:14,998 epoch 5 - iter 48/245 - loss 0.04593921 - samples/sec: 141.59 - lr: 0.100000
2022-12-15 22:50:20,869 epoch 5 - iter 72/245 - loss 0.04871098 - samples/sec: 130.92 - lr: 0.100000
2022-12-15 22:50:26,547 epoch 5 - iter 96/245 - loss 0.04831732 - samples/sec: 135.38 - lr: 0.100000
2022-12-15 22:50:33,610 epoch 5 - iter 120/245 - loss 0.04716250 - samples/sec: 108.80 - lr: 0.100000
2022-12-15 22:50:38,610 epoch 5 - iter 144/245 - loss 0.04772626 - samples/sec: 153.75 - lr: 0.100000
2022-12-15 22:50:43,743 epoch 5 - iter 168/245 - loss 0.05018310 - samples/sec: 149.76 - lr: 0.100000
2022-12-15 22:50:50,294 epoch 5 - iter 192/245 - loss 0.04912082 - samples/sec: 117.31 - lr: 0.100000
2022-12-15 22:50:54,937 epoch 5 - iter 216/245 - loss 0.04820535 - samples/sec: 165.57 - lr: 0.100000
2022-12-15 22:51:01,857 epoch 5 - iter 240/245 - loss 0.04782338 - samples/sec: 111.05

100%|██████████| 37/37 [00:10<00:00,  3.39it/s]

2022-12-15 22:51:14,014 Evaluating as a multi-label problem: False
2022-12-15 22:51:14,039 DEV : loss 0.048343438655138016 - f1-score (micro avg)  0.8264
2022-12-15 22:51:14,182 BAD EPOCHS (no improvement): 0
2022-12-15 22:51:14,185 saving best model





2022-12-15 22:51:14,638 ----------------------------------------------------------------------------------------------------
2022-12-15 22:51:20,720 epoch 6 - iter 24/245 - loss 0.04739896 - samples/sec: 126.40 - lr: 0.100000
2022-12-15 22:51:26,350 epoch 6 - iter 48/245 - loss 0.04189560 - samples/sec: 136.51 - lr: 0.100000
2022-12-15 22:51:31,285 epoch 6 - iter 72/245 - loss 0.04045147 - samples/sec: 155.76 - lr: 0.100000
2022-12-15 22:51:35,947 epoch 6 - iter 96/245 - loss 0.04174507 - samples/sec: 164.92 - lr: 0.100000
2022-12-15 22:51:40,844 epoch 6 - iter 120/245 - loss 0.04124404 - samples/sec: 156.96 - lr: 0.100000
2022-12-15 22:51:47,127 epoch 6 - iter 144/245 - loss 0.04143596 - samples/sec: 122.31 - lr: 0.100000
2022-12-15 22:51:52,499 epoch 6 - iter 168/245 - loss 0.04092975 - samples/sec: 143.07 - lr: 0.100000
2022-12-15 22:52:00,166 epoch 6 - iter 192/245 - loss 0.04086186 - samples/sec: 100.22 - lr: 0.100000
2022-12-15 22:52:05,690 epoch 6 - iter 216/245 - loss 0.0402615

100%|██████████| 37/37 [00:10<00:00,  3.56it/s]

2022-12-15 22:52:24,454 Evaluating as a multi-label problem: False
2022-12-15 22:52:24,480 DEV : loss 0.05134847015142441 - f1-score (micro avg)  0.8141
2022-12-15 22:52:24,615 BAD EPOCHS (no improvement): 1
2022-12-15 22:52:24,617 ----------------------------------------------------------------------------------------------------





2022-12-15 22:52:30,041 epoch 7 - iter 24/245 - loss 0.03685288 - samples/sec: 141.75 - lr: 0.100000
2022-12-15 22:52:35,383 epoch 7 - iter 48/245 - loss 0.03557998 - samples/sec: 143.90 - lr: 0.100000
2022-12-15 22:52:41,874 epoch 7 - iter 72/245 - loss 0.03687271 - samples/sec: 118.39 - lr: 0.100000
2022-12-15 22:52:49,049 epoch 7 - iter 96/245 - loss 0.03617751 - samples/sec: 107.11 - lr: 0.100000
2022-12-15 22:52:54,020 epoch 7 - iter 120/245 - loss 0.03592187 - samples/sec: 154.63 - lr: 0.100000
2022-12-15 22:53:00,095 epoch 7 - iter 144/245 - loss 0.03567195 - samples/sec: 126.53 - lr: 0.100000
2022-12-15 22:53:06,214 epoch 7 - iter 168/245 - loss 0.03519841 - samples/sec: 125.60 - lr: 0.100000
2022-12-15 22:53:13,048 epoch 7 - iter 192/245 - loss 0.03646793 - samples/sec: 112.46 - lr: 0.100000
2022-12-15 22:53:18,514 epoch 7 - iter 216/245 - loss 0.03665363 - samples/sec: 140.59 - lr: 0.100000
2022-12-15 22:53:23,371 epoch 7 - iter 240/245 - loss 0.03641915 - samples/sec: 158.29

100%|██████████| 37/37 [00:11<00:00,  3.33it/s]

2022-12-15 22:53:36,066 Evaluating as a multi-label problem: False
2022-12-15 22:53:36,090 DEV : loss 0.0450560636818409 - f1-score (micro avg)  0.8288
2022-12-15 22:53:36,226 BAD EPOCHS (no improvement): 0
2022-12-15 22:53:36,228 saving best model





2022-12-15 22:53:36,697 ----------------------------------------------------------------------------------------------------
2022-12-15 22:53:42,257 epoch 8 - iter 24/245 - loss 0.03232879 - samples/sec: 138.31 - lr: 0.100000
2022-12-15 22:53:48,001 epoch 8 - iter 48/245 - loss 0.03555335 - samples/sec: 133.79 - lr: 0.100000
2022-12-15 22:53:53,404 epoch 8 - iter 72/245 - loss 0.03278374 - samples/sec: 142.27 - lr: 0.100000
2022-12-15 22:53:58,333 epoch 8 - iter 96/245 - loss 0.03278495 - samples/sec: 155.96 - lr: 0.100000
2022-12-15 22:54:04,044 epoch 8 - iter 120/245 - loss 0.03295208 - samples/sec: 134.59 - lr: 0.100000
2022-12-15 22:54:10,611 epoch 8 - iter 144/245 - loss 0.03283708 - samples/sec: 117.03 - lr: 0.100000
2022-12-15 22:54:17,679 epoch 8 - iter 168/245 - loss 0.03307726 - samples/sec: 108.73 - lr: 0.100000
2022-12-15 22:54:24,958 epoch 8 - iter 192/245 - loss 0.03254179 - samples/sec: 105.58 - lr: 0.100000
2022-12-15 22:54:29,843 epoch 8 - iter 216/245 - loss 0.0319035

100%|██████████| 37/37 [00:11<00:00,  3.34it/s]

2022-12-15 22:54:48,820 Evaluating as a multi-label problem: False
2022-12-15 22:54:48,843 DEV : loss 0.04213937744498253 - f1-score (micro avg)  0.8443
2022-12-15 22:54:48,978 BAD EPOCHS (no improvement): 0
2022-12-15 22:54:48,981 saving best model





2022-12-15 22:54:49,447 ----------------------------------------------------------------------------------------------------
2022-12-15 22:54:55,356 epoch 9 - iter 24/245 - loss 0.03133572 - samples/sec: 130.15 - lr: 0.100000
2022-12-15 22:55:02,604 epoch 9 - iter 48/245 - loss 0.03046789 - samples/sec: 106.01 - lr: 0.100000
2022-12-15 22:55:07,548 epoch 9 - iter 72/245 - loss 0.02959853 - samples/sec: 155.49 - lr: 0.100000
2022-12-15 22:55:12,833 epoch 9 - iter 96/245 - loss 0.03006850 - samples/sec: 145.46 - lr: 0.100000
2022-12-15 22:55:17,666 epoch 9 - iter 120/245 - loss 0.03038277 - samples/sec: 159.05 - lr: 0.100000
2022-12-15 22:55:23,128 epoch 9 - iter 144/245 - loss 0.02945554 - samples/sec: 140.72 - lr: 0.100000
2022-12-15 22:55:31,365 epoch 9 - iter 168/245 - loss 0.03000327 - samples/sec: 93.29 - lr: 0.100000
2022-12-15 22:55:37,048 epoch 9 - iter 192/245 - loss 0.02983885 - samples/sec: 135.25 - lr: 0.100000
2022-12-15 22:55:42,533 epoch 9 - iter 216/245 - loss 0.02967886

100%|██████████| 37/37 [00:10<00:00,  3.61it/s]

2022-12-15 22:55:59,298 Evaluating as a multi-label problem: False
2022-12-15 22:55:59,320 DEV : loss 0.03852430731058121 - f1-score (micro avg)  0.8573
2022-12-15 22:55:59,448 BAD EPOCHS (no improvement): 0
2022-12-15 22:55:59,450 saving best model





2022-12-15 22:55:59,924 ----------------------------------------------------------------------------------------------------
2022-12-15 22:56:05,115 epoch 10 - iter 24/245 - loss 0.02552663 - samples/sec: 148.17 - lr: 0.100000
2022-12-15 22:56:11,835 epoch 10 - iter 48/245 - loss 0.02779606 - samples/sec: 114.36 - lr: 0.100000
2022-12-15 22:56:17,986 epoch 10 - iter 72/245 - loss 0.02717635 - samples/sec: 124.93 - lr: 0.100000
2022-12-15 22:56:24,001 epoch 10 - iter 96/245 - loss 0.02771204 - samples/sec: 127.77 - lr: 0.100000
2022-12-15 22:56:30,259 epoch 10 - iter 120/245 - loss 0.02773326 - samples/sec: 122.81 - lr: 0.100000
2022-12-15 22:56:35,768 epoch 10 - iter 144/245 - loss 0.02674901 - samples/sec: 139.52 - lr: 0.100000
2022-12-15 22:56:43,479 epoch 10 - iter 168/245 - loss 0.02695934 - samples/sec: 99.66 - lr: 0.100000
2022-12-15 22:56:49,016 epoch 10 - iter 192/245 - loss 0.02727299 - samples/sec: 138.82 - lr: 0.100000
2022-12-15 22:56:54,614 epoch 10 - iter 216/245 - loss 0

100%|██████████| 37/37 [00:10<00:00,  3.39it/s]

2022-12-15 22:57:11,090 Evaluating as a multi-label problem: False
2022-12-15 22:57:11,114 DEV : loss 0.040289729833602905 - f1-score (micro avg)  0.8403
2022-12-15 22:57:11,254 BAD EPOCHS (no improvement): 1
2022-12-15 22:57:11,258 ----------------------------------------------------------------------------------------------------





2022-12-15 22:57:16,772 epoch 11 - iter 24/245 - loss 0.02588168 - samples/sec: 139.44 - lr: 0.100000
2022-12-15 22:57:22,770 epoch 11 - iter 48/245 - loss 0.02721431 - samples/sec: 128.14 - lr: 0.100000
2022-12-15 22:57:29,142 epoch 11 - iter 72/245 - loss 0.02721008 - samples/sec: 120.63 - lr: 0.100000
2022-12-15 22:57:36,102 epoch 11 - iter 96/245 - loss 0.02699674 - samples/sec: 110.41 - lr: 0.100000
2022-12-15 22:57:41,864 epoch 11 - iter 120/245 - loss 0.02608022 - samples/sec: 133.38 - lr: 0.100000
2022-12-15 22:57:47,334 epoch 11 - iter 144/245 - loss 0.02575935 - samples/sec: 140.53 - lr: 0.100000
2022-12-15 22:57:53,313 epoch 11 - iter 168/245 - loss 0.02670625 - samples/sec: 128.54 - lr: 0.100000
2022-12-15 22:57:58,255 epoch 11 - iter 192/245 - loss 0.02638669 - samples/sec: 155.56 - lr: 0.100000
2022-12-15 22:58:03,358 epoch 11 - iter 216/245 - loss 0.02604832 - samples/sec: 150.63 - lr: 0.100000
2022-12-15 22:58:08,492 epoch 11 - iter 240/245 - loss 0.02600988 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.63it/s]

2022-12-15 22:58:21,343 Evaluating as a multi-label problem: False
2022-12-15 22:58:21,366 DEV : loss 0.038068242371082306 - f1-score (micro avg)  0.8523
2022-12-15 22:58:21,494 BAD EPOCHS (no improvement): 2
2022-12-15 22:58:21,496 ----------------------------------------------------------------------------------------------------





2022-12-15 22:58:27,441 epoch 12 - iter 24/245 - loss 0.02270496 - samples/sec: 129.29 - lr: 0.100000
2022-12-15 22:58:34,879 epoch 12 - iter 48/245 - loss 0.02515265 - samples/sec: 103.32 - lr: 0.100000
2022-12-15 22:58:39,691 epoch 12 - iter 72/245 - loss 0.02404308 - samples/sec: 159.76 - lr: 0.100000
2022-12-15 22:58:45,811 epoch 12 - iter 96/245 - loss 0.02500026 - samples/sec: 125.57 - lr: 0.100000
2022-12-15 22:58:50,919 epoch 12 - iter 120/245 - loss 0.02442879 - samples/sec: 150.49 - lr: 0.100000
2022-12-15 22:58:57,255 epoch 12 - iter 144/245 - loss 0.02430141 - samples/sec: 121.31 - lr: 0.100000
2022-12-15 22:59:03,157 epoch 12 - iter 168/245 - loss 0.02403574 - samples/sec: 130.23 - lr: 0.100000
2022-12-15 22:59:09,262 epoch 12 - iter 192/245 - loss 0.02453010 - samples/sec: 125.88 - lr: 0.100000
2022-12-15 22:59:14,162 epoch 12 - iter 216/245 - loss 0.02433856 - samples/sec: 156.86 - lr: 0.100000
2022-12-15 22:59:20,515 epoch 12 - iter 240/245 - loss 0.02490983 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.67it/s]

2022-12-15 22:59:32,235 Evaluating as a multi-label problem: False
2022-12-15 22:59:32,257 DEV : loss 0.03655008226633072 - f1-score (micro avg)  0.873
2022-12-15 22:59:32,394 BAD EPOCHS (no improvement): 0
2022-12-15 22:59:32,397 saving best model





2022-12-15 22:59:32,866 ----------------------------------------------------------------------------------------------------
2022-12-15 22:59:39,345 epoch 13 - iter 24/245 - loss 0.02182342 - samples/sec: 118.67 - lr: 0.100000
2022-12-15 22:59:44,653 epoch 13 - iter 48/245 - loss 0.02332172 - samples/sec: 144.80 - lr: 0.100000
2022-12-15 22:59:50,187 epoch 13 - iter 72/245 - loss 0.02167305 - samples/sec: 138.89 - lr: 0.100000
2022-12-15 22:59:55,584 epoch 13 - iter 96/245 - loss 0.02217682 - samples/sec: 142.42 - lr: 0.100000
2022-12-15 23:00:02,317 epoch 13 - iter 120/245 - loss 0.02166204 - samples/sec: 114.15 - lr: 0.100000
2022-12-15 23:00:07,789 epoch 13 - iter 144/245 - loss 0.02233704 - samples/sec: 140.47 - lr: 0.100000
2022-12-15 23:00:12,847 epoch 13 - iter 168/245 - loss 0.02245241 - samples/sec: 151.98 - lr: 0.100000
2022-12-15 23:00:18,877 epoch 13 - iter 192/245 - loss 0.02307469 - samples/sec: 127.45 - lr: 0.100000
2022-12-15 23:00:24,189 epoch 13 - iter 216/245 - loss 

100%|██████████| 37/37 [00:10<00:00,  3.38it/s]

2022-12-15 23:00:44,409 Evaluating as a multi-label problem: False
2022-12-15 23:00:44,433 DEV : loss 0.03754721209406853 - f1-score (micro avg)  0.879
2022-12-15 23:00:44,567 BAD EPOCHS (no improvement): 0
2022-12-15 23:00:44,569 saving best model





2022-12-15 23:00:45,031 ----------------------------------------------------------------------------------------------------
2022-12-15 23:00:51,752 epoch 14 - iter 24/245 - loss 0.01746260 - samples/sec: 114.35 - lr: 0.100000
2022-12-15 23:00:58,748 epoch 14 - iter 48/245 - loss 0.02054311 - samples/sec: 109.85 - lr: 0.100000
2022-12-15 23:01:04,493 epoch 14 - iter 72/245 - loss 0.02179114 - samples/sec: 133.80 - lr: 0.100000
2022-12-15 23:01:09,906 epoch 14 - iter 96/245 - loss 0.02176379 - samples/sec: 141.98 - lr: 0.100000
2022-12-15 23:01:15,375 epoch 14 - iter 120/245 - loss 0.02128218 - samples/sec: 140.56 - lr: 0.100000
2022-12-15 23:01:20,569 epoch 14 - iter 144/245 - loss 0.02108804 - samples/sec: 147.99 - lr: 0.100000
2022-12-15 23:01:26,726 epoch 14 - iter 168/245 - loss 0.02082528 - samples/sec: 124.82 - lr: 0.100000
2022-12-15 23:01:31,497 epoch 14 - iter 192/245 - loss 0.02051729 - samples/sec: 161.13 - lr: 0.100000
2022-12-15 23:01:37,822 epoch 14 - iter 216/245 - loss 

100%|██████████| 37/37 [00:10<00:00,  3.68it/s]

2022-12-15 23:01:54,998 Evaluating as a multi-label problem: False
2022-12-15 23:01:55,021 DEV : loss 0.04365716874599457 - f1-score (micro avg)  0.8575
2022-12-15 23:01:55,149 BAD EPOCHS (no improvement): 1
2022-12-15 23:01:55,151 ----------------------------------------------------------------------------------------------------





2022-12-15 23:02:00,165 epoch 15 - iter 24/245 - loss 0.02104026 - samples/sec: 153.35 - lr: 0.100000
2022-12-15 23:02:05,131 epoch 15 - iter 48/245 - loss 0.01907400 - samples/sec: 154.81 - lr: 0.100000
2022-12-15 23:02:12,009 epoch 15 - iter 72/245 - loss 0.02021537 - samples/sec: 111.72 - lr: 0.100000
2022-12-15 23:02:19,049 epoch 15 - iter 96/245 - loss 0.02027062 - samples/sec: 109.16 - lr: 0.100000
2022-12-15 23:02:24,339 epoch 15 - iter 120/245 - loss 0.02028997 - samples/sec: 145.32 - lr: 0.100000
2022-12-15 23:02:31,409 epoch 15 - iter 144/245 - loss 0.02018605 - samples/sec: 108.70 - lr: 0.100000
2022-12-15 23:02:36,831 epoch 15 - iter 168/245 - loss 0.02027218 - samples/sec: 141.75 - lr: 0.100000
2022-12-15 23:02:43,288 epoch 15 - iter 192/245 - loss 0.02068665 - samples/sec: 119.01 - lr: 0.100000
2022-12-15 23:02:48,082 epoch 15 - iter 216/245 - loss 0.02028542 - samples/sec: 160.36 - lr: 0.100000
2022-12-15 23:02:54,052 epoch 15 - iter 240/245 - loss 0.02026813 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.38it/s]

2022-12-15 23:03:06,124 Evaluating as a multi-label problem: False
2022-12-15 23:03:06,150 DEV : loss 0.03462392836809158 - f1-score (micro avg)  0.8776
2022-12-15 23:03:06,290 BAD EPOCHS (no improvement): 2
2022-12-15 23:03:06,292 ----------------------------------------------------------------------------------------------------





2022-12-15 23:03:12,267 epoch 16 - iter 24/245 - loss 0.02112419 - samples/sec: 128.67 - lr: 0.100000
2022-12-15 23:03:17,582 epoch 16 - iter 48/245 - loss 0.01938359 - samples/sec: 144.62 - lr: 0.100000
2022-12-15 23:03:22,266 epoch 16 - iter 72/245 - loss 0.01893187 - samples/sec: 164.10 - lr: 0.100000
2022-12-15 23:03:27,511 epoch 16 - iter 96/245 - loss 0.01825479 - samples/sec: 146.56 - lr: 0.100000
2022-12-15 23:03:33,663 epoch 16 - iter 120/245 - loss 0.01954917 - samples/sec: 124.94 - lr: 0.100000
2022-12-15 23:03:39,157 epoch 16 - iter 144/245 - loss 0.01941626 - samples/sec: 139.91 - lr: 0.100000
2022-12-15 23:03:45,245 epoch 16 - iter 168/245 - loss 0.01998785 - samples/sec: 126.24 - lr: 0.100000
2022-12-15 23:03:53,199 epoch 16 - iter 192/245 - loss 0.01929207 - samples/sec: 96.60 - lr: 0.100000
2022-12-15 23:04:00,560 epoch 16 - iter 216/245 - loss 0.01947257 - samples/sec: 104.39 - lr: 0.100000
2022-12-15 23:04:05,907 epoch 16 - iter 240/245 - loss 0.01954070 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.64it/s]

2022-12-15 23:04:17,421 Evaluating as a multi-label problem: False
2022-12-15 23:04:17,443 DEV : loss 0.03850279003381729 - f1-score (micro avg)  0.8681
2022-12-15 23:04:17,580 BAD EPOCHS (no improvement): 3
2022-12-15 23:04:17,582 ----------------------------------------------------------------------------------------------------





2022-12-15 23:04:24,311 epoch 17 - iter 24/245 - loss 0.01971782 - samples/sec: 114.23 - lr: 0.100000
2022-12-15 23:04:29,694 epoch 17 - iter 48/245 - loss 0.01804821 - samples/sec: 142.81 - lr: 0.100000
2022-12-15 23:04:34,993 epoch 17 - iter 72/245 - loss 0.01817479 - samples/sec: 145.04 - lr: 0.100000
2022-12-15 23:04:41,375 epoch 17 - iter 96/245 - loss 0.01751053 - samples/sec: 120.41 - lr: 0.100000
2022-12-15 23:04:47,947 epoch 17 - iter 120/245 - loss 0.01696950 - samples/sec: 116.95 - lr: 0.100000
2022-12-15 23:04:55,871 epoch 17 - iter 144/245 - loss 0.01733691 - samples/sec: 96.97 - lr: 0.100000
2022-12-15 23:05:02,230 epoch 17 - iter 168/245 - loss 0.01699515 - samples/sec: 120.86 - lr: 0.100000
2022-12-15 23:05:07,501 epoch 17 - iter 192/245 - loss 0.01731574 - samples/sec: 145.81 - lr: 0.100000
2022-12-15 23:05:12,686 epoch 17 - iter 216/245 - loss 0.01730638 - samples/sec: 148.24 - lr: 0.100000
2022-12-15 23:05:17,730 epoch 17 - iter 240/245 - loss 0.01748616 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.57it/s]


2022-12-15 23:05:28,934 Evaluating as a multi-label problem: False
2022-12-15 23:05:28,958 DEV : loss 0.03672116994857788 - f1-score (micro avg)  0.8758
2022-12-15 23:05:29,092 Epoch    17: reducing learning rate of group 0 to 5.0000e-02.
2022-12-15 23:05:29,093 BAD EPOCHS (no improvement): 4
2022-12-15 23:05:29,096 ----------------------------------------------------------------------------------------------------
2022-12-15 23:05:34,196 epoch 18 - iter 24/245 - loss 0.01324520 - samples/sec: 150.76 - lr: 0.050000
2022-12-15 23:05:40,823 epoch 18 - iter 48/245 - loss 0.01555244 - samples/sec: 115.97 - lr: 0.050000
2022-12-15 23:05:48,044 epoch 18 - iter 72/245 - loss 0.01539623 - samples/sec: 106.43 - lr: 0.050000
2022-12-15 23:05:53,530 epoch 18 - iter 96/245 - loss 0.01542566 - samples/sec: 140.09 - lr: 0.050000
2022-12-15 23:05:59,053 epoch 18 - iter 120/245 - loss 0.01557728 - samples/sec: 139.18 - lr: 0.050000
2022-12-15 23:06:05,300 epoch 18 - iter 144/245 - loss 0.01514302 - sa

100%|██████████| 37/37 [00:10<00:00,  3.43it/s]


2022-12-15 23:06:39,643 Evaluating as a multi-label problem: False
2022-12-15 23:06:39,666 DEV : loss 0.03697904571890831 - f1-score (micro avg)  0.8761
2022-12-15 23:06:39,794 BAD EPOCHS (no improvement): 1
2022-12-15 23:06:39,797 ----------------------------------------------------------------------------------------------------
2022-12-15 23:06:45,355 epoch 19 - iter 24/245 - loss 0.01521576 - samples/sec: 138.32 - lr: 0.050000
2022-12-15 23:06:53,522 epoch 19 - iter 48/245 - loss 0.01479439 - samples/sec: 94.09 - lr: 0.050000
2022-12-15 23:06:59,203 epoch 19 - iter 72/245 - loss 0.01453195 - samples/sec: 135.26 - lr: 0.050000
2022-12-15 23:07:05,931 epoch 19 - iter 96/245 - loss 0.01466325 - samples/sec: 114.24 - lr: 0.050000
2022-12-15 23:07:11,016 epoch 19 - iter 120/245 - loss 0.01450738 - samples/sec: 151.17 - lr: 0.050000
2022-12-15 23:07:16,048 epoch 19 - iter 144/245 - loss 0.01397157 - samples/sec: 152.74 - lr: 0.050000
2022-12-15 23:07:21,423 epoch 19 - iter 168/245 - loss

100%|██████████| 37/37 [00:10<00:00,  3.68it/s]

2022-12-15 23:07:49,447 Evaluating as a multi-label problem: False
2022-12-15 23:07:49,469 DEV : loss 0.03511109575629234 - f1-score (micro avg)  0.8907
2022-12-15 23:07:49,600 BAD EPOCHS (no improvement): 0
2022-12-15 23:07:49,602 saving best model





2022-12-15 23:07:50,066 ----------------------------------------------------------------------------------------------------
2022-12-15 23:07:56,825 epoch 20 - iter 24/245 - loss 0.01512915 - samples/sec: 113.79 - lr: 0.050000
2022-12-15 23:08:02,175 epoch 20 - iter 48/245 - loss 0.01397794 - samples/sec: 143.65 - lr: 0.050000
2022-12-15 23:08:07,686 epoch 20 - iter 72/245 - loss 0.01361242 - samples/sec: 139.48 - lr: 0.050000
2022-12-15 23:08:13,035 epoch 20 - iter 96/245 - loss 0.01381715 - samples/sec: 143.71 - lr: 0.050000
2022-12-15 23:08:20,563 epoch 20 - iter 120/245 - loss 0.01383550 - samples/sec: 102.07 - lr: 0.050000
2022-12-15 23:08:26,546 epoch 20 - iter 144/245 - loss 0.01357807 - samples/sec: 128.44 - lr: 0.050000
2022-12-15 23:08:31,985 epoch 20 - iter 168/245 - loss 0.01325687 - samples/sec: 141.33 - lr: 0.050000
2022-12-15 23:08:37,192 epoch 20 - iter 192/245 - loss 0.01288673 - samples/sec: 147.61 - lr: 0.050000
2022-12-15 23:08:42,885 epoch 20 - iter 216/245 - loss 

100%|██████████| 37/37 [00:10<00:00,  3.41it/s]

2022-12-15 23:09:00,789 Evaluating as a multi-label problem: False
2022-12-15 23:09:00,811 DEV : loss 0.03325570747256279 - f1-score (micro avg)  0.8786
2022-12-15 23:09:00,940 BAD EPOCHS (no improvement): 1
2022-12-15 23:09:00,942 ----------------------------------------------------------------------------------------------------





2022-12-15 23:09:06,552 epoch 21 - iter 24/245 - loss 0.01303465 - samples/sec: 137.04 - lr: 0.050000
2022-12-15 23:09:12,440 epoch 21 - iter 48/245 - loss 0.01345655 - samples/sec: 130.53 - lr: 0.050000
2022-12-15 23:09:19,192 epoch 21 - iter 72/245 - loss 0.01260666 - samples/sec: 113.83 - lr: 0.050000
2022-12-15 23:09:25,212 epoch 21 - iter 96/245 - loss 0.01292987 - samples/sec: 127.65 - lr: 0.050000
2022-12-15 23:09:31,597 epoch 21 - iter 120/245 - loss 0.01343948 - samples/sec: 120.35 - lr: 0.050000
2022-12-15 23:09:37,341 epoch 21 - iter 144/245 - loss 0.01274685 - samples/sec: 133.82 - lr: 0.050000
2022-12-15 23:09:43,661 epoch 21 - iter 168/245 - loss 0.01261731 - samples/sec: 121.61 - lr: 0.050000
2022-12-15 23:09:49,031 epoch 21 - iter 192/245 - loss 0.01233957 - samples/sec: 143.13 - lr: 0.050000
2022-12-15 23:09:54,266 epoch 21 - iter 216/245 - loss 0.01244585 - samples/sec: 146.82 - lr: 0.050000
2022-12-15 23:09:59,689 epoch 21 - iter 240/245 - loss 0.01241034 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.42it/s]

2022-12-15 23:10:11,474 Evaluating as a multi-label problem: False
2022-12-15 23:10:11,496 DEV : loss 0.03420386090874672 - f1-score (micro avg)  0.8827
2022-12-15 23:10:11,633 BAD EPOCHS (no improvement): 2
2022-12-15 23:10:11,635 ----------------------------------------------------------------------------------------------------





2022-12-15 23:10:16,912 epoch 22 - iter 24/245 - loss 0.01098540 - samples/sec: 145.69 - lr: 0.050000
2022-12-15 23:10:22,722 epoch 22 - iter 48/245 - loss 0.01104128 - samples/sec: 132.27 - lr: 0.050000
2022-12-15 23:10:28,378 epoch 22 - iter 72/245 - loss 0.01124632 - samples/sec: 135.88 - lr: 0.050000
2022-12-15 23:10:35,004 epoch 22 - iter 96/245 - loss 0.01151679 - samples/sec: 115.99 - lr: 0.050000
2022-12-15 23:10:40,570 epoch 22 - iter 120/245 - loss 0.01201699 - samples/sec: 138.08 - lr: 0.050000
2022-12-15 23:10:46,319 epoch 22 - iter 144/245 - loss 0.01243386 - samples/sec: 133.68 - lr: 0.050000
2022-12-15 23:10:51,891 epoch 22 - iter 168/245 - loss 0.01225797 - samples/sec: 137.94 - lr: 0.050000
2022-12-15 23:10:58,902 epoch 22 - iter 192/245 - loss 0.01232341 - samples/sec: 109.61 - lr: 0.050000
2022-12-15 23:11:04,500 epoch 22 - iter 216/245 - loss 0.01272064 - samples/sec: 137.30 - lr: 0.050000
2022-12-15 23:11:10,014 epoch 22 - iter 240/245 - loss 0.01263408 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.76it/s]

2022-12-15 23:11:20,792 Evaluating as a multi-label problem: False
2022-12-15 23:11:20,814 DEV : loss 0.03575759381055832 - f1-score (micro avg)  0.8869
2022-12-15 23:11:20,944 BAD EPOCHS (no improvement): 3
2022-12-15 23:11:20,946 ----------------------------------------------------------------------------------------------------





2022-12-15 23:11:25,936 epoch 23 - iter 24/245 - loss 0.00966276 - samples/sec: 154.09 - lr: 0.050000
2022-12-15 23:11:32,822 epoch 23 - iter 48/245 - loss 0.01111512 - samples/sec: 111.61 - lr: 0.050000
2022-12-15 23:11:38,275 epoch 23 - iter 72/245 - loss 0.01112601 - samples/sec: 140.95 - lr: 0.050000
2022-12-15 23:11:43,886 epoch 23 - iter 96/245 - loss 0.01102208 - samples/sec: 136.96 - lr: 0.050000
2022-12-15 23:11:49,346 epoch 23 - iter 120/245 - loss 0.01039649 - samples/sec: 140.78 - lr: 0.050000
2022-12-15 23:11:55,458 epoch 23 - iter 144/245 - loss 0.01078584 - samples/sec: 125.75 - lr: 0.050000
2022-12-15 23:12:00,262 epoch 23 - iter 168/245 - loss 0.01087833 - samples/sec: 160.01 - lr: 0.050000
2022-12-15 23:12:06,069 epoch 23 - iter 192/245 - loss 0.01087505 - samples/sec: 132.36 - lr: 0.050000
2022-12-15 23:12:13,254 epoch 23 - iter 216/245 - loss 0.01134550 - samples/sec: 106.95 - lr: 0.050000
2022-12-15 23:12:18,958 epoch 23 - iter 240/245 - loss 0.01182049 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.40it/s]

2022-12-15 23:12:31,014 Evaluating as a multi-label problem: False
2022-12-15 23:12:31,039 DEV : loss 0.034832391887903214 - f1-score (micro avg)  0.8853
2022-12-15 23:12:31,164 Epoch    23: reducing learning rate of group 0 to 2.5000e-02.
2022-12-15 23:12:31,165 BAD EPOCHS (no improvement): 4
2022-12-15 23:12:31,168 ----------------------------------------------------------------------------------------------------





2022-12-15 23:12:37,628 epoch 24 - iter 24/245 - loss 0.01240101 - samples/sec: 118.98 - lr: 0.025000
2022-12-15 23:12:42,761 epoch 24 - iter 48/245 - loss 0.01213300 - samples/sec: 149.75 - lr: 0.025000
2022-12-15 23:12:47,360 epoch 24 - iter 72/245 - loss 0.01182752 - samples/sec: 167.16 - lr: 0.025000
2022-12-15 23:12:52,241 epoch 24 - iter 96/245 - loss 0.01132326 - samples/sec: 157.49 - lr: 0.025000
2022-12-15 23:12:58,120 epoch 24 - iter 120/245 - loss 0.01141803 - samples/sec: 130.72 - lr: 0.025000
2022-12-15 23:13:04,263 epoch 24 - iter 144/245 - loss 0.01110918 - samples/sec: 125.12 - lr: 0.025000
2022-12-15 23:13:10,308 epoch 24 - iter 168/245 - loss 0.01105549 - samples/sec: 127.14 - lr: 0.025000
2022-12-15 23:13:17,457 epoch 24 - iter 192/245 - loss 0.01083175 - samples/sec: 107.48 - lr: 0.025000
2022-12-15 23:13:23,427 epoch 24 - iter 216/245 - loss 0.01061389 - samples/sec: 128.74 - lr: 0.025000
2022-12-15 23:13:28,802 epoch 24 - iter 240/245 - loss 0.01072514 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.71it/s]

2022-12-15 23:13:39,690 Evaluating as a multi-label problem: False





2022-12-15 23:13:39,715 DEV : loss 0.035583849996328354 - f1-score (micro avg)  0.8828
2022-12-15 23:13:39,844 BAD EPOCHS (no improvement): 1
2022-12-15 23:13:39,846 ----------------------------------------------------------------------------------------------------
2022-12-15 23:13:45,757 epoch 25 - iter 24/245 - loss 0.00778649 - samples/sec: 130.05 - lr: 0.025000
2022-12-15 23:13:50,952 epoch 25 - iter 48/245 - loss 0.00809361 - samples/sec: 147.96 - lr: 0.025000
2022-12-15 23:13:58,528 epoch 25 - iter 72/245 - loss 0.00969816 - samples/sec: 101.43 - lr: 0.025000
2022-12-15 23:14:03,677 epoch 25 - iter 96/245 - loss 0.01059929 - samples/sec: 149.27 - lr: 0.025000
2022-12-15 23:14:10,842 epoch 25 - iter 120/245 - loss 0.01068352 - samples/sec: 107.27 - lr: 0.025000
2022-12-15 23:14:16,272 epoch 25 - iter 144/245 - loss 0.01024095 - samples/sec: 141.53 - lr: 0.025000
2022-12-15 23:14:22,451 epoch 25 - iter 168/245 - loss 0.01039740 - samples/sec: 124.38 - lr: 0.025000
2022-12-15 23:14

100%|██████████| 37/37 [00:09<00:00,  3.74it/s]

2022-12-15 23:14:49,685 Evaluating as a multi-label problem: False
2022-12-15 23:14:49,707 DEV : loss 0.03561178222298622 - f1-score (micro avg)  0.8861
2022-12-15 23:14:49,838 BAD EPOCHS (no improvement): 2
2022-12-15 23:14:49,840 ----------------------------------------------------------------------------------------------------





2022-12-15 23:14:56,620 epoch 26 - iter 24/245 - loss 0.01183768 - samples/sec: 113.36 - lr: 0.025000
2022-12-15 23:15:01,992 epoch 26 - iter 48/245 - loss 0.01130369 - samples/sec: 143.07 - lr: 0.025000
2022-12-15 23:15:07,037 epoch 26 - iter 72/245 - loss 0.01110815 - samples/sec: 152.37 - lr: 0.025000
2022-12-15 23:15:12,030 epoch 26 - iter 96/245 - loss 0.01048987 - samples/sec: 153.97 - lr: 0.025000
2022-12-15 23:15:17,878 epoch 26 - iter 120/245 - loss 0.00992621 - samples/sec: 131.42 - lr: 0.025000
2022-12-15 23:15:23,591 epoch 26 - iter 144/245 - loss 0.00999739 - samples/sec: 134.54 - lr: 0.025000
2022-12-15 23:15:29,099 epoch 26 - iter 168/245 - loss 0.00979949 - samples/sec: 139.55 - lr: 0.025000
2022-12-15 23:15:36,867 epoch 26 - iter 192/245 - loss 0.00971587 - samples/sec: 98.92 - lr: 0.025000
2022-12-15 23:15:42,389 epoch 26 - iter 216/245 - loss 0.01001906 - samples/sec: 139.19 - lr: 0.025000
2022-12-15 23:15:48,100 epoch 26 - iter 240/245 - loss 0.00974754 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.51it/s]

2022-12-15 23:15:59,476 Evaluating as a multi-label problem: False
2022-12-15 23:15:59,500 DEV : loss 0.03284444287419319 - f1-score (micro avg)  0.8936
2022-12-15 23:15:59,629 BAD EPOCHS (no improvement): 0
2022-12-15 23:15:59,631 saving best model





2022-12-15 23:16:00,090 ----------------------------------------------------------------------------------------------------
2022-12-15 23:16:06,651 epoch 27 - iter 24/245 - loss 0.00811691 - samples/sec: 117.20 - lr: 0.025000
2022-12-15 23:16:11,785 epoch 27 - iter 48/245 - loss 0.00867090 - samples/sec: 149.72 - lr: 0.025000
2022-12-15 23:16:16,884 epoch 27 - iter 72/245 - loss 0.00992008 - samples/sec: 150.75 - lr: 0.025000
2022-12-15 23:16:22,095 epoch 27 - iter 96/245 - loss 0.00964465 - samples/sec: 147.51 - lr: 0.025000
2022-12-15 23:16:28,345 epoch 27 - iter 120/245 - loss 0.00972894 - samples/sec: 122.97 - lr: 0.025000
2022-12-15 23:16:34,153 epoch 27 - iter 144/245 - loss 0.00944300 - samples/sec: 132.32 - lr: 0.025000
2022-12-15 23:16:39,880 epoch 27 - iter 168/245 - loss 0.00911140 - samples/sec: 134.19 - lr: 0.025000
2022-12-15 23:16:44,763 epoch 27 - iter 192/245 - loss 0.00908719 - samples/sec: 157.41 - lr: 0.025000
2022-12-15 23:16:51,609 epoch 27 - iter 216/245 - loss 

100%|██████████| 37/37 [00:09<00:00,  3.80it/s]

2022-12-15 23:17:08,654 Evaluating as a multi-label problem: False
2022-12-15 23:17:08,676 DEV : loss 0.034081846475601196 - f1-score (micro avg)  0.8882
2022-12-15 23:17:08,802 BAD EPOCHS (no improvement): 1
2022-12-15 23:17:08,804 ----------------------------------------------------------------------------------------------------





2022-12-15 23:17:14,288 epoch 28 - iter 24/245 - loss 0.00992644 - samples/sec: 140.18 - lr: 0.025000
2022-12-15 23:17:20,070 epoch 28 - iter 48/245 - loss 0.00925463 - samples/sec: 132.93 - lr: 0.025000
2022-12-15 23:17:26,870 epoch 28 - iter 72/245 - loss 0.00977202 - samples/sec: 113.01 - lr: 0.025000
2022-12-15 23:17:31,632 epoch 28 - iter 96/245 - loss 0.00913838 - samples/sec: 161.40 - lr: 0.025000
2022-12-15 23:17:37,156 epoch 28 - iter 120/245 - loss 0.00893151 - samples/sec: 139.15 - lr: 0.025000
2022-12-15 23:17:44,481 epoch 28 - iter 144/245 - loss 0.00978836 - samples/sec: 104.90 - lr: 0.025000
2022-12-15 23:17:49,577 epoch 28 - iter 168/245 - loss 0.00980911 - samples/sec: 150.82 - lr: 0.025000
2022-12-15 23:17:54,244 epoch 28 - iter 192/245 - loss 0.00990503 - samples/sec: 164.75 - lr: 0.025000
2022-12-15 23:18:00,652 epoch 28 - iter 216/245 - loss 0.01007673 - samples/sec: 119.91 - lr: 0.025000
2022-12-15 23:18:06,118 epoch 28 - iter 240/245 - loss 0.01015355 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.50it/s]

2022-12-15 23:18:17,618 Evaluating as a multi-label problem: False
2022-12-15 23:18:17,639 DEV : loss 0.03136078640818596 - f1-score (micro avg)  0.8981
2022-12-15 23:18:17,764 BAD EPOCHS (no improvement): 0
2022-12-15 23:18:17,765 saving best model





2022-12-15 23:18:18,212 ----------------------------------------------------------------------------------------------------
2022-12-15 23:18:23,818 epoch 29 - iter 24/245 - loss 0.00941908 - samples/sec: 137.19 - lr: 0.025000
2022-12-15 23:18:30,481 epoch 29 - iter 48/245 - loss 0.00996259 - samples/sec: 115.32 - lr: 0.025000
2022-12-15 23:18:36,321 epoch 29 - iter 72/245 - loss 0.00969673 - samples/sec: 131.61 - lr: 0.025000
2022-12-15 23:18:43,709 epoch 29 - iter 96/245 - loss 0.00925748 - samples/sec: 104.00 - lr: 0.025000
2022-12-15 23:18:49,187 epoch 29 - iter 120/245 - loss 0.00908041 - samples/sec: 140.33 - lr: 0.025000
2022-12-15 23:18:54,218 epoch 29 - iter 144/245 - loss 0.00874128 - samples/sec: 152.76 - lr: 0.025000
2022-12-15 23:19:00,233 epoch 29 - iter 168/245 - loss 0.00943298 - samples/sec: 127.78 - lr: 0.025000
2022-12-15 23:19:05,685 epoch 29 - iter 192/245 - loss 0.00928370 - samples/sec: 140.96 - lr: 0.025000
2022-12-15 23:19:10,970 epoch 29 - iter 216/245 - loss 

100%|██████████| 37/37 [00:09<00:00,  3.77it/s]


2022-12-15 23:19:26,861 Evaluating as a multi-label problem: False
2022-12-15 23:19:26,883 DEV : loss 0.03398556634783745 - f1-score (micro avg)  0.8929
2022-12-15 23:19:27,014 BAD EPOCHS (no improvement): 1
2022-12-15 23:19:27,016 ----------------------------------------------------------------------------------------------------
2022-12-15 23:19:36,208 epoch 30 - iter 24/245 - loss 0.00904317 - samples/sec: 83.60 - lr: 0.025000
2022-12-15 23:19:42,138 epoch 30 - iter 48/245 - loss 0.00757147 - samples/sec: 129.61 - lr: 0.025000
2022-12-15 23:19:46,967 epoch 30 - iter 72/245 - loss 0.00798363 - samples/sec: 159.17 - lr: 0.025000
2022-12-15 23:19:51,552 epoch 30 - iter 96/245 - loss 0.00809754 - samples/sec: 167.66 - lr: 0.025000
2022-12-15 23:19:57,296 epoch 30 - iter 120/245 - loss 0.00791459 - samples/sec: 133.80 - lr: 0.025000
2022-12-15 23:20:02,467 epoch 30 - iter 144/245 - loss 0.00829111 - samples/sec: 148.66 - lr: 0.025000
2022-12-15 23:20:07,855 epoch 30 - iter 168/245 - loss

100%|██████████| 37/37 [00:09<00:00,  3.79it/s]

2022-12-15 23:20:35,696 Evaluating as a multi-label problem: False
2022-12-15 23:20:35,719 DEV : loss 0.033835023641586304 - f1-score (micro avg)  0.8974
2022-12-15 23:20:35,847 BAD EPOCHS (no improvement): 2
2022-12-15 23:20:35,849 ----------------------------------------------------------------------------------------------------





2022-12-15 23:20:41,626 epoch 31 - iter 24/245 - loss 0.00954704 - samples/sec: 133.06 - lr: 0.025000
2022-12-15 23:20:46,742 epoch 31 - iter 48/245 - loss 0.00933156 - samples/sec: 150.25 - lr: 0.025000
2022-12-15 23:20:52,756 epoch 31 - iter 72/245 - loss 0.00905828 - samples/sec: 127.78 - lr: 0.025000
2022-12-15 23:20:58,540 epoch 31 - iter 96/245 - loss 0.00871444 - samples/sec: 132.88 - lr: 0.025000
2022-12-15 23:21:05,626 epoch 31 - iter 120/245 - loss 0.00864675 - samples/sec: 108.44 - lr: 0.025000
2022-12-15 23:21:11,764 epoch 31 - iter 144/245 - loss 0.00844951 - samples/sec: 125.20 - lr: 0.025000
2022-12-15 23:21:17,037 epoch 31 - iter 168/245 - loss 0.00865813 - samples/sec: 145.76 - lr: 0.025000
2022-12-15 23:21:22,427 epoch 31 - iter 192/245 - loss 0.00878536 - samples/sec: 142.60 - lr: 0.025000
2022-12-15 23:21:28,215 epoch 31 - iter 216/245 - loss 0.00859897 - samples/sec: 132.78 - lr: 0.025000
2022-12-15 23:21:33,532 epoch 31 - iter 240/245 - loss 0.00861498 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.55it/s]

2022-12-15 23:21:45,031 Evaluating as a multi-label problem: False
2022-12-15 23:21:45,052 DEV : loss 0.03534458577632904 - f1-score (micro avg)  0.8885
2022-12-15 23:21:45,178 BAD EPOCHS (no improvement): 3
2022-12-15 23:21:45,180 ----------------------------------------------------------------------------------------------------





2022-12-15 23:21:50,654 epoch 32 - iter 24/245 - loss 0.00749384 - samples/sec: 140.44 - lr: 0.025000
2022-12-15 23:21:57,089 epoch 32 - iter 48/245 - loss 0.00723466 - samples/sec: 119.44 - lr: 0.025000
2022-12-15 23:22:03,223 epoch 32 - iter 72/245 - loss 0.00777014 - samples/sec: 125.27 - lr: 0.025000
2022-12-15 23:22:09,550 epoch 32 - iter 96/245 - loss 0.00783861 - samples/sec: 121.48 - lr: 0.025000
2022-12-15 23:22:15,906 epoch 32 - iter 120/245 - loss 0.00831365 - samples/sec: 120.90 - lr: 0.025000
2022-12-15 23:22:21,290 epoch 32 - iter 144/245 - loss 0.00814228 - samples/sec: 142.76 - lr: 0.025000
2022-12-15 23:22:26,797 epoch 32 - iter 168/245 - loss 0.00840159 - samples/sec: 139.56 - lr: 0.025000
2022-12-15 23:22:31,653 epoch 32 - iter 192/245 - loss 0.00834352 - samples/sec: 158.30 - lr: 0.025000
2022-12-15 23:22:37,605 epoch 32 - iter 216/245 - loss 0.00838621 - samples/sec: 129.14 - lr: 0.025000
2022-12-15 23:22:42,368 epoch 32 - iter 240/245 - loss 0.00845182 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.74it/s]


2022-12-15 23:22:53,394 Evaluating as a multi-label problem: False
2022-12-15 23:22:53,417 DEV : loss 0.03412977606058121 - f1-score (micro avg)  0.89
2022-12-15 23:22:53,545 Epoch    32: reducing learning rate of group 0 to 1.2500e-02.
2022-12-15 23:22:53,547 BAD EPOCHS (no improvement): 4
2022-12-15 23:22:53,550 ----------------------------------------------------------------------------------------------------
2022-12-15 23:22:58,430 epoch 33 - iter 24/245 - loss 0.00655633 - samples/sec: 157.54 - lr: 0.012500
2022-12-15 23:23:04,666 epoch 33 - iter 48/245 - loss 0.00692137 - samples/sec: 123.24 - lr: 0.012500
2022-12-15 23:23:10,342 epoch 33 - iter 72/245 - loss 0.00711720 - samples/sec: 135.40 - lr: 0.012500
2022-12-15 23:23:16,221 epoch 33 - iter 96/245 - loss 0.00732810 - samples/sec: 130.72 - lr: 0.012500
2022-12-15 23:23:21,889 epoch 33 - iter 120/245 - loss 0.00762800 - samples/sec: 135.59 - lr: 0.012500
2022-12-15 23:23:26,851 epoch 33 - iter 144/245 - loss 0.00744406 - samp

100%|██████████| 37/37 [00:10<00:00,  3.45it/s]

2022-12-15 23:24:02,501 Evaluating as a multi-label problem: False
2022-12-15 23:24:02,521 DEV : loss 0.036615852266550064 - f1-score (micro avg)  0.8893
2022-12-15 23:24:02,650 BAD EPOCHS (no improvement): 1
2022-12-15 23:24:02,652 ----------------------------------------------------------------------------------------------------





2022-12-15 23:24:08,084 epoch 34 - iter 24/245 - loss 0.00669907 - samples/sec: 141.53 - lr: 0.012500
2022-12-15 23:24:13,793 epoch 34 - iter 48/245 - loss 0.00739704 - samples/sec: 134.63 - lr: 0.012500
2022-12-15 23:24:18,692 epoch 34 - iter 72/245 - loss 0.00790446 - samples/sec: 156.90 - lr: 0.012500
2022-12-15 23:24:24,187 epoch 34 - iter 96/245 - loss 0.00805655 - samples/sec: 139.85 - lr: 0.012500
2022-12-15 23:24:31,093 epoch 34 - iter 120/245 - loss 0.00788679 - samples/sec: 111.28 - lr: 0.012500
2022-12-15 23:24:38,205 epoch 34 - iter 144/245 - loss 0.00839979 - samples/sec: 108.05 - lr: 0.012500
2022-12-15 23:24:43,387 epoch 34 - iter 168/245 - loss 0.00818045 - samples/sec: 148.31 - lr: 0.012500
2022-12-15 23:24:48,654 epoch 34 - iter 192/245 - loss 0.00797692 - samples/sec: 145.94 - lr: 0.012500
2022-12-15 23:24:53,972 epoch 34 - iter 216/245 - loss 0.00801003 - samples/sec: 144.51 - lr: 0.012500
2022-12-15 23:24:59,995 epoch 34 - iter 240/245 - loss 0.00812817 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.51it/s]

2022-12-15 23:25:11,465 Evaluating as a multi-label problem: False
2022-12-15 23:25:11,487 DEV : loss 0.034274082630872726 - f1-score (micro avg)  0.8905
2022-12-15 23:25:11,613 BAD EPOCHS (no improvement): 2
2022-12-15 23:25:11,615 ----------------------------------------------------------------------------------------------------





2022-12-15 23:25:16,542 epoch 35 - iter 24/245 - loss 0.00668325 - samples/sec: 156.06 - lr: 0.012500
2022-12-15 23:25:22,287 epoch 35 - iter 48/245 - loss 0.00665956 - samples/sec: 133.77 - lr: 0.012500
2022-12-15 23:25:29,208 epoch 35 - iter 72/245 - loss 0.00698320 - samples/sec: 111.03 - lr: 0.012500
2022-12-15 23:25:34,571 epoch 35 - iter 96/245 - loss 0.00723871 - samples/sec: 143.32 - lr: 0.012500
2022-12-15 23:25:39,967 epoch 35 - iter 120/245 - loss 0.00746848 - samples/sec: 142.46 - lr: 0.012500
2022-12-15 23:25:46,381 epoch 35 - iter 144/245 - loss 0.00747135 - samples/sec: 119.82 - lr: 0.012500
2022-12-15 23:25:51,108 epoch 35 - iter 168/245 - loss 0.00751551 - samples/sec: 162.62 - lr: 0.012500
2022-12-15 23:25:57,551 epoch 35 - iter 192/245 - loss 0.00749425 - samples/sec: 119.26 - lr: 0.012500
2022-12-15 23:26:04,439 epoch 35 - iter 216/245 - loss 0.00761419 - samples/sec: 111.56 - lr: 0.012500
2022-12-15 23:26:10,079 epoch 35 - iter 240/245 - loss 0.00775824 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.72it/s]

2022-12-15 23:26:20,895 Evaluating as a multi-label problem: False
2022-12-15 23:26:20,918 DEV : loss 0.034948382526636124 - f1-score (micro avg)  0.8975
2022-12-15 23:26:21,057 BAD EPOCHS (no improvement): 3
2022-12-15 23:26:21,059 ----------------------------------------------------------------------------------------------------





2022-12-15 23:26:29,436 epoch 36 - iter 24/245 - loss 0.00810446 - samples/sec: 91.77 - lr: 0.012500
2022-12-15 23:26:35,150 epoch 36 - iter 48/245 - loss 0.00786966 - samples/sec: 134.49 - lr: 0.012500
2022-12-15 23:26:40,461 epoch 36 - iter 72/245 - loss 0.00738554 - samples/sec: 144.73 - lr: 0.012500
2022-12-15 23:26:45,046 epoch 36 - iter 96/245 - loss 0.00755897 - samples/sec: 167.63 - lr: 0.012500
2022-12-15 23:26:49,818 epoch 36 - iter 120/245 - loss 0.00728690 - samples/sec: 161.09 - lr: 0.012500
2022-12-15 23:26:57,127 epoch 36 - iter 144/245 - loss 0.00827208 - samples/sec: 105.14 - lr: 0.012500
2022-12-15 23:27:02,724 epoch 36 - iter 168/245 - loss 0.00826247 - samples/sec: 137.33 - lr: 0.012500
2022-12-15 23:27:08,112 epoch 36 - iter 192/245 - loss 0.00811300 - samples/sec: 142.66 - lr: 0.012500
2022-12-15 23:27:12,907 epoch 36 - iter 216/245 - loss 0.00809177 - samples/sec: 160.28 - lr: 0.012500
2022-12-15 23:27:19,467 epoch 36 - iter 240/245 - loss 0.00807160 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.54it/s]


2022-12-15 23:27:30,893 Evaluating as a multi-label problem: False
2022-12-15 23:27:30,915 DEV : loss 0.03463789075613022 - f1-score (micro avg)  0.8922
2022-12-15 23:27:31,042 Epoch    36: reducing learning rate of group 0 to 6.2500e-03.
2022-12-15 23:27:31,045 BAD EPOCHS (no improvement): 4
2022-12-15 23:27:31,047 ----------------------------------------------------------------------------------------------------
2022-12-15 23:27:37,185 epoch 37 - iter 24/245 - loss 0.00895508 - samples/sec: 125.23 - lr: 0.006250
2022-12-15 23:27:42,871 epoch 37 - iter 48/245 - loss 0.00753025 - samples/sec: 135.17 - lr: 0.006250
2022-12-15 23:27:48,658 epoch 37 - iter 72/245 - loss 0.00828203 - samples/sec: 132.80 - lr: 0.006250
2022-12-15 23:27:53,828 epoch 37 - iter 96/245 - loss 0.00794043 - samples/sec: 148.66 - lr: 0.006250
2022-12-15 23:28:00,661 epoch 37 - iter 120/245 - loss 0.00755870 - samples/sec: 112.46 - lr: 0.006250
2022-12-15 23:28:06,219 epoch 37 - iter 144/245 - loss 0.00745279 - sa

100%|██████████| 37/37 [00:10<00:00,  3.67it/s]

2022-12-15 23:28:39,216 Evaluating as a multi-label problem: False
2022-12-15 23:28:39,237 DEV : loss 0.03494878485798836 - f1-score (micro avg)  0.8923
2022-12-15 23:28:39,367 BAD EPOCHS (no improvement): 1
2022-12-15 23:28:39,369 ----------------------------------------------------------------------------------------------------





2022-12-15 23:28:45,345 epoch 38 - iter 24/245 - loss 0.00663739 - samples/sec: 128.64 - lr: 0.006250
2022-12-15 23:28:52,420 epoch 38 - iter 48/245 - loss 0.00609812 - samples/sec: 108.61 - lr: 0.006250
2022-12-15 23:28:58,460 epoch 38 - iter 72/245 - loss 0.00726966 - samples/sec: 127.25 - lr: 0.006250
2022-12-15 23:29:04,726 epoch 38 - iter 96/245 - loss 0.00743411 - samples/sec: 122.65 - lr: 0.006250
2022-12-15 23:29:11,543 epoch 38 - iter 120/245 - loss 0.00762640 - samples/sec: 112.72 - lr: 0.006250
2022-12-15 23:29:16,868 epoch 38 - iter 144/245 - loss 0.00776595 - samples/sec: 144.33 - lr: 0.006250
2022-12-15 23:29:21,754 epoch 38 - iter 168/245 - loss 0.00758409 - samples/sec: 157.34 - lr: 0.006250
2022-12-15 23:29:26,713 epoch 38 - iter 192/245 - loss 0.00733328 - samples/sec: 155.00 - lr: 0.006250
2022-12-15 23:29:32,378 epoch 38 - iter 216/245 - loss 0.00748264 - samples/sec: 135.68 - lr: 0.006250
2022-12-15 23:29:37,543 epoch 38 - iter 240/245 - loss 0.00748369 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.72it/s]

2022-12-15 23:29:48,849 Evaluating as a multi-label problem: False
2022-12-15 23:29:48,872 DEV : loss 0.03536289930343628 - f1-score (micro avg)  0.8934
2022-12-15 23:29:49,008 BAD EPOCHS (no improvement): 2
2022-12-15 23:29:49,010 ----------------------------------------------------------------------------------------------------





2022-12-15 23:29:54,614 epoch 39 - iter 24/245 - loss 0.00800882 - samples/sec: 137.17 - lr: 0.006250
2022-12-15 23:29:59,721 epoch 39 - iter 48/245 - loss 0.00750602 - samples/sec: 150.52 - lr: 0.006250
2022-12-15 23:30:07,174 epoch 39 - iter 72/245 - loss 0.00748605 - samples/sec: 103.10 - lr: 0.006250
2022-12-15 23:30:12,717 epoch 39 - iter 96/245 - loss 0.00757584 - samples/sec: 138.66 - lr: 0.006250
2022-12-15 23:30:19,296 epoch 39 - iter 120/245 - loss 0.00732091 - samples/sec: 116.82 - lr: 0.006250
2022-12-15 23:30:24,936 epoch 39 - iter 144/245 - loss 0.00773654 - samples/sec: 136.27 - lr: 0.006250
2022-12-15 23:30:30,143 epoch 39 - iter 168/245 - loss 0.00767398 - samples/sec: 147.62 - lr: 0.006250
2022-12-15 23:30:36,109 epoch 39 - iter 192/245 - loss 0.00773977 - samples/sec: 128.82 - lr: 0.006250
2022-12-15 23:30:42,020 epoch 39 - iter 216/245 - loss 0.00766172 - samples/sec: 130.02 - lr: 0.006250
2022-12-15 23:30:47,447 epoch 39 - iter 240/245 - loss 0.00773529 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.40it/s]

2022-12-15 23:30:59,216 Evaluating as a multi-label problem: False
2022-12-15 23:30:59,237 DEV : loss 0.03524079918861389 - f1-score (micro avg)  0.8933
2022-12-15 23:30:59,363 BAD EPOCHS (no improvement): 3
2022-12-15 23:30:59,365 ----------------------------------------------------------------------------------------------------





2022-12-15 23:31:04,391 epoch 40 - iter 24/245 - loss 0.00570720 - samples/sec: 153.00 - lr: 0.006250
2022-12-15 23:31:10,926 epoch 40 - iter 48/245 - loss 0.00682666 - samples/sec: 117.59 - lr: 0.006250
2022-12-15 23:31:19,292 epoch 40 - iter 72/245 - loss 0.00749827 - samples/sec: 91.84 - lr: 0.006250
2022-12-15 23:31:24,217 epoch 40 - iter 96/245 - loss 0.00704808 - samples/sec: 156.09 - lr: 0.006250
2022-12-15 23:31:29,190 epoch 40 - iter 120/245 - loss 0.00692509 - samples/sec: 154.55 - lr: 0.006250
2022-12-15 23:31:35,298 epoch 40 - iter 144/245 - loss 0.00683587 - samples/sec: 125.82 - lr: 0.006250
2022-12-15 23:31:41,122 epoch 40 - iter 168/245 - loss 0.00703633 - samples/sec: 131.94 - lr: 0.006250
2022-12-15 23:31:46,521 epoch 40 - iter 192/245 - loss 0.00703547 - samples/sec: 142.37 - lr: 0.006250
2022-12-15 23:31:51,332 epoch 40 - iter 216/245 - loss 0.00724106 - samples/sec: 159.79 - lr: 0.006250
2022-12-15 23:31:56,160 epoch 40 - iter 240/245 - loss 0.00738637 - samples/se

100%|██████████| 37/37 [00:09<00:00,  3.79it/s]

2022-12-15 23:32:06,747 Evaluating as a multi-label problem: False
2022-12-15 23:32:06,770 DEV : loss 0.035714857280254364 - f1-score (micro avg)  0.8863
2022-12-15 23:32:06,894 Epoch    40: reducing learning rate of group 0 to 3.1250e-03.
2022-12-15 23:32:06,896 BAD EPOCHS (no improvement): 4
2022-12-15 23:32:06,900 ----------------------------------------------------------------------------------------------------





2022-12-15 23:32:12,760 epoch 41 - iter 24/245 - loss 0.00668577 - samples/sec: 131.16 - lr: 0.003125
2022-12-15 23:32:19,523 epoch 41 - iter 48/245 - loss 0.00585152 - samples/sec: 113.62 - lr: 0.003125
2022-12-15 23:32:23,844 epoch 41 - iter 72/245 - loss 0.00620706 - samples/sec: 177.91 - lr: 0.003125
2022-12-15 23:32:30,539 epoch 41 - iter 96/245 - loss 0.00714716 - samples/sec: 114.79 - lr: 0.003125
2022-12-15 23:32:36,228 epoch 41 - iter 120/245 - loss 0.00732952 - samples/sec: 135.09 - lr: 0.003125
2022-12-15 23:32:41,330 epoch 41 - iter 144/245 - loss 0.00727813 - samples/sec: 150.67 - lr: 0.003125
2022-12-15 23:32:47,516 epoch 41 - iter 168/245 - loss 0.00728558 - samples/sec: 124.25 - lr: 0.003125
2022-12-15 23:32:52,705 epoch 41 - iter 192/245 - loss 0.00726879 - samples/sec: 148.11 - lr: 0.003125
2022-12-15 23:32:58,766 epoch 41 - iter 216/245 - loss 0.00734295 - samples/sec: 126.81 - lr: 0.003125
2022-12-15 23:33:03,392 epoch 41 - iter 240/245 - loss 0.00747417 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.41it/s]


2022-12-15 23:33:15,125 Evaluating as a multi-label problem: False
2022-12-15 23:33:15,150 DEV : loss 0.03561308979988098 - f1-score (micro avg)  0.89
2022-12-15 23:33:15,284 BAD EPOCHS (no improvement): 1
2022-12-15 23:33:15,287 ----------------------------------------------------------------------------------------------------
2022-12-15 23:33:21,603 epoch 42 - iter 24/245 - loss 0.00691721 - samples/sec: 121.70 - lr: 0.003125
2022-12-15 23:33:26,957 epoch 42 - iter 48/245 - loss 0.00726528 - samples/sec: 143.55 - lr: 0.003125
2022-12-15 23:33:33,881 epoch 42 - iter 72/245 - loss 0.00810640 - samples/sec: 110.98 - lr: 0.003125
2022-12-15 23:33:39,272 epoch 42 - iter 96/245 - loss 0.00836689 - samples/sec: 142.58 - lr: 0.003125
2022-12-15 23:33:44,224 epoch 42 - iter 120/245 - loss 0.00818378 - samples/sec: 155.22 - lr: 0.003125
2022-12-15 23:33:51,105 epoch 42 - iter 144/245 - loss 0.00785820 - samples/sec: 111.68 - lr: 0.003125
2022-12-15 23:33:56,285 epoch 42 - iter 168/245 - loss 

100%|██████████| 37/37 [00:09<00:00,  3.72it/s]

2022-12-15 23:34:23,330 Evaluating as a multi-label problem: False
2022-12-15 23:34:23,352 DEV : loss 0.03502008318901062 - f1-score (micro avg)  0.8922
2022-12-15 23:34:23,481 BAD EPOCHS (no improvement): 2
2022-12-15 23:34:23,483 ----------------------------------------------------------------------------------------------------





2022-12-15 23:34:28,555 epoch 43 - iter 24/245 - loss 0.00769678 - samples/sec: 151.59 - lr: 0.003125
2022-12-15 23:34:35,129 epoch 43 - iter 48/245 - loss 0.00774329 - samples/sec: 116.90 - lr: 0.003125
2022-12-15 23:34:41,211 epoch 43 - iter 72/245 - loss 0.00762484 - samples/sec: 126.35 - lr: 0.003125
2022-12-15 23:34:46,430 epoch 43 - iter 96/245 - loss 0.00777302 - samples/sec: 147.27 - lr: 0.003125
2022-12-15 23:34:50,938 epoch 43 - iter 120/245 - loss 0.00752931 - samples/sec: 170.53 - lr: 0.003125
2022-12-15 23:34:58,507 epoch 43 - iter 144/245 - loss 0.00791385 - samples/sec: 101.51 - lr: 0.003125
2022-12-15 23:35:04,075 epoch 43 - iter 168/245 - loss 0.00775537 - samples/sec: 138.04 - lr: 0.003125
2022-12-15 23:35:08,712 epoch 43 - iter 192/245 - loss 0.00779415 - samples/sec: 165.77 - lr: 0.003125
2022-12-15 23:35:16,046 epoch 43 - iter 216/245 - loss 0.00789956 - samples/sec: 104.78 - lr: 0.003125
2022-12-15 23:35:21,668 epoch 43 - iter 240/245 - loss 0.00789342 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.84it/s]


2022-12-15 23:35:32,407 Evaluating as a multi-label problem: False
2022-12-15 23:35:32,432 DEV : loss 0.03572358936071396 - f1-score (micro avg)  0.8931
2022-12-15 23:35:32,568 BAD EPOCHS (no improvement): 3
2022-12-15 23:35:32,570 ----------------------------------------------------------------------------------------------------
2022-12-15 23:35:37,616 epoch 44 - iter 24/245 - loss 0.00714540 - samples/sec: 152.32 - lr: 0.003125
2022-12-15 23:35:44,215 epoch 44 - iter 48/245 - loss 0.00701974 - samples/sec: 116.47 - lr: 0.003125
2022-12-15 23:35:49,385 epoch 44 - iter 72/245 - loss 0.00744287 - samples/sec: 148.65 - lr: 0.003125
2022-12-15 23:35:54,582 epoch 44 - iter 96/245 - loss 0.00711496 - samples/sec: 147.92 - lr: 0.003125
2022-12-15 23:36:01,214 epoch 44 - iter 120/245 - loss 0.00715829 - samples/sec: 115.88 - lr: 0.003125
2022-12-15 23:36:06,139 epoch 44 - iter 144/245 - loss 0.00741338 - samples/sec: 156.07 - lr: 0.003125
2022-12-15 23:36:12,354 epoch 44 - iter 168/245 - los

100%|██████████| 37/37 [00:10<00:00,  3.48it/s]

2022-12-15 23:36:41,392 Evaluating as a multi-label problem: False
2022-12-15 23:36:41,417 DEV : loss 0.035541433840990067 - f1-score (micro avg)  0.8903
2022-12-15 23:36:41,551 Epoch    44: reducing learning rate of group 0 to 1.5625e-03.
2022-12-15 23:36:41,553 BAD EPOCHS (no improvement): 4
2022-12-15 23:36:41,556 ----------------------------------------------------------------------------------------------------





2022-12-15 23:36:47,570 epoch 45 - iter 24/245 - loss 0.00834227 - samples/sec: 127.90 - lr: 0.001563
2022-12-15 23:36:52,592 epoch 45 - iter 48/245 - loss 0.00710847 - samples/sec: 153.08 - lr: 0.001563
2022-12-15 23:36:59,307 epoch 45 - iter 72/245 - loss 0.00743390 - samples/sec: 114.43 - lr: 0.001563
2022-12-15 23:37:05,610 epoch 45 - iter 96/245 - loss 0.00733177 - samples/sec: 121.94 - lr: 0.001563
2022-12-15 23:37:12,628 epoch 45 - iter 120/245 - loss 0.00811917 - samples/sec: 109.51 - lr: 0.001563
2022-12-15 23:37:18,156 epoch 45 - iter 144/245 - loss 0.00783678 - samples/sec: 139.04 - lr: 0.001563
2022-12-15 23:37:24,165 epoch 45 - iter 168/245 - loss 0.00746653 - samples/sec: 127.88 - lr: 0.001563
2022-12-15 23:37:28,842 epoch 45 - iter 192/245 - loss 0.00738708 - samples/sec: 164.36 - lr: 0.001563
2022-12-15 23:37:33,971 epoch 45 - iter 216/245 - loss 0.00750279 - samples/sec: 149.85 - lr: 0.001563
2022-12-15 23:37:40,404 epoch 45 - iter 240/245 - loss 0.00747667 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.69it/s]

2022-12-15 23:37:51,350 Evaluating as a multi-label problem: False
2022-12-15 23:37:51,374 DEV : loss 0.03545683994889259 - f1-score (micro avg)  0.8905
2022-12-15 23:37:51,507 BAD EPOCHS (no improvement): 1
2022-12-15 23:37:51,509 ----------------------------------------------------------------------------------------------------





2022-12-15 23:37:58,151 epoch 46 - iter 24/245 - loss 0.00617512 - samples/sec: 115.75 - lr: 0.001563
2022-12-15 23:38:04,274 epoch 46 - iter 48/245 - loss 0.00824552 - samples/sec: 125.51 - lr: 0.001563
2022-12-15 23:38:10,009 epoch 46 - iter 72/245 - loss 0.00761188 - samples/sec: 134.01 - lr: 0.001563
2022-12-15 23:38:15,921 epoch 46 - iter 96/245 - loss 0.00782098 - samples/sec: 130.00 - lr: 0.001563
2022-12-15 23:38:21,423 epoch 46 - iter 120/245 - loss 0.00807661 - samples/sec: 139.69 - lr: 0.001563
2022-12-15 23:38:26,880 epoch 46 - iter 144/245 - loss 0.00770473 - samples/sec: 140.84 - lr: 0.001563
2022-12-15 23:38:33,333 epoch 46 - iter 168/245 - loss 0.00748593 - samples/sec: 119.11 - lr: 0.001563
2022-12-15 23:38:38,953 epoch 46 - iter 192/245 - loss 0.00765638 - samples/sec: 136.76 - lr: 0.001563
2022-12-15 23:38:45,370 epoch 46 - iter 216/245 - loss 0.00749687 - samples/sec: 119.78 - lr: 0.001563
2022-12-15 23:38:50,810 epoch 46 - iter 240/245 - loss 0.00743430 - samples/s

100%|██████████| 37/37 [00:11<00:00,  3.35it/s]

2022-12-15 23:39:02,970 Evaluating as a multi-label problem: False
2022-12-15 23:39:02,996 DEV : loss 0.03555715084075928 - f1-score (micro avg)  0.8917
2022-12-15 23:39:03,140 BAD EPOCHS (no improvement): 2
2022-12-15 23:39:03,142 ----------------------------------------------------------------------------------------------------





2022-12-15 23:39:08,954 epoch 47 - iter 24/245 - loss 0.00610444 - samples/sec: 132.29 - lr: 0.001563
2022-12-15 23:39:15,017 epoch 47 - iter 48/245 - loss 0.00703853 - samples/sec: 126.77 - lr: 0.001563
2022-12-15 23:39:20,872 epoch 47 - iter 72/245 - loss 0.00668319 - samples/sec: 131.28 - lr: 0.001563
2022-12-15 23:39:27,810 epoch 47 - iter 96/245 - loss 0.00647034 - samples/sec: 110.76 - lr: 0.001563
2022-12-15 23:39:34,211 epoch 47 - iter 120/245 - loss 0.00629132 - samples/sec: 120.04 - lr: 0.001563
2022-12-15 23:39:42,225 epoch 47 - iter 144/245 - loss 0.00677417 - samples/sec: 95.89 - lr: 0.001563
2022-12-15 23:39:47,633 epoch 47 - iter 168/245 - loss 0.00690644 - samples/sec: 142.10 - lr: 0.001563
2022-12-15 23:39:52,638 epoch 47 - iter 192/245 - loss 0.00685584 - samples/sec: 153.60 - lr: 0.001563
2022-12-15 23:39:57,859 epoch 47 - iter 216/245 - loss 0.00711158 - samples/sec: 147.23 - lr: 0.001563
2022-12-15 23:40:02,460 epoch 47 - iter 240/245 - loss 0.00703365 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.44it/s]

2022-12-15 23:40:14,269 Evaluating as a multi-label problem: False
2022-12-15 23:40:14,294 DEV : loss 0.035418882966041565 - f1-score (micro avg)  0.8897
2022-12-15 23:40:14,431 BAD EPOCHS (no improvement): 3
2022-12-15 23:40:14,433 ----------------------------------------------------------------------------------------------------





2022-12-15 23:40:19,598 epoch 48 - iter 24/245 - loss 0.00844975 - samples/sec: 148.85 - lr: 0.001563
2022-12-15 23:40:24,154 epoch 48 - iter 48/245 - loss 0.00809639 - samples/sec: 168.74 - lr: 0.001563
2022-12-15 23:40:30,553 epoch 48 - iter 72/245 - loss 0.00757812 - samples/sec: 120.10 - lr: 0.001563
2022-12-15 23:40:35,801 epoch 48 - iter 96/245 - loss 0.00732206 - samples/sec: 146.47 - lr: 0.001563
2022-12-15 23:40:42,276 epoch 48 - iter 120/245 - loss 0.00732192 - samples/sec: 118.68 - lr: 0.001563
2022-12-15 23:40:48,642 epoch 48 - iter 144/245 - loss 0.00764993 - samples/sec: 120.74 - lr: 0.001563
2022-12-15 23:40:53,966 epoch 48 - iter 168/245 - loss 0.00729898 - samples/sec: 144.37 - lr: 0.001563
2022-12-15 23:41:00,132 epoch 48 - iter 192/245 - loss 0.00732454 - samples/sec: 124.63 - lr: 0.001563
2022-12-15 23:41:05,316 epoch 48 - iter 216/245 - loss 0.00721489 - samples/sec: 148.29 - lr: 0.001563
2022-12-15 23:41:12,699 epoch 48 - iter 240/245 - loss 0.00709498 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.71it/s]

2022-12-15 23:41:23,648 Evaluating as a multi-label problem: False
2022-12-15 23:41:23,671 DEV : loss 0.035394567996263504 - f1-score (micro avg)  0.8916
2022-12-15 23:41:23,805 Epoch    48: reducing learning rate of group 0 to 7.8125e-04.
2022-12-15 23:41:23,807 BAD EPOCHS (no improvement): 4
2022-12-15 23:41:23,808 ----------------------------------------------------------------------------------------------------





2022-12-15 23:41:28,635 epoch 49 - iter 24/245 - loss 0.00686675 - samples/sec: 159.35 - lr: 0.000781
2022-12-15 23:41:34,622 epoch 49 - iter 48/245 - loss 0.00632789 - samples/sec: 128.36 - lr: 0.000781
2022-12-15 23:41:40,532 epoch 49 - iter 72/245 - loss 0.00609964 - samples/sec: 130.03 - lr: 0.000781
2022-12-15 23:41:46,941 epoch 49 - iter 96/245 - loss 0.00658742 - samples/sec: 119.91 - lr: 0.000781
2022-12-15 23:41:54,330 epoch 49 - iter 120/245 - loss 0.00714936 - samples/sec: 104.00 - lr: 0.000781
2022-12-15 23:41:59,843 epoch 49 - iter 144/245 - loss 0.00741282 - samples/sec: 139.41 - lr: 0.000781
2022-12-15 23:42:05,507 epoch 49 - iter 168/245 - loss 0.00725508 - samples/sec: 135.73 - lr: 0.000781
2022-12-15 23:42:11,104 epoch 49 - iter 192/245 - loss 0.00752202 - samples/sec: 137.33 - lr: 0.000781
2022-12-15 23:42:17,555 epoch 49 - iter 216/245 - loss 0.00748266 - samples/sec: 119.14 - lr: 0.000781
2022-12-15 23:42:22,682 epoch 49 - iter 240/245 - loss 0.00748094 - samples/s

100%|██████████| 37/37 [00:11<00:00,  3.36it/s]

2022-12-15 23:42:34,516 Evaluating as a multi-label problem: False
2022-12-15 23:42:34,540 DEV : loss 0.03555220365524292 - f1-score (micro avg)  0.889
2022-12-15 23:42:34,669 BAD EPOCHS (no improvement): 1
2022-12-15 23:42:34,671 ----------------------------------------------------------------------------------------------------





2022-12-15 23:42:39,765 epoch 50 - iter 24/245 - loss 0.00679563 - samples/sec: 150.91 - lr: 0.000781
2022-12-15 23:42:44,679 epoch 50 - iter 48/245 - loss 0.00665822 - samples/sec: 156.42 - lr: 0.000781
2022-12-15 23:42:49,594 epoch 50 - iter 72/245 - loss 0.00755486 - samples/sec: 156.39 - lr: 0.000781
2022-12-15 23:42:55,610 epoch 50 - iter 96/245 - loss 0.00739975 - samples/sec: 127.75 - lr: 0.000781
2022-12-15 23:43:01,263 epoch 50 - iter 120/245 - loss 0.00743119 - samples/sec: 135.97 - lr: 0.000781
2022-12-15 23:43:09,440 epoch 50 - iter 144/245 - loss 0.00762778 - samples/sec: 93.97 - lr: 0.000781
2022-12-15 23:43:14,764 epoch 50 - iter 168/245 - loss 0.00754695 - samples/sec: 144.36 - lr: 0.000781
2022-12-15 23:43:20,656 epoch 50 - iter 192/245 - loss 0.00735202 - samples/sec: 130.44 - lr: 0.000781
2022-12-15 23:43:27,511 epoch 50 - iter 216/245 - loss 0.00725760 - samples/sec: 112.11 - lr: 0.000781
2022-12-15 23:43:33,471 epoch 50 - iter 240/245 - loss 0.00722071 - samples/se

100%|██████████| 37/37 [00:09<00:00,  3.74it/s]

2022-12-15 23:43:44,229 Evaluating as a multi-label problem: False
2022-12-15 23:43:44,251 DEV : loss 0.035571396350860596 - f1-score (micro avg)  0.8882
2022-12-15 23:43:44,385 BAD EPOCHS (no improvement): 2
2022-12-15 23:43:44,386 ----------------------------------------------------------------------------------------------------





2022-12-15 23:43:50,318 epoch 51 - iter 24/245 - loss 0.00658672 - samples/sec: 129.61 - lr: 0.000781
2022-12-15 23:43:56,335 epoch 51 - iter 48/245 - loss 0.00631476 - samples/sec: 127.73 - lr: 0.000781
2022-12-15 23:44:02,265 epoch 51 - iter 72/245 - loss 0.00624902 - samples/sec: 129.61 - lr: 0.000781
2022-12-15 23:44:07,055 epoch 51 - iter 96/245 - loss 0.00631890 - samples/sec: 160.49 - lr: 0.000781
2022-12-15 23:44:12,907 epoch 51 - iter 120/245 - loss 0.00704744 - samples/sec: 131.33 - lr: 0.000781
2022-12-15 23:44:18,948 epoch 51 - iter 144/245 - loss 0.00689887 - samples/sec: 127.22 - lr: 0.000781
2022-12-15 23:44:24,465 epoch 51 - iter 168/245 - loss 0.00673511 - samples/sec: 139.32 - lr: 0.000781
2022-12-15 23:44:32,727 epoch 51 - iter 192/245 - loss 0.00671438 - samples/sec: 93.01 - lr: 0.000781
2022-12-15 23:44:37,975 epoch 51 - iter 216/245 - loss 0.00695038 - samples/sec: 146.45 - lr: 0.000781
2022-12-15 23:44:43,426 epoch 51 - iter 240/245 - loss 0.00683220 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.67it/s]

2022-12-15 23:44:54,865 Evaluating as a multi-label problem: False
2022-12-15 23:44:54,888 DEV : loss 0.03548908978700638 - f1-score (micro avg)  0.89
2022-12-15 23:44:55,023 BAD EPOCHS (no improvement): 3
2022-12-15 23:44:55,027 ----------------------------------------------------------------------------------------------------





2022-12-15 23:45:00,606 epoch 52 - iter 24/245 - loss 0.00710273 - samples/sec: 137.78 - lr: 0.000781
2022-12-15 23:45:05,640 epoch 52 - iter 48/245 - loss 0.00736617 - samples/sec: 152.67 - lr: 0.000781
2022-12-15 23:45:11,280 epoch 52 - iter 72/245 - loss 0.00723066 - samples/sec: 136.30 - lr: 0.000781
2022-12-15 23:45:20,973 epoch 52 - iter 96/245 - loss 0.00713478 - samples/sec: 79.26 - lr: 0.000781
2022-12-15 23:45:26,672 epoch 52 - iter 120/245 - loss 0.00729291 - samples/sec: 134.88 - lr: 0.000781
2022-12-15 23:45:32,732 epoch 52 - iter 144/245 - loss 0.00726305 - samples/sec: 126.81 - lr: 0.000781
2022-12-15 23:45:37,592 epoch 52 - iter 168/245 - loss 0.00724526 - samples/sec: 158.16 - lr: 0.000781
2022-12-15 23:45:42,099 epoch 52 - iter 192/245 - loss 0.00715929 - samples/sec: 170.58 - lr: 0.000781
2022-12-15 23:45:47,032 epoch 52 - iter 216/245 - loss 0.00701965 - samples/sec: 155.80 - lr: 0.000781
2022-12-15 23:45:53,324 epoch 52 - iter 240/245 - loss 0.00701068 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.46it/s]

2022-12-15 23:46:05,179 Evaluating as a multi-label problem: False
2022-12-15 23:46:05,202 DEV : loss 0.035630691796541214 - f1-score (micro avg)  0.8901
2022-12-15 23:46:05,339 Epoch    52: reducing learning rate of group 0 to 3.9063e-04.
2022-12-15 23:46:05,340 BAD EPOCHS (no improvement): 4
2022-12-15 23:46:05,346 ----------------------------------------------------------------------------------------------------





2022-12-15 23:46:11,967 epoch 53 - iter 24/245 - loss 0.00673890 - samples/sec: 116.09 - lr: 0.000391
2022-12-15 23:46:17,109 epoch 53 - iter 48/245 - loss 0.00596844 - samples/sec: 149.51 - lr: 0.000391
2022-12-15 23:46:22,703 epoch 53 - iter 72/245 - loss 0.00668473 - samples/sec: 137.40 - lr: 0.000391
2022-12-15 23:46:27,481 epoch 53 - iter 96/245 - loss 0.00662421 - samples/sec: 160.87 - lr: 0.000391
2022-12-15 23:46:33,403 epoch 53 - iter 120/245 - loss 0.00685039 - samples/sec: 129.78 - lr: 0.000391
2022-12-15 23:46:38,463 epoch 53 - iter 144/245 - loss 0.00662366 - samples/sec: 151.90 - lr: 0.000391
2022-12-15 23:46:43,459 epoch 53 - iter 168/245 - loss 0.00692946 - samples/sec: 153.82 - lr: 0.000391
2022-12-15 23:46:48,973 epoch 53 - iter 192/245 - loss 0.00666774 - samples/sec: 139.39 - lr: 0.000391
2022-12-15 23:46:55,481 epoch 53 - iter 216/245 - loss 0.00671520 - samples/sec: 118.09 - lr: 0.000391
2022-12-15 23:47:03,748 epoch 53 - iter 240/245 - loss 0.00677187 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.67it/s]

2022-12-15 23:47:14,994 Evaluating as a multi-label problem: False
2022-12-15 23:47:15,016 DEV : loss 0.035685762763023376 - f1-score (micro avg)  0.8898
2022-12-15 23:47:15,149 BAD EPOCHS (no improvement): 1
2022-12-15 23:47:15,152 ----------------------------------------------------------------------------------------------------





2022-12-15 23:47:22,743 epoch 54 - iter 24/245 - loss 0.00529054 - samples/sec: 101.26 - lr: 0.000391
2022-12-15 23:47:29,974 epoch 54 - iter 48/245 - loss 0.00576506 - samples/sec: 106.27 - lr: 0.000391
2022-12-15 23:47:34,982 epoch 54 - iter 72/245 - loss 0.00612709 - samples/sec: 153.47 - lr: 0.000391
2022-12-15 23:47:39,730 epoch 54 - iter 96/245 - loss 0.00619416 - samples/sec: 161.92 - lr: 0.000391
2022-12-15 23:47:45,244 epoch 54 - iter 120/245 - loss 0.00664890 - samples/sec: 139.37 - lr: 0.000391
2022-12-15 23:47:50,273 epoch 54 - iter 144/245 - loss 0.00706668 - samples/sec: 152.86 - lr: 0.000391
2022-12-15 23:47:57,463 epoch 54 - iter 168/245 - loss 0.00702117 - samples/sec: 106.88 - lr: 0.000391
2022-12-15 23:48:02,638 epoch 54 - iter 192/245 - loss 0.00706326 - samples/sec: 148.50 - lr: 0.000391
2022-12-15 23:48:08,206 epoch 54 - iter 216/245 - loss 0.00720293 - samples/sec: 138.05 - lr: 0.000391
2022-12-15 23:48:14,358 epoch 54 - iter 240/245 - loss 0.00705461 - samples/s

100%|██████████| 37/37 [00:11<00:00,  3.34it/s]

2022-12-15 23:48:26,423 Evaluating as a multi-label problem: False
2022-12-15 23:48:26,447 DEV : loss 0.035681385546922684 - f1-score (micro avg)  0.8913
2022-12-15 23:48:26,584 BAD EPOCHS (no improvement): 2
2022-12-15 23:48:26,586 ----------------------------------------------------------------------------------------------------





2022-12-15 23:48:32,004 epoch 55 - iter 24/245 - loss 0.00818410 - samples/sec: 141.93 - lr: 0.000391
2022-12-15 23:48:37,976 epoch 55 - iter 48/245 - loss 0.00770026 - samples/sec: 128.68 - lr: 0.000391
2022-12-15 23:48:43,290 epoch 55 - iter 72/245 - loss 0.00783347 - samples/sec: 144.66 - lr: 0.000391
2022-12-15 23:48:50,192 epoch 55 - iter 96/245 - loss 0.00705417 - samples/sec: 111.35 - lr: 0.000391
2022-12-15 23:48:56,636 epoch 55 - iter 120/245 - loss 0.00759185 - samples/sec: 119.27 - lr: 0.000391
2022-12-15 23:49:02,022 epoch 55 - iter 144/245 - loss 0.00729633 - samples/sec: 142.71 - lr: 0.000391
2022-12-15 23:49:07,302 epoch 55 - iter 168/245 - loss 0.00704415 - samples/sec: 145.56 - lr: 0.000391
2022-12-15 23:49:13,733 epoch 55 - iter 192/245 - loss 0.00710268 - samples/sec: 119.51 - lr: 0.000391
2022-12-15 23:49:18,384 epoch 55 - iter 216/245 - loss 0.00710597 - samples/sec: 165.27 - lr: 0.000391
2022-12-15 23:49:24,810 epoch 55 - iter 240/245 - loss 0.00726156 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.66it/s]

2022-12-15 23:49:35,919 Evaluating as a multi-label problem: False
2022-12-15 23:49:35,943 DEV : loss 0.03565187379717827 - f1-score (micro avg)  0.8913
2022-12-15 23:49:36,082 BAD EPOCHS (no improvement): 3
2022-12-15 23:49:36,086 ----------------------------------------------------------------------------------------------------





2022-12-15 23:49:42,944 epoch 56 - iter 24/245 - loss 0.00878279 - samples/sec: 112.06 - lr: 0.000391
2022-12-15 23:49:47,862 epoch 56 - iter 48/245 - loss 0.00979479 - samples/sec: 156.28 - lr: 0.000391
2022-12-15 23:49:53,090 epoch 56 - iter 72/245 - loss 0.00825337 - samples/sec: 147.04 - lr: 0.000391
2022-12-15 23:49:59,017 epoch 56 - iter 96/245 - loss 0.00737849 - samples/sec: 129.67 - lr: 0.000391
2022-12-15 23:50:06,100 epoch 56 - iter 120/245 - loss 0.00726007 - samples/sec: 108.49 - lr: 0.000391
2022-12-15 23:50:12,461 epoch 56 - iter 144/245 - loss 0.00731062 - samples/sec: 120.82 - lr: 0.000391
2022-12-15 23:50:17,392 epoch 56 - iter 168/245 - loss 0.00722211 - samples/sec: 155.88 - lr: 0.000391
2022-12-15 23:50:22,912 epoch 56 - iter 192/245 - loss 0.00706391 - samples/sec: 139.27 - lr: 0.000391
2022-12-15 23:50:28,698 epoch 56 - iter 216/245 - loss 0.00705743 - samples/sec: 132.83 - lr: 0.000391
2022-12-15 23:50:35,121 epoch 56 - iter 240/245 - loss 0.00690463 - samples/s

100%|██████████| 37/37 [00:09<00:00,  3.70it/s]

2022-12-15 23:50:45,910 Evaluating as a multi-label problem: False
2022-12-15 23:50:45,932 DEV : loss 0.035618700087070465 - f1-score (micro avg)  0.8902
2022-12-15 23:50:46,066 Epoch    56: reducing learning rate of group 0 to 1.9531e-04.
2022-12-15 23:50:46,068 BAD EPOCHS (no improvement): 4
2022-12-15 23:50:46,071 ----------------------------------------------------------------------------------------------------





2022-12-15 23:50:52,522 epoch 57 - iter 24/245 - loss 0.00819368 - samples/sec: 119.15 - lr: 0.000195
2022-12-15 23:50:58,909 epoch 57 - iter 48/245 - loss 0.00726786 - samples/sec: 120.32 - lr: 0.000195
2022-12-15 23:51:05,382 epoch 57 - iter 72/245 - loss 0.00714264 - samples/sec: 118.73 - lr: 0.000195
2022-12-15 23:51:10,683 epoch 57 - iter 96/245 - loss 0.00661354 - samples/sec: 144.97 - lr: 0.000195
2022-12-15 23:51:17,845 epoch 57 - iter 120/245 - loss 0.00702420 - samples/sec: 107.30 - lr: 0.000195
2022-12-15 23:51:23,048 epoch 57 - iter 144/245 - loss 0.00702116 - samples/sec: 147.75 - lr: 0.000195
2022-12-15 23:51:27,920 epoch 57 - iter 168/245 - loss 0.00698959 - samples/sec: 157.76 - lr: 0.000195
2022-12-15 23:51:33,350 epoch 57 - iter 192/245 - loss 0.00677020 - samples/sec: 141.54 - lr: 0.000195
2022-12-15 23:51:39,827 epoch 57 - iter 216/245 - loss 0.00695834 - samples/sec: 118.65 - lr: 0.000195
2022-12-15 23:51:45,503 epoch 57 - iter 240/245 - loss 0.00714746 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.46it/s]

2022-12-15 23:51:57,394 Evaluating as a multi-label problem: False
2022-12-15 23:51:57,418 DEV : loss 0.035603173077106476 - f1-score (micro avg)  0.8898
2022-12-15 23:51:57,552 BAD EPOCHS (no improvement): 1
2022-12-15 23:51:57,554 ----------------------------------------------------------------------------------------------------





2022-12-15 23:52:04,605 epoch 58 - iter 24/245 - loss 0.00610341 - samples/sec: 109.02 - lr: 0.000195
2022-12-15 23:52:09,785 epoch 58 - iter 48/245 - loss 0.00642826 - samples/sec: 148.38 - lr: 0.000195
2022-12-15 23:52:14,970 epoch 58 - iter 72/245 - loss 0.00714228 - samples/sec: 148.25 - lr: 0.000195
2022-12-15 23:52:21,091 epoch 58 - iter 96/245 - loss 0.00697135 - samples/sec: 125.56 - lr: 0.000195
2022-12-15 23:52:27,048 epoch 58 - iter 120/245 - loss 0.00654548 - samples/sec: 129.03 - lr: 0.000195
2022-12-15 23:52:31,834 epoch 58 - iter 144/245 - loss 0.00637312 - samples/sec: 160.62 - lr: 0.000195
2022-12-15 23:52:38,273 epoch 58 - iter 168/245 - loss 0.00648585 - samples/sec: 119.35 - lr: 0.000195
2022-12-15 23:52:45,232 epoch 58 - iter 192/245 - loss 0.00682250 - samples/sec: 110.43 - lr: 0.000195
2022-12-15 23:52:50,323 epoch 58 - iter 216/245 - loss 0.00702892 - samples/sec: 150.97 - lr: 0.000195
2022-12-15 23:52:57,037 epoch 58 - iter 240/245 - loss 0.00696824 - samples/s

100%|██████████| 37/37 [00:10<00:00,  3.67it/s]

2022-12-15 23:53:08,121 Evaluating as a multi-label problem: False
2022-12-15 23:53:08,146 DEV : loss 0.03560180962085724 - f1-score (micro avg)  0.8896
2022-12-15 23:53:08,279 BAD EPOCHS (no improvement): 2
2022-12-15 23:53:08,282 ----------------------------------------------------------------------------------------------------





2022-12-15 23:53:14,249 epoch 59 - iter 24/245 - loss 0.00641920 - samples/sec: 128.81 - lr: 0.000195
2022-12-15 23:53:19,689 epoch 59 - iter 48/245 - loss 0.00735354 - samples/sec: 141.29 - lr: 0.000195
2022-12-15 23:53:26,880 epoch 59 - iter 72/245 - loss 0.00723171 - samples/sec: 106.86 - lr: 0.000195
2022-12-15 23:53:32,287 epoch 59 - iter 96/245 - loss 0.00728061 - samples/sec: 142.15 - lr: 0.000195
2022-12-15 23:53:38,058 epoch 59 - iter 120/245 - loss 0.00755098 - samples/sec: 133.17 - lr: 0.000195
2022-12-15 23:53:43,418 epoch 59 - iter 144/245 - loss 0.00751893 - samples/sec: 143.40 - lr: 0.000195
2022-12-15 23:53:48,493 epoch 59 - iter 168/245 - loss 0.00747900 - samples/sec: 151.45 - lr: 0.000195
2022-12-15 23:53:54,247 epoch 59 - iter 192/245 - loss 0.00728050 - samples/sec: 133.58 - lr: 0.000195
2022-12-15 23:54:02,487 epoch 59 - iter 216/245 - loss 0.00707707 - samples/sec: 93.25 - lr: 0.000195
2022-12-15 23:54:08,726 epoch 59 - iter 240/245 - loss 0.00719397 - samples/se

100%|██████████| 37/37 [00:10<00:00,  3.65it/s]

2022-12-15 23:54:19,720 Evaluating as a multi-label problem: False
2022-12-15 23:54:19,745 DEV : loss 0.03561580553650856 - f1-score (micro avg)  0.8899
2022-12-15 23:54:19,876 BAD EPOCHS (no improvement): 3
2022-12-15 23:54:19,879 ----------------------------------------------------------------------------------------------------





2022-12-15 23:54:25,136 epoch 60 - iter 24/245 - loss 0.00832094 - samples/sec: 146.22 - lr: 0.000195
2022-12-15 23:54:31,173 epoch 60 - iter 48/245 - loss 0.00701704 - samples/sec: 127.31 - lr: 0.000195
2022-12-15 23:54:38,188 epoch 60 - iter 72/245 - loss 0.00663483 - samples/sec: 109.54 - lr: 0.000195
2022-12-15 23:54:43,957 epoch 60 - iter 96/245 - loss 0.00665982 - samples/sec: 133.23 - lr: 0.000195
2022-12-15 23:54:48,994 epoch 60 - iter 120/245 - loss 0.00692155 - samples/sec: 152.59 - lr: 0.000195
2022-12-15 23:54:53,815 epoch 60 - iter 144/245 - loss 0.00710772 - samples/sec: 159.44 - lr: 0.000195
2022-12-15 23:55:00,988 epoch 60 - iter 168/245 - loss 0.00737087 - samples/sec: 107.14 - lr: 0.000195
2022-12-15 23:55:06,988 epoch 60 - iter 192/245 - loss 0.00725396 - samples/sec: 128.08 - lr: 0.000195
2022-12-15 23:55:12,053 epoch 60 - iter 216/245 - loss 0.00724795 - samples/sec: 151.77 - lr: 0.000195
2022-12-15 23:55:19,060 epoch 60 - iter 240/245 - loss 0.00730834 - samples/s

100%|██████████| 37/37 [00:11<00:00,  3.31it/s]

2022-12-15 23:55:31,103 Evaluating as a multi-label problem: False
2022-12-15 23:55:31,126 DEV : loss 0.03563614562153816 - f1-score (micro avg)  0.891
2022-12-15 23:55:31,257 Epoch    60: reducing learning rate of group 0 to 9.7656e-05.
2022-12-15 23:55:31,259 BAD EPOCHS (no improvement): 4
2022-12-15 23:55:31,263 ----------------------------------------------------------------------------------------------------
2022-12-15 23:55:31,265 ----------------------------------------------------------------------------------------------------
2022-12-15 23:55:31,267 learning rate too small - quitting training!
2022-12-15 23:55:31,269 ----------------------------------------------------------------------------------------------------





2022-12-15 23:55:31,637 ----------------------------------------------------------------------------------------------------
2022-12-15 23:55:31,639 loading file resources/taggers/sota-ner-flair/best-model.pt
2022-12-15 23:55:32,304 SequenceTagger predicts: Dictionary with 27 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-TEMPO, B-TEMPO, E-TEMPO, I-TEMPO, S-JURISPRUDENCIA, B-JURISPRUDENCIA, E-JURISPRUDENCIA, I-JURISPRUDENCIA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, <START>, <STOP>


100%|██████████| 44/44 [00:50<00:00,  1.15s/it]

2022-12-15 23:56:23,221 Evaluating as a multi-label problem: False
2022-12-15 23:56:23,241 0.8899	0.8997	0.8948	0.8187
2022-12-15 23:56:23,242 
Results:
- F-score (micro) 0.8948
- F-score (macro) 0.8802
- Accuracy 0.8187

By class:
                precision    recall  f1-score   support

   ORGANIZACAO     0.8595    0.8423    0.8508       501
    LEGISLACAO     0.9409    0.9683    0.9544       378
        PESSOA     0.9170    0.9485    0.9325       233
JURISPRUDENCIA     0.8571    0.9081    0.8819       185
         TEMPO     0.9027    0.8698    0.8859       192
         LOCAL     0.7451    0.8085    0.7755        47

     micro avg     0.8899    0.8997    0.8948      1536
     macro avg     0.8704    0.8909    0.8802      1536
  weighted avg     0.8899    0.8997    0.8945      1536

2022-12-15 23:56:23,244 ----------------------------------------------------------------------------------------------------





{'test_score': 0.8947879572677242,
 'dev_score_history': [0.5959595959595959,
  0.7385892116182572,
  0.7973967176004527,
  0.7867378477100309,
  0.8264417845484223,
  0.8140837556681781,
  0.8287800919664593,
  0.8443213296398894,
  0.8572987721691678,
  0.8402893711741792,
  0.8522975929978118,
  0.8730245231607631,
  0.8789740849585893,
  0.8574502420656267,
  0.8776019983347211,
  0.8681408681408682,
  0.8757814623539005,
  0.8761230601687994,
  0.8906549739654699,
  0.878550175818231,
  0.8827361563517915,
  0.8868999186330351,
  0.8852997536271558,
  0.8827738029719318,
  0.8861171366594359,
  0.8935936211163046,
  0.8882224645583425,
  0.8981481481481481,
  0.8929447017161536,
  0.8973591069970052,
  0.8885236921391401,
  0.88998088998089,
  0.8892525913802509,
  0.8905228758169934,
  0.8974707642099539,
  0.8922237380627558,
  0.8922659430122115,
  0.8933623503808488,
  0.893336989306279,
  0.8863325116406463,
  0.8900409276944066,
  0.8921729611384783,
  0.8930714675395527,
  

## Vetores Estático e de Contexto concatenados (Pt-Wiki-Fastext e Flair Embeddings)


### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='dev.txt')

## Tarefa
label_type = 'ner'

2022-09-11 03:40:03,613 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig
2022-09-11 03:40:03,619 Train: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/train.txt
2022-09-11 03:40:03,621 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/dev.txt
2022-09-11 03:40:03,624 Test: /content/drive/MyDrive/Flair_NLP/Corpus/Lener_br/Orig/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-09-11 03:40:13,242 Computing label dictionary. Progress:


7827it [00:00, 28374.72it/s]

2022-09-11 03:40:13,572 Dictionary created for label 'ner' with 7 values: ORGANIZACAO (seen 2400 times), LEGISLACAO (seen 1920 times), PESSOA (seen 1525 times), TEMPO (seen 1334 times), JURISPRUDENCIA (seen 1104 times), LOCAL (seen 611 times)
Dictionary with 7 tags: <unk>, ORGANIZACAO, LEGISLACAO, PESSOA, TEMPO, JURISPRUDENCIA, LOCAL





### Embeddings

In [None]:
## Stacked Embeddings
# Initialize embedding stack with 
embedding_types = [
    WordEmbeddings('pt'),
    FlairEmbeddings('pt-forward'),
    FlairEmbeddings('pt-backward')
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

2022-09-11 03:40:14,711 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M.vectors.npy not found in cache, downloading to /tmp/tmpqoqis1c5


100%|██████████| 710528528/710528528 [01:06<00:00, 10706795.83B/s]

2022-09-11 03:41:21,924 copying /tmp/tmpqoqis1c5 to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M.vectors.npy





2022-09-11 03:41:23,977 removing temp file /tmp/tmpqoqis1c5
2022-09-11 03:41:25,214 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M not found in cache, downloading to /tmp/tmpdn1ngcky


100%|██████████| 23541010/23541010 [00:03<00:00, 6159174.95B/s] 

2022-09-11 03:41:29,885 copying /tmp/tmpdn1ngcky to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M
2022-09-11 03:41:29,918 removing temp file /tmp/tmpdn1ngcky





2022-09-11 03:41:33,958 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-forward.pt not found in cache, downloading to /tmp/tmpohy5wift


100%|██████████| 72819080/72819080 [00:08<00:00, 8544064.06B/s] 

2022-09-11 03:41:43,315 copying /tmp/tmpohy5wift to cache at /root/.flair/embeddings/lm-pt-forward.pt
2022-09-11 03:41:43,411 removing temp file /tmp/tmpohy5wift





2022-09-11 03:41:54,449 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-backward.pt not found in cache, downloading to /tmp/tmpprh54y5r


100%|██████████| 72819080/72819080 [00:08<00:00, 8713097.57B/s] 

2022-09-11 03:42:03,885 copying /tmp/tmpprh54y5r to cache at /root/.flair/embeddings/lm-pt-backward.pt
2022-09-11 03:42:03,981 removing temp file /tmp/tmpprh54y5r





### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-09-11 03:42:04,208 SequenceTagger predicts: Dictionary with 25 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-TEMPO, B-TEMPO, E-TEMPO, I-TEMPO, S-JURISPRUDENCIA, B-JURISPRUDENCIA, E-JURISPRUDENCIA, I-JURISPRUDENCIA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)

# Start training
trainer.train('resources/taggers/sota-ner-flair',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

2022-09-11 06:45:31,812 ----------------------------------------------------------------------------------------------------
2022-09-11 06:45:31,815 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'pt'
      (embedding): Embedding(592108, 300)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4396, out_features=4396, bias=T

  "There should be no best model saved at epoch 1 except there "


2022-09-11 06:45:39,867 epoch 1 - iter 24/245 - loss 0.00537267 - samples/sec: 95.97 - lr: 0.100000
2022-09-11 06:45:46,894 epoch 1 - iter 48/245 - loss 0.00634968 - samples/sec: 109.38 - lr: 0.100000
2022-09-11 06:45:53,297 epoch 1 - iter 72/245 - loss 0.00599587 - samples/sec: 120.04 - lr: 0.100000
2022-09-11 06:46:00,257 epoch 1 - iter 96/245 - loss 0.00604685 - samples/sec: 110.43 - lr: 0.100000
2022-09-11 06:46:05,258 epoch 1 - iter 120/245 - loss 0.00586165 - samples/sec: 153.76 - lr: 0.100000
2022-09-11 06:46:10,997 epoch 1 - iter 144/245 - loss 0.00646426 - samples/sec: 133.95 - lr: 0.100000
2022-09-11 06:46:17,571 epoch 1 - iter 168/245 - loss 0.00642237 - samples/sec: 116.89 - lr: 0.100000
2022-09-11 06:46:25,013 epoch 1 - iter 192/245 - loss 0.00674661 - samples/sec: 103.28 - lr: 0.100000
2022-09-11 06:46:32,381 epoch 1 - iter 216/245 - loss 0.00690267 - samples/sec: 104.32 - lr: 0.100000
2022-09-11 06:46:39,770 epoch 1 - iter 240/245 - loss 0.00699894 - samples/sec: 104.02 

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 06:46:55,478 Evaluating as a multi-label problem: False
2022-09-11 06:46:55,508 DEV : loss 0.04608402028679848 - f1-score (micro avg)  0.8865





2022-09-11 06:46:55,746 BAD EPOCHS (no improvement): 0
2022-09-11 06:46:55,749 saving best model
2022-09-11 06:46:59,932 ----------------------------------------------------------------------------------------------------
2022-09-11 06:47:06,361 epoch 2 - iter 24/245 - loss 0.00673832 - samples/sec: 119.58 - lr: 0.100000
2022-09-11 06:47:13,589 epoch 2 - iter 48/245 - loss 0.00568705 - samples/sec: 106.34 - lr: 0.100000
2022-09-11 06:47:22,069 epoch 2 - iter 72/245 - loss 0.00582133 - samples/sec: 90.62 - lr: 0.100000
2022-09-11 06:47:29,196 epoch 2 - iter 96/245 - loss 0.00639223 - samples/sec: 107.85 - lr: 0.100000
2022-09-11 06:47:37,077 epoch 2 - iter 120/245 - loss 0.00676326 - samples/sec: 97.52 - lr: 0.100000
2022-09-11 06:47:44,135 epoch 2 - iter 144/245 - loss 0.00724198 - samples/sec: 108.90 - lr: 0.100000
2022-09-11 06:47:50,857 epoch 2 - iter 168/245 - loss 0.00748454 - samples/sec: 114.35 - lr: 0.100000
2022-09-11 06:47:57,104 epoch 2 - iter 192/245 - loss 0.00742090 - sam

100%|██████████| 37/37 [00:13<00:00,  2.78it/s]

2022-09-11 06:48:26,862 Evaluating as a multi-label problem: False
2022-09-11 06:48:26,894 DEV : loss 0.039582714438438416 - f1-score (micro avg)  0.8829





2022-09-11 06:48:27,117 BAD EPOCHS (no improvement): 1
2022-09-11 06:48:27,120 ----------------------------------------------------------------------------------------------------
2022-09-11 06:48:35,207 epoch 3 - iter 24/245 - loss 0.01029669 - samples/sec: 95.06 - lr: 0.100000
2022-09-11 06:48:41,747 epoch 3 - iter 48/245 - loss 0.00816216 - samples/sec: 117.52 - lr: 0.100000
2022-09-11 06:48:48,682 epoch 3 - iter 72/245 - loss 0.00777300 - samples/sec: 110.83 - lr: 0.100000
2022-09-11 06:48:55,626 epoch 3 - iter 96/245 - loss 0.00804909 - samples/sec: 110.68 - lr: 0.100000
2022-09-11 06:49:03,925 epoch 3 - iter 120/245 - loss 0.00784910 - samples/sec: 92.61 - lr: 0.100000
2022-09-11 06:49:11,025 epoch 3 - iter 144/245 - loss 0.00785120 - samples/sec: 108.26 - lr: 0.100000
2022-09-11 06:49:19,189 epoch 3 - iter 168/245 - loss 0.00810683 - samples/sec: 94.13 - lr: 0.100000
2022-09-11 06:49:25,788 epoch 3 - iter 192/245 - loss 0.00809338 - samples/sec: 116.50 - lr: 0.100000
2022-09-11 

100%|██████████| 37/37 [00:14<00:00,  2.51it/s]

2022-09-11 06:49:55,080 Evaluating as a multi-label problem: False
2022-09-11 06:49:55,108 DEV : loss 0.03870369866490364 - f1-score (micro avg)  0.8812





2022-09-11 06:49:55,352 BAD EPOCHS (no improvement): 2
2022-09-11 06:49:55,356 ----------------------------------------------------------------------------------------------------
2022-09-11 06:50:02,371 epoch 4 - iter 24/245 - loss 0.00691332 - samples/sec: 109.60 - lr: 0.100000
2022-09-11 06:50:09,874 epoch 4 - iter 48/245 - loss 0.00754436 - samples/sec: 102.44 - lr: 0.100000
2022-09-11 06:50:17,907 epoch 4 - iter 72/245 - loss 0.00765944 - samples/sec: 95.69 - lr: 0.100000
2022-09-11 06:50:25,609 epoch 4 - iter 96/245 - loss 0.00767813 - samples/sec: 99.80 - lr: 0.100000
2022-09-11 06:50:32,441 epoch 4 - iter 120/245 - loss 0.00737611 - samples/sec: 112.49 - lr: 0.100000
2022-09-11 06:50:39,636 epoch 4 - iter 144/245 - loss 0.00730054 - samples/sec: 106.83 - lr: 0.100000
2022-09-11 06:50:47,471 epoch 4 - iter 168/245 - loss 0.00733246 - samples/sec: 98.09 - lr: 0.100000
2022-09-11 06:50:54,002 epoch 4 - iter 192/245 - loss 0.00740800 - samples/sec: 117.70 - lr: 0.100000
2022-09-11 

100%|██████████| 37/37 [00:14<00:00,  2.53it/s]

2022-09-11 06:51:24,261 Evaluating as a multi-label problem: False
2022-09-11 06:51:24,289 DEV : loss 0.04017338156700134 - f1-score (micro avg)  0.8873





2022-09-11 06:51:24,521 BAD EPOCHS (no improvement): 0
2022-09-11 06:51:24,524 saving best model
2022-09-11 06:51:29,111 ----------------------------------------------------------------------------------------------------
2022-09-11 06:51:37,076 epoch 5 - iter 24/245 - loss 0.00612529 - samples/sec: 96.50 - lr: 0.100000
2022-09-11 06:51:44,742 epoch 5 - iter 48/245 - loss 0.00663136 - samples/sec: 100.26 - lr: 0.100000
2022-09-11 06:51:52,400 epoch 5 - iter 72/245 - loss 0.00673999 - samples/sec: 100.35 - lr: 0.100000
2022-09-11 06:51:59,897 epoch 5 - iter 96/245 - loss 0.00777118 - samples/sec: 102.52 - lr: 0.100000
2022-09-11 06:52:06,925 epoch 5 - iter 120/245 - loss 0.00789020 - samples/sec: 109.36 - lr: 0.100000
2022-09-11 06:52:13,470 epoch 5 - iter 144/245 - loss 0.00789962 - samples/sec: 117.46 - lr: 0.100000
2022-09-11 06:52:19,662 epoch 5 - iter 168/245 - loss 0.00788581 - samples/sec: 124.12 - lr: 0.100000
2022-09-11 06:52:27,930 epoch 5 - iter 192/245 - loss 0.00792509 - sa

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 06:52:57,115 Evaluating as a multi-label problem: False
2022-09-11 06:52:57,143 DEV : loss 0.0396902933716774 - f1-score (micro avg)  0.884





2022-09-11 06:52:57,379 BAD EPOCHS (no improvement): 1
2022-09-11 06:52:57,381 ----------------------------------------------------------------------------------------------------
2022-09-11 06:53:05,096 epoch 6 - iter 24/245 - loss 0.00577606 - samples/sec: 99.65 - lr: 0.100000
2022-09-11 06:53:12,573 epoch 6 - iter 48/245 - loss 0.00643076 - samples/sec: 102.80 - lr: 0.100000
2022-09-11 06:53:19,231 epoch 6 - iter 72/245 - loss 0.00619170 - samples/sec: 115.44 - lr: 0.100000
2022-09-11 06:53:25,941 epoch 6 - iter 96/245 - loss 0.00664313 - samples/sec: 114.56 - lr: 0.100000
2022-09-11 06:53:32,716 epoch 6 - iter 120/245 - loss 0.00679414 - samples/sec: 113.46 - lr: 0.100000
2022-09-11 06:53:39,740 epoch 6 - iter 144/245 - loss 0.00725570 - samples/sec: 109.42 - lr: 0.100000
2022-09-11 06:53:47,184 epoch 6 - iter 168/245 - loss 0.00763300 - samples/sec: 103.24 - lr: 0.100000
2022-09-11 06:53:53,872 epoch 6 - iter 192/245 - loss 0.00789568 - samples/sec: 114.95 - lr: 0.100000
2022-09-1

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 06:54:26,168 Evaluating as a multi-label problem: False
2022-09-11 06:54:26,195 DEV : loss 0.042866069823503494 - f1-score (micro avg)  0.8803





2022-09-11 06:54:26,425 BAD EPOCHS (no improvement): 2
2022-09-11 06:54:26,428 ----------------------------------------------------------------------------------------------------
2022-09-11 06:54:33,534 epoch 7 - iter 24/245 - loss 0.00765168 - samples/sec: 108.20 - lr: 0.100000
2022-09-11 06:54:41,443 epoch 7 - iter 48/245 - loss 0.00743981 - samples/sec: 97.17 - lr: 0.100000
2022-09-11 06:54:47,856 epoch 7 - iter 72/245 - loss 0.00709278 - samples/sec: 119.86 - lr: 0.100000
2022-09-11 06:54:56,586 epoch 7 - iter 96/245 - loss 0.00687619 - samples/sec: 88.03 - lr: 0.100000
2022-09-11 06:55:03,523 epoch 7 - iter 120/245 - loss 0.00656287 - samples/sec: 110.80 - lr: 0.100000
2022-09-11 06:55:11,494 epoch 7 - iter 144/245 - loss 0.00670800 - samples/sec: 96.42 - lr: 0.100000
2022-09-11 06:55:17,344 epoch 7 - iter 168/245 - loss 0.00659667 - samples/sec: 131.40 - lr: 0.100000
2022-09-11 06:55:24,640 epoch 7 - iter 192/245 - loss 0.00642141 - samples/sec: 105.35 - lr: 0.100000
2022-09-11 

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 06:55:53,853 Evaluating as a multi-label problem: False
2022-09-11 06:55:53,879 DEV : loss 0.0430731326341629 - f1-score (micro avg)  0.8768





2022-09-11 06:55:54,113 BAD EPOCHS (no improvement): 3
2022-09-11 06:55:54,116 ----------------------------------------------------------------------------------------------------
2022-09-11 06:56:00,894 epoch 8 - iter 24/245 - loss 0.00508945 - samples/sec: 113.45 - lr: 0.100000
2022-09-11 06:56:07,624 epoch 8 - iter 48/245 - loss 0.00684875 - samples/sec: 114.20 - lr: 0.100000
2022-09-11 06:56:15,740 epoch 8 - iter 72/245 - loss 0.00736121 - samples/sec: 94.69 - lr: 0.100000
2022-09-11 06:56:22,024 epoch 8 - iter 96/245 - loss 0.00734199 - samples/sec: 122.31 - lr: 0.100000
2022-09-11 06:56:28,447 epoch 8 - iter 120/245 - loss 0.00772750 - samples/sec: 119.68 - lr: 0.100000
2022-09-11 06:56:36,280 epoch 8 - iter 144/245 - loss 0.00750897 - samples/sec: 98.11 - lr: 0.100000
2022-09-11 06:56:43,797 epoch 8 - iter 168/245 - loss 0.00769892 - samples/sec: 102.25 - lr: 0.100000
2022-09-11 06:56:52,031 epoch 8 - iter 192/245 - loss 0.00795690 - samples/sec: 93.33 - lr: 0.100000
2022-09-11 

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 06:57:21,871 Evaluating as a multi-label problem: False
2022-09-11 06:57:21,898 DEV : loss 0.03850177302956581 - f1-score (micro avg)  0.888





2022-09-11 06:57:22,131 BAD EPOCHS (no improvement): 0
2022-09-11 06:57:22,135 saving best model
2022-09-11 06:57:26,682 ----------------------------------------------------------------------------------------------------
2022-09-11 06:57:34,744 epoch 9 - iter 24/245 - loss 0.00724780 - samples/sec: 95.37 - lr: 0.100000
2022-09-11 06:57:43,103 epoch 9 - iter 48/245 - loss 0.00674161 - samples/sec: 91.94 - lr: 0.100000
2022-09-11 06:57:49,782 epoch 9 - iter 72/245 - loss 0.00686786 - samples/sec: 115.08 - lr: 0.100000
2022-09-11 06:57:57,112 epoch 9 - iter 96/245 - loss 0.00668718 - samples/sec: 104.85 - lr: 0.100000
2022-09-11 06:58:03,771 epoch 9 - iter 120/245 - loss 0.00693210 - samples/sec: 115.44 - lr: 0.100000
2022-09-11 06:58:10,999 epoch 9 - iter 144/245 - loss 0.00689589 - samples/sec: 106.34 - lr: 0.100000
2022-09-11 06:58:18,006 epoch 9 - iter 168/245 - loss 0.00701067 - samples/sec: 109.68 - lr: 0.100000
2022-09-11 06:58:25,152 epoch 9 - iter 192/245 - loss 0.00728944 - sam

100%|██████████| 37/37 [00:14<00:00,  2.55it/s]

2022-09-11 06:58:54,954 Evaluating as a multi-label problem: False
2022-09-11 06:58:54,980 DEV : loss 0.04363260418176651 - f1-score (micro avg)  0.9006





2022-09-11 06:58:55,215 BAD EPOCHS (no improvement): 0
2022-09-11 06:58:55,218 saving best model
2022-09-11 06:58:59,742 ----------------------------------------------------------------------------------------------------
2022-09-11 06:59:06,249 epoch 10 - iter 24/245 - loss 0.00661824 - samples/sec: 118.18 - lr: 0.100000
2022-09-11 06:59:13,436 epoch 10 - iter 48/245 - loss 0.00545514 - samples/sec: 106.94 - lr: 0.100000
2022-09-11 06:59:21,620 epoch 10 - iter 72/245 - loss 0.00572943 - samples/sec: 93.89 - lr: 0.100000
2022-09-11 06:59:27,702 epoch 10 - iter 96/245 - loss 0.00599443 - samples/sec: 126.40 - lr: 0.100000
2022-09-11 06:59:35,089 epoch 10 - iter 120/245 - loss 0.00643794 - samples/sec: 104.04 - lr: 0.100000
2022-09-11 06:59:42,055 epoch 10 - iter 144/245 - loss 0.00683349 - samples/sec: 110.33 - lr: 0.100000
2022-09-11 06:59:49,199 epoch 10 - iter 168/245 - loss 0.00692005 - samples/sec: 107.60 - lr: 0.100000
2022-09-11 06:59:57,705 epoch 10 - iter 192/245 - loss 0.00695

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:00:26,683 Evaluating as a multi-label problem: False
2022-09-11 07:00:26,712 DEV : loss 0.04165041819214821 - f1-score (micro avg)  0.8845





2022-09-11 07:00:26,943 BAD EPOCHS (no improvement): 1
2022-09-11 07:00:26,947 ----------------------------------------------------------------------------------------------------
2022-09-11 07:00:33,563 epoch 11 - iter 24/245 - loss 0.00579247 - samples/sec: 116.21 - lr: 0.100000
2022-09-11 07:00:41,834 epoch 11 - iter 48/245 - loss 0.00641276 - samples/sec: 92.92 - lr: 0.100000
2022-09-11 07:00:51,051 epoch 11 - iter 72/245 - loss 0.00673719 - samples/sec: 83.38 - lr: 0.100000
2022-09-11 07:00:57,842 epoch 11 - iter 96/245 - loss 0.00754211 - samples/sec: 113.18 - lr: 0.100000
2022-09-11 07:01:04,982 epoch 11 - iter 120/245 - loss 0.00740787 - samples/sec: 107.65 - lr: 0.100000
2022-09-11 07:01:11,843 epoch 11 - iter 144/245 - loss 0.00753686 - samples/sec: 112.04 - lr: 0.100000
2022-09-11 07:01:18,242 epoch 11 - iter 168/245 - loss 0.00742432 - samples/sec: 120.11 - lr: 0.100000
2022-09-11 07:01:26,005 epoch 11 - iter 192/245 - loss 0.00760879 - samples/sec: 99.00 - lr: 0.100000
202

100%|██████████| 37/37 [00:14<00:00,  2.56it/s]

2022-09-11 07:01:55,372 Evaluating as a multi-label problem: False
2022-09-11 07:01:55,399 DEV : loss 0.04661846533417702 - f1-score (micro avg)  0.8867





2022-09-11 07:01:55,633 BAD EPOCHS (no improvement): 2
2022-09-11 07:01:55,636 ----------------------------------------------------------------------------------------------------
2022-09-11 07:02:03,084 epoch 12 - iter 24/245 - loss 0.00722058 - samples/sec: 103.21 - lr: 0.100000
2022-09-11 07:02:11,142 epoch 12 - iter 48/245 - loss 0.00699144 - samples/sec: 95.37 - lr: 0.100000
2022-09-11 07:02:18,075 epoch 12 - iter 72/245 - loss 0.00676439 - samples/sec: 110.86 - lr: 0.100000
2022-09-11 07:02:25,072 epoch 12 - iter 96/245 - loss 0.00671927 - samples/sec: 109.86 - lr: 0.100000
2022-09-11 07:02:31,815 epoch 12 - iter 120/245 - loss 0.00667223 - samples/sec: 113.98 - lr: 0.100000
2022-09-11 07:02:39,127 epoch 12 - iter 144/245 - loss 0.00685470 - samples/sec: 105.10 - lr: 0.100000
2022-09-11 07:02:47,178 epoch 12 - iter 168/245 - loss 0.00718337 - samples/sec: 95.47 - lr: 0.100000
2022-09-11 07:02:54,290 epoch 12 - iter 192/245 - loss 0.00703002 - samples/sec: 108.05 - lr: 0.100000
20

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 07:03:22,937 Evaluating as a multi-label problem: False
2022-09-11 07:03:22,965 DEV : loss 0.04888354241847992 - f1-score (micro avg)  0.8794





2022-09-11 07:03:23,201 BAD EPOCHS (no improvement): 3
2022-09-11 07:03:23,204 ----------------------------------------------------------------------------------------------------
2022-09-11 07:03:30,357 epoch 13 - iter 24/245 - loss 0.00628936 - samples/sec: 107.48 - lr: 0.100000
2022-09-11 07:03:37,797 epoch 13 - iter 48/245 - loss 0.00656516 - samples/sec: 103.31 - lr: 0.100000
2022-09-11 07:03:44,598 epoch 13 - iter 72/245 - loss 0.00622849 - samples/sec: 113.01 - lr: 0.100000
2022-09-11 07:03:51,544 epoch 13 - iter 96/245 - loss 0.00621040 - samples/sec: 110.64 - lr: 0.100000
2022-09-11 07:03:59,120 epoch 13 - iter 120/245 - loss 0.00647866 - samples/sec: 101.46 - lr: 0.100000
2022-09-11 07:04:07,025 epoch 13 - iter 144/245 - loss 0.00684050 - samples/sec: 97.22 - lr: 0.100000
2022-09-11 07:04:13,886 epoch 13 - iter 168/245 - loss 0.00707213 - samples/sec: 112.03 - lr: 0.100000
2022-09-11 07:04:23,236 epoch 13 - iter 192/245 - loss 0.00688581 - samples/sec: 82.19 - lr: 0.100000
20

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:04:52,029 Evaluating as a multi-label problem: False
2022-09-11 07:04:52,057 DEV : loss 0.04375133663415909 - f1-score (micro avg)  0.8889





2022-09-11 07:04:52,288 Epoch    13: reducing learning rate of group 0 to 5.0000e-02.
2022-09-11 07:04:52,289 BAD EPOCHS (no improvement): 4
2022-09-11 07:04:52,292 ----------------------------------------------------------------------------------------------------
2022-09-11 07:04:59,876 epoch 14 - iter 24/245 - loss 0.00602066 - samples/sec: 101.40 - lr: 0.050000
2022-09-11 07:05:06,885 epoch 14 - iter 48/245 - loss 0.00569052 - samples/sec: 109.65 - lr: 0.050000
2022-09-11 07:05:14,672 epoch 14 - iter 72/245 - loss 0.00562634 - samples/sec: 98.69 - lr: 0.050000
2022-09-11 07:05:21,246 epoch 14 - iter 96/245 - loss 0.00562009 - samples/sec: 116.93 - lr: 0.050000
2022-09-11 07:05:27,866 epoch 14 - iter 120/245 - loss 0.00540252 - samples/sec: 116.11 - lr: 0.050000
2022-09-11 07:05:35,002 epoch 14 - iter 144/245 - loss 0.00571646 - samples/sec: 107.70 - lr: 0.050000
2022-09-11 07:05:41,401 epoch 14 - iter 168/245 - loss 0.00553306 - samples/sec: 120.13 - lr: 0.050000
2022-09-11 07:05:4

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 07:06:20,482 Evaluating as a multi-label problem: False
2022-09-11 07:06:20,509 DEV : loss 0.040513183921575546 - f1-score (micro avg)  0.8993





2022-09-11 07:06:20,739 BAD EPOCHS (no improvement): 1
2022-09-11 07:06:20,741 ----------------------------------------------------------------------------------------------------
2022-09-11 07:06:28,675 epoch 15 - iter 24/245 - loss 0.00584684 - samples/sec: 96.90 - lr: 0.050000
2022-09-11 07:06:35,795 epoch 15 - iter 48/245 - loss 0.00672858 - samples/sec: 107.97 - lr: 0.050000
2022-09-11 07:06:42,553 epoch 15 - iter 72/245 - loss 0.00627825 - samples/sec: 113.71 - lr: 0.050000
2022-09-11 07:06:51,772 epoch 15 - iter 96/245 - loss 0.00605102 - samples/sec: 83.36 - lr: 0.050000
2022-09-11 07:06:57,962 epoch 15 - iter 120/245 - loss 0.00546245 - samples/sec: 124.19 - lr: 0.050000
2022-09-11 07:07:05,252 epoch 15 - iter 144/245 - loss 0.00533745 - samples/sec: 105.43 - lr: 0.050000
2022-09-11 07:07:12,460 epoch 15 - iter 168/245 - loss 0.00556317 - samples/sec: 106.63 - lr: 0.050000
2022-09-11 07:07:19,415 epoch 15 - iter 192/245 - loss 0.00561121 - samples/sec: 110.49 - lr: 0.050000
20

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:07:48,351 Evaluating as a multi-label problem: False
2022-09-11 07:07:48,378 DEV : loss 0.04207519441843033 - f1-score (micro avg)  0.903





2022-09-11 07:07:48,610 BAD EPOCHS (no improvement): 0
2022-09-11 07:07:48,613 saving best model
2022-09-11 07:07:53,176 ----------------------------------------------------------------------------------------------------
2022-09-11 07:08:00,887 epoch 16 - iter 24/245 - loss 0.00492307 - samples/sec: 99.71 - lr: 0.050000
2022-09-11 07:08:08,690 epoch 16 - iter 48/245 - loss 0.00526985 - samples/sec: 98.48 - lr: 0.050000
2022-09-11 07:08:15,826 epoch 16 - iter 72/245 - loss 0.00515836 - samples/sec: 107.71 - lr: 0.050000
2022-09-11 07:08:22,024 epoch 16 - iter 96/245 - loss 0.00512663 - samples/sec: 124.04 - lr: 0.050000
2022-09-11 07:08:30,379 epoch 16 - iter 120/245 - loss 0.00512718 - samples/sec: 91.97 - lr: 0.050000
2022-09-11 07:08:37,579 epoch 16 - iter 144/245 - loss 0.00518206 - samples/sec: 106.75 - lr: 0.050000
2022-09-11 07:08:44,416 epoch 16 - iter 168/245 - loss 0.00508120 - samples/sec: 112.43 - lr: 0.050000
2022-09-11 07:08:52,067 epoch 16 - iter 192/245 - loss 0.0050006

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 07:09:20,985 Evaluating as a multi-label problem: False
2022-09-11 07:09:21,009 DEV : loss 0.042186904698610306 - f1-score (micro avg)  0.8981





2022-09-11 07:09:21,236 BAD EPOCHS (no improvement): 1
2022-09-11 07:09:21,239 ----------------------------------------------------------------------------------------------------
2022-09-11 07:09:28,518 epoch 17 - iter 24/245 - loss 0.00573337 - samples/sec: 105.62 - lr: 0.050000
2022-09-11 07:09:36,881 epoch 17 - iter 48/245 - loss 0.00505809 - samples/sec: 91.89 - lr: 0.050000
2022-09-11 07:09:43,856 epoch 17 - iter 72/245 - loss 0.00493804 - samples/sec: 110.19 - lr: 0.050000
2022-09-11 07:09:50,804 epoch 17 - iter 96/245 - loss 0.00460201 - samples/sec: 110.63 - lr: 0.050000
2022-09-11 07:09:58,170 epoch 17 - iter 120/245 - loss 0.00506209 - samples/sec: 104.35 - lr: 0.050000
2022-09-11 07:10:04,989 epoch 17 - iter 144/245 - loss 0.00506827 - samples/sec: 112.71 - lr: 0.050000
2022-09-11 07:10:11,162 epoch 17 - iter 168/245 - loss 0.00481242 - samples/sec: 124.53 - lr: 0.050000
2022-09-11 07:10:18,453 epoch 17 - iter 192/245 - loss 0.00492562 - samples/sec: 105.40 - lr: 0.050000
2

100%|██████████| 37/37 [00:13<00:00,  2.75it/s]

2022-09-11 07:10:47,729 Evaluating as a multi-label problem: False
2022-09-11 07:10:47,759 DEV : loss 0.04267817363142967 - f1-score (micro avg)  0.8933





2022-09-11 07:10:47,992 BAD EPOCHS (no improvement): 2
2022-09-11 07:10:47,995 ----------------------------------------------------------------------------------------------------
2022-09-11 07:10:55,895 epoch 18 - iter 24/245 - loss 0.00482172 - samples/sec: 97.31 - lr: 0.050000
2022-09-11 07:11:01,708 epoch 18 - iter 48/245 - loss 0.00504459 - samples/sec: 132.25 - lr: 0.050000
2022-09-11 07:11:08,945 epoch 18 - iter 72/245 - loss 0.00516035 - samples/sec: 106.20 - lr: 0.050000
2022-09-11 07:11:16,036 epoch 18 - iter 96/245 - loss 0.00475880 - samples/sec: 108.39 - lr: 0.050000
2022-09-11 07:11:24,009 epoch 18 - iter 120/245 - loss 0.00445617 - samples/sec: 96.39 - lr: 0.050000
2022-09-11 07:11:32,023 epoch 18 - iter 144/245 - loss 0.00448985 - samples/sec: 95.90 - lr: 0.050000
2022-09-11 07:11:39,142 epoch 18 - iter 168/245 - loss 0.00463770 - samples/sec: 107.96 - lr: 0.050000
2022-09-11 07:11:47,147 epoch 18 - iter 192/245 - loss 0.00455480 - samples/sec: 96.01 - lr: 0.050000
2022

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:12:16,329 Evaluating as a multi-label problem: False
2022-09-11 07:12:16,356 DEV : loss 0.04357725754380226 - f1-score (micro avg)  0.8843





2022-09-11 07:12:16,592 BAD EPOCHS (no improvement): 3
2022-09-11 07:12:16,595 ----------------------------------------------------------------------------------------------------
2022-09-11 07:12:23,672 epoch 19 - iter 24/245 - loss 0.00363628 - samples/sec: 108.64 - lr: 0.050000
2022-09-11 07:12:30,981 epoch 19 - iter 48/245 - loss 0.00315081 - samples/sec: 105.15 - lr: 0.050000
2022-09-11 07:12:40,366 epoch 19 - iter 72/245 - loss 0.00406087 - samples/sec: 81.88 - lr: 0.050000
2022-09-11 07:12:47,326 epoch 19 - iter 96/245 - loss 0.00423151 - samples/sec: 110.43 - lr: 0.050000
2022-09-11 07:12:55,840 epoch 19 - iter 120/245 - loss 0.00386203 - samples/sec: 90.26 - lr: 0.050000
2022-09-11 07:13:02,431 epoch 19 - iter 144/245 - loss 0.00418073 - samples/sec: 116.62 - lr: 0.050000
2022-09-11 07:13:09,618 epoch 19 - iter 168/245 - loss 0.00428714 - samples/sec: 106.93 - lr: 0.050000
2022-09-11 07:13:16,220 epoch 19 - iter 192/245 - loss 0.00431804 - samples/sec: 116.43 - lr: 0.050000
20

100%|██████████| 37/37 [00:14<00:00,  2.54it/s]

2022-09-11 07:13:45,223 Evaluating as a multi-label problem: False
2022-09-11 07:13:45,250 DEV : loss 0.04396483674645424 - f1-score (micro avg)  0.8935





2022-09-11 07:13:45,487 Epoch    19: reducing learning rate of group 0 to 2.5000e-02.
2022-09-11 07:13:45,488 BAD EPOCHS (no improvement): 4
2022-09-11 07:13:45,492 ----------------------------------------------------------------------------------------------------
2022-09-11 07:13:52,441 epoch 20 - iter 24/245 - loss 0.00547099 - samples/sec: 110.66 - lr: 0.025000
2022-09-11 07:13:58,943 epoch 20 - iter 48/245 - loss 0.00532268 - samples/sec: 118.23 - lr: 0.025000
2022-09-11 07:14:06,598 epoch 20 - iter 72/245 - loss 0.00490106 - samples/sec: 100.41 - lr: 0.025000
2022-09-11 07:14:14,233 epoch 20 - iter 96/245 - loss 0.00480678 - samples/sec: 100.66 - lr: 0.025000
2022-09-11 07:14:20,463 epoch 20 - iter 120/245 - loss 0.00453319 - samples/sec: 123.39 - lr: 0.025000
2022-09-11 07:14:27,490 epoch 20 - iter 144/245 - loss 0.00430751 - samples/sec: 109.39 - lr: 0.025000
2022-09-11 07:14:36,150 epoch 20 - iter 168/245 - loss 0.00418997 - samples/sec: 88.74 - lr: 0.025000
2022-09-11 07:14:4

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 07:15:12,697 Evaluating as a multi-label problem: False
2022-09-11 07:15:12,727 DEV : loss 0.04220595210790634 - f1-score (micro avg)  0.8933





2022-09-11 07:15:12,961 BAD EPOCHS (no improvement): 1
2022-09-11 07:15:12,965 ----------------------------------------------------------------------------------------------------
2022-09-11 07:15:20,090 epoch 21 - iter 24/245 - loss 0.00530895 - samples/sec: 107.89 - lr: 0.025000
2022-09-11 07:15:27,799 epoch 21 - iter 48/245 - loss 0.00506245 - samples/sec: 99.71 - lr: 0.025000
2022-09-11 07:15:34,653 epoch 21 - iter 72/245 - loss 0.00526058 - samples/sec: 112.15 - lr: 0.025000
2022-09-11 07:15:42,031 epoch 21 - iter 96/245 - loss 0.00505237 - samples/sec: 104.17 - lr: 0.025000
2022-09-11 07:15:48,448 epoch 21 - iter 120/245 - loss 0.00510597 - samples/sec: 119.80 - lr: 0.025000
2022-09-11 07:15:57,481 epoch 21 - iter 144/245 - loss 0.00457259 - samples/sec: 85.07 - lr: 0.025000
2022-09-11 07:16:03,637 epoch 21 - iter 168/245 - loss 0.00454423 - samples/sec: 124.88 - lr: 0.025000
2022-09-11 07:16:11,601 epoch 21 - iter 192/245 - loss 0.00467789 - samples/sec: 96.51 - lr: 0.025000
202

100%|██████████| 37/37 [00:14<00:00,  2.50it/s]

2022-09-11 07:16:41,600 Evaluating as a multi-label problem: False
2022-09-11 07:16:41,629 DEV : loss 0.042547840625047684 - f1-score (micro avg)  0.8933





2022-09-11 07:16:41,865 BAD EPOCHS (no improvement): 2
2022-09-11 07:16:41,869 ----------------------------------------------------------------------------------------------------
2022-09-11 07:16:50,256 epoch 22 - iter 24/245 - loss 0.00381541 - samples/sec: 91.66 - lr: 0.025000
2022-09-11 07:16:57,483 epoch 22 - iter 48/245 - loss 0.00426409 - samples/sec: 106.34 - lr: 0.025000
2022-09-11 07:17:05,627 epoch 22 - iter 72/245 - loss 0.00446020 - samples/sec: 94.37 - lr: 0.025000
2022-09-11 07:17:12,171 epoch 22 - iter 96/245 - loss 0.00407161 - samples/sec: 117.44 - lr: 0.025000
2022-09-11 07:17:18,984 epoch 22 - iter 120/245 - loss 0.00427092 - samples/sec: 112.82 - lr: 0.025000
2022-09-11 07:17:26,114 epoch 22 - iter 144/245 - loss 0.00427844 - samples/sec: 107.81 - lr: 0.025000
2022-09-11 07:17:33,071 epoch 22 - iter 168/245 - loss 0.00411637 - samples/sec: 110.48 - lr: 0.025000
2022-09-11 07:17:39,828 epoch 22 - iter 192/245 - loss 0.00416575 - samples/sec: 113.76 - lr: 0.025000
20

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:18:09,422 Evaluating as a multi-label problem: False
2022-09-11 07:18:09,448 DEV : loss 0.042904652655124664 - f1-score (micro avg)  0.8966





2022-09-11 07:18:09,677 BAD EPOCHS (no improvement): 3
2022-09-11 07:18:09,681 ----------------------------------------------------------------------------------------------------
2022-09-11 07:18:17,584 epoch 23 - iter 24/245 - loss 0.00266614 - samples/sec: 97.28 - lr: 0.025000
2022-09-11 07:18:24,886 epoch 23 - iter 48/245 - loss 0.00353906 - samples/sec: 105.25 - lr: 0.025000
2022-09-11 07:18:32,168 epoch 23 - iter 72/245 - loss 0.00340270 - samples/sec: 105.53 - lr: 0.025000
2022-09-11 07:18:38,343 epoch 23 - iter 96/245 - loss 0.00368231 - samples/sec: 124.48 - lr: 0.025000
2022-09-11 07:18:46,169 epoch 23 - iter 120/245 - loss 0.00352236 - samples/sec: 98.19 - lr: 0.025000
2022-09-11 07:18:53,038 epoch 23 - iter 144/245 - loss 0.00377715 - samples/sec: 111.88 - lr: 0.025000
2022-09-11 07:18:59,911 epoch 23 - iter 168/245 - loss 0.00364895 - samples/sec: 111.83 - lr: 0.025000
2022-09-11 07:19:08,682 epoch 23 - iter 192/245 - loss 0.00379788 - samples/sec: 87.62 - lr: 0.025000
202

100%|██████████| 37/37 [00:13<00:00,  2.75it/s]

2022-09-11 07:19:38,461 Evaluating as a multi-label problem: False
2022-09-11 07:19:38,489 DEV : loss 0.04290531575679779 - f1-score (micro avg)  0.8921





2022-09-11 07:19:38,729 Epoch    23: reducing learning rate of group 0 to 1.2500e-02.
2022-09-11 07:19:38,731 BAD EPOCHS (no improvement): 4
2022-09-11 07:19:38,734 ----------------------------------------------------------------------------------------------------
2022-09-11 07:19:46,820 epoch 24 - iter 24/245 - loss 0.00403963 - samples/sec: 95.09 - lr: 0.012500
2022-09-11 07:19:54,626 epoch 24 - iter 48/245 - loss 0.00380496 - samples/sec: 98.44 - lr: 0.012500
2022-09-11 07:20:02,816 epoch 24 - iter 72/245 - loss 0.00400043 - samples/sec: 93.85 - lr: 0.012500
2022-09-11 07:20:10,571 epoch 24 - iter 96/245 - loss 0.00427338 - samples/sec: 99.11 - lr: 0.012500
2022-09-11 07:20:17,605 epoch 24 - iter 120/245 - loss 0.00389660 - samples/sec: 109.27 - lr: 0.012500
2022-09-11 07:20:24,470 epoch 24 - iter 144/245 - loss 0.00388098 - samples/sec: 111.97 - lr: 0.012500
2022-09-11 07:20:30,861 epoch 24 - iter 168/245 - loss 0.00372307 - samples/sec: 120.27 - lr: 0.012500
2022-09-11 07:20:38,6

100%|██████████| 37/37 [00:14<00:00,  2.53it/s]

2022-09-11 07:21:07,984 Evaluating as a multi-label problem: False
2022-09-11 07:21:08,013 DEV : loss 0.04375961422920227 - f1-score (micro avg)  0.8959





2022-09-11 07:21:08,250 BAD EPOCHS (no improvement): 1
2022-09-11 07:21:08,254 ----------------------------------------------------------------------------------------------------
2022-09-11 07:21:14,814 epoch 25 - iter 24/245 - loss 0.00449195 - samples/sec: 117.20 - lr: 0.012500
2022-09-11 07:21:21,371 epoch 25 - iter 48/245 - loss 0.00452405 - samples/sec: 117.23 - lr: 0.012500
2022-09-11 07:21:28,641 epoch 25 - iter 72/245 - loss 0.00381095 - samples/sec: 105.72 - lr: 0.012500
2022-09-11 07:21:36,714 epoch 25 - iter 96/245 - loss 0.00381035 - samples/sec: 95.19 - lr: 0.012500
2022-09-11 07:21:43,701 epoch 25 - iter 120/245 - loss 0.00379497 - samples/sec: 110.02 - lr: 0.012500
2022-09-11 07:21:50,031 epoch 25 - iter 144/245 - loss 0.00401150 - samples/sec: 121.47 - lr: 0.012500
2022-09-11 07:21:57,630 epoch 25 - iter 168/245 - loss 0.00411889 - samples/sec: 101.15 - lr: 0.012500
2022-09-11 07:22:05,675 epoch 25 - iter 192/245 - loss 0.00422707 - samples/sec: 95.54 - lr: 0.012500
20

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:22:35,637 Evaluating as a multi-label problem: False
2022-09-11 07:22:35,665 DEV : loss 0.0437433160841465 - f1-score (micro avg)  0.895





2022-09-11 07:22:35,902 BAD EPOCHS (no improvement): 2
2022-09-11 07:22:35,905 ----------------------------------------------------------------------------------------------------
2022-09-11 07:22:44,112 epoch 26 - iter 24/245 - loss 0.00286475 - samples/sec: 93.67 - lr: 0.012500
2022-09-11 07:22:51,319 epoch 26 - iter 48/245 - loss 0.00288282 - samples/sec: 106.65 - lr: 0.012500
2022-09-11 07:22:58,553 epoch 26 - iter 72/245 - loss 0.00295247 - samples/sec: 106.27 - lr: 0.012500
2022-09-11 07:23:05,330 epoch 26 - iter 96/245 - loss 0.00302275 - samples/sec: 113.42 - lr: 0.012500
2022-09-11 07:23:11,818 epoch 26 - iter 120/245 - loss 0.00298220 - samples/sec: 118.48 - lr: 0.012500
2022-09-11 07:23:18,935 epoch 26 - iter 144/245 - loss 0.00301745 - samples/sec: 108.00 - lr: 0.012500
2022-09-11 07:23:25,153 epoch 26 - iter 168/245 - loss 0.00302503 - samples/sec: 123.60 - lr: 0.012500
2022-09-11 07:23:31,824 epoch 26 - iter 192/245 - loss 0.00295289 - samples/sec: 115.24 - lr: 0.012500
2

100%|██████████| 37/37 [00:14<00:00,  2.55it/s]

2022-09-11 07:24:03,862 Evaluating as a multi-label problem: False
2022-09-11 07:24:03,889 DEV : loss 0.04366707056760788 - f1-score (micro avg)  0.8951





2022-09-11 07:24:04,121 BAD EPOCHS (no improvement): 3
2022-09-11 07:24:04,125 ----------------------------------------------------------------------------------------------------
2022-09-11 07:24:11,766 epoch 27 - iter 24/245 - loss 0.00347305 - samples/sec: 100.64 - lr: 0.012500
2022-09-11 07:24:18,992 epoch 27 - iter 48/245 - loss 0.00339958 - samples/sec: 106.37 - lr: 0.012500
2022-09-11 07:24:26,136 epoch 27 - iter 72/245 - loss 0.00348676 - samples/sec: 107.58 - lr: 0.012500
2022-09-11 07:24:34,983 epoch 27 - iter 96/245 - loss 0.00385107 - samples/sec: 86.87 - lr: 0.012500
2022-09-11 07:24:41,678 epoch 27 - iter 120/245 - loss 0.00366369 - samples/sec: 114.81 - lr: 0.012500
2022-09-11 07:24:48,429 epoch 27 - iter 144/245 - loss 0.00364732 - samples/sec: 113.87 - lr: 0.012500
2022-09-11 07:24:55,611 epoch 27 - iter 168/245 - loss 0.00367992 - samples/sec: 107.01 - lr: 0.012500
2022-09-11 07:25:02,282 epoch 27 - iter 192/245 - loss 0.00372378 - samples/sec: 115.23 - lr: 0.012500
2

100%|██████████| 37/37 [00:13<00:00,  2.76it/s]

2022-09-11 07:25:31,140 Evaluating as a multi-label problem: False
2022-09-11 07:25:31,168 DEV : loss 0.04408752918243408 - f1-score (micro avg)  0.8937





2022-09-11 07:25:31,390 Epoch    27: reducing learning rate of group 0 to 6.2500e-03.
2022-09-11 07:25:31,391 BAD EPOCHS (no improvement): 4
2022-09-11 07:25:31,394 ----------------------------------------------------------------------------------------------------
2022-09-11 07:25:39,878 epoch 28 - iter 24/245 - loss 0.00404642 - samples/sec: 90.62 - lr: 0.006250
2022-09-11 07:25:47,204 epoch 28 - iter 48/245 - loss 0.00370386 - samples/sec: 104.92 - lr: 0.006250
2022-09-11 07:25:53,963 epoch 28 - iter 72/245 - loss 0.00351283 - samples/sec: 113.73 - lr: 0.006250
2022-09-11 07:26:00,933 epoch 28 - iter 96/245 - loss 0.00360662 - samples/sec: 110.28 - lr: 0.006250
2022-09-11 07:26:07,980 epoch 28 - iter 120/245 - loss 0.00372274 - samples/sec: 109.06 - lr: 0.006250
2022-09-11 07:26:15,147 epoch 28 - iter 144/245 - loss 0.00371578 - samples/sec: 107.25 - lr: 0.006250
2022-09-11 07:26:22,801 epoch 28 - iter 168/245 - loss 0.00341560 - samples/sec: 100.42 - lr: 0.006250
2022-09-11 07:26:3

100%|██████████| 37/37 [00:13<00:00,  2.77it/s]

2022-09-11 07:26:59,322 Evaluating as a multi-label problem: False
2022-09-11 07:26:59,349 DEV : loss 0.04408428072929382 - f1-score (micro avg)  0.8955





2022-09-11 07:26:59,570 BAD EPOCHS (no improvement): 1
2022-09-11 07:26:59,573 ----------------------------------------------------------------------------------------------------
2022-09-11 07:27:06,082 epoch 29 - iter 24/245 - loss 0.00405447 - samples/sec: 118.13 - lr: 0.006250
2022-09-11 07:27:13,650 epoch 29 - iter 48/245 - loss 0.00405224 - samples/sec: 101.56 - lr: 0.006250
2022-09-11 07:27:20,152 epoch 29 - iter 72/245 - loss 0.00314415 - samples/sec: 118.23 - lr: 0.006250
2022-09-11 07:27:26,332 epoch 29 - iter 96/245 - loss 0.00327527 - samples/sec: 124.37 - lr: 0.006250
2022-09-11 07:27:33,022 epoch 29 - iter 120/245 - loss 0.00325955 - samples/sec: 114.89 - lr: 0.006250
2022-09-11 07:27:40,299 epoch 29 - iter 144/245 - loss 0.00318890 - samples/sec: 105.63 - lr: 0.006250
2022-09-11 07:27:47,612 epoch 29 - iter 168/245 - loss 0.00322812 - samples/sec: 105.10 - lr: 0.006250
2022-09-11 07:27:56,031 epoch 29 - iter 192/245 - loss 0.00323334 - samples/sec: 91.28 - lr: 0.006250
2

100%|██████████| 37/37 [00:14<00:00,  2.60it/s]

2022-09-11 07:28:26,463 Evaluating as a multi-label problem: False
2022-09-11 07:28:26,488 DEV : loss 0.043866127729415894 - f1-score (micro avg)  0.8946





2022-09-11 07:28:26,710 BAD EPOCHS (no improvement): 2
2022-09-11 07:28:26,713 ----------------------------------------------------------------------------------------------------
2022-09-11 07:28:33,375 epoch 30 - iter 24/245 - loss 0.00242523 - samples/sec: 115.43 - lr: 0.006250
2022-09-11 07:28:40,408 epoch 30 - iter 48/245 - loss 0.00273201 - samples/sec: 109.29 - lr: 0.006250
2022-09-11 07:28:46,701 epoch 30 - iter 72/245 - loss 0.00299400 - samples/sec: 122.13 - lr: 0.006250
2022-09-11 07:28:53,835 epoch 30 - iter 96/245 - loss 0.00300725 - samples/sec: 107.74 - lr: 0.006250
2022-09-11 07:29:01,232 epoch 30 - iter 120/245 - loss 0.00289616 - samples/sec: 103.90 - lr: 0.006250
2022-09-11 07:29:07,889 epoch 30 - iter 144/245 - loss 0.00302854 - samples/sec: 115.46 - lr: 0.006250
2022-09-11 07:29:14,254 epoch 30 - iter 168/245 - loss 0.00295420 - samples/sec: 120.77 - lr: 0.006250
2022-09-11 07:29:22,502 epoch 30 - iter 192/245 - loss 0.00330713 - samples/sec: 93.18 - lr: 0.006250
2

100%|██████████| 37/37 [00:13<00:00,  2.79it/s]

2022-09-11 07:29:52,149 Evaluating as a multi-label problem: False
2022-09-11 07:29:52,176 DEV : loss 0.043949324637651443 - f1-score (micro avg)  0.8955





2022-09-11 07:29:52,405 BAD EPOCHS (no improvement): 3
2022-09-11 07:29:52,411 ----------------------------------------------------------------------------------------------------
2022-09-11 07:29:59,040 epoch 31 - iter 24/245 - loss 0.00454805 - samples/sec: 116.01 - lr: 0.006250
2022-09-11 07:30:06,236 epoch 31 - iter 48/245 - loss 0.00560938 - samples/sec: 106.80 - lr: 0.006250
2022-09-11 07:30:13,017 epoch 31 - iter 72/245 - loss 0.00504959 - samples/sec: 113.35 - lr: 0.006250
2022-09-11 07:30:22,086 epoch 31 - iter 96/245 - loss 0.00439140 - samples/sec: 84.74 - lr: 0.006250
2022-09-11 07:30:28,657 epoch 31 - iter 120/245 - loss 0.00388366 - samples/sec: 116.99 - lr: 0.006250
2022-09-11 07:30:36,069 epoch 31 - iter 144/245 - loss 0.00369791 - samples/sec: 103.69 - lr: 0.006250
2022-09-11 07:30:43,280 epoch 31 - iter 168/245 - loss 0.00360715 - samples/sec: 106.57 - lr: 0.006250
2022-09-11 07:30:49,828 epoch 31 - iter 192/245 - loss 0.00350980 - samples/sec: 117.38 - lr: 0.006250
2

100%|██████████| 37/37 [00:13<00:00,  2.77it/s]

2022-09-11 07:31:18,846 Evaluating as a multi-label problem: False
2022-09-11 07:31:18,873 DEV : loss 0.04438399150967598 - f1-score (micro avg)  0.8971





2022-09-11 07:31:19,099 Epoch    31: reducing learning rate of group 0 to 3.1250e-03.
2022-09-11 07:31:19,101 BAD EPOCHS (no improvement): 4
2022-09-11 07:31:19,105 ----------------------------------------------------------------------------------------------------
2022-09-11 07:31:26,827 epoch 32 - iter 24/245 - loss 0.00429142 - samples/sec: 99.56 - lr: 0.003125
2022-09-11 07:31:34,759 epoch 32 - iter 48/245 - loss 0.00340359 - samples/sec: 96.90 - lr: 0.003125
2022-09-11 07:31:41,577 epoch 32 - iter 72/245 - loss 0.00363354 - samples/sec: 112.75 - lr: 0.003125
2022-09-11 07:31:48,515 epoch 32 - iter 96/245 - loss 0.00363110 - samples/sec: 110.78 - lr: 0.003125
2022-09-11 07:31:55,943 epoch 32 - iter 120/245 - loss 0.00357428 - samples/sec: 103.48 - lr: 0.003125
2022-09-11 07:32:02,801 epoch 32 - iter 144/245 - loss 0.00366022 - samples/sec: 112.07 - lr: 0.003125
2022-09-11 07:32:10,416 epoch 32 - iter 168/245 - loss 0.00362102 - samples/sec: 100.95 - lr: 0.003125
2022-09-11 07:32:17

100%|██████████| 37/37 [00:14<00:00,  2.51it/s]

2022-09-11 07:32:48,470 Evaluating as a multi-label problem: False
2022-09-11 07:32:48,496 DEV : loss 0.043678104877471924 - f1-score (micro avg)  0.8933





2022-09-11 07:32:48,733 BAD EPOCHS (no improvement): 1
2022-09-11 07:32:48,736 ----------------------------------------------------------------------------------------------------
2022-09-11 07:32:56,215 epoch 33 - iter 24/245 - loss 0.00243913 - samples/sec: 102.81 - lr: 0.003125
2022-09-11 07:33:03,961 epoch 33 - iter 48/245 - loss 0.00257112 - samples/sec: 99.23 - lr: 0.003125
2022-09-11 07:33:10,262 epoch 33 - iter 72/245 - loss 0.00248630 - samples/sec: 121.98 - lr: 0.003125
2022-09-11 07:33:16,800 epoch 33 - iter 96/245 - loss 0.00246568 - samples/sec: 117.58 - lr: 0.003125
2022-09-11 07:33:24,300 epoch 33 - iter 120/245 - loss 0.00258169 - samples/sec: 102.47 - lr: 0.003125
2022-09-11 07:33:33,703 epoch 33 - iter 144/245 - loss 0.00263748 - samples/sec: 81.73 - lr: 0.003125
2022-09-11 07:33:41,361 epoch 33 - iter 168/245 - loss 0.00285047 - samples/sec: 100.38 - lr: 0.003125
2022-09-11 07:33:48,283 epoch 33 - iter 192/245 - loss 0.00301002 - samples/sec: 111.04 - lr: 0.003125
20

100%|██████████| 37/37 [00:13<00:00,  2.71it/s]

2022-09-11 07:34:16,587 Evaluating as a multi-label problem: False
2022-09-11 07:34:16,613 DEV : loss 0.04361075535416603 - f1-score (micro avg)  0.8954





2022-09-11 07:34:16,846 BAD EPOCHS (no improvement): 2
2022-09-11 07:34:16,850 ----------------------------------------------------------------------------------------------------
2022-09-11 07:34:24,716 epoch 34 - iter 24/245 - loss 0.00260229 - samples/sec: 97.74 - lr: 0.003125
2022-09-11 07:34:32,704 epoch 34 - iter 48/245 - loss 0.00296325 - samples/sec: 96.20 - lr: 0.003125
2022-09-11 07:34:38,990 epoch 34 - iter 72/245 - loss 0.00358775 - samples/sec: 122.30 - lr: 0.003125
2022-09-11 07:34:46,589 epoch 34 - iter 96/245 - loss 0.00367484 - samples/sec: 101.15 - lr: 0.003125
2022-09-11 07:34:53,518 epoch 34 - iter 120/245 - loss 0.00366027 - samples/sec: 110.93 - lr: 0.003125
2022-09-11 07:35:00,197 epoch 34 - iter 144/245 - loss 0.00359607 - samples/sec: 115.08 - lr: 0.003125
2022-09-11 07:35:07,052 epoch 34 - iter 168/245 - loss 0.00351314 - samples/sec: 112.12 - lr: 0.003125
2022-09-11 07:35:14,421 epoch 34 - iter 192/245 - loss 0.00343708 - samples/sec: 104.32 - lr: 0.003125
20

100%|██████████| 37/37 [00:14<00:00,  2.51it/s]

2022-09-11 07:35:45,841 Evaluating as a multi-label problem: False
2022-09-11 07:35:45,869 DEV : loss 0.04416164010763168 - f1-score (micro avg)  0.8931





2022-09-11 07:35:46,104 BAD EPOCHS (no improvement): 3
2022-09-11 07:35:46,107 ----------------------------------------------------------------------------------------------------
2022-09-11 07:35:53,321 epoch 35 - iter 24/245 - loss 0.00361474 - samples/sec: 106.57 - lr: 0.003125
2022-09-11 07:35:59,961 epoch 35 - iter 48/245 - loss 0.00327079 - samples/sec: 115.76 - lr: 0.003125
2022-09-11 07:36:07,085 epoch 35 - iter 72/245 - loss 0.00346754 - samples/sec: 107.90 - lr: 0.003125
2022-09-11 07:36:14,815 epoch 35 - iter 96/245 - loss 0.00318446 - samples/sec: 99.42 - lr: 0.003125
2022-09-11 07:36:22,201 epoch 35 - iter 120/245 - loss 0.00345268 - samples/sec: 104.07 - lr: 0.003125
2022-09-11 07:36:29,413 epoch 35 - iter 144/245 - loss 0.00333674 - samples/sec: 106.58 - lr: 0.003125
2022-09-11 07:36:37,928 epoch 35 - iter 168/245 - loss 0.00316901 - samples/sec: 90.25 - lr: 0.003125
2022-09-11 07:36:45,746 epoch 35 - iter 192/245 - loss 0.00302181 - samples/sec: 98.30 - lr: 0.003125
202

100%|██████████| 37/37 [00:13<00:00,  2.71it/s]

2022-09-11 07:37:14,979 Evaluating as a multi-label problem: False
2022-09-11 07:37:15,008 DEV : loss 0.04418227821588516 - f1-score (micro avg)  0.8946





2022-09-11 07:37:15,240 Epoch    35: reducing learning rate of group 0 to 1.5625e-03.
2022-09-11 07:37:15,242 BAD EPOCHS (no improvement): 4
2022-09-11 07:37:15,246 ----------------------------------------------------------------------------------------------------
2022-09-11 07:37:21,794 epoch 36 - iter 24/245 - loss 0.00393147 - samples/sec: 117.45 - lr: 0.001563
2022-09-11 07:37:28,499 epoch 36 - iter 48/245 - loss 0.00365323 - samples/sec: 114.63 - lr: 0.001563
2022-09-11 07:37:36,346 epoch 36 - iter 72/245 - loss 0.00346865 - samples/sec: 97.94 - lr: 0.001563
2022-09-11 07:37:44,070 epoch 36 - iter 96/245 - loss 0.00314774 - samples/sec: 99.50 - lr: 0.001563
2022-09-11 07:37:50,694 epoch 36 - iter 120/245 - loss 0.00314635 - samples/sec: 116.05 - lr: 0.001563
2022-09-11 07:37:58,699 epoch 36 - iter 144/245 - loss 0.00309456 - samples/sec: 96.02 - lr: 0.001563
2022-09-11 07:38:07,208 epoch 36 - iter 168/245 - loss 0.00314451 - samples/sec: 90.32 - lr: 0.001563
2022-09-11 07:38:14,2

100%|██████████| 37/37 [00:13<00:00,  2.72it/s]

2022-09-11 07:38:44,616 Evaluating as a multi-label problem: False
2022-09-11 07:38:44,643 DEV : loss 0.04426480829715729 - f1-score (micro avg)  0.8938





2022-09-11 07:38:44,879 BAD EPOCHS (no improvement): 1
2022-09-11 07:38:44,882 ----------------------------------------------------------------------------------------------------
2022-09-11 07:38:52,155 epoch 37 - iter 24/245 - loss 0.00308358 - samples/sec: 105.70 - lr: 0.001563
2022-09-11 07:38:58,634 epoch 37 - iter 48/245 - loss 0.00295320 - samples/sec: 118.63 - lr: 0.001563
2022-09-11 07:39:06,150 epoch 37 - iter 72/245 - loss 0.00342832 - samples/sec: 102.25 - lr: 0.001563
2022-09-11 07:39:13,546 epoch 37 - iter 96/245 - loss 0.00344793 - samples/sec: 103.92 - lr: 0.001563
2022-09-11 07:39:20,535 epoch 37 - iter 120/245 - loss 0.00344344 - samples/sec: 110.00 - lr: 0.001563
2022-09-11 07:39:27,854 epoch 37 - iter 144/245 - loss 0.00323409 - samples/sec: 105.01 - lr: 0.001563
2022-09-11 07:39:34,831 epoch 37 - iter 168/245 - loss 0.00313559 - samples/sec: 110.17 - lr: 0.001563
2022-09-11 07:39:42,315 epoch 37 - iter 192/245 - loss 0.00322865 - samples/sec: 102.70 - lr: 0.001563


100%|██████████| 37/37 [00:14<00:00,  2.52it/s]

2022-09-11 07:40:14,728 Evaluating as a multi-label problem: False
2022-09-11 07:40:14,759 DEV : loss 0.044284120202064514 - f1-score (micro avg)  0.8943





2022-09-11 07:40:14,997 BAD EPOCHS (no improvement): 2
2022-09-11 07:40:15,001 ----------------------------------------------------------------------------------------------------
2022-09-11 07:40:22,045 epoch 38 - iter 24/245 - loss 0.00211057 - samples/sec: 109.15 - lr: 0.001563
2022-09-11 07:40:28,813 epoch 38 - iter 48/245 - loss 0.00284537 - samples/sec: 113.58 - lr: 0.001563
2022-09-11 07:40:37,124 epoch 38 - iter 72/245 - loss 0.00296843 - samples/sec: 92.48 - lr: 0.001563
2022-09-11 07:40:44,708 epoch 38 - iter 96/245 - loss 0.00280432 - samples/sec: 101.35 - lr: 0.001563
2022-09-11 07:40:51,841 epoch 38 - iter 120/245 - loss 0.00299036 - samples/sec: 107.76 - lr: 0.001563
2022-09-11 07:40:58,741 epoch 38 - iter 144/245 - loss 0.00326511 - samples/sec: 111.39 - lr: 0.001563
2022-09-11 07:41:06,351 epoch 38 - iter 168/245 - loss 0.00319926 - samples/sec: 100.99 - lr: 0.001563
2022-09-11 07:41:12,208 epoch 38 - iter 192/245 - loss 0.00319481 - samples/sec: 131.26 - lr: 0.001563
2

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 07:41:42,479 Evaluating as a multi-label problem: False
2022-09-11 07:41:42,511 DEV : loss 0.044395118951797485 - f1-score (micro avg)  0.8937





2022-09-11 07:41:42,744 BAD EPOCHS (no improvement): 3
2022-09-11 07:41:42,747 ----------------------------------------------------------------------------------------------------
2022-09-11 07:41:49,808 epoch 39 - iter 24/245 - loss 0.00196330 - samples/sec: 108.89 - lr: 0.001563
2022-09-11 07:41:55,727 epoch 39 - iter 48/245 - loss 0.00233870 - samples/sec: 129.87 - lr: 0.001563
2022-09-11 07:42:02,903 epoch 39 - iter 72/245 - loss 0.00257846 - samples/sec: 107.11 - lr: 0.001563
2022-09-11 07:42:10,152 epoch 39 - iter 96/245 - loss 0.00250408 - samples/sec: 106.03 - lr: 0.001563
2022-09-11 07:42:17,954 epoch 39 - iter 120/245 - loss 0.00255778 - samples/sec: 98.50 - lr: 0.001563
2022-09-11 07:42:25,732 epoch 39 - iter 144/245 - loss 0.00278644 - samples/sec: 98.82 - lr: 0.001563
2022-09-11 07:42:32,578 epoch 39 - iter 168/245 - loss 0.00267218 - samples/sec: 112.27 - lr: 0.001563
2022-09-11 07:42:39,530 epoch 39 - iter 192/245 - loss 0.00271845 - samples/sec: 110.56 - lr: 0.001563
20

100%|██████████| 37/37 [00:14<00:00,  2.53it/s]

2022-09-11 07:43:10,733 Evaluating as a multi-label problem: False
2022-09-11 07:43:10,759 DEV : loss 0.044437699019908905 - f1-score (micro avg)  0.8951





2022-09-11 07:43:10,987 Epoch    39: reducing learning rate of group 0 to 7.8125e-04.
2022-09-11 07:43:10,988 BAD EPOCHS (no improvement): 4
2022-09-11 07:43:10,992 ----------------------------------------------------------------------------------------------------
2022-09-11 07:43:18,708 epoch 40 - iter 24/245 - loss 0.00512617 - samples/sec: 99.66 - lr: 0.000781
2022-09-11 07:43:26,374 epoch 40 - iter 48/245 - loss 0.00446527 - samples/sec: 100.25 - lr: 0.000781
2022-09-11 07:43:32,759 epoch 40 - iter 72/245 - loss 0.00416410 - samples/sec: 120.37 - lr: 0.000781
2022-09-11 07:43:40,726 epoch 40 - iter 96/245 - loss 0.00405825 - samples/sec: 96.47 - lr: 0.000781
2022-09-11 07:43:48,015 epoch 40 - iter 120/245 - loss 0.00397989 - samples/sec: 105.45 - lr: 0.000781
2022-09-11 07:43:55,971 epoch 40 - iter 144/245 - loss 0.00388383 - samples/sec: 96.60 - lr: 0.000781
2022-09-11 07:44:04,032 epoch 40 - iter 168/245 - loss 0.00362526 - samples/sec: 95.34 - lr: 0.000781
2022-09-11 07:44:10,9

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 07:44:38,790 Evaluating as a multi-label problem: False
2022-09-11 07:44:38,816 DEV : loss 0.0444183424115181 - f1-score (micro avg)  0.8942





2022-09-11 07:44:39,046 BAD EPOCHS (no improvement): 1
2022-09-11 07:44:39,049 ----------------------------------------------------------------------------------------------------
2022-09-11 07:44:45,449 epoch 41 - iter 24/245 - loss 0.00261871 - samples/sec: 120.15 - lr: 0.000781
2022-09-11 07:44:53,112 epoch 41 - iter 48/245 - loss 0.00244484 - samples/sec: 100.30 - lr: 0.000781
2022-09-11 07:44:59,838 epoch 41 - iter 72/245 - loss 0.00276735 - samples/sec: 114.26 - lr: 0.000781
2022-09-11 07:45:07,665 epoch 41 - iter 96/245 - loss 0.00318430 - samples/sec: 98.20 - lr: 0.000781
2022-09-11 07:45:14,600 epoch 41 - iter 120/245 - loss 0.00311658 - samples/sec: 110.84 - lr: 0.000781
2022-09-11 07:45:22,359 epoch 41 - iter 144/245 - loss 0.00310530 - samples/sec: 99.04 - lr: 0.000781
2022-09-11 07:45:30,963 epoch 41 - iter 168/245 - loss 0.00312851 - samples/sec: 89.32 - lr: 0.000781
2022-09-11 07:45:39,392 epoch 41 - iter 192/245 - loss 0.00328040 - samples/sec: 91.18 - lr: 0.000781
2022

100%|██████████| 37/37 [00:13<00:00,  2.73it/s]

2022-09-11 07:46:08,085 Evaluating as a multi-label problem: False
2022-09-11 07:46:08,111 DEV : loss 0.04442427307367325 - f1-score (micro avg)  0.8951





2022-09-11 07:46:08,344 BAD EPOCHS (no improvement): 2
2022-09-11 07:46:08,348 ----------------------------------------------------------------------------------------------------
2022-09-11 07:46:15,558 epoch 42 - iter 24/245 - loss 0.00319939 - samples/sec: 106.64 - lr: 0.000781
2022-09-11 07:46:22,273 epoch 42 - iter 48/245 - loss 0.00305363 - samples/sec: 114.48 - lr: 0.000781
2022-09-11 07:46:29,794 epoch 42 - iter 72/245 - loss 0.00352813 - samples/sec: 102.19 - lr: 0.000781
2022-09-11 07:46:36,989 epoch 42 - iter 96/245 - loss 0.00326044 - samples/sec: 106.82 - lr: 0.000781
2022-09-11 07:46:44,089 epoch 42 - iter 120/245 - loss 0.00327930 - samples/sec: 108.25 - lr: 0.000781
2022-09-11 07:46:50,673 epoch 42 - iter 144/245 - loss 0.00329026 - samples/sec: 116.77 - lr: 0.000781
2022-09-11 07:46:57,032 epoch 42 - iter 168/245 - loss 0.00329463 - samples/sec: 120.88 - lr: 0.000781
2022-09-11 07:47:05,727 epoch 42 - iter 192/245 - loss 0.00324120 - samples/sec: 88.38 - lr: 0.000781
2

100%|██████████| 37/37 [00:14<00:00,  2.53it/s]

2022-09-11 07:47:37,136 Evaluating as a multi-label problem: False
2022-09-11 07:47:37,163 DEV : loss 0.04427662491798401 - f1-score (micro avg)  0.8951





2022-09-11 07:47:37,401 BAD EPOCHS (no improvement): 3
2022-09-11 07:47:37,405 ----------------------------------------------------------------------------------------------------
2022-09-11 07:47:44,992 epoch 43 - iter 24/245 - loss 0.00289363 - samples/sec: 101.34 - lr: 0.000781
2022-09-11 07:47:52,895 epoch 43 - iter 48/245 - loss 0.00314890 - samples/sec: 97.24 - lr: 0.000781
2022-09-11 07:47:59,949 epoch 43 - iter 72/245 - loss 0.00300594 - samples/sec: 108.96 - lr: 0.000781
2022-09-11 07:48:06,519 epoch 43 - iter 96/245 - loss 0.00316608 - samples/sec: 117.02 - lr: 0.000781
2022-09-11 07:48:14,841 epoch 43 - iter 120/245 - loss 0.00325343 - samples/sec: 92.35 - lr: 0.000781
2022-09-11 07:48:23,105 epoch 43 - iter 144/245 - loss 0.00325899 - samples/sec: 93.00 - lr: 0.000781
2022-09-11 07:48:30,358 epoch 43 - iter 168/245 - loss 0.00336237 - samples/sec: 105.97 - lr: 0.000781
2022-09-11 07:48:36,935 epoch 43 - iter 192/245 - loss 0.00331150 - samples/sec: 116.85 - lr: 0.000781
202

100%|██████████| 37/37 [00:13<00:00,  2.74it/s]

2022-09-11 07:49:05,264 Evaluating as a multi-label problem: False
2022-09-11 07:49:05,290 DEV : loss 0.04427645727992058 - f1-score (micro avg)  0.896





2022-09-11 07:49:05,520 Epoch    43: reducing learning rate of group 0 to 3.9063e-04.
2022-09-11 07:49:05,522 BAD EPOCHS (no improvement): 4
2022-09-11 07:49:05,524 ----------------------------------------------------------------------------------------------------
2022-09-11 07:49:12,350 epoch 44 - iter 24/245 - loss 0.00181734 - samples/sec: 112.68 - lr: 0.000391
2022-09-11 07:49:18,518 epoch 44 - iter 48/245 - loss 0.00216948 - samples/sec: 124.64 - lr: 0.000391
2022-09-11 07:49:25,656 epoch 44 - iter 72/245 - loss 0.00286764 - samples/sec: 107.68 - lr: 0.000391
2022-09-11 07:49:32,994 epoch 44 - iter 96/245 - loss 0.00294446 - samples/sec: 104.74 - lr: 0.000391
2022-09-11 07:49:40,906 epoch 44 - iter 120/245 - loss 0.00326432 - samples/sec: 97.15 - lr: 0.000391
2022-09-11 07:49:48,631 epoch 44 - iter 144/245 - loss 0.00335884 - samples/sec: 99.49 - lr: 0.000391
2022-09-11 07:49:56,283 epoch 44 - iter 168/245 - loss 0.00323294 - samples/sec: 100.44 - lr: 0.000391
2022-09-11 07:50:03

100%|██████████| 37/37 [00:14<00:00,  2.51it/s]

2022-09-11 07:50:35,044 Evaluating as a multi-label problem: False
2022-09-11 07:50:35,072 DEV : loss 0.04428698867559433 - f1-score (micro avg)  0.8957





2022-09-11 07:50:35,308 BAD EPOCHS (no improvement): 1
2022-09-11 07:50:35,311 ----------------------------------------------------------------------------------------------------
2022-09-11 07:50:43,191 epoch 45 - iter 24/245 - loss 0.00387270 - samples/sec: 97.57 - lr: 0.000391
2022-09-11 07:50:52,297 epoch 45 - iter 48/245 - loss 0.00392892 - samples/sec: 84.40 - lr: 0.000391
2022-09-11 07:50:59,124 epoch 45 - iter 72/245 - loss 0.00408650 - samples/sec: 112.59 - lr: 0.000391
2022-09-11 07:51:06,020 epoch 45 - iter 96/245 - loss 0.00387405 - samples/sec: 111.46 - lr: 0.000391
2022-09-11 07:51:13,210 epoch 45 - iter 120/245 - loss 0.00367962 - samples/sec: 106.90 - lr: 0.000391
2022-09-11 07:51:20,230 epoch 45 - iter 144/245 - loss 0.00348704 - samples/sec: 109.51 - lr: 0.000391
2022-09-11 07:51:27,182 epoch 45 - iter 168/245 - loss 0.00345561 - samples/sec: 110.56 - lr: 0.000391
2022-09-11 07:51:34,906 epoch 45 - iter 192/245 - loss 0.00337509 - samples/sec: 99.49 - lr: 0.000391
202

100%|██████████| 37/37 [00:13<00:00,  2.71it/s]

2022-09-11 07:52:03,527 Evaluating as a multi-label problem: False
2022-09-11 07:52:03,558 DEV : loss 0.04430179297924042 - f1-score (micro avg)  0.8957





2022-09-11 07:52:03,802 BAD EPOCHS (no improvement): 2
2022-09-11 07:52:03,805 ----------------------------------------------------------------------------------------------------
2022-09-11 07:52:11,845 epoch 46 - iter 24/245 - loss 0.00411291 - samples/sec: 95.63 - lr: 0.000391
2022-09-11 07:52:19,544 epoch 46 - iter 48/245 - loss 0.00416715 - samples/sec: 99.81 - lr: 0.000391
2022-09-11 07:52:27,342 epoch 46 - iter 72/245 - loss 0.00439639 - samples/sec: 98.56 - lr: 0.000391
2022-09-11 07:52:34,473 epoch 46 - iter 96/245 - loss 0.00399346 - samples/sec: 107.78 - lr: 0.000391
2022-09-11 07:52:42,110 epoch 46 - iter 120/245 - loss 0.00380030 - samples/sec: 100.65 - lr: 0.000391
2022-09-11 07:52:49,491 epoch 46 - iter 144/245 - loss 0.00347998 - samples/sec: 104.13 - lr: 0.000391
2022-09-11 07:52:57,674 epoch 46 - iter 168/245 - loss 0.00349105 - samples/sec: 93.92 - lr: 0.000391
2022-09-11 07:53:04,703 epoch 46 - iter 192/245 - loss 0.00345159 - samples/sec: 109.35 - lr: 0.000391
2022

100%|██████████| 37/37 [00:13<00:00,  2.72it/s]

2022-09-11 07:53:33,852 Evaluating as a multi-label problem: False
2022-09-11 07:53:33,879 DEV : loss 0.04433118551969528 - f1-score (micro avg)  0.896





2022-09-11 07:53:34,111 BAD EPOCHS (no improvement): 3
2022-09-11 07:53:34,114 ----------------------------------------------------------------------------------------------------
2022-09-11 07:53:41,708 epoch 47 - iter 24/245 - loss 0.00340322 - samples/sec: 101.26 - lr: 0.000391
2022-09-11 07:53:49,823 epoch 47 - iter 48/245 - loss 0.00357361 - samples/sec: 94.70 - lr: 0.000391
2022-09-11 07:53:57,244 epoch 47 - iter 72/245 - loss 0.00314236 - samples/sec: 103.59 - lr: 0.000391
2022-09-11 07:54:04,262 epoch 47 - iter 96/245 - loss 0.00306778 - samples/sec: 109.54 - lr: 0.000391
2022-09-11 07:54:11,884 epoch 47 - iter 120/245 - loss 0.00306203 - samples/sec: 100.84 - lr: 0.000391
2022-09-11 07:54:18,847 epoch 47 - iter 144/245 - loss 0.00314432 - samples/sec: 110.40 - lr: 0.000391
2022-09-11 07:54:26,520 epoch 47 - iter 168/245 - loss 0.00323129 - samples/sec: 100.15 - lr: 0.000391
2022-09-11 07:54:34,083 epoch 47 - iter 192/245 - loss 0.00319975 - samples/sec: 101.64 - lr: 0.000391
2

100%|██████████| 37/37 [00:14<00:00,  2.51it/s]

2022-09-11 07:55:02,994 Evaluating as a multi-label problem: False
2022-09-11 07:55:03,020 DEV : loss 0.04430649057030678 - f1-score (micro avg)  0.896





2022-09-11 07:55:03,261 Epoch    47: reducing learning rate of group 0 to 1.9531e-04.
2022-09-11 07:55:03,263 BAD EPOCHS (no improvement): 4
2022-09-11 07:55:03,265 ----------------------------------------------------------------------------------------------------
2022-09-11 07:55:11,122 epoch 48 - iter 24/245 - loss 0.00254217 - samples/sec: 97.87 - lr: 0.000195
2022-09-11 07:55:17,510 epoch 48 - iter 48/245 - loss 0.00252657 - samples/sec: 120.35 - lr: 0.000195
2022-09-11 07:55:26,192 epoch 48 - iter 72/245 - loss 0.00309463 - samples/sec: 88.51 - lr: 0.000195
2022-09-11 07:55:32,341 epoch 48 - iter 96/245 - loss 0.00356310 - samples/sec: 125.04 - lr: 0.000195
2022-09-11 07:55:39,901 epoch 48 - iter 120/245 - loss 0.00335675 - samples/sec: 101.67 - lr: 0.000195
2022-09-11 07:55:47,219 epoch 48 - iter 144/245 - loss 0.00339791 - samples/sec: 105.02 - lr: 0.000195
2022-09-11 07:55:54,299 epoch 48 - iter 168/245 - loss 0.00325185 - samples/sec: 108.56 - lr: 0.000195
2022-09-11 07:56:01

100%|██████████| 37/37 [00:13<00:00,  2.71it/s]

2022-09-11 07:56:31,304 Evaluating as a multi-label problem: False
2022-09-11 07:56:31,335 DEV : loss 0.04428604245185852 - f1-score (micro avg)  0.8957





2022-09-11 07:56:31,566 BAD EPOCHS (no improvement): 1
2022-09-11 07:56:31,569 ----------------------------------------------------------------------------------------------------
2022-09-11 07:56:39,115 epoch 49 - iter 24/245 - loss 0.00535039 - samples/sec: 101.90 - lr: 0.000195
2022-09-11 07:56:45,727 epoch 49 - iter 48/245 - loss 0.00411403 - samples/sec: 116.24 - lr: 0.000195
2022-09-11 07:56:52,156 epoch 49 - iter 72/245 - loss 0.00352356 - samples/sec: 119.58 - lr: 0.000195
2022-09-11 07:57:00,127 epoch 49 - iter 96/245 - loss 0.00314097 - samples/sec: 96.42 - lr: 0.000195
2022-09-11 07:57:06,706 epoch 49 - iter 120/245 - loss 0.00300373 - samples/sec: 116.85 - lr: 0.000195
2022-09-11 07:57:13,578 epoch 49 - iter 144/245 - loss 0.00311646 - samples/sec: 111.85 - lr: 0.000195
2022-09-11 07:57:21,545 epoch 49 - iter 168/245 - loss 0.00334224 - samples/sec: 96.46 - lr: 0.000195
2022-09-11 07:57:28,291 epoch 49 - iter 192/245 - loss 0.00321776 - samples/sec: 113.95 - lr: 0.000195
20

100%|██████████| 37/37 [00:14<00:00,  2.53it/s]

2022-09-11 07:58:01,049 Evaluating as a multi-label problem: False
2022-09-11 07:58:01,075 DEV : loss 0.044288791716098785 - f1-score (micro avg)  0.8963





2022-09-11 07:58:01,312 BAD EPOCHS (no improvement): 2
2022-09-11 07:58:01,316 ----------------------------------------------------------------------------------------------------
2022-09-11 07:58:08,246 epoch 50 - iter 24/245 - loss 0.00314183 - samples/sec: 110.96 - lr: 0.000195
2022-09-11 07:58:15,843 epoch 50 - iter 48/245 - loss 0.00322227 - samples/sec: 101.16 - lr: 0.000195
2022-09-11 07:58:22,925 epoch 50 - iter 72/245 - loss 0.00317290 - samples/sec: 108.54 - lr: 0.000195
2022-09-11 07:58:29,465 epoch 50 - iter 96/245 - loss 0.00325495 - samples/sec: 117.55 - lr: 0.000195
2022-09-11 07:58:36,578 epoch 50 - iter 120/245 - loss 0.00311008 - samples/sec: 108.04 - lr: 0.000195
2022-09-11 07:58:42,880 epoch 50 - iter 144/245 - loss 0.00306380 - samples/sec: 121.96 - lr: 0.000195
2022-09-11 07:58:50,828 epoch 50 - iter 168/245 - loss 0.00322382 - samples/sec: 96.70 - lr: 0.000195
2022-09-11 07:58:58,106 epoch 50 - iter 192/245 - loss 0.00317520 - samples/sec: 105.61 - lr: 0.000195
2

100%|██████████| 37/37 [00:13<00:00,  2.72it/s]

2022-09-11 07:59:29,547 Evaluating as a multi-label problem: False
2022-09-11 07:59:29,574 DEV : loss 0.04428573325276375 - f1-score (micro avg)  0.8957





2022-09-11 07:59:29,811 BAD EPOCHS (no improvement): 3
2022-09-11 07:59:29,814 ----------------------------------------------------------------------------------------------------
2022-09-11 07:59:37,381 epoch 51 - iter 24/245 - loss 0.00307417 - samples/sec: 101.61 - lr: 0.000195
2022-09-11 07:59:44,774 epoch 51 - iter 48/245 - loss 0.00342369 - samples/sec: 103.97 - lr: 0.000195
2022-09-11 07:59:53,436 epoch 51 - iter 72/245 - loss 0.00319220 - samples/sec: 88.72 - lr: 0.000195
2022-09-11 08:00:00,039 epoch 51 - iter 96/245 - loss 0.00326406 - samples/sec: 116.40 - lr: 0.000195
2022-09-11 08:00:06,998 epoch 51 - iter 120/245 - loss 0.00315743 - samples/sec: 110.45 - lr: 0.000195
2022-09-11 08:00:13,887 epoch 51 - iter 144/245 - loss 0.00292692 - samples/sec: 111.58 - lr: 0.000195
2022-09-11 08:00:21,026 epoch 51 - iter 168/245 - loss 0.00315130 - samples/sec: 107.66 - lr: 0.000195
2022-09-11 08:00:28,891 epoch 51 - iter 192/245 - loss 0.00340183 - samples/sec: 97.71 - lr: 0.000195
20

100%|██████████| 37/37 [00:13<00:00,  2.72it/s]

2022-09-11 08:00:59,277 Evaluating as a multi-label problem: False
2022-09-11 08:00:59,306 DEV : loss 0.04428378865122795 - f1-score (micro avg)  0.8966





2022-09-11 08:00:59,543 Epoch    51: reducing learning rate of group 0 to 9.7656e-05.
2022-09-11 08:00:59,545 BAD EPOCHS (no improvement): 4
2022-09-11 08:00:59,549 ----------------------------------------------------------------------------------------------------
2022-09-11 08:00:59,552 ----------------------------------------------------------------------------------------------------
2022-09-11 08:00:59,555 learning rate too small - quitting training!
2022-09-11 08:00:59,556 ----------------------------------------------------------------------------------------------------
2022-09-11 08:01:04,238 ----------------------------------------------------------------------------------------------------
2022-09-11 08:01:04,241 loading file resources/taggers/sota-ner-flair/best-model.pt
2022-09-11 08:01:06,203 SequenceTagger predicts: Dictionary with 27 tags: O, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-LEGISLACAO, B-LEGISLACAO, E-LEGISLACAO, I-LEGISLACAO, S-PESSOA, B-P

100%|██████████| 44/44 [00:29<00:00,  1.47it/s]

2022-09-11 08:01:36,609 Evaluating as a multi-label problem: False
2022-09-11 08:01:36,636 0.8976	0.9134	0.9055	0.8341
2022-09-11 08:01:36,637 
Results:
- F-score (micro) 0.9055
- F-score (macro) 0.8873
- Accuracy 0.8341

By class:
                precision    recall  f1-score   support

   ORGANIZACAO     0.8605    0.8743    0.8673       501
    LEGISLACAO     0.9479    0.9630    0.9554       378
        PESSOA     0.9174    0.9528    0.9347       233
JURISPRUDENCIA     0.8342    0.8973    0.8646       185
         TEMPO     0.9727    0.9271    0.9493       192
         LOCAL     0.7609    0.7447    0.7527        47

     micro avg     0.8976    0.9134    0.9055      1536
     macro avg     0.8823    0.8932    0.8873      1536
  weighted avg     0.8984    0.9134    0.9056      1536

2022-09-11 08:01:36,641 ----------------------------------------------------------------------------------------------------





{'test_score': 0.9054533720555019,
 'dev_score_history': [0.8864688265722843,
  0.8829026937877955,
  0.8812071330589849,
  0.8873007146783947,
  0.8839900799118214,
  0.8802797955340329,
  0.8768175582990398,
  0.8880351262349067,
  0.9005975013579577,
  0.8845303867403315,
  0.8867048867048868,
  0.8793817278498481,
  0.8889496717724289,
  0.8993178717598909,
  0.902981029810298,
  0.8981152690521715,
  0.8932676518883416,
  0.884308876064853,
  0.893467064245053,
  0.8933260393873086,
  0.8932676518883416,
  0.8965517241379312,
  0.8921139101861995,
  0.895856052344602,
  0.8950095445868557,
  0.8951239444293109,
  0.8936750272628134,
  0.8955142231947483,
  0.8945791337510215,
  0.8954979536152796,
  0.8970588235294118,
  0.8932568932568933,
  0.8954408954408954,
  0.8930817610062892,
  0.8945931185144729,
  0.8938028938028939,
  0.8943488943488944,
  0.8937448784485114,
  0.8950819672131148,
  0.8941755537325676,
  0.8950819672131148,
  0.8951392681594756,
  0.896043656207367,
  0

Quero testar o curpus Lener-br, Ulisses e o Harem, gostaria de testar uma rede treinada em um corpus diferente e também quero lidar com as entidades.

# Corpus PL-Corpus

## Vetor Estático Pt-Wiki-Fasttext


### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import WordEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='valid.txt')

## Tarefa
label_type = 'ner'

2022-10-03 18:40:31,812 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria
2022-10-03 18:40:31,814 Train: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/train.txt
2022-10-03 18:40:31,816 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/valid.txt
2022-10-03 18:40:31,817 Test: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-10-03 18:40:37,873 Computing label dictionary. Progress:


6667it [00:00, 50770.50it/s]

2022-10-03 18:40:38,055 Dictionary created for label 'ner' with 8 values: PESSOA (seen 628 times), FUNDAMENTO (seen 490 times), ORGANIZACAO (seen 435 times), DATA (seen 433 times), LOCAL (seen 369 times), PRODUTODELEI (seen 230 times), EVENTO (seen 9 times)
Dictionary with 8 tags: <unk>, PESSOA, FUNDAMENTO, ORGANIZACAO, DATA, LOCAL, PRODUTODELEI, EVENTO





### Embeddings

In [None]:
## Embeddings
# Initialize embedding
embeddings = WordEmbeddings('pt')

2022-10-03 18:40:38,193 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M.vectors.npy not found in cache, downloading to /tmp/tmpw2ulevpc


100%|██████████| 710528528/710528528 [00:18<00:00, 38731417.43B/s]

2022-10-03 18:40:56,606 copying /tmp/tmpw2ulevpc to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M.vectors.npy





2022-10-03 18:40:58,842 removing temp file /tmp/tmpw2ulevpc
2022-10-03 18:40:59,010 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M not found in cache, downloading to /tmp/tmplmp3y9p2


100%|██████████| 23541010/23541010 [00:00<00:00, 36033178.74B/s]

2022-10-03 18:40:59,731 copying /tmp/tmplmp3y9p2 to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M
2022-10-03 18:40:59,760 removing temp file /tmp/tmplmp3y9p2





### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-10-03 18:41:02,599 SequenceTagger predicts: Dictionary with 29 tags: O, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-FUNDAMENTO, B-FUNDAMENTO, E-FUNDAMENTO, I-FUNDAMENTO, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-DATA, B-DATA, E-DATA, I-DATA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, S-PRODUTODELEI, B-PRODUTODELEI, E-PRODUTODELEI, I-PRODUTODELEI, S-EVENTO, B-EVENTO, E-EVENTO, I-EVENTO


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

# Start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=100)

2022-10-03 18:41:12,330 ----------------------------------------------------------------------------------------------------
2022-10-03 18:41:12,331 Model: "SequenceTagger(
  (embeddings): WordEmbeddings(
    'pt'
    (embedding): Embedding(592108, 300)
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=300, out_features=300, bias=True)
  (rnn): LSTM(300, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=512, out_features=31, bias=True)
  (loss_function): ViterbiLoss()
  (crf): CRF()
)"
2022-10-03 18:41:12,335 ----------------------------------------------------------------------------------------------------
2022-10-03 18:41:12,336 Corpus: "Corpus: 6667 train + 1429 dev + 1430 test sentences"
2022-10-03 18:41:12,338 ----------------------------------------------------------------------------------------------------
2022-10-03 18:41:12,339 Parameters:
2022-10-03 18:41:12,341  - learning_rate: "

100%|██████████| 45/45 [00:06<00:00,  6.64it/s]

2022-10-03 18:41:46,276 Evaluating as a multi-label problem: False
2022-10-03 18:41:46,290 DEV : loss 0.24962784349918365 - f1-score (micro avg)  0.1874





2022-10-03 18:41:46,353 BAD EPOCHS (no improvement): 0
2022-10-03 18:41:46,357 saving best model
2022-10-03 18:41:49,663 ----------------------------------------------------------------------------------------------------
2022-10-03 18:41:52,203 epoch 2 - iter 20/209 - loss 0.24736338 - samples/sec: 254.61 - lr: 0.100000
2022-10-03 18:41:54,315 epoch 2 - iter 40/209 - loss 0.24630332 - samples/sec: 303.57 - lr: 0.100000
2022-10-03 18:41:56,747 epoch 2 - iter 60/209 - loss 0.24346197 - samples/sec: 263.62 - lr: 0.100000
2022-10-03 18:41:59,049 epoch 2 - iter 80/209 - loss 0.23695731 - samples/sec: 278.51 - lr: 0.100000
2022-10-03 18:42:01,515 epoch 2 - iter 100/209 - loss 0.23573205 - samples/sec: 259.97 - lr: 0.100000
2022-10-03 18:42:04,165 epoch 2 - iter 120/209 - loss 0.23944936 - samples/sec: 241.86 - lr: 0.100000
2022-10-03 18:42:06,665 epoch 2 - iter 140/209 - loss 0.23104741 - samples/sec: 256.42 - lr: 0.100000
2022-10-03 18:42:09,093 epoch 2 - iter 160/209 - loss 0.23120208 - s

100%|██████████| 45/45 [00:06<00:00,  6.76it/s]

2022-10-03 18:42:22,172 Evaluating as a multi-label problem: False
2022-10-03 18:42:22,185 DEV : loss 0.1606435775756836 - f1-score (micro avg)  0.3552





2022-10-03 18:42:22,247 BAD EPOCHS (no improvement): 0
2022-10-03 18:42:22,251 saving best model
2022-10-03 18:42:25,291 ----------------------------------------------------------------------------------------------------
2022-10-03 18:42:27,726 epoch 3 - iter 20/209 - loss 0.20009008 - samples/sec: 263.45 - lr: 0.100000
2022-10-03 18:42:29,839 epoch 3 - iter 40/209 - loss 0.17856804 - samples/sec: 303.41 - lr: 0.100000
2022-10-03 18:42:32,794 epoch 3 - iter 60/209 - loss 0.17350678 - samples/sec: 216.89 - lr: 0.100000
2022-10-03 18:42:35,040 epoch 3 - iter 80/209 - loss 0.17399075 - samples/sec: 285.55 - lr: 0.100000
2022-10-03 18:42:37,325 epoch 3 - iter 100/209 - loss 0.16954490 - samples/sec: 280.64 - lr: 0.100000
2022-10-03 18:42:39,957 epoch 3 - iter 120/209 - loss 0.16822763 - samples/sec: 243.56 - lr: 0.100000
2022-10-03 18:42:42,536 epoch 3 - iter 140/209 - loss 0.16445022 - samples/sec: 248.56 - lr: 0.100000
2022-10-03 18:42:46,039 epoch 3 - iter 160/209 - loss 0.16189731 - s

100%|██████████| 45/45 [00:06<00:00,  6.82it/s]

2022-10-03 18:42:57,672 Evaluating as a multi-label problem: False
2022-10-03 18:42:57,684 DEV : loss 0.14665403962135315 - f1-score (micro avg)  0.4476





2022-10-03 18:42:57,745 BAD EPOCHS (no improvement): 0
2022-10-03 18:42:57,750 saving best model
2022-10-03 18:43:00,664 ----------------------------------------------------------------------------------------------------
2022-10-03 18:43:03,278 epoch 4 - iter 20/209 - loss 0.15744090 - samples/sec: 245.42 - lr: 0.100000
2022-10-03 18:43:05,179 epoch 4 - iter 40/209 - loss 0.15558899 - samples/sec: 337.28 - lr: 0.100000
2022-10-03 18:43:07,429 epoch 4 - iter 60/209 - loss 0.15465018 - samples/sec: 285.05 - lr: 0.100000
2022-10-03 18:43:10,228 epoch 4 - iter 80/209 - loss 0.14895647 - samples/sec: 228.98 - lr: 0.100000
2022-10-03 18:43:12,708 epoch 4 - iter 100/209 - loss 0.14668958 - samples/sec: 258.66 - lr: 0.100000
2022-10-03 18:43:14,831 epoch 4 - iter 120/209 - loss 0.14223313 - samples/sec: 302.42 - lr: 0.100000
2022-10-03 18:43:17,404 epoch 4 - iter 140/209 - loss 0.13997978 - samples/sec: 249.23 - lr: 0.100000
2022-10-03 18:43:20,467 epoch 4 - iter 160/209 - loss 0.13497334 - s

100%|██████████| 45/45 [00:07<00:00,  6.37it/s]

2022-10-03 18:43:34,045 Evaluating as a multi-label problem: False
2022-10-03 18:43:34,059 DEV : loss 0.1001163050532341 - f1-score (micro avg)  0.5943





2022-10-03 18:43:34,119 BAD EPOCHS (no improvement): 0
2022-10-03 18:43:34,123 saving best model
2022-10-03 18:43:36,983 ----------------------------------------------------------------------------------------------------
2022-10-03 18:43:39,342 epoch 5 - iter 20/209 - loss 0.10305595 - samples/sec: 272.04 - lr: 0.100000
2022-10-03 18:43:41,180 epoch 5 - iter 40/209 - loss 0.09758076 - samples/sec: 349.12 - lr: 0.100000
2022-10-03 18:43:43,898 epoch 5 - iter 60/209 - loss 0.10228892 - samples/sec: 235.81 - lr: 0.100000
2022-10-03 18:43:46,853 epoch 5 - iter 80/209 - loss 0.09961444 - samples/sec: 216.97 - lr: 0.100000
2022-10-03 18:43:49,238 epoch 5 - iter 100/209 - loss 0.09951898 - samples/sec: 268.86 - lr: 0.100000
2022-10-03 18:43:51,799 epoch 5 - iter 120/209 - loss 0.10366967 - samples/sec: 250.50 - lr: 0.100000
2022-10-03 18:43:55,212 epoch 5 - iter 140/209 - loss 0.10452159 - samples/sec: 187.76 - lr: 0.100000
2022-10-03 18:43:57,851 epoch 5 - iter 160/209 - loss 0.10145469 - s

100%|██████████| 45/45 [00:06<00:00,  6.55it/s]

2022-10-03 18:44:10,362 Evaluating as a multi-label problem: False
2022-10-03 18:44:10,376 DEV : loss 0.08253118395805359 - f1-score (micro avg)  0.6818





2022-10-03 18:44:10,437 BAD EPOCHS (no improvement): 0
2022-10-03 18:44:10,441 saving best model
2022-10-03 18:44:13,361 ----------------------------------------------------------------------------------------------------
2022-10-03 18:44:15,654 epoch 6 - iter 20/209 - loss 0.08688902 - samples/sec: 279.72 - lr: 0.100000
2022-10-03 18:44:17,825 epoch 6 - iter 40/209 - loss 0.10515545 - samples/sec: 295.32 - lr: 0.100000
2022-10-03 18:44:20,029 epoch 6 - iter 60/209 - loss 0.10382327 - samples/sec: 291.12 - lr: 0.100000
2022-10-03 18:44:22,740 epoch 6 - iter 80/209 - loss 0.09891535 - samples/sec: 236.61 - lr: 0.100000
2022-10-03 18:44:25,290 epoch 6 - iter 100/209 - loss 0.10043741 - samples/sec: 251.46 - lr: 0.100000
2022-10-03 18:44:28,293 epoch 6 - iter 120/209 - loss 0.09609621 - samples/sec: 213.47 - lr: 0.100000
2022-10-03 18:44:30,894 epoch 6 - iter 140/209 - loss 0.09879681 - samples/sec: 246.53 - lr: 0.100000
2022-10-03 18:44:33,629 epoch 6 - iter 160/209 - loss 0.09772805 - s

100%|██████████| 45/45 [00:07<00:00,  6.32it/s]

2022-10-03 18:44:46,962 Evaluating as a multi-label problem: False
2022-10-03 18:44:46,978 DEV : loss 0.0795944556593895 - f1-score (micro avg)  0.666





2022-10-03 18:44:47,040 BAD EPOCHS (no improvement): 1
2022-10-03 18:44:47,045 ----------------------------------------------------------------------------------------------------
2022-10-03 18:44:49,674 epoch 7 - iter 20/209 - loss 0.08401653 - samples/sec: 243.87 - lr: 0.100000
2022-10-03 18:44:52,150 epoch 7 - iter 40/209 - loss 0.08378110 - samples/sec: 258.96 - lr: 0.100000
2022-10-03 18:44:54,141 epoch 7 - iter 60/209 - loss 0.08726679 - samples/sec: 322.15 - lr: 0.100000
2022-10-03 18:44:56,412 epoch 7 - iter 80/209 - loss 0.08403209 - samples/sec: 282.31 - lr: 0.100000
2022-10-03 18:44:59,551 epoch 7 - iter 100/209 - loss 0.08381965 - samples/sec: 204.15 - lr: 0.100000
2022-10-03 18:45:01,626 epoch 7 - iter 120/209 - loss 0.08758004 - samples/sec: 309.16 - lr: 0.100000
2022-10-03 18:45:04,256 epoch 7 - iter 140/209 - loss 0.08665601 - samples/sec: 243.65 - lr: 0.100000
2022-10-03 18:45:06,593 epoch 7 - iter 160/209 - loss 0.08763578 - samples/sec: 274.34 - lr: 0.100000
2022-10-

100%|██████████| 45/45 [00:06<00:00,  6.78it/s]

2022-10-03 18:45:18,776 Evaluating as a multi-label problem: False
2022-10-03 18:45:18,791 DEV : loss 0.0680016428232193 - f1-score (micro avg)  0.7221





2022-10-03 18:45:18,858 BAD EPOCHS (no improvement): 0
2022-10-03 18:45:18,862 saving best model
2022-10-03 18:45:21,664 ----------------------------------------------------------------------------------------------------
2022-10-03 18:45:23,858 epoch 8 - iter 20/209 - loss 0.10493936 - samples/sec: 292.51 - lr: 0.100000
2022-10-03 18:45:26,041 epoch 8 - iter 40/209 - loss 0.09432833 - samples/sec: 293.82 - lr: 0.100000
2022-10-03 18:45:28,474 epoch 8 - iter 60/209 - loss 0.10157386 - samples/sec: 263.42 - lr: 0.100000
2022-10-03 18:45:31,567 epoch 8 - iter 80/209 - loss 0.09912179 - samples/sec: 207.28 - lr: 0.100000
2022-10-03 18:45:34,127 epoch 8 - iter 100/209 - loss 0.09568934 - samples/sec: 250.43 - lr: 0.100000
2022-10-03 18:45:36,618 epoch 8 - iter 120/209 - loss 0.09353830 - samples/sec: 257.44 - lr: 0.100000
2022-10-03 18:45:39,838 epoch 8 - iter 140/209 - loss 0.09134120 - samples/sec: 199.17 - lr: 0.100000
2022-10-03 18:45:42,554 epoch 8 - iter 160/209 - loss 0.08962821 - s

100%|██████████| 45/45 [00:07<00:00,  6.38it/s]

2022-10-03 18:45:55,118 Evaluating as a multi-label problem: False
2022-10-03 18:45:55,134 DEV : loss 0.0680040791630745 - f1-score (micro avg)  0.71





2022-10-03 18:45:55,194 BAD EPOCHS (no improvement): 1
2022-10-03 18:45:55,199 ----------------------------------------------------------------------------------------------------
2022-10-03 18:45:57,592 epoch 9 - iter 20/209 - loss 0.09681647 - samples/sec: 268.07 - lr: 0.100000
2022-10-03 18:45:59,948 epoch 9 - iter 40/209 - loss 0.08881046 - samples/sec: 272.12 - lr: 0.100000
2022-10-03 18:46:02,109 epoch 9 - iter 60/209 - loss 0.08340358 - samples/sec: 296.75 - lr: 0.100000
2022-10-03 18:46:04,464 epoch 9 - iter 80/209 - loss 0.08372878 - samples/sec: 272.20 - lr: 0.100000
2022-10-03 18:46:06,736 epoch 9 - iter 100/209 - loss 0.08179172 - samples/sec: 282.22 - lr: 0.100000
2022-10-03 18:46:09,281 epoch 9 - iter 120/209 - loss 0.07877386 - samples/sec: 251.88 - lr: 0.100000
2022-10-03 18:46:12,108 epoch 9 - iter 140/209 - loss 0.07729495 - samples/sec: 226.68 - lr: 0.100000
2022-10-03 18:46:14,464 epoch 9 - iter 160/209 - loss 0.07978013 - samples/sec: 272.18 - lr: 0.100000
2022-10-

100%|██████████| 45/45 [00:06<00:00,  6.61it/s]

2022-10-03 18:46:27,236 Evaluating as a multi-label problem: False
2022-10-03 18:46:27,252 DEV : loss 0.060828354209661484 - f1-score (micro avg)  0.7586





2022-10-03 18:46:27,312 BAD EPOCHS (no improvement): 0
2022-10-03 18:46:27,317 saving best model
2022-10-03 18:46:30,197 ----------------------------------------------------------------------------------------------------
2022-10-03 18:46:32,510 epoch 10 - iter 20/209 - loss 0.09725881 - samples/sec: 277.42 - lr: 0.100000
2022-10-03 18:46:35,720 epoch 10 - iter 40/209 - loss 0.08603662 - samples/sec: 199.64 - lr: 0.100000
2022-10-03 18:46:37,765 epoch 10 - iter 60/209 - loss 0.08038356 - samples/sec: 313.50 - lr: 0.100000
2022-10-03 18:46:40,044 epoch 10 - iter 80/209 - loss 0.07767378 - samples/sec: 281.50 - lr: 0.100000
2022-10-03 18:46:42,407 epoch 10 - iter 100/209 - loss 0.07848740 - samples/sec: 271.28 - lr: 0.100000
2022-10-03 18:46:44,573 epoch 10 - iter 120/209 - loss 0.07484399 - samples/sec: 296.27 - lr: 0.100000
2022-10-03 18:46:47,284 epoch 10 - iter 140/209 - loss 0.07309283 - samples/sec: 236.40 - lr: 0.100000
2022-10-03 18:46:49,977 epoch 10 - iter 160/209 - loss 0.0720

100%|██████████| 45/45 [00:06<00:00,  6.66it/s]


2022-10-03 18:47:03,182 Evaluating as a multi-label problem: False
2022-10-03 18:47:03,200 DEV : loss 0.06503196060657501 - f1-score (micro avg)  0.7529
2022-10-03 18:47:03,259 BAD EPOCHS (no improvement): 1
2022-10-03 18:47:03,264 ----------------------------------------------------------------------------------------------------
2022-10-03 18:47:06,422 epoch 11 - iter 20/209 - loss 0.06452681 - samples/sec: 203.04 - lr: 0.100000
2022-10-03 18:47:08,828 epoch 11 - iter 40/209 - loss 0.06678285 - samples/sec: 266.49 - lr: 0.100000
2022-10-03 18:47:10,874 epoch 11 - iter 60/209 - loss 0.06875308 - samples/sec: 313.44 - lr: 0.100000
2022-10-03 18:47:12,953 epoch 11 - iter 80/209 - loss 0.06696340 - samples/sec: 308.45 - lr: 0.100000
2022-10-03 18:47:15,470 epoch 11 - iter 100/209 - loss 0.06618400 - samples/sec: 254.74 - lr: 0.100000
2022-10-03 18:47:17,757 epoch 11 - iter 120/209 - loss 0.06667697 - samples/sec: 280.28 - lr: 0.100000
2022-10-03 18:47:20,453 epoch 11 - iter 140/209 - los

100%|██████████| 45/45 [00:06<00:00,  6.68it/s]

2022-10-03 18:47:35,484 Evaluating as a multi-label problem: False
2022-10-03 18:47:35,497 DEV : loss 0.05805245786905289 - f1-score (micro avg)  0.7755





2022-10-03 18:47:35,558 BAD EPOCHS (no improvement): 0
2022-10-03 18:47:35,562 saving best model
2022-10-03 18:47:38,361 ----------------------------------------------------------------------------------------------------
2022-10-03 18:47:40,650 epoch 12 - iter 20/209 - loss 0.06850431 - samples/sec: 280.25 - lr: 0.100000
2022-10-03 18:47:42,928 epoch 12 - iter 40/209 - loss 0.06560530 - samples/sec: 281.49 - lr: 0.100000
2022-10-03 18:47:44,791 epoch 12 - iter 60/209 - loss 0.06556213 - samples/sec: 344.36 - lr: 0.100000
2022-10-03 18:47:47,940 epoch 12 - iter 80/209 - loss 0.06393704 - samples/sec: 203.48 - lr: 0.100000
2022-10-03 18:47:50,463 epoch 12 - iter 100/209 - loss 0.06121776 - samples/sec: 254.26 - lr: 0.100000
2022-10-03 18:47:53,051 epoch 12 - iter 120/209 - loss 0.05933833 - samples/sec: 247.72 - lr: 0.100000
2022-10-03 18:47:55,178 epoch 12 - iter 140/209 - loss 0.06262313 - samples/sec: 301.59 - lr: 0.100000
2022-10-03 18:47:58,136 epoch 12 - iter 160/209 - loss 0.0650

100%|██████████| 45/45 [00:06<00:00,  6.87it/s]

2022-10-03 18:48:10,988 Evaluating as a multi-label problem: False
2022-10-03 18:48:11,001 DEV : loss 0.057785019278526306 - f1-score (micro avg)  0.7593





2022-10-03 18:48:11,063 BAD EPOCHS (no improvement): 1
2022-10-03 18:48:11,068 ----------------------------------------------------------------------------------------------------
2022-10-03 18:48:13,400 epoch 13 - iter 20/209 - loss 0.04475673 - samples/sec: 275.03 - lr: 0.100000
2022-10-03 18:48:15,393 epoch 13 - iter 40/209 - loss 0.05227686 - samples/sec: 321.86 - lr: 0.100000
2022-10-03 18:48:18,197 epoch 13 - iter 60/209 - loss 0.05574324 - samples/sec: 228.60 - lr: 0.100000
2022-10-03 18:48:20,124 epoch 13 - iter 80/209 - loss 0.05706818 - samples/sec: 332.72 - lr: 0.100000
2022-10-03 18:48:22,327 epoch 13 - iter 100/209 - loss 0.05711975 - samples/sec: 291.21 - lr: 0.100000
2022-10-03 18:48:24,690 epoch 13 - iter 120/209 - loss 0.05729657 - samples/sec: 271.33 - lr: 0.100000
2022-10-03 18:48:26,726 epoch 13 - iter 140/209 - loss 0.05831103 - samples/sec: 315.02 - lr: 0.100000
2022-10-03 18:48:29,486 epoch 13 - iter 160/209 - loss 0.05802068 - samples/sec: 232.30 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.80it/s]

2022-10-03 18:48:42,760 Evaluating as a multi-label problem: False
2022-10-03 18:48:42,774 DEV : loss 0.05335738882422447 - f1-score (micro avg)  0.8049





2022-10-03 18:48:42,834 BAD EPOCHS (no improvement): 0
2022-10-03 18:48:42,838 saving best model
2022-10-03 18:48:45,605 ----------------------------------------------------------------------------------------------------
2022-10-03 18:48:48,194 epoch 14 - iter 20/209 - loss 0.05677778 - samples/sec: 247.85 - lr: 0.100000
2022-10-03 18:48:50,405 epoch 14 - iter 40/209 - loss 0.05327978 - samples/sec: 290.01 - lr: 0.100000
2022-10-03 18:48:52,855 epoch 14 - iter 60/209 - loss 0.05756637 - samples/sec: 261.67 - lr: 0.100000
2022-10-03 18:48:55,476 epoch 14 - iter 80/209 - loss 0.05683522 - samples/sec: 244.69 - lr: 0.100000
2022-10-03 18:48:57,713 epoch 14 - iter 100/209 - loss 0.05683143 - samples/sec: 286.56 - lr: 0.100000
2022-10-03 18:48:59,703 epoch 14 - iter 120/209 - loss 0.05793003 - samples/sec: 322.37 - lr: 0.100000
2022-10-03 18:49:02,275 epoch 14 - iter 140/209 - loss 0.05719444 - samples/sec: 249.34 - lr: 0.100000
2022-10-03 18:49:04,776 epoch 14 - iter 160/209 - loss 0.0579

100%|██████████| 45/45 [00:06<00:00,  6.78it/s]

2022-10-03 18:49:18,314 Evaluating as a multi-label problem: False
2022-10-03 18:49:18,336 DEV : loss 0.05508466437458992 - f1-score (micro avg)  0.7892





2022-10-03 18:49:18,397 BAD EPOCHS (no improvement): 1
2022-10-03 18:49:18,401 ----------------------------------------------------------------------------------------------------
2022-10-03 18:49:20,882 epoch 15 - iter 20/209 - loss 0.05638990 - samples/sec: 258.45 - lr: 0.100000
2022-10-03 18:49:23,232 epoch 15 - iter 40/209 - loss 0.06361697 - samples/sec: 272.91 - lr: 0.100000
2022-10-03 18:49:25,500 epoch 15 - iter 60/209 - loss 0.06034399 - samples/sec: 282.75 - lr: 0.100000
2022-10-03 18:49:27,692 epoch 15 - iter 80/209 - loss 0.05990189 - samples/sec: 292.48 - lr: 0.100000
2022-10-03 18:49:30,455 epoch 15 - iter 100/209 - loss 0.05913335 - samples/sec: 232.01 - lr: 0.100000
2022-10-03 18:49:32,691 epoch 15 - iter 120/209 - loss 0.05894373 - samples/sec: 286.71 - lr: 0.100000
2022-10-03 18:49:35,200 epoch 15 - iter 140/209 - loss 0.05997174 - samples/sec: 255.60 - lr: 0.100000
2022-10-03 18:49:37,394 epoch 15 - iter 160/209 - loss 0.05885711 - samples/sec: 292.31 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.43it/s]

2022-10-03 18:49:49,941 Evaluating as a multi-label problem: False
2022-10-03 18:49:49,956 DEV : loss 0.05065612494945526 - f1-score (micro avg)  0.8052





2022-10-03 18:49:50,018 BAD EPOCHS (no improvement): 0
2022-10-03 18:49:50,023 saving best model
2022-10-03 18:49:52,820 ----------------------------------------------------------------------------------------------------
2022-10-03 18:49:55,412 epoch 16 - iter 20/209 - loss 0.06611578 - samples/sec: 247.60 - lr: 0.100000
2022-10-03 18:49:57,872 epoch 16 - iter 40/209 - loss 0.05526306 - samples/sec: 260.54 - lr: 0.100000
2022-10-03 18:50:00,128 epoch 16 - iter 60/209 - loss 0.05897922 - samples/sec: 284.20 - lr: 0.100000
2022-10-03 18:50:03,099 epoch 16 - iter 80/209 - loss 0.05748477 - samples/sec: 215.75 - lr: 0.100000
2022-10-03 18:50:05,836 epoch 16 - iter 100/209 - loss 0.05541908 - samples/sec: 234.41 - lr: 0.100000
2022-10-03 18:50:08,133 epoch 16 - iter 120/209 - loss 0.05354911 - samples/sec: 279.13 - lr: 0.100000
2022-10-03 18:50:10,347 epoch 16 - iter 140/209 - loss 0.05534813 - samples/sec: 289.57 - lr: 0.100000
2022-10-03 18:50:13,033 epoch 16 - iter 160/209 - loss 0.0550

100%|██████████| 45/45 [00:06<00:00,  6.77it/s]

2022-10-03 18:50:25,724 Evaluating as a multi-label problem: False
2022-10-03 18:50:25,738 DEV : loss 0.048684261739254 - f1-score (micro avg)  0.8129





2022-10-03 18:50:25,800 BAD EPOCHS (no improvement): 0
2022-10-03 18:50:25,805 saving best model
2022-10-03 18:50:28,933 ----------------------------------------------------------------------------------------------------
2022-10-03 18:50:31,509 epoch 17 - iter 20/209 - loss 0.06389245 - samples/sec: 249.04 - lr: 0.100000
2022-10-03 18:50:33,906 epoch 17 - iter 40/209 - loss 0.05413621 - samples/sec: 267.48 - lr: 0.100000
2022-10-03 18:50:36,040 epoch 17 - iter 60/209 - loss 0.05422989 - samples/sec: 300.60 - lr: 0.100000
2022-10-03 18:50:38,383 epoch 17 - iter 80/209 - loss 0.05406084 - samples/sec: 273.61 - lr: 0.100000
2022-10-03 18:50:40,730 epoch 17 - iter 100/209 - loss 0.05342149 - samples/sec: 273.14 - lr: 0.100000
2022-10-03 18:50:43,591 epoch 17 - iter 120/209 - loss 0.05536332 - samples/sec: 224.00 - lr: 0.100000
2022-10-03 18:50:46,277 epoch 17 - iter 140/209 - loss 0.05497211 - samples/sec: 238.71 - lr: 0.100000
2022-10-03 18:50:49,424 epoch 17 - iter 160/209 - loss 0.0553

100%|██████████| 45/45 [00:07<00:00,  6.40it/s]

2022-10-03 18:51:02,182 Evaluating as a multi-label problem: False
2022-10-03 18:51:02,196 DEV : loss 0.05065369978547096 - f1-score (micro avg)  0.794





2022-10-03 18:51:02,258 BAD EPOCHS (no improvement): 1
2022-10-03 18:51:02,262 ----------------------------------------------------------------------------------------------------
2022-10-03 18:51:04,376 epoch 18 - iter 20/209 - loss 0.04804430 - samples/sec: 303.63 - lr: 0.100000
2022-10-03 18:51:06,242 epoch 18 - iter 40/209 - loss 0.05200816 - samples/sec: 343.60 - lr: 0.100000
2022-10-03 18:51:08,434 epoch 18 - iter 60/209 - loss 0.05164329 - samples/sec: 292.50 - lr: 0.100000
2022-10-03 18:51:10,505 epoch 18 - iter 80/209 - loss 0.05380102 - samples/sec: 309.69 - lr: 0.100000
2022-10-03 18:51:13,061 epoch 18 - iter 100/209 - loss 0.05356022 - samples/sec: 250.80 - lr: 0.100000
2022-10-03 18:51:15,430 epoch 18 - iter 120/209 - loss 0.05247331 - samples/sec: 270.59 - lr: 0.100000
2022-10-03 18:51:17,660 epoch 18 - iter 140/209 - loss 0.05200027 - samples/sec: 287.55 - lr: 0.100000
2022-10-03 18:51:20,506 epoch 18 - iter 160/209 - loss 0.05149652 - samples/sec: 225.15 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.73it/s]

2022-10-03 18:51:33,217 Evaluating as a multi-label problem: False
2022-10-03 18:51:33,232 DEV : loss 0.04896869510412216 - f1-score (micro avg)  0.8203





2022-10-03 18:51:33,290 BAD EPOCHS (no improvement): 0
2022-10-03 18:51:33,294 saving best model
2022-10-03 18:51:36,140 ----------------------------------------------------------------------------------------------------
2022-10-03 18:51:38,718 epoch 19 - iter 20/209 - loss 0.03978212 - samples/sec: 248.88 - lr: 0.100000
2022-10-03 18:51:40,791 epoch 19 - iter 40/209 - loss 0.04558074 - samples/sec: 309.31 - lr: 0.100000
2022-10-03 18:51:43,538 epoch 19 - iter 60/209 - loss 0.05048568 - samples/sec: 233.36 - lr: 0.100000
2022-10-03 18:51:46,128 epoch 19 - iter 80/209 - loss 0.05190054 - samples/sec: 247.55 - lr: 0.100000
2022-10-03 18:51:49,133 epoch 19 - iter 100/209 - loss 0.05070626 - samples/sec: 213.28 - lr: 0.100000
2022-10-03 18:51:51,851 epoch 19 - iter 120/209 - loss 0.04981074 - samples/sec: 235.90 - lr: 0.100000
2022-10-03 18:51:54,399 epoch 19 - iter 140/209 - loss 0.04984403 - samples/sec: 251.63 - lr: 0.100000
2022-10-03 18:51:56,836 epoch 19 - iter 160/209 - loss 0.0503

100%|██████████| 45/45 [00:06<00:00,  6.46it/s]

2022-10-03 18:52:09,246 Evaluating as a multi-label problem: False
2022-10-03 18:52:09,261 DEV : loss 0.046392813324928284 - f1-score (micro avg)  0.814





2022-10-03 18:52:09,320 BAD EPOCHS (no improvement): 1
2022-10-03 18:52:09,324 ----------------------------------------------------------------------------------------------------
2022-10-03 18:52:11,635 epoch 20 - iter 20/209 - loss 0.04410483 - samples/sec: 277.63 - lr: 0.100000
2022-10-03 18:52:14,206 epoch 20 - iter 40/209 - loss 0.04222022 - samples/sec: 249.35 - lr: 0.100000
2022-10-03 18:52:16,431 epoch 20 - iter 60/209 - loss 0.04208644 - samples/sec: 288.20 - lr: 0.100000
2022-10-03 18:52:18,566 epoch 20 - iter 80/209 - loss 0.04350335 - samples/sec: 300.26 - lr: 0.100000
2022-10-03 18:52:21,437 epoch 20 - iter 100/209 - loss 0.04412048 - samples/sec: 223.28 - lr: 0.100000
2022-10-03 18:52:23,731 epoch 20 - iter 120/209 - loss 0.04564095 - samples/sec: 279.41 - lr: 0.100000
2022-10-03 18:52:26,117 epoch 20 - iter 140/209 - loss 0.04779393 - samples/sec: 268.75 - lr: 0.100000
2022-10-03 18:52:28,677 epoch 20 - iter 160/209 - loss 0.04623421 - samples/sec: 250.53 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.87it/s]

2022-10-03 18:52:40,403 Evaluating as a multi-label problem: False
2022-10-03 18:52:40,416 DEV : loss 0.05078371241688728 - f1-score (micro avg)  0.8134





2022-10-03 18:52:40,482 BAD EPOCHS (no improvement): 2
2022-10-03 18:52:40,486 ----------------------------------------------------------------------------------------------------
2022-10-03 18:52:42,835 epoch 21 - iter 20/209 - loss 0.03856598 - samples/sec: 273.04 - lr: 0.100000
2022-10-03 18:52:45,169 epoch 21 - iter 40/209 - loss 0.04267619 - samples/sec: 274.62 - lr: 0.100000
2022-10-03 18:52:47,526 epoch 21 - iter 60/209 - loss 0.04270275 - samples/sec: 271.94 - lr: 0.100000
2022-10-03 18:52:50,276 epoch 21 - iter 80/209 - loss 0.04224218 - samples/sec: 233.13 - lr: 0.100000
2022-10-03 18:52:52,573 epoch 21 - iter 100/209 - loss 0.04222561 - samples/sec: 279.04 - lr: 0.100000
2022-10-03 18:52:54,581 epoch 21 - iter 120/209 - loss 0.04324073 - samples/sec: 319.45 - lr: 0.100000
2022-10-03 18:52:56,456 epoch 21 - iter 140/209 - loss 0.04446012 - samples/sec: 342.16 - lr: 0.100000
2022-10-03 18:52:59,262 epoch 21 - iter 160/209 - loss 0.04520011 - samples/sec: 228.46 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.79it/s]

2022-10-03 18:53:11,519 Evaluating as a multi-label problem: False
2022-10-03 18:53:11,533 DEV : loss 0.04473907873034477 - f1-score (micro avg)  0.8177





2022-10-03 18:53:11,593 BAD EPOCHS (no improvement): 3
2022-10-03 18:53:11,598 ----------------------------------------------------------------------------------------------------
2022-10-03 18:53:14,353 epoch 22 - iter 20/209 - loss 0.05449680 - samples/sec: 232.75 - lr: 0.100000
2022-10-03 18:53:16,424 epoch 22 - iter 40/209 - loss 0.04822891 - samples/sec: 309.70 - lr: 0.100000
2022-10-03 18:53:18,803 epoch 22 - iter 60/209 - loss 0.04362862 - samples/sec: 269.52 - lr: 0.100000
2022-10-03 18:53:21,473 epoch 22 - iter 80/209 - loss 0.04361282 - samples/sec: 240.01 - lr: 0.100000
2022-10-03 18:53:23,803 epoch 22 - iter 100/209 - loss 0.04374349 - samples/sec: 275.22 - lr: 0.100000
2022-10-03 18:53:26,462 epoch 22 - iter 120/209 - loss 0.04402134 - samples/sec: 241.06 - lr: 0.100000
2022-10-03 18:53:28,921 epoch 22 - iter 140/209 - loss 0.04372993 - samples/sec: 260.60 - lr: 0.100000
2022-10-03 18:53:31,015 epoch 22 - iter 160/209 - loss 0.04465147 - samples/sec: 306.26 - lr: 0.100000


100%|██████████| 45/45 [00:06<00:00,  6.76it/s]

2022-10-03 18:53:43,510 Evaluating as a multi-label problem: False
2022-10-03 18:53:43,523 DEV : loss 0.04734628647565842 - f1-score (micro avg)  0.8129





2022-10-03 18:53:43,583 Epoch    22: reducing learning rate of group 0 to 5.0000e-02.
2022-10-03 18:53:43,584 BAD EPOCHS (no improvement): 4
2022-10-03 18:53:43,589 ----------------------------------------------------------------------------------------------------
2022-10-03 18:53:46,788 epoch 23 - iter 20/209 - loss 0.04490919 - samples/sec: 200.38 - lr: 0.050000
2022-10-03 18:53:49,053 epoch 23 - iter 40/209 - loss 0.04070683 - samples/sec: 283.12 - lr: 0.050000
2022-10-03 18:53:51,127 epoch 23 - iter 60/209 - loss 0.04136243 - samples/sec: 309.27 - lr: 0.050000
2022-10-03 18:53:53,505 epoch 23 - iter 80/209 - loss 0.03796725 - samples/sec: 269.52 - lr: 0.050000
2022-10-03 18:53:55,786 epoch 23 - iter 100/209 - loss 0.03771703 - samples/sec: 281.17 - lr: 0.050000
2022-10-03 18:53:57,744 epoch 23 - iter 120/209 - loss 0.03845175 - samples/sec: 327.51 - lr: 0.050000
2022-10-03 18:54:00,251 epoch 23 - iter 140/209 - loss 0.03796796 - samples/sec: 255.61 - lr: 0.050000
2022-10-03 18:54:

100%|██████████| 45/45 [00:06<00:00,  6.89it/s]

2022-10-03 18:54:14,849 Evaluating as a multi-label problem: False
2022-10-03 18:54:14,864 DEV : loss 0.038541991263628006 - f1-score (micro avg)  0.8447





2022-10-03 18:54:14,922 BAD EPOCHS (no improvement): 0
2022-10-03 18:54:14,928 saving best model
2022-10-03 18:54:17,719 ----------------------------------------------------------------------------------------------------
2022-10-03 18:54:20,002 epoch 24 - iter 20/209 - loss 0.03566420 - samples/sec: 281.09 - lr: 0.050000
2022-10-03 18:54:22,826 epoch 24 - iter 40/209 - loss 0.03802126 - samples/sec: 226.99 - lr: 0.050000
2022-10-03 18:54:25,600 epoch 24 - iter 60/209 - loss 0.03830815 - samples/sec: 231.09 - lr: 0.050000
2022-10-03 18:54:28,097 epoch 24 - iter 80/209 - loss 0.03712500 - samples/sec: 256.77 - lr: 0.050000
2022-10-03 18:54:30,394 epoch 24 - iter 100/209 - loss 0.03688358 - samples/sec: 279.20 - lr: 0.050000
2022-10-03 18:54:33,114 epoch 24 - iter 120/209 - loss 0.03593417 - samples/sec: 235.62 - lr: 0.050000
2022-10-03 18:54:35,451 epoch 24 - iter 140/209 - loss 0.03668622 - samples/sec: 274.55 - lr: 0.050000
2022-10-03 18:54:37,896 epoch 24 - iter 160/209 - loss 0.0360

100%|██████████| 45/45 [00:06<00:00,  6.79it/s]

2022-10-03 18:54:50,678 Evaluating as a multi-label problem: False
2022-10-03 18:54:50,691 DEV : loss 0.03912742808461189 - f1-score (micro avg)  0.8359





2022-10-03 18:54:50,751 BAD EPOCHS (no improvement): 1
2022-10-03 18:54:50,757 ----------------------------------------------------------------------------------------------------
2022-10-03 18:54:52,890 epoch 25 - iter 20/209 - loss 0.03604788 - samples/sec: 300.82 - lr: 0.050000
2022-10-03 18:54:55,195 epoch 25 - iter 40/209 - loss 0.03723479 - samples/sec: 278.12 - lr: 0.050000
2022-10-03 18:54:57,304 epoch 25 - iter 60/209 - loss 0.03694517 - samples/sec: 304.11 - lr: 0.050000
2022-10-03 18:55:00,247 epoch 25 - iter 80/209 - loss 0.03474352 - samples/sec: 217.73 - lr: 0.050000
2022-10-03 18:55:02,679 epoch 25 - iter 100/209 - loss 0.03339952 - samples/sec: 263.57 - lr: 0.050000
2022-10-03 18:55:05,339 epoch 25 - iter 120/209 - loss 0.03451474 - samples/sec: 241.00 - lr: 0.050000
2022-10-03 18:55:07,993 epoch 25 - iter 140/209 - loss 0.03607590 - samples/sec: 241.63 - lr: 0.050000
2022-10-03 18:55:10,306 epoch 25 - iter 160/209 - loss 0.03695701 - samples/sec: 277.23 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.79it/s]

2022-10-03 18:55:22,032 Evaluating as a multi-label problem: False
2022-10-03 18:55:22,046 DEV : loss 0.03847711160778999 - f1-score (micro avg)  0.8443





2022-10-03 18:55:22,106 BAD EPOCHS (no improvement): 2
2022-10-03 18:55:22,111 ----------------------------------------------------------------------------------------------------
2022-10-03 18:55:24,048 epoch 26 - iter 20/209 - loss 0.03142486 - samples/sec: 331.33 - lr: 0.050000
2022-10-03 18:55:26,178 epoch 26 - iter 40/209 - loss 0.03569151 - samples/sec: 301.04 - lr: 0.050000
2022-10-03 18:55:29,198 epoch 26 - iter 60/209 - loss 0.03878296 - samples/sec: 212.13 - lr: 0.050000
2022-10-03 18:55:31,438 epoch 26 - iter 80/209 - loss 0.03929280 - samples/sec: 286.30 - lr: 0.050000
2022-10-03 18:55:33,650 epoch 26 - iter 100/209 - loss 0.03806674 - samples/sec: 289.91 - lr: 0.050000
2022-10-03 18:55:35,809 epoch 26 - iter 120/209 - loss 0.03672993 - samples/sec: 296.93 - lr: 0.050000
2022-10-03 18:55:37,934 epoch 26 - iter 140/209 - loss 0.03619345 - samples/sec: 301.77 - lr: 0.050000
2022-10-03 18:55:40,277 epoch 26 - iter 160/209 - loss 0.03568538 - samples/sec: 273.57 - lr: 0.050000


100%|██████████| 45/45 [00:07<00:00,  6.40it/s]

2022-10-03 18:55:53,437 Evaluating as a multi-label problem: False
2022-10-03 18:55:53,451 DEV : loss 0.03878262639045715 - f1-score (micro avg)  0.8464





2022-10-03 18:55:53,515 BAD EPOCHS (no improvement): 0
2022-10-03 18:55:53,520 saving best model
2022-10-03 18:55:56,326 ----------------------------------------------------------------------------------------------------
2022-10-03 18:55:58,480 epoch 27 - iter 20/209 - loss 0.04124344 - samples/sec: 297.94 - lr: 0.050000
2022-10-03 18:56:00,995 epoch 27 - iter 40/209 - loss 0.03519492 - samples/sec: 254.86 - lr: 0.050000
2022-10-03 18:56:03,294 epoch 27 - iter 60/209 - loss 0.03254257 - samples/sec: 278.94 - lr: 0.050000
2022-10-03 18:56:05,719 epoch 27 - iter 80/209 - loss 0.03391869 - samples/sec: 264.39 - lr: 0.050000
2022-10-03 18:56:09,097 epoch 27 - iter 100/209 - loss 0.03531802 - samples/sec: 189.71 - lr: 0.050000
2022-10-03 18:56:11,949 epoch 27 - iter 120/209 - loss 0.03442961 - samples/sec: 224.72 - lr: 0.050000
2022-10-03 18:56:14,821 epoch 27 - iter 140/209 - loss 0.03337365 - samples/sec: 223.19 - lr: 0.050000
2022-10-03 18:56:17,244 epoch 27 - iter 160/209 - loss 0.0340

100%|██████████| 45/45 [00:06<00:00,  6.79it/s]

2022-10-03 18:56:29,135 Evaluating as a multi-label problem: False
2022-10-03 18:56:29,150 DEV : loss 0.03771375119686127 - f1-score (micro avg)  0.8546





2022-10-03 18:56:29,211 BAD EPOCHS (no improvement): 0
2022-10-03 18:56:29,216 saving best model
2022-10-03 18:56:32,041 ----------------------------------------------------------------------------------------------------
2022-10-03 18:56:34,596 epoch 28 - iter 20/209 - loss 0.03651516 - samples/sec: 251.00 - lr: 0.050000
2022-10-03 18:56:36,612 epoch 28 - iter 40/209 - loss 0.03021368 - samples/sec: 318.01 - lr: 0.050000
2022-10-03 18:56:38,962 epoch 28 - iter 60/209 - loss 0.03062097 - samples/sec: 272.81 - lr: 0.050000
2022-10-03 18:56:41,301 epoch 28 - iter 80/209 - loss 0.03287940 - samples/sec: 274.15 - lr: 0.050000
2022-10-03 18:56:44,116 epoch 28 - iter 100/209 - loss 0.03227649 - samples/sec: 227.75 - lr: 0.050000
2022-10-03 18:56:46,512 epoch 28 - iter 120/209 - loss 0.03255969 - samples/sec: 267.80 - lr: 0.050000
2022-10-03 18:56:48,877 epoch 28 - iter 140/209 - loss 0.03340108 - samples/sec: 271.09 - lr: 0.050000
2022-10-03 18:56:51,919 epoch 28 - iter 160/209 - loss 0.0331

100%|██████████| 45/45 [00:07<00:00,  6.37it/s]

2022-10-03 18:57:05,208 Evaluating as a multi-label problem: False
2022-10-03 18:57:05,223 DEV : loss 0.03745303675532341 - f1-score (micro avg)  0.8522





2022-10-03 18:57:05,285 BAD EPOCHS (no improvement): 1
2022-10-03 18:57:05,289 ----------------------------------------------------------------------------------------------------
2022-10-03 18:57:08,284 epoch 29 - iter 20/209 - loss 0.03503674 - samples/sec: 214.06 - lr: 0.050000
2022-10-03 18:57:10,623 epoch 29 - iter 40/209 - loss 0.03784537 - samples/sec: 274.15 - lr: 0.050000
2022-10-03 18:57:13,379 epoch 29 - iter 60/209 - loss 0.03309820 - samples/sec: 232.64 - lr: 0.050000
2022-10-03 18:57:15,238 epoch 29 - iter 80/209 - loss 0.03424924 - samples/sec: 344.87 - lr: 0.050000
2022-10-03 18:57:17,673 epoch 29 - iter 100/209 - loss 0.03278384 - samples/sec: 263.23 - lr: 0.050000
2022-10-03 18:57:20,253 epoch 29 - iter 120/209 - loss 0.03303552 - samples/sec: 248.42 - lr: 0.050000
2022-10-03 18:57:22,476 epoch 29 - iter 140/209 - loss 0.03319139 - samples/sec: 288.42 - lr: 0.050000
2022-10-03 18:57:24,905 epoch 29 - iter 160/209 - loss 0.03383625 - samples/sec: 263.77 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.79it/s]

2022-10-03 18:57:36,800 Evaluating as a multi-label problem: False
2022-10-03 18:57:36,815 DEV : loss 0.03784233704209328 - f1-score (micro avg)  0.8434





2022-10-03 18:57:36,875 BAD EPOCHS (no improvement): 2
2022-10-03 18:57:36,879 ----------------------------------------------------------------------------------------------------
2022-10-03 18:57:39,190 epoch 30 - iter 20/209 - loss 0.03099233 - samples/sec: 277.69 - lr: 0.050000
2022-10-03 18:57:42,306 epoch 30 - iter 40/209 - loss 0.03235507 - samples/sec: 205.57 - lr: 0.050000
2022-10-03 18:57:44,282 epoch 30 - iter 60/209 - loss 0.03269074 - samples/sec: 324.68 - lr: 0.050000
2022-10-03 18:57:46,802 epoch 30 - iter 80/209 - loss 0.03263476 - samples/sec: 254.32 - lr: 0.050000
2022-10-03 18:57:48,874 epoch 30 - iter 100/209 - loss 0.03300115 - samples/sec: 309.64 - lr: 0.050000
2022-10-03 18:57:51,254 epoch 30 - iter 120/209 - loss 0.03307002 - samples/sec: 269.37 - lr: 0.050000
2022-10-03 18:57:53,486 epoch 30 - iter 140/209 - loss 0.03300142 - samples/sec: 287.25 - lr: 0.050000
2022-10-03 18:57:55,816 epoch 30 - iter 160/209 - loss 0.03331140 - samples/sec: 275.15 - lr: 0.050000


100%|██████████| 45/45 [00:07<00:00,  6.39it/s]

2022-10-03 18:58:08,370 Evaluating as a multi-label problem: False
2022-10-03 18:58:08,384 DEV : loss 0.035691067576408386 - f1-score (micro avg)  0.8594





2022-10-03 18:58:08,445 BAD EPOCHS (no improvement): 0
2022-10-03 18:58:08,460 saving best model
2022-10-03 18:58:11,237 ----------------------------------------------------------------------------------------------------
2022-10-03 18:58:13,027 epoch 31 - iter 20/209 - loss 0.03092145 - samples/sec: 358.70 - lr: 0.050000
2022-10-03 18:58:15,687 epoch 31 - iter 40/209 - loss 0.03636770 - samples/sec: 241.11 - lr: 0.050000
2022-10-03 18:58:18,281 epoch 31 - iter 60/209 - loss 0.03664451 - samples/sec: 247.11 - lr: 0.050000
2022-10-03 18:58:21,368 epoch 31 - iter 80/209 - loss 0.03561756 - samples/sec: 207.62 - lr: 0.050000
2022-10-03 18:58:24,074 epoch 31 - iter 100/209 - loss 0.03319419 - samples/sec: 236.86 - lr: 0.050000
2022-10-03 18:58:26,786 epoch 31 - iter 120/209 - loss 0.03343126 - samples/sec: 236.35 - lr: 0.050000
2022-10-03 18:58:29,313 epoch 31 - iter 140/209 - loss 0.03330487 - samples/sec: 253.69 - lr: 0.050000
2022-10-03 18:58:31,667 epoch 31 - iter 160/209 - loss 0.0332

100%|██████████| 45/45 [00:06<00:00,  6.84it/s]

2022-10-03 18:58:43,704 Evaluating as a multi-label problem: False
2022-10-03 18:58:43,720 DEV : loss 0.0346212238073349 - f1-score (micro avg)  0.8561





2022-10-03 18:58:43,783 BAD EPOCHS (no improvement): 1
2022-10-03 18:58:43,787 ----------------------------------------------------------------------------------------------------
2022-10-03 18:58:46,446 epoch 32 - iter 20/209 - loss 0.04293188 - samples/sec: 241.22 - lr: 0.050000
2022-10-03 18:58:48,550 epoch 32 - iter 40/209 - loss 0.04038174 - samples/sec: 304.72 - lr: 0.050000
2022-10-03 18:58:50,872 epoch 32 - iter 60/209 - loss 0.03612782 - samples/sec: 276.16 - lr: 0.050000
2022-10-03 18:58:53,029 epoch 32 - iter 80/209 - loss 0.03405726 - samples/sec: 297.28 - lr: 0.050000
2022-10-03 18:58:55,904 epoch 32 - iter 100/209 - loss 0.03282160 - samples/sec: 222.97 - lr: 0.050000
2022-10-03 18:58:58,277 epoch 32 - iter 120/209 - loss 0.03297007 - samples/sec: 270.07 - lr: 0.050000
2022-10-03 18:59:00,929 epoch 32 - iter 140/209 - loss 0.03271107 - samples/sec: 242.29 - lr: 0.050000
2022-10-03 18:59:03,032 epoch 32 - iter 160/209 - loss 0.03199546 - samples/sec: 304.78 - lr: 0.050000


100%|██████████| 45/45 [00:07<00:00,  6.35it/s]

2022-10-03 18:59:16,029 Evaluating as a multi-label problem: False
2022-10-03 18:59:16,045 DEV : loss 0.03576216101646423 - f1-score (micro avg)  0.8592





2022-10-03 18:59:16,105 BAD EPOCHS (no improvement): 2
2022-10-03 18:59:16,109 ----------------------------------------------------------------------------------------------------
2022-10-03 18:59:18,744 epoch 33 - iter 20/209 - loss 0.02933981 - samples/sec: 243.42 - lr: 0.050000
2022-10-03 18:59:21,395 epoch 33 - iter 40/209 - loss 0.02843495 - samples/sec: 241.83 - lr: 0.050000
2022-10-03 18:59:23,946 epoch 33 - iter 60/209 - loss 0.02835724 - samples/sec: 251.33 - lr: 0.050000
2022-10-03 18:59:26,072 epoch 33 - iter 80/209 - loss 0.02894572 - samples/sec: 301.53 - lr: 0.050000
2022-10-03 18:59:28,844 epoch 33 - iter 100/209 - loss 0.03014244 - samples/sec: 231.22 - lr: 0.050000
2022-10-03 18:59:31,078 epoch 33 - iter 120/209 - loss 0.03044812 - samples/sec: 287.01 - lr: 0.050000
2022-10-03 18:59:33,281 epoch 33 - iter 140/209 - loss 0.03243420 - samples/sec: 291.04 - lr: 0.050000
2022-10-03 18:59:35,450 epoch 33 - iter 160/209 - loss 0.03204004 - samples/sec: 295.67 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.78it/s]

2022-10-03 18:59:47,824 Evaluating as a multi-label problem: False
2022-10-03 18:59:47,839 DEV : loss 0.035961270332336426 - f1-score (micro avg)  0.8577





2022-10-03 18:59:47,901 BAD EPOCHS (no improvement): 3
2022-10-03 18:59:47,906 ----------------------------------------------------------------------------------------------------
2022-10-03 18:59:50,009 epoch 34 - iter 20/209 - loss 0.03336373 - samples/sec: 305.08 - lr: 0.050000
2022-10-03 18:59:52,195 epoch 34 - iter 40/209 - loss 0.03099473 - samples/sec: 293.43 - lr: 0.050000
2022-10-03 18:59:54,644 epoch 34 - iter 60/209 - loss 0.03257470 - samples/sec: 261.75 - lr: 0.050000
2022-10-03 18:59:57,228 epoch 34 - iter 80/209 - loss 0.03216700 - samples/sec: 248.15 - lr: 0.050000
2022-10-03 18:59:59,240 epoch 34 - iter 100/209 - loss 0.03195582 - samples/sec: 318.70 - lr: 0.050000
2022-10-03 19:00:01,550 epoch 34 - iter 120/209 - loss 0.03222553 - samples/sec: 277.62 - lr: 0.050000
2022-10-03 19:00:03,684 epoch 34 - iter 140/209 - loss 0.03343709 - samples/sec: 300.50 - lr: 0.050000
2022-10-03 19:00:05,812 epoch 34 - iter 160/209 - loss 0.03318508 - samples/sec: 301.21 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.83it/s]

2022-10-03 19:00:19,037 Evaluating as a multi-label problem: False
2022-10-03 19:00:19,051 DEV : loss 0.03565596416592598 - f1-score (micro avg)  0.8669





2022-10-03 19:00:19,110 BAD EPOCHS (no improvement): 0
2022-10-03 19:00:19,115 saving best model
2022-10-03 19:00:21,891 ----------------------------------------------------------------------------------------------------
2022-10-03 19:00:24,336 epoch 35 - iter 20/209 - loss 0.02474612 - samples/sec: 262.31 - lr: 0.050000
2022-10-03 19:00:26,943 epoch 35 - iter 40/209 - loss 0.02533335 - samples/sec: 245.92 - lr: 0.050000
2022-10-03 19:00:29,217 epoch 35 - iter 60/209 - loss 0.02782840 - samples/sec: 281.92 - lr: 0.050000
2022-10-03 19:00:31,844 epoch 35 - iter 80/209 - loss 0.02830117 - samples/sec: 244.04 - lr: 0.050000
2022-10-03 19:00:34,242 epoch 35 - iter 100/209 - loss 0.02829926 - samples/sec: 267.40 - lr: 0.050000
2022-10-03 19:00:37,315 epoch 35 - iter 120/209 - loss 0.02939050 - samples/sec: 208.57 - lr: 0.050000
2022-10-03 19:00:39,347 epoch 35 - iter 140/209 - loss 0.03062654 - samples/sec: 315.75 - lr: 0.050000
2022-10-03 19:00:42,089 epoch 35 - iter 160/209 - loss 0.0304

100%|██████████| 45/45 [00:06<00:00,  6.81it/s]

2022-10-03 19:00:54,096 Evaluating as a multi-label problem: False
2022-10-03 19:00:54,110 DEV : loss 0.03467694669961929 - f1-score (micro avg)  0.8579





2022-10-03 19:00:54,173 BAD EPOCHS (no improvement): 1
2022-10-03 19:00:54,178 ----------------------------------------------------------------------------------------------------
2022-10-03 19:00:56,374 epoch 36 - iter 20/209 - loss 0.02611639 - samples/sec: 292.20 - lr: 0.050000
2022-10-03 19:00:58,472 epoch 36 - iter 40/209 - loss 0.02599810 - samples/sec: 305.74 - lr: 0.050000
2022-10-03 19:01:01,227 epoch 36 - iter 60/209 - loss 0.02458949 - samples/sec: 232.65 - lr: 0.050000
2022-10-03 19:01:03,628 epoch 36 - iter 80/209 - loss 0.02705213 - samples/sec: 267.13 - lr: 0.050000
2022-10-03 19:01:06,670 epoch 36 - iter 100/209 - loss 0.02858279 - samples/sec: 210.61 - lr: 0.050000
2022-10-03 19:01:08,615 epoch 36 - iter 120/209 - loss 0.02944707 - samples/sec: 329.81 - lr: 0.050000
2022-10-03 19:01:11,000 epoch 36 - iter 140/209 - loss 0.02934770 - samples/sec: 268.83 - lr: 0.050000
2022-10-03 19:01:13,184 epoch 36 - iter 160/209 - loss 0.02865872 - samples/sec: 293.57 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.84it/s]

2022-10-03 19:01:25,552 Evaluating as a multi-label problem: False
2022-10-03 19:01:25,568 DEV : loss 0.03771058842539787 - f1-score (micro avg)  0.8508





2022-10-03 19:01:25,629 BAD EPOCHS (no improvement): 2
2022-10-03 19:01:25,633 ----------------------------------------------------------------------------------------------------
2022-10-03 19:01:27,808 epoch 37 - iter 20/209 - loss 0.02875593 - samples/sec: 295.03 - lr: 0.050000
2022-10-03 19:01:29,857 epoch 37 - iter 40/209 - loss 0.02971880 - samples/sec: 313.14 - lr: 0.050000
2022-10-03 19:01:32,550 epoch 37 - iter 60/209 - loss 0.02873274 - samples/sec: 237.93 - lr: 0.050000
2022-10-03 19:01:34,330 epoch 37 - iter 80/209 - loss 0.02911321 - samples/sec: 360.47 - lr: 0.050000
2022-10-03 19:01:36,346 epoch 37 - iter 100/209 - loss 0.02795122 - samples/sec: 318.18 - lr: 0.050000
2022-10-03 19:01:38,738 epoch 37 - iter 120/209 - loss 0.02740457 - samples/sec: 268.05 - lr: 0.050000
2022-10-03 19:01:41,163 epoch 37 - iter 140/209 - loss 0.02917642 - samples/sec: 264.30 - lr: 0.050000
2022-10-03 19:01:44,265 epoch 37 - iter 160/209 - loss 0.02948369 - samples/sec: 206.65 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.63it/s]

2022-10-03 19:01:56,684 Evaluating as a multi-label problem: False
2022-10-03 19:01:56,702 DEV : loss 0.035229187458753586 - f1-score (micro avg)  0.8652





2022-10-03 19:01:56,762 BAD EPOCHS (no improvement): 3
2022-10-03 19:01:56,766 ----------------------------------------------------------------------------------------------------
2022-10-03 19:01:59,030 epoch 38 - iter 20/209 - loss 0.02248741 - samples/sec: 283.35 - lr: 0.050000
2022-10-03 19:02:01,403 epoch 38 - iter 40/209 - loss 0.02712813 - samples/sec: 270.11 - lr: 0.050000
2022-10-03 19:02:04,070 epoch 38 - iter 60/209 - loss 0.02696268 - samples/sec: 240.36 - lr: 0.050000
2022-10-03 19:02:05,859 epoch 38 - iter 80/209 - loss 0.02747135 - samples/sec: 358.51 - lr: 0.050000
2022-10-03 19:02:08,089 epoch 38 - iter 100/209 - loss 0.02793577 - samples/sec: 287.54 - lr: 0.050000
2022-10-03 19:02:10,160 epoch 38 - iter 120/209 - loss 0.02764095 - samples/sec: 310.27 - lr: 0.050000
2022-10-03 19:02:12,677 epoch 38 - iter 140/209 - loss 0.02870037 - samples/sec: 254.63 - lr: 0.050000
2022-10-03 19:02:15,054 epoch 38 - iter 160/209 - loss 0.02846173 - samples/sec: 269.66 - lr: 0.050000


100%|██████████| 45/45 [00:06<00:00,  6.83it/s]

2022-10-03 19:02:27,518 Evaluating as a multi-label problem: False
2022-10-03 19:02:27,532 DEV : loss 0.036679625511169434 - f1-score (micro avg)  0.8606





2022-10-03 19:02:27,592 Epoch    38: reducing learning rate of group 0 to 2.5000e-02.
2022-10-03 19:02:27,594 BAD EPOCHS (no improvement): 4
2022-10-03 19:02:27,600 ----------------------------------------------------------------------------------------------------
2022-10-03 19:02:29,559 epoch 39 - iter 20/209 - loss 0.03323789 - samples/sec: 327.49 - lr: 0.025000
2022-10-03 19:02:32,147 epoch 39 - iter 40/209 - loss 0.02909738 - samples/sec: 247.72 - lr: 0.025000
2022-10-03 19:02:35,052 epoch 39 - iter 60/209 - loss 0.02853403 - samples/sec: 220.59 - lr: 0.025000
2022-10-03 19:02:37,286 epoch 39 - iter 80/209 - loss 0.02781790 - samples/sec: 286.98 - lr: 0.025000
2022-10-03 19:02:39,256 epoch 39 - iter 100/209 - loss 0.02665063 - samples/sec: 325.47 - lr: 0.025000
2022-10-03 19:02:41,332 epoch 39 - iter 120/209 - loss 0.02809682 - samples/sec: 309.04 - lr: 0.025000
2022-10-03 19:02:43,969 epoch 39 - iter 140/209 - loss 0.02734183 - samples/sec: 243.03 - lr: 0.025000
2022-10-03 19:02:

100%|██████████| 45/45 [00:06<00:00,  6.43it/s]

2022-10-03 19:02:59,077 Evaluating as a multi-label problem: False
2022-10-03 19:02:59,090 DEV : loss 0.03443046286702156 - f1-score (micro avg)  0.8646





2022-10-03 19:02:59,153 BAD EPOCHS (no improvement): 1
2022-10-03 19:02:59,157 ----------------------------------------------------------------------------------------------------
2022-10-03 19:03:01,315 epoch 40 - iter 20/209 - loss 0.02333335 - samples/sec: 297.29 - lr: 0.025000
2022-10-03 19:03:03,550 epoch 40 - iter 40/209 - loss 0.02482048 - samples/sec: 286.90 - lr: 0.025000
2022-10-03 19:03:05,769 epoch 40 - iter 60/209 - loss 0.02556107 - samples/sec: 289.01 - lr: 0.025000
2022-10-03 19:03:08,215 epoch 40 - iter 80/209 - loss 0.02564052 - samples/sec: 262.05 - lr: 0.025000
2022-10-03 19:03:10,557 epoch 40 - iter 100/209 - loss 0.02677976 - samples/sec: 273.81 - lr: 0.025000
2022-10-03 19:03:13,455 epoch 40 - iter 120/209 - loss 0.02664807 - samples/sec: 221.19 - lr: 0.025000
2022-10-03 19:03:16,237 epoch 40 - iter 140/209 - loss 0.02614328 - samples/sec: 230.39 - lr: 0.025000
2022-10-03 19:03:17,958 epoch 40 - iter 160/209 - loss 0.02552506 - samples/sec: 372.82 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.50it/s]

2022-10-03 19:03:30,523 Evaluating as a multi-label problem: False
2022-10-03 19:03:30,539 DEV : loss 0.03451847657561302 - f1-score (micro avg)  0.8627





2022-10-03 19:03:30,603 BAD EPOCHS (no improvement): 2
2022-10-03 19:03:30,608 ----------------------------------------------------------------------------------------------------
2022-10-03 19:03:32,921 epoch 41 - iter 20/209 - loss 0.02145619 - samples/sec: 277.60 - lr: 0.025000
2022-10-03 19:03:35,941 epoch 41 - iter 40/209 - loss 0.02499946 - samples/sec: 212.18 - lr: 0.025000
2022-10-03 19:03:38,389 epoch 41 - iter 60/209 - loss 0.02378618 - samples/sec: 261.91 - lr: 0.025000
2022-10-03 19:03:40,559 epoch 41 - iter 80/209 - loss 0.02492705 - samples/sec: 295.45 - lr: 0.025000
2022-10-03 19:03:43,052 epoch 41 - iter 100/209 - loss 0.02410770 - samples/sec: 257.18 - lr: 0.025000
2022-10-03 19:03:45,609 epoch 41 - iter 120/209 - loss 0.02370296 - samples/sec: 250.76 - lr: 0.025000
2022-10-03 19:03:47,874 epoch 41 - iter 140/209 - loss 0.02348416 - samples/sec: 283.02 - lr: 0.025000
2022-10-03 19:03:50,312 epoch 41 - iter 160/209 - loss 0.02338055 - samples/sec: 262.99 - lr: 0.025000


100%|██████████| 45/45 [00:07<00:00,  6.37it/s]

2022-10-03 19:04:02,970 Evaluating as a multi-label problem: False
2022-10-03 19:04:02,985 DEV : loss 0.035778582096099854 - f1-score (micro avg)  0.8665





2022-10-03 19:04:03,046 BAD EPOCHS (no improvement): 3
2022-10-03 19:04:03,050 ----------------------------------------------------------------------------------------------------
2022-10-03 19:04:05,204 epoch 42 - iter 20/209 - loss 0.02210304 - samples/sec: 297.84 - lr: 0.025000
2022-10-03 19:04:07,508 epoch 42 - iter 40/209 - loss 0.01883916 - samples/sec: 278.25 - lr: 0.025000
2022-10-03 19:04:09,556 epoch 42 - iter 60/209 - loss 0.02167392 - samples/sec: 313.16 - lr: 0.025000
2022-10-03 19:04:12,724 epoch 42 - iter 80/209 - loss 0.02205374 - samples/sec: 202.25 - lr: 0.025000
2022-10-03 19:04:15,008 epoch 42 - iter 100/209 - loss 0.02330793 - samples/sec: 280.80 - lr: 0.025000
2022-10-03 19:04:17,355 epoch 42 - iter 120/209 - loss 0.02227605 - samples/sec: 273.16 - lr: 0.025000
2022-10-03 19:04:19,747 epoch 42 - iter 140/209 - loss 0.02291935 - samples/sec: 267.92 - lr: 0.025000
2022-10-03 19:04:21,988 epoch 42 - iter 160/209 - loss 0.02300133 - samples/sec: 286.11 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.77it/s]

2022-10-03 19:04:34,240 Evaluating as a multi-label problem: False
2022-10-03 19:04:34,253 DEV : loss 0.034299444407224655 - f1-score (micro avg)  0.873





2022-10-03 19:04:34,315 BAD EPOCHS (no improvement): 0
2022-10-03 19:04:34,320 saving best model
2022-10-03 19:04:37,174 ----------------------------------------------------------------------------------------------------
2022-10-03 19:04:39,823 epoch 43 - iter 20/209 - loss 0.02188493 - samples/sec: 242.09 - lr: 0.025000
2022-10-03 19:04:42,305 epoch 43 - iter 40/209 - loss 0.02090843 - samples/sec: 258.23 - lr: 0.025000
2022-10-03 19:04:44,401 epoch 43 - iter 60/209 - loss 0.02145781 - samples/sec: 305.88 - lr: 0.025000
2022-10-03 19:04:46,842 epoch 43 - iter 80/209 - loss 0.02260492 - samples/sec: 262.79 - lr: 0.025000
2022-10-03 19:04:50,187 epoch 43 - iter 100/209 - loss 0.02342191 - samples/sec: 191.56 - lr: 0.025000
2022-10-03 19:04:52,752 epoch 43 - iter 120/209 - loss 0.02299544 - samples/sec: 250.17 - lr: 0.025000
2022-10-03 19:04:55,118 epoch 43 - iter 140/209 - loss 0.02302930 - samples/sec: 270.95 - lr: 0.025000
2022-10-03 19:04:57,297 epoch 43 - iter 160/209 - loss 0.0233

100%|██████████| 45/45 [00:06<00:00,  6.45it/s]

2022-10-03 19:05:10,114 Evaluating as a multi-label problem: False
2022-10-03 19:05:10,129 DEV : loss 0.034895509481430054 - f1-score (micro avg)  0.8718





2022-10-03 19:05:10,191 BAD EPOCHS (no improvement): 1
2022-10-03 19:05:10,195 ----------------------------------------------------------------------------------------------------
2022-10-03 19:05:12,625 epoch 44 - iter 20/209 - loss 0.02495643 - samples/sec: 263.99 - lr: 0.025000
2022-10-03 19:05:14,666 epoch 44 - iter 40/209 - loss 0.02376734 - samples/sec: 314.28 - lr: 0.025000
2022-10-03 19:05:17,053 epoch 44 - iter 60/209 - loss 0.02244456 - samples/sec: 268.58 - lr: 0.025000
2022-10-03 19:05:19,426 epoch 44 - iter 80/209 - loss 0.02170674 - samples/sec: 270.24 - lr: 0.025000
2022-10-03 19:05:21,858 epoch 44 - iter 100/209 - loss 0.02234838 - samples/sec: 263.51 - lr: 0.025000
2022-10-03 19:05:24,235 epoch 44 - iter 120/209 - loss 0.02189085 - samples/sec: 269.71 - lr: 0.025000
2022-10-03 19:05:26,819 epoch 44 - iter 140/209 - loss 0.02157902 - samples/sec: 248.01 - lr: 0.025000
2022-10-03 19:05:28,850 epoch 44 - iter 160/209 - loss 0.02396619 - samples/sec: 315.84 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.80it/s]

2022-10-03 19:05:41,573 Evaluating as a multi-label problem: False
2022-10-03 19:05:41,591 DEV : loss 0.035161375999450684 - f1-score (micro avg)  0.8664





2022-10-03 19:05:41,652 BAD EPOCHS (no improvement): 2
2022-10-03 19:05:41,656 ----------------------------------------------------------------------------------------------------
2022-10-03 19:05:44,329 epoch 45 - iter 20/209 - loss 0.02446418 - samples/sec: 239.85 - lr: 0.025000
2022-10-03 19:05:46,761 epoch 45 - iter 40/209 - loss 0.02323851 - samples/sec: 263.62 - lr: 0.025000
2022-10-03 19:05:48,547 epoch 45 - iter 60/209 - loss 0.02438842 - samples/sec: 359.20 - lr: 0.025000
2022-10-03 19:05:50,638 epoch 45 - iter 80/209 - loss 0.02397033 - samples/sec: 306.71 - lr: 0.025000
2022-10-03 19:05:52,728 epoch 45 - iter 100/209 - loss 0.02333019 - samples/sec: 306.71 - lr: 0.025000
2022-10-03 19:05:55,401 epoch 45 - iter 120/209 - loss 0.02303955 - samples/sec: 239.80 - lr: 0.025000
2022-10-03 19:05:57,414 epoch 45 - iter 140/209 - loss 0.02262398 - samples/sec: 318.51 - lr: 0.025000
2022-10-03 19:06:00,340 epoch 45 - iter 160/209 - loss 0.02377841 - samples/sec: 219.01 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.45it/s]

2022-10-03 19:06:13,042 Evaluating as a multi-label problem: False
2022-10-03 19:06:13,056 DEV : loss 0.033696189522743225 - f1-score (micro avg)  0.8731





2022-10-03 19:06:13,117 BAD EPOCHS (no improvement): 0
2022-10-03 19:06:13,122 saving best model
2022-10-03 19:06:16,017 ----------------------------------------------------------------------------------------------------
2022-10-03 19:06:18,535 epoch 46 - iter 20/209 - loss 0.02272263 - samples/sec: 254.77 - lr: 0.025000
2022-10-03 19:06:20,706 epoch 46 - iter 40/209 - loss 0.02244563 - samples/sec: 295.42 - lr: 0.025000
2022-10-03 19:06:22,920 epoch 46 - iter 60/209 - loss 0.02208276 - samples/sec: 289.53 - lr: 0.025000
2022-10-03 19:06:25,255 epoch 46 - iter 80/209 - loss 0.02376052 - samples/sec: 274.59 - lr: 0.025000
2022-10-03 19:06:28,101 epoch 46 - iter 100/209 - loss 0.02330807 - samples/sec: 225.26 - lr: 0.025000
2022-10-03 19:06:30,729 epoch 46 - iter 120/209 - loss 0.02361636 - samples/sec: 243.88 - lr: 0.025000
2022-10-03 19:06:33,152 epoch 46 - iter 140/209 - loss 0.02440889 - samples/sec: 264.93 - lr: 0.025000
2022-10-03 19:06:35,379 epoch 46 - iter 160/209 - loss 0.0244

100%|██████████| 45/45 [00:06<00:00,  6.77it/s]

2022-10-03 19:06:47,617 Evaluating as a multi-label problem: False
2022-10-03 19:06:47,631 DEV : loss 0.03433816879987717 - f1-score (micro avg)  0.8642





2022-10-03 19:06:47,693 BAD EPOCHS (no improvement): 1
2022-10-03 19:06:47,697 ----------------------------------------------------------------------------------------------------
2022-10-03 19:06:50,593 epoch 47 - iter 20/209 - loss 0.03144695 - samples/sec: 221.45 - lr: 0.025000
2022-10-03 19:06:53,354 epoch 47 - iter 40/209 - loss 0.02593703 - samples/sec: 232.12 - lr: 0.025000
2022-10-03 19:06:55,886 epoch 47 - iter 60/209 - loss 0.02503758 - samples/sec: 253.23 - lr: 0.025000
2022-10-03 19:06:57,935 epoch 47 - iter 80/209 - loss 0.02411605 - samples/sec: 313.04 - lr: 0.025000
2022-10-03 19:07:00,063 epoch 47 - iter 100/209 - loss 0.02345332 - samples/sec: 301.34 - lr: 0.025000
2022-10-03 19:07:02,523 epoch 47 - iter 120/209 - loss 0.02336255 - samples/sec: 260.55 - lr: 0.025000
2022-10-03 19:07:04,509 epoch 47 - iter 140/209 - loss 0.02361644 - samples/sec: 323.08 - lr: 0.025000
2022-10-03 19:07:06,662 epoch 47 - iter 160/209 - loss 0.02367686 - samples/sec: 297.77 - lr: 0.025000


100%|██████████| 45/45 [00:07<00:00,  6.40it/s]

2022-10-03 19:07:18,896 Evaluating as a multi-label problem: False
2022-10-03 19:07:18,911 DEV : loss 0.03581185266375542 - f1-score (micro avg)  0.8684





2022-10-03 19:07:18,972 BAD EPOCHS (no improvement): 2
2022-10-03 19:07:18,977 ----------------------------------------------------------------------------------------------------
2022-10-03 19:07:20,845 epoch 48 - iter 20/209 - loss 0.02008651 - samples/sec: 343.68 - lr: 0.025000
2022-10-03 19:07:23,385 epoch 48 - iter 40/209 - loss 0.02344140 - samples/sec: 252.33 - lr: 0.025000
2022-10-03 19:07:25,698 epoch 48 - iter 60/209 - loss 0.02315436 - samples/sec: 277.15 - lr: 0.025000
2022-10-03 19:07:27,881 epoch 48 - iter 80/209 - loss 0.02390065 - samples/sec: 293.72 - lr: 0.025000
2022-10-03 19:07:30,065 epoch 48 - iter 100/209 - loss 0.02355359 - samples/sec: 293.64 - lr: 0.025000
2022-10-03 19:07:32,315 epoch 48 - iter 120/209 - loss 0.02334787 - samples/sec: 285.05 - lr: 0.025000
2022-10-03 19:07:34,978 epoch 48 - iter 140/209 - loss 0.02361882 - samples/sec: 240.66 - lr: 0.025000
2022-10-03 19:07:37,328 epoch 48 - iter 160/209 - loss 0.02322118 - samples/sec: 272.82 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.57it/s]

2022-10-03 19:07:50,510 Evaluating as a multi-label problem: False
2022-10-03 19:07:50,524 DEV : loss 0.03494102135300636 - f1-score (micro avg)  0.8774





2022-10-03 19:07:50,585 BAD EPOCHS (no improvement): 0
2022-10-03 19:07:50,589 saving best model
2022-10-03 19:07:53,373 ----------------------------------------------------------------------------------------------------
2022-10-03 19:07:56,031 epoch 49 - iter 20/209 - loss 0.02246136 - samples/sec: 241.25 - lr: 0.025000
2022-10-03 19:07:58,059 epoch 49 - iter 40/209 - loss 0.02835828 - samples/sec: 316.27 - lr: 0.025000
2022-10-03 19:08:00,822 epoch 49 - iter 60/209 - loss 0.02661774 - samples/sec: 232.01 - lr: 0.025000
2022-10-03 19:08:03,338 epoch 49 - iter 80/209 - loss 0.02440317 - samples/sec: 254.81 - lr: 0.025000
2022-10-03 19:08:06,415 epoch 49 - iter 100/209 - loss 0.02353666 - samples/sec: 208.48 - lr: 0.025000
2022-10-03 19:08:08,837 epoch 49 - iter 120/209 - loss 0.02309499 - samples/sec: 264.74 - lr: 0.025000
2022-10-03 19:08:11,197 epoch 49 - iter 140/209 - loss 0.02296278 - samples/sec: 271.71 - lr: 0.025000
2022-10-03 19:08:13,291 epoch 49 - iter 160/209 - loss 0.0228

100%|██████████| 45/45 [00:06<00:00,  6.83it/s]

2022-10-03 19:08:26,275 Evaluating as a multi-label problem: False
2022-10-03 19:08:26,289 DEV : loss 0.035118408501148224 - f1-score (micro avg)  0.8643





2022-10-03 19:08:26,350 BAD EPOCHS (no improvement): 1
2022-10-03 19:08:26,357 ----------------------------------------------------------------------------------------------------
2022-10-03 19:08:28,849 epoch 50 - iter 20/209 - loss 0.01987740 - samples/sec: 257.28 - lr: 0.025000
2022-10-03 19:08:31,090 epoch 50 - iter 40/209 - loss 0.01972687 - samples/sec: 286.14 - lr: 0.025000
2022-10-03 19:08:33,703 epoch 50 - iter 60/209 - loss 0.01974508 - samples/sec: 245.41 - lr: 0.025000
2022-10-03 19:08:36,085 epoch 50 - iter 80/209 - loss 0.02078722 - samples/sec: 269.09 - lr: 0.025000
2022-10-03 19:08:38,525 epoch 50 - iter 100/209 - loss 0.02091209 - samples/sec: 262.72 - lr: 0.025000
2022-10-03 19:08:40,719 epoch 50 - iter 120/209 - loss 0.02140482 - samples/sec: 292.32 - lr: 0.025000
2022-10-03 19:08:43,369 epoch 50 - iter 140/209 - loss 0.02160555 - samples/sec: 241.93 - lr: 0.025000
2022-10-03 19:08:45,364 epoch 50 - iter 160/209 - loss 0.02137551 - samples/sec: 321.43 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.61it/s]

2022-10-03 19:08:58,312 Evaluating as a multi-label problem: False
2022-10-03 19:08:58,327 DEV : loss 0.035399846732616425 - f1-score (micro avg)  0.8716





2022-10-03 19:08:58,392 BAD EPOCHS (no improvement): 2
2022-10-03 19:08:58,398 ----------------------------------------------------------------------------------------------------
2022-10-03 19:09:00,569 epoch 51 - iter 20/209 - loss 0.02495955 - samples/sec: 295.67 - lr: 0.025000
2022-10-03 19:09:02,996 epoch 51 - iter 40/209 - loss 0.02152705 - samples/sec: 264.19 - lr: 0.025000
2022-10-03 19:09:05,367 epoch 51 - iter 60/209 - loss 0.02577160 - samples/sec: 270.39 - lr: 0.025000
2022-10-03 19:09:07,621 epoch 51 - iter 80/209 - loss 0.02370978 - samples/sec: 284.41 - lr: 0.025000
2022-10-03 19:09:10,065 epoch 51 - iter 100/209 - loss 0.02404241 - samples/sec: 262.34 - lr: 0.025000
2022-10-03 19:09:12,613 epoch 51 - iter 120/209 - loss 0.02337208 - samples/sec: 251.53 - lr: 0.025000
2022-10-03 19:09:14,722 epoch 51 - iter 140/209 - loss 0.02257906 - samples/sec: 304.03 - lr: 0.025000
2022-10-03 19:09:17,627 epoch 51 - iter 160/209 - loss 0.02300082 - samples/sec: 220.55 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.88it/s]

2022-10-03 19:09:29,918 Evaluating as a multi-label problem: False
2022-10-03 19:09:29,932 DEV : loss 0.035587798804044724 - f1-score (micro avg)  0.8635





2022-10-03 19:09:29,994 BAD EPOCHS (no improvement): 3
2022-10-03 19:09:29,998 ----------------------------------------------------------------------------------------------------
2022-10-03 19:09:31,961 epoch 52 - iter 20/209 - loss 0.02417430 - samples/sec: 326.84 - lr: 0.025000
2022-10-03 19:09:34,691 epoch 52 - iter 40/209 - loss 0.02565059 - samples/sec: 234.78 - lr: 0.025000
2022-10-03 19:09:37,103 epoch 52 - iter 60/209 - loss 0.02331609 - samples/sec: 265.82 - lr: 0.025000
2022-10-03 19:09:39,697 epoch 52 - iter 80/209 - loss 0.02381603 - samples/sec: 247.11 - lr: 0.025000
2022-10-03 19:09:42,219 epoch 52 - iter 100/209 - loss 0.02268180 - samples/sec: 254.14 - lr: 0.025000
2022-10-03 19:09:44,640 epoch 52 - iter 120/209 - loss 0.02148553 - samples/sec: 264.78 - lr: 0.025000
2022-10-03 19:09:47,042 epoch 52 - iter 140/209 - loss 0.02089275 - samples/sec: 267.01 - lr: 0.025000
2022-10-03 19:09:49,412 epoch 52 - iter 160/209 - loss 0.02111722 - samples/sec: 270.51 - lr: 0.025000


100%|██████████| 45/45 [00:06<00:00,  6.77it/s]

2022-10-03 19:10:01,708 Evaluating as a multi-label problem: False
2022-10-03 19:10:01,726 DEV : loss 0.036509983241558075 - f1-score (micro avg)  0.8729





2022-10-03 19:10:01,788 Epoch    52: reducing learning rate of group 0 to 1.2500e-02.
2022-10-03 19:10:01,790 BAD EPOCHS (no improvement): 4
2022-10-03 19:10:01,796 ----------------------------------------------------------------------------------------------------
2022-10-03 19:10:04,378 epoch 53 - iter 20/209 - loss 0.02399334 - samples/sec: 248.37 - lr: 0.012500
2022-10-03 19:10:06,814 epoch 53 - iter 40/209 - loss 0.02144920 - samples/sec: 263.14 - lr: 0.012500
2022-10-03 19:10:09,345 epoch 53 - iter 60/209 - loss 0.01876152 - samples/sec: 253.32 - lr: 0.012500
2022-10-03 19:10:11,594 epoch 53 - iter 80/209 - loss 0.01835339 - samples/sec: 285.07 - lr: 0.012500
2022-10-03 19:10:13,833 epoch 53 - iter 100/209 - loss 0.01876080 - samples/sec: 286.40 - lr: 0.012500
2022-10-03 19:10:16,273 epoch 53 - iter 120/209 - loss 0.01943931 - samples/sec: 262.77 - lr: 0.012500
2022-10-03 19:10:18,693 epoch 53 - iter 140/209 - loss 0.01932565 - samples/sec: 264.84 - lr: 0.012500
2022-10-03 19:10:

100%|██████████| 45/45 [00:06<00:00,  6.72it/s]

2022-10-03 19:10:33,241 Evaluating as a multi-label problem: False
2022-10-03 19:10:33,255 DEV : loss 0.034630678594112396 - f1-score (micro avg)  0.8679





2022-10-03 19:10:33,317 BAD EPOCHS (no improvement): 1
2022-10-03 19:10:33,321 ----------------------------------------------------------------------------------------------------
2022-10-03 19:10:35,671 epoch 54 - iter 20/209 - loss 0.01341939 - samples/sec: 272.92 - lr: 0.012500
2022-10-03 19:10:38,662 epoch 54 - iter 40/209 - loss 0.01637549 - samples/sec: 214.34 - lr: 0.012500
2022-10-03 19:10:41,116 epoch 54 - iter 60/209 - loss 0.01717534 - samples/sec: 261.22 - lr: 0.012500
2022-10-03 19:10:43,568 epoch 54 - iter 80/209 - loss 0.01802383 - samples/sec: 261.51 - lr: 0.012500
2022-10-03 19:10:45,534 epoch 54 - iter 100/209 - loss 0.01855165 - samples/sec: 326.19 - lr: 0.012500
2022-10-03 19:10:48,344 epoch 54 - iter 120/209 - loss 0.02037558 - samples/sec: 228.10 - lr: 0.012500
2022-10-03 19:10:50,700 epoch 54 - iter 140/209 - loss 0.02030548 - samples/sec: 272.18 - lr: 0.012500
2022-10-03 19:10:52,886 epoch 54 - iter 160/209 - loss 0.02062239 - samples/sec: 293.31 - lr: 0.012500


100%|██████████| 45/45 [00:07<00:00,  6.43it/s]

2022-10-03 19:11:05,357 Evaluating as a multi-label problem: False
2022-10-03 19:11:05,371 DEV : loss 0.03396720439195633 - f1-score (micro avg)  0.8711





2022-10-03 19:11:05,431 BAD EPOCHS (no improvement): 2
2022-10-03 19:11:05,442 ----------------------------------------------------------------------------------------------------
2022-10-03 19:11:08,161 epoch 55 - iter 20/209 - loss 0.01578634 - samples/sec: 235.83 - lr: 0.012500
2022-10-03 19:11:10,985 epoch 55 - iter 40/209 - loss 0.01966757 - samples/sec: 226.93 - lr: 0.012500
2022-10-03 19:11:13,273 epoch 55 - iter 60/209 - loss 0.01868557 - samples/sec: 280.17 - lr: 0.012500
2022-10-03 19:11:15,623 epoch 55 - iter 80/209 - loss 0.01781036 - samples/sec: 272.84 - lr: 0.012500
2022-10-03 19:11:17,593 epoch 55 - iter 100/209 - loss 0.01921116 - samples/sec: 325.53 - lr: 0.012500
2022-10-03 19:11:19,901 epoch 55 - iter 120/209 - loss 0.01898954 - samples/sec: 277.90 - lr: 0.012500
2022-10-03 19:11:22,213 epoch 55 - iter 140/209 - loss 0.02014739 - samples/sec: 277.34 - lr: 0.012500
2022-10-03 19:11:24,302 epoch 55 - iter 160/209 - loss 0.01961584 - samples/sec: 306.88 - lr: 0.012500


100%|██████████| 45/45 [00:06<00:00,  6.78it/s]

2022-10-03 19:11:36,346 Evaluating as a multi-label problem: False
2022-10-03 19:11:36,362 DEV : loss 0.034209948033094406 - f1-score (micro avg)  0.8749





2022-10-03 19:11:36,426 BAD EPOCHS (no improvement): 3
2022-10-03 19:11:36,430 ----------------------------------------------------------------------------------------------------
2022-10-03 19:11:39,046 epoch 56 - iter 20/209 - loss 0.01643525 - samples/sec: 245.20 - lr: 0.012500
2022-10-03 19:11:41,552 epoch 56 - iter 40/209 - loss 0.01650752 - samples/sec: 255.81 - lr: 0.012500
2022-10-03 19:11:43,440 epoch 56 - iter 60/209 - loss 0.01781854 - samples/sec: 339.74 - lr: 0.012500
2022-10-03 19:11:46,195 epoch 56 - iter 80/209 - loss 0.01813378 - samples/sec: 232.68 - lr: 0.012500
2022-10-03 19:11:48,281 epoch 56 - iter 100/209 - loss 0.01822422 - samples/sec: 307.42 - lr: 0.012500
2022-10-03 19:11:51,016 epoch 56 - iter 120/209 - loss 0.01825890 - samples/sec: 234.35 - lr: 0.012500
2022-10-03 19:11:53,096 epoch 56 - iter 140/209 - loss 0.01777448 - samples/sec: 308.38 - lr: 0.012500
2022-10-03 19:11:55,968 epoch 56 - iter 160/209 - loss 0.01860373 - samples/sec: 223.06 - lr: 0.012500


100%|██████████| 45/45 [00:07<00:00,  6.28it/s]

2022-10-03 19:12:08,503 Evaluating as a multi-label problem: False
2022-10-03 19:12:08,518 DEV : loss 0.03512508049607277 - f1-score (micro avg)  0.8697





2022-10-03 19:12:08,581 Epoch    56: reducing learning rate of group 0 to 6.2500e-03.
2022-10-03 19:12:08,583 BAD EPOCHS (no improvement): 4
2022-10-03 19:12:08,590 ----------------------------------------------------------------------------------------------------
2022-10-03 19:12:11,049 epoch 57 - iter 20/209 - loss 0.01980101 - samples/sec: 260.80 - lr: 0.006250
2022-10-03 19:12:13,364 epoch 57 - iter 40/209 - loss 0.02146163 - samples/sec: 276.95 - lr: 0.006250
2022-10-03 19:12:15,347 epoch 57 - iter 60/209 - loss 0.02256366 - samples/sec: 323.40 - lr: 0.006250
2022-10-03 19:12:18,309 epoch 57 - iter 80/209 - loss 0.02139072 - samples/sec: 216.42 - lr: 0.006250
2022-10-03 19:12:20,660 epoch 57 - iter 100/209 - loss 0.02158705 - samples/sec: 272.64 - lr: 0.006250
2022-10-03 19:12:23,019 epoch 57 - iter 120/209 - loss 0.02139961 - samples/sec: 271.77 - lr: 0.006250
2022-10-03 19:12:25,466 epoch 57 - iter 140/209 - loss 0.02058319 - samples/sec: 262.07 - lr: 0.006250
2022-10-03 19:12:

100%|██████████| 45/45 [00:06<00:00,  6.80it/s]

2022-10-03 19:12:39,744 Evaluating as a multi-label problem: False
2022-10-03 19:12:39,759 DEV : loss 0.035065241158008575 - f1-score (micro avg)  0.8704





2022-10-03 19:12:39,819 BAD EPOCHS (no improvement): 1
2022-10-03 19:12:39,824 ----------------------------------------------------------------------------------------------------
2022-10-03 19:12:42,197 epoch 58 - iter 20/209 - loss 0.02111709 - samples/sec: 270.37 - lr: 0.006250
2022-10-03 19:12:45,193 epoch 58 - iter 40/209 - loss 0.02369031 - samples/sec: 213.94 - lr: 0.006250
2022-10-03 19:12:46,955 epoch 58 - iter 60/209 - loss 0.02213122 - samples/sec: 364.10 - lr: 0.006250
2022-10-03 19:12:49,482 epoch 58 - iter 80/209 - loss 0.02033665 - samples/sec: 253.61 - lr: 0.006250
2022-10-03 19:12:52,144 epoch 58 - iter 100/209 - loss 0.02028090 - samples/sec: 240.83 - lr: 0.006250
2022-10-03 19:12:54,365 epoch 58 - iter 120/209 - loss 0.02112618 - samples/sec: 288.71 - lr: 0.006250
2022-10-03 19:12:56,739 epoch 58 - iter 140/209 - loss 0.02171642 - samples/sec: 270.00 - lr: 0.006250
2022-10-03 19:12:59,316 epoch 58 - iter 160/209 - loss 0.02095216 - samples/sec: 248.72 - lr: 0.006250


100%|██████████| 45/45 [00:06<00:00,  6.82it/s]

2022-10-03 19:13:10,925 Evaluating as a multi-label problem: False
2022-10-03 19:13:10,939 DEV : loss 0.03536901995539665 - f1-score (micro avg)  0.8754





2022-10-03 19:13:11,000 BAD EPOCHS (no improvement): 2
2022-10-03 19:13:11,004 ----------------------------------------------------------------------------------------------------
2022-10-03 19:13:13,703 epoch 59 - iter 20/209 - loss 0.02152230 - samples/sec: 237.64 - lr: 0.006250
2022-10-03 19:13:16,058 epoch 59 - iter 40/209 - loss 0.01760568 - samples/sec: 272.25 - lr: 0.006250
2022-10-03 19:13:18,563 epoch 59 - iter 60/209 - loss 0.01822673 - samples/sec: 255.91 - lr: 0.006250
2022-10-03 19:13:20,670 epoch 59 - iter 80/209 - loss 0.01938733 - samples/sec: 304.34 - lr: 0.006250
2022-10-03 19:13:23,007 epoch 59 - iter 100/209 - loss 0.01913116 - samples/sec: 274.35 - lr: 0.006250
2022-10-03 19:13:25,944 epoch 59 - iter 120/209 - loss 0.01930495 - samples/sec: 218.21 - lr: 0.006250
2022-10-03 19:13:28,267 epoch 59 - iter 140/209 - loss 0.01980231 - samples/sec: 276.07 - lr: 0.006250
2022-10-03 19:13:30,547 epoch 59 - iter 160/209 - loss 0.01962202 - samples/sec: 281.26 - lr: 0.006250


100%|██████████| 45/45 [00:06<00:00,  6.85it/s]

2022-10-03 19:13:42,983 Evaluating as a multi-label problem: False
2022-10-03 19:13:42,998 DEV : loss 0.03492897003889084 - f1-score (micro avg)  0.875





2022-10-03 19:13:43,059 BAD EPOCHS (no improvement): 3
2022-10-03 19:13:43,064 ----------------------------------------------------------------------------------------------------
2022-10-03 19:13:45,392 epoch 60 - iter 20/209 - loss 0.02293606 - samples/sec: 275.53 - lr: 0.006250
2022-10-03 19:13:47,709 epoch 60 - iter 40/209 - loss 0.02144036 - samples/sec: 276.75 - lr: 0.006250
2022-10-03 19:13:49,835 epoch 60 - iter 60/209 - loss 0.02089450 - samples/sec: 301.64 - lr: 0.006250
2022-10-03 19:13:52,647 epoch 60 - iter 80/209 - loss 0.02164286 - samples/sec: 227.91 - lr: 0.006250
2022-10-03 19:13:54,923 epoch 60 - iter 100/209 - loss 0.02062991 - samples/sec: 281.67 - lr: 0.006250
2022-10-03 19:13:57,095 epoch 60 - iter 120/209 - loss 0.01957764 - samples/sec: 295.18 - lr: 0.006250
2022-10-03 19:13:59,496 epoch 60 - iter 140/209 - loss 0.01890808 - samples/sec: 267.00 - lr: 0.006250
2022-10-03 19:14:01,749 epoch 60 - iter 160/209 - loss 0.01930005 - samples/sec: 284.52 - lr: 0.006250


100%|██████████| 45/45 [00:06<00:00,  6.92it/s]

2022-10-03 19:14:13,834 Evaluating as a multi-label problem: False
2022-10-03 19:14:13,847 DEV : loss 0.0348680317401886 - f1-score (micro avg)  0.874





2022-10-03 19:14:13,907 Epoch    60: reducing learning rate of group 0 to 3.1250e-03.
2022-10-03 19:14:13,909 BAD EPOCHS (no improvement): 4
2022-10-03 19:14:13,914 ----------------------------------------------------------------------------------------------------
2022-10-03 19:14:15,973 epoch 61 - iter 20/209 - loss 0.02414530 - samples/sec: 311.61 - lr: 0.003125
2022-10-03 19:14:17,981 epoch 61 - iter 40/209 - loss 0.01924958 - samples/sec: 319.48 - lr: 0.003125
2022-10-03 19:14:20,711 epoch 61 - iter 60/209 - loss 0.01916045 - samples/sec: 234.76 - lr: 0.003125
2022-10-03 19:14:23,246 epoch 61 - iter 80/209 - loss 0.01844401 - samples/sec: 252.88 - lr: 0.003125
2022-10-03 19:14:25,427 epoch 61 - iter 100/209 - loss 0.01858065 - samples/sec: 293.96 - lr: 0.003125
2022-10-03 19:14:27,737 epoch 61 - iter 120/209 - loss 0.01837066 - samples/sec: 277.63 - lr: 0.003125
2022-10-03 19:14:30,600 epoch 61 - iter 140/209 - loss 0.01929522 - samples/sec: 223.88 - lr: 0.003125
2022-10-03 19:14:

100%|██████████| 45/45 [00:06<00:00,  6.80it/s]

2022-10-03 19:14:44,941 Evaluating as a multi-label problem: False
2022-10-03 19:14:44,957 DEV : loss 0.03488953039050102 - f1-score (micro avg)  0.8742





2022-10-03 19:14:45,015 BAD EPOCHS (no improvement): 1
2022-10-03 19:14:45,021 ----------------------------------------------------------------------------------------------------
2022-10-03 19:14:47,415 epoch 62 - iter 20/209 - loss 0.01181098 - samples/sec: 268.11 - lr: 0.003125
2022-10-03 19:14:49,787 epoch 62 - iter 40/209 - loss 0.01293157 - samples/sec: 270.34 - lr: 0.003125
2022-10-03 19:14:52,062 epoch 62 - iter 60/209 - loss 0.01394274 - samples/sec: 281.79 - lr: 0.003125
2022-10-03 19:14:54,249 epoch 62 - iter 80/209 - loss 0.01385399 - samples/sec: 293.17 - lr: 0.003125
2022-10-03 19:14:56,250 epoch 62 - iter 100/209 - loss 0.01568637 - samples/sec: 320.49 - lr: 0.003125
2022-10-03 19:14:58,237 epoch 62 - iter 120/209 - loss 0.01746889 - samples/sec: 322.95 - lr: 0.003125
2022-10-03 19:15:00,900 epoch 62 - iter 140/209 - loss 0.01841341 - samples/sec: 240.72 - lr: 0.003125
2022-10-03 19:15:03,099 epoch 62 - iter 160/209 - loss 0.01827288 - samples/sec: 291.61 - lr: 0.003125


100%|██████████| 45/45 [00:06<00:00,  6.89it/s]

2022-10-03 19:15:15,945 Evaluating as a multi-label problem: False
2022-10-03 19:15:15,959 DEV : loss 0.034764859825372696 - f1-score (micro avg)  0.8717





2022-10-03 19:15:16,021 BAD EPOCHS (no improvement): 2
2022-10-03 19:15:16,025 ----------------------------------------------------------------------------------------------------
2022-10-03 19:15:18,188 epoch 63 - iter 20/209 - loss 0.01996152 - samples/sec: 296.63 - lr: 0.003125
2022-10-03 19:15:20,645 epoch 63 - iter 40/209 - loss 0.01863588 - samples/sec: 260.96 - lr: 0.003125
2022-10-03 19:15:23,793 epoch 63 - iter 60/209 - loss 0.01856509 - samples/sec: 203.57 - lr: 0.003125
2022-10-03 19:15:25,943 epoch 63 - iter 80/209 - loss 0.01795568 - samples/sec: 298.12 - lr: 0.003125
2022-10-03 19:15:28,079 epoch 63 - iter 100/209 - loss 0.01724804 - samples/sec: 300.21 - lr: 0.003125
2022-10-03 19:15:29,886 epoch 63 - iter 120/209 - loss 0.01743796 - samples/sec: 354.84 - lr: 0.003125
2022-10-03 19:15:32,655 epoch 63 - iter 140/209 - loss 0.01787550 - samples/sec: 231.53 - lr: 0.003125
2022-10-03 19:15:34,635 epoch 63 - iter 160/209 - loss 0.01801099 - samples/sec: 323.87 - lr: 0.003125


100%|██████████| 45/45 [00:06<00:00,  6.48it/s]

2022-10-03 19:15:47,048 Evaluating as a multi-label problem: False
2022-10-03 19:15:47,062 DEV : loss 0.03489621728658676 - f1-score (micro avg)  0.8798





2022-10-03 19:15:47,122 BAD EPOCHS (no improvement): 0
2022-10-03 19:15:47,130 saving best model
2022-10-03 19:15:49,853 ----------------------------------------------------------------------------------------------------
2022-10-03 19:15:52,204 epoch 64 - iter 20/209 - loss 0.01578349 - samples/sec: 272.90 - lr: 0.003125
2022-10-03 19:15:54,328 epoch 64 - iter 40/209 - loss 0.01531008 - samples/sec: 301.89 - lr: 0.003125
2022-10-03 19:15:56,914 epoch 64 - iter 60/209 - loss 0.01537041 - samples/sec: 247.93 - lr: 0.003125
2022-10-03 19:15:59,774 epoch 64 - iter 80/209 - loss 0.01761964 - samples/sec: 224.05 - lr: 0.003125
2022-10-03 19:16:02,187 epoch 64 - iter 100/209 - loss 0.01901937 - samples/sec: 265.73 - lr: 0.003125
2022-10-03 19:16:05,375 epoch 64 - iter 120/209 - loss 0.01905087 - samples/sec: 201.00 - lr: 0.003125
2022-10-03 19:16:07,668 epoch 64 - iter 140/209 - loss 0.01832486 - samples/sec: 279.57 - lr: 0.003125
2022-10-03 19:16:09,719 epoch 64 - iter 160/209 - loss 0.0185

100%|██████████| 45/45 [00:06<00:00,  6.87it/s]

2022-10-03 19:16:22,020 Evaluating as a multi-label problem: False
2022-10-03 19:16:22,037 DEV : loss 0.034771960228681564 - f1-score (micro avg)  0.8778





2022-10-03 19:16:22,097 BAD EPOCHS (no improvement): 1
2022-10-03 19:16:22,102 ----------------------------------------------------------------------------------------------------
2022-10-03 19:16:24,501 epoch 65 - iter 20/209 - loss 0.01821296 - samples/sec: 267.35 - lr: 0.003125
2022-10-03 19:16:26,919 epoch 65 - iter 40/209 - loss 0.01808091 - samples/sec: 265.17 - lr: 0.003125
2022-10-03 19:16:29,023 epoch 65 - iter 60/209 - loss 0.01987660 - samples/sec: 304.75 - lr: 0.003125
2022-10-03 19:16:31,425 epoch 65 - iter 80/209 - loss 0.02063004 - samples/sec: 266.84 - lr: 0.003125
2022-10-03 19:16:33,821 epoch 65 - iter 100/209 - loss 0.02091067 - samples/sec: 267.55 - lr: 0.003125
2022-10-03 19:16:36,045 epoch 65 - iter 120/209 - loss 0.02004083 - samples/sec: 288.40 - lr: 0.003125
2022-10-03 19:16:38,379 epoch 65 - iter 140/209 - loss 0.01931386 - samples/sec: 274.62 - lr: 0.003125
2022-10-03 19:16:40,381 epoch 65 - iter 160/209 - loss 0.01928993 - samples/sec: 320.29 - lr: 0.003125


100%|██████████| 45/45 [00:06<00:00,  6.48it/s]

2022-10-03 19:16:53,553 Evaluating as a multi-label problem: False
2022-10-03 19:16:53,568 DEV : loss 0.034869417548179626 - f1-score (micro avg)  0.8752





2022-10-03 19:16:53,628 BAD EPOCHS (no improvement): 2
2022-10-03 19:16:53,634 ----------------------------------------------------------------------------------------------------
2022-10-03 19:16:55,907 epoch 66 - iter 20/209 - loss 0.01491423 - samples/sec: 282.30 - lr: 0.003125
2022-10-03 19:16:58,260 epoch 66 - iter 40/209 - loss 0.01778867 - samples/sec: 272.48 - lr: 0.003125
2022-10-03 19:17:00,450 epoch 66 - iter 60/209 - loss 0.01876415 - samples/sec: 292.83 - lr: 0.003125
2022-10-03 19:17:02,870 epoch 66 - iter 80/209 - loss 0.01845549 - samples/sec: 264.89 - lr: 0.003125
2022-10-03 19:17:05,136 epoch 66 - iter 100/209 - loss 0.01966041 - samples/sec: 282.86 - lr: 0.003125
2022-10-03 19:17:07,282 epoch 66 - iter 120/209 - loss 0.01956874 - samples/sec: 298.86 - lr: 0.003125
2022-10-03 19:17:09,608 epoch 66 - iter 140/209 - loss 0.01902105 - samples/sec: 275.54 - lr: 0.003125
2022-10-03 19:17:11,902 epoch 66 - iter 160/209 - loss 0.01868922 - samples/sec: 279.53 - lr: 0.003125


100%|██████████| 45/45 [00:06<00:00,  6.83it/s]

2022-10-03 19:17:24,876 Evaluating as a multi-label problem: False
2022-10-03 19:17:24,890 DEV : loss 0.03478711098432541 - f1-score (micro avg)  0.8762





2022-10-03 19:17:24,959 BAD EPOCHS (no improvement): 3
2022-10-03 19:17:24,963 ----------------------------------------------------------------------------------------------------
2022-10-03 19:17:27,481 epoch 67 - iter 20/209 - loss 0.01396055 - samples/sec: 254.74 - lr: 0.003125
2022-10-03 19:17:29,768 epoch 67 - iter 40/209 - loss 0.02044204 - samples/sec: 280.36 - lr: 0.003125
2022-10-03 19:17:32,538 epoch 67 - iter 60/209 - loss 0.01863545 - samples/sec: 231.40 - lr: 0.003125
2022-10-03 19:17:34,507 epoch 67 - iter 80/209 - loss 0.01711628 - samples/sec: 325.58 - lr: 0.003125
2022-10-03 19:17:36,401 epoch 67 - iter 100/209 - loss 0.01870816 - samples/sec: 338.62 - lr: 0.003125
2022-10-03 19:17:38,755 epoch 67 - iter 120/209 - loss 0.01784488 - samples/sec: 272.30 - lr: 0.003125
2022-10-03 19:17:41,339 epoch 67 - iter 140/209 - loss 0.01762153 - samples/sec: 248.05 - lr: 0.003125
2022-10-03 19:17:43,483 epoch 67 - iter 160/209 - loss 0.01878138 - samples/sec: 298.97 - lr: 0.003125


100%|██████████| 45/45 [00:06<00:00,  6.62it/s]

2022-10-03 19:17:55,978 Evaluating as a multi-label problem: False
2022-10-03 19:17:55,991 DEV : loss 0.035082440823316574 - f1-score (micro avg)  0.8762





2022-10-03 19:17:56,052 Epoch    67: reducing learning rate of group 0 to 1.5625e-03.
2022-10-03 19:17:56,053 BAD EPOCHS (no improvement): 4
2022-10-03 19:17:56,061 ----------------------------------------------------------------------------------------------------
2022-10-03 19:17:58,786 epoch 68 - iter 20/209 - loss 0.01963565 - samples/sec: 235.47 - lr: 0.001563
2022-10-03 19:18:00,899 epoch 68 - iter 40/209 - loss 0.01923029 - samples/sec: 303.38 - lr: 0.001563
2022-10-03 19:18:02,951 epoch 68 - iter 60/209 - loss 0.01937616 - samples/sec: 312.40 - lr: 0.001563
2022-10-03 19:18:05,141 epoch 68 - iter 80/209 - loss 0.01897040 - samples/sec: 292.69 - lr: 0.001563
2022-10-03 19:18:07,378 epoch 68 - iter 100/209 - loss 0.01842393 - samples/sec: 286.60 - lr: 0.001563
2022-10-03 19:18:09,745 epoch 68 - iter 120/209 - loss 0.01807717 - samples/sec: 270.67 - lr: 0.001563
2022-10-03 19:18:11,845 epoch 68 - iter 140/209 - loss 0.01798271 - samples/sec: 305.26 - lr: 0.001563
2022-10-03 19:18:

100%|██████████| 45/45 [00:06<00:00,  7.00it/s]

2022-10-03 19:18:26,214 Evaluating as a multi-label problem: False
2022-10-03 19:18:26,228 DEV : loss 0.03492031991481781 - f1-score (micro avg)  0.8762





2022-10-03 19:18:26,288 BAD EPOCHS (no improvement): 1
2022-10-03 19:18:26,292 ----------------------------------------------------------------------------------------------------
2022-10-03 19:18:28,589 epoch 69 - iter 20/209 - loss 0.01519404 - samples/sec: 279.37 - lr: 0.001563
2022-10-03 19:18:30,567 epoch 69 - iter 40/209 - loss 0.01819276 - samples/sec: 324.08 - lr: 0.001563
2022-10-03 19:18:32,976 epoch 69 - iter 60/209 - loss 0.01884177 - samples/sec: 266.12 - lr: 0.001563
2022-10-03 19:18:35,591 epoch 69 - iter 80/209 - loss 0.01888207 - samples/sec: 245.01 - lr: 0.001563
2022-10-03 19:18:38,326 epoch 69 - iter 100/209 - loss 0.02009867 - samples/sec: 234.34 - lr: 0.001563
2022-10-03 19:18:40,521 epoch 69 - iter 120/209 - loss 0.02020916 - samples/sec: 292.01 - lr: 0.001563
2022-10-03 19:18:42,624 epoch 69 - iter 140/209 - loss 0.01910118 - samples/sec: 304.89 - lr: 0.001563
2022-10-03 19:18:44,702 epoch 69 - iter 160/209 - loss 0.01879652 - samples/sec: 308.62 - lr: 0.001563


100%|██████████| 45/45 [00:06<00:00,  7.10it/s]

2022-10-03 19:18:56,572 Evaluating as a multi-label problem: False
2022-10-03 19:18:56,586 DEV : loss 0.03477175906300545 - f1-score (micro avg)  0.8806





2022-10-03 19:18:56,645 BAD EPOCHS (no improvement): 0
2022-10-03 19:18:56,649 saving best model
2022-10-03 19:18:59,465 ----------------------------------------------------------------------------------------------------
2022-10-03 19:19:01,969 epoch 70 - iter 20/209 - loss 0.01711614 - samples/sec: 256.15 - lr: 0.001563
2022-10-03 19:19:04,036 epoch 70 - iter 40/209 - loss 0.01608790 - samples/sec: 310.19 - lr: 0.001563
2022-10-03 19:19:06,042 epoch 70 - iter 60/209 - loss 0.01877522 - samples/sec: 319.60 - lr: 0.001563
2022-10-03 19:19:09,063 epoch 70 - iter 80/209 - loss 0.01951794 - samples/sec: 212.05 - lr: 0.001563
2022-10-03 19:19:11,505 epoch 70 - iter 100/209 - loss 0.01906754 - samples/sec: 262.48 - lr: 0.001563
2022-10-03 19:19:14,274 epoch 70 - iter 120/209 - loss 0.01811945 - samples/sec: 231.44 - lr: 0.001563
2022-10-03 19:19:16,622 epoch 70 - iter 140/209 - loss 0.01745468 - samples/sec: 273.06 - lr: 0.001563
2022-10-03 19:19:19,199 epoch 70 - iter 160/209 - loss 0.0167

100%|██████████| 45/45 [00:06<00:00,  7.01it/s]

2022-10-03 19:19:30,671 Evaluating as a multi-label problem: False
2022-10-03 19:19:30,686 DEV : loss 0.0350797101855278 - f1-score (micro avg)  0.8742





2022-10-03 19:19:30,747 BAD EPOCHS (no improvement): 1
2022-10-03 19:19:30,751 ----------------------------------------------------------------------------------------------------
2022-10-03 19:19:32,929 epoch 71 - iter 20/209 - loss 0.02151159 - samples/sec: 294.40 - lr: 0.001563
2022-10-03 19:19:35,430 epoch 71 - iter 40/209 - loss 0.02354236 - samples/sec: 256.37 - lr: 0.001563
2022-10-03 19:19:37,437 epoch 71 - iter 60/209 - loss 0.02103064 - samples/sec: 319.45 - lr: 0.001563
2022-10-03 19:19:39,769 epoch 71 - iter 80/209 - loss 0.01849395 - samples/sec: 274.87 - lr: 0.001563
2022-10-03 19:19:41,740 epoch 71 - iter 100/209 - loss 0.01906296 - samples/sec: 325.24 - lr: 0.001563
2022-10-03 19:19:43,719 epoch 71 - iter 120/209 - loss 0.02003465 - samples/sec: 323.96 - lr: 0.001563
2022-10-03 19:19:45,917 epoch 71 - iter 140/209 - loss 0.01985499 - samples/sec: 291.56 - lr: 0.001563
2022-10-03 19:19:48,290 epoch 71 - iter 160/209 - loss 0.01919267 - samples/sec: 270.30 - lr: 0.001563


100%|██████████| 45/45 [00:06<00:00,  7.00it/s]

2022-10-03 19:20:00,635 Evaluating as a multi-label problem: False
2022-10-03 19:20:00,649 DEV : loss 0.034917108714580536 - f1-score (micro avg)  0.875





2022-10-03 19:20:00,712 BAD EPOCHS (no improvement): 2
2022-10-03 19:20:00,723 ----------------------------------------------------------------------------------------------------
2022-10-03 19:20:03,041 epoch 72 - iter 20/209 - loss 0.01946228 - samples/sec: 276.79 - lr: 0.001563
2022-10-03 19:20:04,925 epoch 72 - iter 40/209 - loss 0.02167901 - samples/sec: 340.38 - lr: 0.001563
2022-10-03 19:20:07,172 epoch 72 - iter 60/209 - loss 0.01904927 - samples/sec: 285.31 - lr: 0.001563
2022-10-03 19:20:09,920 epoch 72 - iter 80/209 - loss 0.01950831 - samples/sec: 233.21 - lr: 0.001563
2022-10-03 19:20:12,057 epoch 72 - iter 100/209 - loss 0.01887816 - samples/sec: 299.86 - lr: 0.001563
2022-10-03 19:20:13,954 epoch 72 - iter 120/209 - loss 0.01973675 - samples/sec: 338.09 - lr: 0.001563
2022-10-03 19:20:15,986 epoch 72 - iter 140/209 - loss 0.02005957 - samples/sec: 315.49 - lr: 0.001563
2022-10-03 19:20:18,487 epoch 72 - iter 160/209 - loss 0.02007449 - samples/sec: 256.24 - lr: 0.001563


100%|██████████| 45/45 [00:06<00:00,  7.01it/s]

2022-10-03 19:20:30,457 Evaluating as a multi-label problem: False
2022-10-03 19:20:30,470 DEV : loss 0.03500644117593765 - f1-score (micro avg)  0.875





2022-10-03 19:20:30,531 BAD EPOCHS (no improvement): 3
2022-10-03 19:20:30,538 ----------------------------------------------------------------------------------------------------
2022-10-03 19:20:32,420 epoch 73 - iter 20/209 - loss 0.01748658 - samples/sec: 340.88 - lr: 0.001563
2022-10-03 19:20:34,913 epoch 73 - iter 40/209 - loss 0.01785763 - samples/sec: 257.06 - lr: 0.001563
2022-10-03 19:20:37,414 epoch 73 - iter 60/209 - loss 0.01636475 - samples/sec: 256.19 - lr: 0.001563
2022-10-03 19:20:39,328 epoch 73 - iter 80/209 - loss 0.01625790 - samples/sec: 335.13 - lr: 0.001563
2022-10-03 19:20:41,779 epoch 73 - iter 100/209 - loss 0.01624215 - samples/sec: 261.55 - lr: 0.001563
2022-10-03 19:20:43,811 epoch 73 - iter 120/209 - loss 0.01735613 - samples/sec: 315.53 - lr: 0.001563
2022-10-03 19:20:45,821 epoch 73 - iter 140/209 - loss 0.01772073 - samples/sec: 318.90 - lr: 0.001563
2022-10-03 19:20:47,944 epoch 73 - iter 160/209 - loss 0.01784101 - samples/sec: 301.85 - lr: 0.001563


100%|██████████| 45/45 [00:06<00:00,  6.98it/s]

2022-10-03 19:21:00,519 Evaluating as a multi-label problem: False
2022-10-03 19:21:00,534 DEV : loss 0.035036664456129074 - f1-score (micro avg)  0.876





2022-10-03 19:21:00,596 Epoch    73: reducing learning rate of group 0 to 7.8125e-04.
2022-10-03 19:21:00,597 BAD EPOCHS (no improvement): 4
2022-10-03 19:21:00,603 ----------------------------------------------------------------------------------------------------
2022-10-03 19:21:03,105 epoch 74 - iter 20/209 - loss 0.02677825 - samples/sec: 256.37 - lr: 0.000781
2022-10-03 19:21:05,130 epoch 74 - iter 40/209 - loss 0.02413203 - samples/sec: 316.49 - lr: 0.000781
2022-10-03 19:21:07,182 epoch 74 - iter 60/209 - loss 0.02215506 - samples/sec: 312.47 - lr: 0.000781
2022-10-03 19:21:09,531 epoch 74 - iter 80/209 - loss 0.02063176 - samples/sec: 272.93 - lr: 0.000781
2022-10-03 19:21:12,348 epoch 74 - iter 100/209 - loss 0.01961199 - samples/sec: 227.49 - lr: 0.000781
2022-10-03 19:21:14,424 epoch 74 - iter 120/209 - loss 0.01876651 - samples/sec: 308.78 - lr: 0.000781
2022-10-03 19:21:16,683 epoch 74 - iter 140/209 - loss 0.01855344 - samples/sec: 283.82 - lr: 0.000781
2022-10-03 19:21:

100%|██████████| 45/45 [00:06<00:00,  6.54it/s]

2022-10-03 19:21:30,946 Evaluating as a multi-label problem: False
2022-10-03 19:21:30,960 DEV : loss 0.03510430455207825 - f1-score (micro avg)  0.875





2022-10-03 19:21:31,021 BAD EPOCHS (no improvement): 1
2022-10-03 19:21:31,024 ----------------------------------------------------------------------------------------------------
2022-10-03 19:21:33,165 epoch 75 - iter 20/209 - loss 0.01911324 - samples/sec: 299.84 - lr: 0.000781
2022-10-03 19:21:35,875 epoch 75 - iter 40/209 - loss 0.01820434 - samples/sec: 236.46 - lr: 0.000781
2022-10-03 19:21:37,846 epoch 75 - iter 60/209 - loss 0.01806888 - samples/sec: 325.36 - lr: 0.000781
2022-10-03 19:21:39,765 epoch 75 - iter 80/209 - loss 0.01774210 - samples/sec: 334.03 - lr: 0.000781
2022-10-03 19:21:42,404 epoch 75 - iter 100/209 - loss 0.01738705 - samples/sec: 242.81 - lr: 0.000781
2022-10-03 19:21:44,592 epoch 75 - iter 120/209 - loss 0.01768008 - samples/sec: 293.07 - lr: 0.000781
2022-10-03 19:21:46,938 epoch 75 - iter 140/209 - loss 0.01741865 - samples/sec: 273.23 - lr: 0.000781
2022-10-03 19:21:49,432 epoch 75 - iter 160/209 - loss 0.01759304 - samples/sec: 257.00 - lr: 0.000781


100%|██████████| 45/45 [00:06<00:00,  7.03it/s]

2022-10-03 19:22:00,914 Evaluating as a multi-label problem: False
2022-10-03 19:22:00,927 DEV : loss 0.035137295722961426 - f1-score (micro avg)  0.875





2022-10-03 19:22:00,989 BAD EPOCHS (no improvement): 2
2022-10-03 19:22:00,993 ----------------------------------------------------------------------------------------------------
2022-10-03 19:22:03,445 epoch 76 - iter 20/209 - loss 0.02154003 - samples/sec: 261.49 - lr: 0.000781
2022-10-03 19:22:05,374 epoch 76 - iter 40/209 - loss 0.01879825 - samples/sec: 332.48 - lr: 0.000781
2022-10-03 19:22:08,431 epoch 76 - iter 60/209 - loss 0.01864241 - samples/sec: 209.61 - lr: 0.000781
2022-10-03 19:22:10,605 epoch 76 - iter 80/209 - loss 0.01749681 - samples/sec: 294.89 - lr: 0.000781
2022-10-03 19:22:12,944 epoch 76 - iter 100/209 - loss 0.01804213 - samples/sec: 274.11 - lr: 0.000781
2022-10-03 19:22:15,016 epoch 76 - iter 120/209 - loss 0.01735181 - samples/sec: 309.47 - lr: 0.000781
2022-10-03 19:22:16,974 epoch 76 - iter 140/209 - loss 0.01834934 - samples/sec: 327.50 - lr: 0.000781
2022-10-03 19:22:19,169 epoch 76 - iter 160/209 - loss 0.01756134 - samples/sec: 292.19 - lr: 0.000781


100%|██████████| 45/45 [00:06<00:00,  6.57it/s]

2022-10-03 19:22:31,135 Evaluating as a multi-label problem: False
2022-10-03 19:22:31,149 DEV : loss 0.03514198213815689 - f1-score (micro avg)  0.875





2022-10-03 19:22:31,209 BAD EPOCHS (no improvement): 3
2022-10-03 19:22:31,213 ----------------------------------------------------------------------------------------------------
2022-10-03 19:22:33,616 epoch 77 - iter 20/209 - loss 0.02006788 - samples/sec: 266.90 - lr: 0.000781
2022-10-03 19:22:35,743 epoch 77 - iter 40/209 - loss 0.01637534 - samples/sec: 301.45 - lr: 0.000781
2022-10-03 19:22:38,187 epoch 77 - iter 60/209 - loss 0.01715055 - samples/sec: 262.15 - lr: 0.000781
2022-10-03 19:22:40,384 epoch 77 - iter 80/209 - loss 0.01673623 - samples/sec: 291.92 - lr: 0.000781
2022-10-03 19:22:42,835 epoch 77 - iter 100/209 - loss 0.01704461 - samples/sec: 261.49 - lr: 0.000781
2022-10-03 19:22:44,779 epoch 77 - iter 120/209 - loss 0.01775368 - samples/sec: 329.83 - lr: 0.000781
2022-10-03 19:22:46,889 epoch 77 - iter 140/209 - loss 0.01742609 - samples/sec: 303.99 - lr: 0.000781
2022-10-03 19:22:49,270 epoch 77 - iter 160/209 - loss 0.01717592 - samples/sec: 269.15 - lr: 0.000781


100%|██████████| 45/45 [00:06<00:00,  7.12it/s]

2022-10-03 19:23:01,100 Evaluating as a multi-label problem: False
2022-10-03 19:23:01,115 DEV : loss 0.035096194595098495 - f1-score (micro avg)  0.8742





2022-10-03 19:23:01,175 Epoch    77: reducing learning rate of group 0 to 3.9063e-04.
2022-10-03 19:23:01,176 BAD EPOCHS (no improvement): 4
2022-10-03 19:23:01,181 ----------------------------------------------------------------------------------------------------
2022-10-03 19:23:03,408 epoch 78 - iter 20/209 - loss 0.02037479 - samples/sec: 288.08 - lr: 0.000391
2022-10-03 19:23:05,927 epoch 78 - iter 40/209 - loss 0.02077358 - samples/sec: 254.43 - lr: 0.000391
2022-10-03 19:23:08,283 epoch 78 - iter 60/209 - loss 0.01807861 - samples/sec: 271.99 - lr: 0.000391
2022-10-03 19:23:10,241 epoch 78 - iter 80/209 - loss 0.01734763 - samples/sec: 327.40 - lr: 0.000391
2022-10-03 19:23:12,482 epoch 78 - iter 100/209 - loss 0.01788362 - samples/sec: 285.96 - lr: 0.000391
2022-10-03 19:23:14,945 epoch 78 - iter 120/209 - loss 0.01872418 - samples/sec: 260.13 - lr: 0.000391
2022-10-03 19:23:16,963 epoch 78 - iter 140/209 - loss 0.01849453 - samples/sec: 317.66 - lr: 0.000391
2022-10-03 19:23:

100%|██████████| 45/45 [00:06<00:00,  7.11it/s]

2022-10-03 19:23:30,918 Evaluating as a multi-label problem: False
2022-10-03 19:23:30,931 DEV : loss 0.035006631165742874 - f1-score (micro avg)  0.8752





2022-10-03 19:23:30,992 BAD EPOCHS (no improvement): 1
2022-10-03 19:23:30,996 ----------------------------------------------------------------------------------------------------
2022-10-03 19:23:33,532 epoch 79 - iter 20/209 - loss 0.02130564 - samples/sec: 252.79 - lr: 0.000391
2022-10-03 19:23:35,778 epoch 79 - iter 40/209 - loss 0.02098178 - samples/sec: 285.41 - lr: 0.000391
2022-10-03 19:23:38,050 epoch 79 - iter 60/209 - loss 0.01951401 - samples/sec: 282.07 - lr: 0.000391
2022-10-03 19:23:40,363 epoch 79 - iter 80/209 - loss 0.01854836 - samples/sec: 277.16 - lr: 0.000391
2022-10-03 19:23:42,706 epoch 79 - iter 100/209 - loss 0.01982808 - samples/sec: 273.62 - lr: 0.000391
2022-10-03 19:23:45,030 epoch 79 - iter 120/209 - loss 0.01904833 - samples/sec: 275.91 - lr: 0.000391
2022-10-03 19:23:47,478 epoch 79 - iter 140/209 - loss 0.01941893 - samples/sec: 261.75 - lr: 0.000391
2022-10-03 19:23:49,622 epoch 79 - iter 160/209 - loss 0.01960195 - samples/sec: 299.07 - lr: 0.000391


100%|██████████| 45/45 [00:06<00:00,  7.06it/s]

2022-10-03 19:24:01,229 Evaluating as a multi-label problem: False
2022-10-03 19:24:01,243 DEV : loss 0.035026099532842636 - f1-score (micro avg)  0.8752





2022-10-03 19:24:01,303 BAD EPOCHS (no improvement): 2
2022-10-03 19:24:01,307 ----------------------------------------------------------------------------------------------------
2022-10-03 19:24:03,242 epoch 80 - iter 20/209 - loss 0.01574453 - samples/sec: 331.54 - lr: 0.000391
2022-10-03 19:24:06,168 epoch 80 - iter 40/209 - loss 0.01918007 - samples/sec: 219.01 - lr: 0.000391
2022-10-03 19:24:08,126 epoch 80 - iter 60/209 - loss 0.01884088 - samples/sec: 327.51 - lr: 0.000391
2022-10-03 19:24:09,951 epoch 80 - iter 80/209 - loss 0.01980117 - samples/sec: 351.50 - lr: 0.000391
2022-10-03 19:24:12,776 epoch 80 - iter 100/209 - loss 0.02004965 - samples/sec: 226.80 - lr: 0.000391
2022-10-03 19:24:15,540 epoch 80 - iter 120/209 - loss 0.01968290 - samples/sec: 231.84 - lr: 0.000391
2022-10-03 19:24:17,783 epoch 80 - iter 140/209 - loss 0.01923350 - samples/sec: 285.70 - lr: 0.000391
2022-10-03 19:24:19,819 epoch 80 - iter 160/209 - loss 0.01960577 - samples/sec: 314.80 - lr: 0.000391


100%|██████████| 45/45 [00:06<00:00,  7.10it/s]

2022-10-03 19:24:30,871 Evaluating as a multi-label problem: False
2022-10-03 19:24:30,883 DEV : loss 0.03505483642220497 - f1-score (micro avg)  0.8752





2022-10-03 19:24:30,943 BAD EPOCHS (no improvement): 3
2022-10-03 19:24:30,946 ----------------------------------------------------------------------------------------------------
2022-10-03 19:24:33,109 epoch 81 - iter 20/209 - loss 0.01944239 - samples/sec: 296.47 - lr: 0.000391
2022-10-03 19:24:35,249 epoch 81 - iter 40/209 - loss 0.01654143 - samples/sec: 299.61 - lr: 0.000391
2022-10-03 19:24:37,962 epoch 81 - iter 60/209 - loss 0.01729232 - samples/sec: 236.24 - lr: 0.000391
2022-10-03 19:24:40,443 epoch 81 - iter 80/209 - loss 0.01821247 - samples/sec: 258.37 - lr: 0.000391
2022-10-03 19:24:42,222 epoch 81 - iter 100/209 - loss 0.01836415 - samples/sec: 360.26 - lr: 0.000391
2022-10-03 19:24:44,779 epoch 81 - iter 120/209 - loss 0.01860634 - samples/sec: 250.65 - lr: 0.000391
2022-10-03 19:24:47,016 epoch 81 - iter 140/209 - loss 0.01881178 - samples/sec: 286.65 - lr: 0.000391
2022-10-03 19:24:48,907 epoch 81 - iter 160/209 - loss 0.01869566 - samples/sec: 338.94 - lr: 0.000391


100%|██████████| 45/45 [00:06<00:00,  7.03it/s]

2022-10-03 19:25:00,820 Evaluating as a multi-label problem: False
2022-10-03 19:25:00,838 DEV : loss 0.03510748967528343 - f1-score (micro avg)  0.8742





2022-10-03 19:25:00,901 Epoch    81: reducing learning rate of group 0 to 1.9531e-04.
2022-10-03 19:25:00,902 BAD EPOCHS (no improvement): 4
2022-10-03 19:25:00,907 ----------------------------------------------------------------------------------------------------
2022-10-03 19:25:03,640 epoch 82 - iter 20/209 - loss 0.01787978 - samples/sec: 234.63 - lr: 0.000195
2022-10-03 19:25:06,157 epoch 82 - iter 40/209 - loss 0.01620613 - samples/sec: 254.67 - lr: 0.000195
2022-10-03 19:25:08,395 epoch 82 - iter 60/209 - loss 0.01614408 - samples/sec: 286.42 - lr: 0.000195
2022-10-03 19:25:10,629 epoch 82 - iter 80/209 - loss 0.01603924 - samples/sec: 287.01 - lr: 0.000195
2022-10-03 19:25:12,606 epoch 82 - iter 100/209 - loss 0.01755991 - samples/sec: 324.30 - lr: 0.000195
2022-10-03 19:25:14,736 epoch 82 - iter 120/209 - loss 0.01752979 - samples/sec: 300.89 - lr: 0.000195
2022-10-03 19:25:17,137 epoch 82 - iter 140/209 - loss 0.01780406 - samples/sec: 266.91 - lr: 0.000195
2022-10-03 19:25:

100%|██████████| 45/45 [00:06<00:00,  7.00it/s]

2022-10-03 19:25:31,153 Evaluating as a multi-label problem: False
2022-10-03 19:25:31,166 DEV : loss 0.03514496609568596 - f1-score (micro avg)  0.8742





2022-10-03 19:25:31,228 BAD EPOCHS (no improvement): 1
2022-10-03 19:25:31,234 ----------------------------------------------------------------------------------------------------
2022-10-03 19:25:33,837 epoch 83 - iter 20/209 - loss 0.01350827 - samples/sec: 246.38 - lr: 0.000195
2022-10-03 19:25:36,413 epoch 83 - iter 40/209 - loss 0.01723518 - samples/sec: 248.79 - lr: 0.000195
2022-10-03 19:25:38,676 epoch 83 - iter 60/209 - loss 0.01800993 - samples/sec: 283.25 - lr: 0.000195
2022-10-03 19:25:40,834 epoch 83 - iter 80/209 - loss 0.01850703 - samples/sec: 297.04 - lr: 0.000195
2022-10-03 19:25:43,231 epoch 83 - iter 100/209 - loss 0.01798957 - samples/sec: 267.44 - lr: 0.000195
2022-10-03 19:25:45,174 epoch 83 - iter 120/209 - loss 0.01819250 - samples/sec: 330.08 - lr: 0.000195
2022-10-03 19:25:47,269 epoch 83 - iter 140/209 - loss 0.01836390 - samples/sec: 306.07 - lr: 0.000195
2022-10-03 19:25:49,307 epoch 83 - iter 160/209 - loss 0.01782714 - samples/sec: 314.49 - lr: 0.000195


100%|██████████| 45/45 [00:06<00:00,  6.67it/s]

2022-10-03 19:26:01,659 Evaluating as a multi-label problem: False
2022-10-03 19:26:01,672 DEV : loss 0.03508172556757927 - f1-score (micro avg)  0.8752





2022-10-03 19:26:01,734 BAD EPOCHS (no improvement): 2
2022-10-03 19:26:01,738 ----------------------------------------------------------------------------------------------------
2022-10-03 19:26:03,922 epoch 84 - iter 20/209 - loss 0.01684332 - samples/sec: 293.81 - lr: 0.000195
2022-10-03 19:26:05,731 epoch 84 - iter 40/209 - loss 0.01674248 - samples/sec: 354.41 - lr: 0.000195
2022-10-03 19:26:08,919 epoch 84 - iter 60/209 - loss 0.01726975 - samples/sec: 200.98 - lr: 0.000195
2022-10-03 19:26:11,119 epoch 84 - iter 80/209 - loss 0.01963823 - samples/sec: 291.26 - lr: 0.000195
2022-10-03 19:26:13,631 epoch 84 - iter 100/209 - loss 0.01849264 - samples/sec: 255.16 - lr: 0.000195
2022-10-03 19:26:15,754 epoch 84 - iter 120/209 - loss 0.01839022 - samples/sec: 301.99 - lr: 0.000195
2022-10-03 19:26:18,091 epoch 84 - iter 140/209 - loss 0.01859170 - samples/sec: 274.28 - lr: 0.000195
2022-10-03 19:26:20,302 epoch 84 - iter 160/209 - loss 0.01856816 - samples/sec: 290.01 - lr: 0.000195


100%|██████████| 45/45 [00:06<00:00,  7.11it/s]

2022-10-03 19:26:31,796 Evaluating as a multi-label problem: False
2022-10-03 19:26:31,810 DEV : loss 0.0350802019238472 - f1-score (micro avg)  0.8752





2022-10-03 19:26:31,870 BAD EPOCHS (no improvement): 3
2022-10-03 19:26:31,874 ----------------------------------------------------------------------------------------------------
2022-10-03 19:26:34,082 epoch 85 - iter 20/209 - loss 0.02079547 - samples/sec: 290.54 - lr: 0.000195
2022-10-03 19:26:36,603 epoch 85 - iter 40/209 - loss 0.02109484 - samples/sec: 254.18 - lr: 0.000195
2022-10-03 19:26:38,713 epoch 85 - iter 60/209 - loss 0.02101884 - samples/sec: 304.06 - lr: 0.000195
2022-10-03 19:26:40,533 epoch 85 - iter 80/209 - loss 0.02056094 - samples/sec: 352.09 - lr: 0.000195
2022-10-03 19:26:42,821 epoch 85 - iter 100/209 - loss 0.01907542 - samples/sec: 280.13 - lr: 0.000195
2022-10-03 19:26:45,036 epoch 85 - iter 120/209 - loss 0.01851257 - samples/sec: 289.42 - lr: 0.000195
2022-10-03 19:26:46,928 epoch 85 - iter 140/209 - loss 0.01863915 - samples/sec: 338.82 - lr: 0.000195
2022-10-03 19:26:49,198 epoch 85 - iter 160/209 - loss 0.01888991 - samples/sec: 282.44 - lr: 0.000195


100%|██████████| 45/45 [00:06<00:00,  6.69it/s]

2022-10-03 19:27:01,990 Evaluating as a multi-label problem: False
2022-10-03 19:27:02,002 DEV : loss 0.035058941692113876 - f1-score (micro avg)  0.8752





2022-10-03 19:27:02,067 Epoch    85: reducing learning rate of group 0 to 9.7656e-05.
2022-10-03 19:27:02,069 BAD EPOCHS (no improvement): 4
2022-10-03 19:27:02,073 ----------------------------------------------------------------------------------------------------
2022-10-03 19:27:02,075 ----------------------------------------------------------------------------------------------------
2022-10-03 19:27:02,077 learning rate too small - quitting training!
2022-10-03 19:27:02,078 ----------------------------------------------------------------------------------------------------
2022-10-03 19:27:04,785 ----------------------------------------------------------------------------------------------------
2022-10-03 19:27:04,788 loading file /content/drive/MyDrive/Flair_NLP/sota-ner-flair/best-model.pt
2022-10-03 19:27:06,385 SequenceTagger predicts: Dictionary with 31 tags: O, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-FUNDAMENTO, B-FUNDAMENTO, E-FUNDAMENTO, I-FUNDAMENTO, S-ORGANIZACAO, B-O

100%|██████████| 45/45 [00:07<00:00,  6.03it/s]

2022-10-03 19:27:14,029 Evaluating as a multi-label problem: False
2022-10-03 19:27:14,043 0.8694	0.8447	0.8569	0.7541
2022-10-03 19:27:14,045 
Results:
- F-score (micro) 0.8569
- F-score (macro) 0.8171
- Accuracy 0.7541

By class:
              precision    recall  f1-score   support

  FUNDAMENTO     0.9174    0.8952    0.9061       124
      PESSOA     0.8926    0.9076    0.9000       119
       LOCAL     0.8081    0.7921    0.8000       101
        DATA     0.9216    0.9592    0.9400        98
 ORGANIZACAO     0.8391    0.7766    0.8066        94
PRODUTODELEI     0.7609    0.6481    0.7000        54
      EVENTO     0.8333    0.5556    0.6667         9

   micro avg     0.8694    0.8447    0.8569       599
   macro avg     0.8533    0.7906    0.8171       599
weighted avg     0.8670    0.8447    0.8548       599

2022-10-03 19:27:14,048 ----------------------------------------------------------------------------------------------------





{'test_score': 0.8569009314140559,
 'dev_score_history': [0.18742857142857144,
  0.3552311435523115,
  0.4476190476190477,
  0.5943097997892519,
  0.6818181818181818,
  0.666,
  0.7220630372492837,
  0.7100478468899523,
  0.7585551330798479,
  0.7529411764705883,
  0.7755491881566381,
  0.7592592592592592,
  0.8048780487804877,
  0.7892204042348413,
  0.8052434456928839,
  0.8129032258064516,
  0.7939793038570083,
  0.8203198494825964,
  0.8139754485363551,
  0.8133704735376045,
  0.8176991150442477,
  0.8129032258064516,
  0.8446866485013623,
  0.8359020852221215,
  0.8442844284428443,
  0.8464285714285714,
  0.8545618789521228,
  0.8522212148685404,
  0.8434389140271493,
  0.8594306049822064,
  0.8561085972850679,
  0.8592057761732852,
  0.8576642335766423,
  0.8669064748201438,
  0.8579234972677596,
  0.8507596067917784,
  0.8651583710407241,
  0.8606485539000875,
  0.8645739910313901,
  0.8627450980392157,
  0.8664898320070734,
  0.8729874776386404,
  0.871841155234657,
  0.8663677

## Vetor de Contexto Flair Embeddings


### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import FlairEmbeddings, StackedEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='valid.txt')

## Tarefa
label_type = 'ner'

2022-10-04 05:29:47,877 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria
2022-10-04 05:29:47,880 Train: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/train.txt
2022-10-04 05:29:47,882 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/valid.txt
2022-10-04 05:29:47,884 Test: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-10-04 05:29:52,038 Computing label dictionary. Progress:


6667it [00:00, 44590.30it/s]

2022-10-04 05:29:52,237 Dictionary created for label 'ner' with 8 values: PESSOA (seen 628 times), FUNDAMENTO (seen 490 times), ORGANIZACAO (seen 435 times), DATA (seen 433 times), LOCAL (seen 369 times), PRODUTODELEI (seen 230 times), EVENTO (seen 9 times)
Dictionary with 8 tags: <unk>, PESSOA, FUNDAMENTO, ORGANIZACAO, DATA, LOCAL, PRODUTODELEI, EVENTO





### Embeddings

In [None]:
## Stacked Embeddings
# Initialize embedding stack with 
embedding_types = [
    FlairEmbeddings('pt-forward'),
    FlairEmbeddings('pt-backward')
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

2022-10-04 05:29:52,839 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-forward.pt not found in cache, downloading to /tmp/tmpgvcqwym7


100%|██████████| 72819080/72819080 [00:03<00:00, 20669355.30B/s]

2022-10-04 05:29:56,714 copying /tmp/tmpgvcqwym7 to cache at /root/.flair/embeddings/lm-pt-forward.pt
2022-10-04 05:29:56,796 removing temp file /tmp/tmpgvcqwym7





2022-10-04 05:30:08,058 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-backward.pt not found in cache, downloading to /tmp/tmp1wbxvwvz


100%|██████████| 72819080/72819080 [00:03<00:00, 20063950.90B/s]

2022-10-04 05:30:12,039 copying /tmp/tmp1wbxvwvz to cache at /root/.flair/embeddings/lm-pt-backward.pt





2022-10-04 05:30:12,120 removing temp file /tmp/tmp1wbxvwvz


### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-10-04 05:30:12,353 SequenceTagger predicts: Dictionary with 29 tags: O, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-FUNDAMENTO, B-FUNDAMENTO, E-FUNDAMENTO, I-FUNDAMENTO, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-DATA, B-DATA, E-DATA, I-DATA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, S-PRODUTODELEI, B-PRODUTODELEI, E-PRODUTODELEI, I-PRODUTODELEI, S-EVENTO, B-EVENTO, E-EVENTO, I-EVENTO


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

# Start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=100)

2022-10-04 05:30:12,598 ----------------------------------------------------------------------------------------------------
2022-10-04 05:30:12,602 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4096, out_features=4096, bias=True)
  (rnn): LSTM(4096, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=

100%|██████████| 45/45 [00:34<00:00,  1.32it/s]

2022-10-04 05:33:22,677 Evaluating as a multi-label problem: False
2022-10-04 05:33:22,698 DEV : loss 0.16018033027648926 - f1-score (micro avg)  0.5041
2022-10-04 05:33:22,793 BAD EPOCHS (no improvement): 0
2022-10-04 05:33:22,799 saving best model





2022-10-04 05:33:23,548 ----------------------------------------------------------------------------------------------------
2022-10-04 05:33:26,676 epoch 2 - iter 20/209 - loss 0.17530746 - samples/sec: 205.22 - lr: 0.100000
2022-10-04 05:33:30,028 epoch 2 - iter 40/209 - loss 0.17048181 - samples/sec: 191.15 - lr: 0.100000
2022-10-04 05:33:33,517 epoch 2 - iter 60/209 - loss 0.15783351 - samples/sec: 183.71 - lr: 0.100000
2022-10-04 05:33:37,382 epoch 2 - iter 80/209 - loss 0.14285898 - samples/sec: 165.81 - lr: 0.100000
2022-10-04 05:33:41,266 epoch 2 - iter 100/209 - loss 0.14534819 - samples/sec: 164.99 - lr: 0.100000
2022-10-04 05:33:44,786 epoch 2 - iter 120/209 - loss 0.14638194 - samples/sec: 182.09 - lr: 0.100000
2022-10-04 05:33:47,687 epoch 2 - iter 140/209 - loss 0.14734390 - samples/sec: 220.96 - lr: 0.100000
2022-10-04 05:33:52,696 epoch 2 - iter 160/209 - loss 0.14241373 - samples/sec: 127.89 - lr: 0.100000
2022-10-04 05:33:56,484 epoch 2 - iter 180/209 - loss 0.1428196

100%|██████████| 45/45 [00:09<00:00,  4.89it/s]


2022-10-04 05:34:11,278 Evaluating as a multi-label problem: False
2022-10-04 05:34:11,294 DEV : loss 0.08979383111000061 - f1-score (micro avg)  0.6182
2022-10-04 05:34:11,385 BAD EPOCHS (no improvement): 0
2022-10-04 05:34:11,391 saving best model
2022-10-04 05:34:12,107 ----------------------------------------------------------------------------------------------------
2022-10-04 05:34:15,834 epoch 3 - iter 20/209 - loss 0.12845237 - samples/sec: 171.95 - lr: 0.100000
2022-10-04 05:34:20,051 epoch 3 - iter 40/209 - loss 0.11445123 - samples/sec: 151.98 - lr: 0.100000
2022-10-04 05:34:23,217 epoch 3 - iter 60/209 - loss 0.11535705 - samples/sec: 202.45 - lr: 0.100000
2022-10-04 05:34:27,096 epoch 3 - iter 80/209 - loss 0.11145062 - samples/sec: 165.17 - lr: 0.100000
2022-10-04 05:34:31,164 epoch 3 - iter 100/209 - loss 0.10570771 - samples/sec: 157.52 - lr: 0.100000
2022-10-04 05:34:35,136 epoch 3 - iter 120/209 - loss 0.10768676 - samples/sec: 161.34 - lr: 0.100000
2022-10-04 05:34:

100%|██████████| 45/45 [00:09<00:00,  4.66it/s]


2022-10-04 05:35:01,077 Evaluating as a multi-label problem: False
2022-10-04 05:35:01,093 DEV : loss 0.06800028681755066 - f1-score (micro avg)  0.7527
2022-10-04 05:35:01,185 BAD EPOCHS (no improvement): 0
2022-10-04 05:35:01,190 saving best model
2022-10-04 05:35:01,902 ----------------------------------------------------------------------------------------------------
2022-10-04 05:35:05,736 epoch 4 - iter 20/209 - loss 0.07889683 - samples/sec: 167.38 - lr: 0.100000
2022-10-04 05:35:09,501 epoch 4 - iter 40/209 - loss 0.07485090 - samples/sec: 170.19 - lr: 0.100000
2022-10-04 05:35:13,938 epoch 4 - iter 60/209 - loss 0.07727403 - samples/sec: 144.44 - lr: 0.100000
2022-10-04 05:35:17,873 epoch 4 - iter 80/209 - loss 0.07242931 - samples/sec: 162.84 - lr: 0.100000
2022-10-04 05:35:21,242 epoch 4 - iter 100/209 - loss 0.07723278 - samples/sec: 190.26 - lr: 0.100000
2022-10-04 05:35:24,777 epoch 4 - iter 120/209 - loss 0.08153699 - samples/sec: 181.29 - lr: 0.100000
2022-10-04 05:35:

100%|██████████| 45/45 [00:09<00:00,  4.90it/s]


2022-10-04 05:35:50,591 Evaluating as a multi-label problem: False
2022-10-04 05:35:50,609 DEV : loss 0.07237409800291061 - f1-score (micro avg)  0.742
2022-10-04 05:35:50,704 BAD EPOCHS (no improvement): 1
2022-10-04 05:35:50,709 ----------------------------------------------------------------------------------------------------
2022-10-04 05:35:55,159 epoch 5 - iter 20/209 - loss 0.07737473 - samples/sec: 144.10 - lr: 0.100000
2022-10-04 05:35:58,869 epoch 5 - iter 40/209 - loss 0.07132954 - samples/sec: 172.74 - lr: 0.100000
2022-10-04 05:36:02,608 epoch 5 - iter 60/209 - loss 0.07167298 - samples/sec: 171.36 - lr: 0.100000
2022-10-04 05:36:06,742 epoch 5 - iter 80/209 - loss 0.07410241 - samples/sec: 155.00 - lr: 0.100000
2022-10-04 05:36:10,402 epoch 5 - iter 100/209 - loss 0.07098395 - samples/sec: 175.11 - lr: 0.100000
2022-10-04 05:36:13,839 epoch 5 - iter 120/209 - loss 0.07312924 - samples/sec: 186.44 - lr: 0.100000
2022-10-04 05:36:18,400 epoch 5 - iter 140/209 - loss 0.0731

100%|██████████| 45/45 [00:09<00:00,  4.67it/s]


2022-10-04 05:36:39,139 Evaluating as a multi-label problem: False
2022-10-04 05:36:39,154 DEV : loss 0.06755087524652481 - f1-score (micro avg)  0.7792
2022-10-04 05:36:39,244 BAD EPOCHS (no improvement): 0
2022-10-04 05:36:39,250 saving best model
2022-10-04 05:36:39,980 ----------------------------------------------------------------------------------------------------
2022-10-04 05:36:43,621 epoch 6 - iter 20/209 - loss 0.08050086 - samples/sec: 176.18 - lr: 0.100000
2022-10-04 05:36:46,745 epoch 6 - iter 40/209 - loss 0.06803591 - samples/sec: 205.22 - lr: 0.100000
2022-10-04 05:36:51,294 epoch 6 - iter 60/209 - loss 0.06693076 - samples/sec: 140.83 - lr: 0.100000
2022-10-04 05:36:54,861 epoch 6 - iter 80/209 - loss 0.06388079 - samples/sec: 179.65 - lr: 0.100000
2022-10-04 05:36:59,538 epoch 6 - iter 100/209 - loss 0.06616947 - samples/sec: 137.01 - lr: 0.100000
2022-10-04 05:37:03,520 epoch 6 - iter 120/209 - loss 0.06668201 - samples/sec: 161.00 - lr: 0.100000
2022-10-04 05:37:

100%|██████████| 45/45 [00:09<00:00,  4.81it/s]


2022-10-04 05:37:28,788 Evaluating as a multi-label problem: False
2022-10-04 05:37:28,803 DEV : loss 0.05483702942728996 - f1-score (micro avg)  0.7723
2022-10-04 05:37:28,897 BAD EPOCHS (no improvement): 1
2022-10-04 05:37:28,902 ----------------------------------------------------------------------------------------------------
2022-10-04 05:37:32,912 epoch 7 - iter 20/209 - loss 0.05176174 - samples/sec: 159.87 - lr: 0.100000
2022-10-04 05:37:36,517 epoch 7 - iter 40/209 - loss 0.05748387 - samples/sec: 177.76 - lr: 0.100000
2022-10-04 05:37:40,222 epoch 7 - iter 60/209 - loss 0.05191996 - samples/sec: 172.94 - lr: 0.100000
2022-10-04 05:37:44,149 epoch 7 - iter 80/209 - loss 0.05435278 - samples/sec: 163.18 - lr: 0.100000
2022-10-04 05:37:47,465 epoch 7 - iter 100/209 - loss 0.05488853 - samples/sec: 193.25 - lr: 0.100000
2022-10-04 05:37:51,411 epoch 7 - iter 120/209 - loss 0.05544098 - samples/sec: 162.37 - lr: 0.100000
2022-10-04 05:37:55,800 epoch 7 - iter 140/209 - loss 0.057

100%|██████████| 45/45 [00:09<00:00,  4.60it/s]


2022-10-04 05:38:17,839 Evaluating as a multi-label problem: False
2022-10-04 05:38:17,854 DEV : loss 0.05141553282737732 - f1-score (micro avg)  0.8043
2022-10-04 05:38:17,948 BAD EPOCHS (no improvement): 0
2022-10-04 05:38:17,953 saving best model
2022-10-04 05:38:18,690 ----------------------------------------------------------------------------------------------------
2022-10-04 05:38:22,586 epoch 8 - iter 20/209 - loss 0.05908982 - samples/sec: 164.56 - lr: 0.100000
2022-10-04 05:38:26,121 epoch 8 - iter 40/209 - loss 0.05477516 - samples/sec: 181.32 - lr: 0.100000
2022-10-04 05:38:29,793 epoch 8 - iter 60/209 - loss 0.05725141 - samples/sec: 174.59 - lr: 0.100000
2022-10-04 05:38:33,106 epoch 8 - iter 80/209 - loss 0.05718671 - samples/sec: 193.44 - lr: 0.100000
2022-10-04 05:38:36,775 epoch 8 - iter 100/209 - loss 0.05187066 - samples/sec: 174.73 - lr: 0.100000
2022-10-04 05:38:41,263 epoch 8 - iter 120/209 - loss 0.05067641 - samples/sec: 142.73 - lr: 0.100000
2022-10-04 05:38:

100%|██████████| 45/45 [00:09<00:00,  4.76it/s]

2022-10-04 05:39:07,689 Evaluating as a multi-label problem: False
2022-10-04 05:39:07,704 DEV : loss 0.05836400017142296 - f1-score (micro avg)  0.8198
2022-10-04 05:39:07,797 BAD EPOCHS (no improvement): 0
2022-10-04 05:39:07,816 saving best model





2022-10-04 05:39:08,528 ----------------------------------------------------------------------------------------------------
2022-10-04 05:39:12,255 epoch 9 - iter 20/209 - loss 0.05137884 - samples/sec: 172.13 - lr: 0.100000
2022-10-04 05:39:16,000 epoch 9 - iter 40/209 - loss 0.05079334 - samples/sec: 171.07 - lr: 0.100000
2022-10-04 05:39:19,720 epoch 9 - iter 60/209 - loss 0.05468352 - samples/sec: 172.30 - lr: 0.100000
2022-10-04 05:39:22,863 epoch 9 - iter 80/209 - loss 0.05142707 - samples/sec: 203.90 - lr: 0.100000
2022-10-04 05:39:27,191 epoch 9 - iter 100/209 - loss 0.05268009 - samples/sec: 148.04 - lr: 0.100000
2022-10-04 05:39:30,722 epoch 9 - iter 120/209 - loss 0.05137849 - samples/sec: 181.48 - lr: 0.100000
2022-10-04 05:39:34,130 epoch 9 - iter 140/209 - loss 0.05259436 - samples/sec: 188.11 - lr: 0.100000
2022-10-04 05:39:37,881 epoch 9 - iter 160/209 - loss 0.05178342 - samples/sec: 170.85 - lr: 0.100000
2022-10-04 05:39:41,865 epoch 9 - iter 180/209 - loss 0.0497144

100%|██████████| 45/45 [00:09<00:00,  4.66it/s]

2022-10-04 05:39:56,945 Evaluating as a multi-label problem: False
2022-10-04 05:39:56,962 DEV : loss 0.04702908545732498 - f1-score (micro avg)  0.8284
2022-10-04 05:39:57,054 BAD EPOCHS (no improvement): 0
2022-10-04 05:39:57,059 saving best model





2022-10-04 05:39:57,771 ----------------------------------------------------------------------------------------------------
2022-10-04 05:40:02,101 epoch 10 - iter 20/209 - loss 0.04537652 - samples/sec: 148.01 - lr: 0.100000
2022-10-04 05:40:06,304 epoch 10 - iter 40/209 - loss 0.04432074 - samples/sec: 152.45 - lr: 0.100000
2022-10-04 05:40:10,056 epoch 10 - iter 60/209 - loss 0.04668812 - samples/sec: 170.77 - lr: 0.100000
2022-10-04 05:40:13,794 epoch 10 - iter 80/209 - loss 0.04152727 - samples/sec: 171.44 - lr: 0.100000
2022-10-04 05:40:17,347 epoch 10 - iter 100/209 - loss 0.04189730 - samples/sec: 180.37 - lr: 0.100000
2022-10-04 05:40:20,988 epoch 10 - iter 120/209 - loss 0.04122967 - samples/sec: 176.00 - lr: 0.100000
2022-10-04 05:40:24,621 epoch 10 - iter 140/209 - loss 0.04105762 - samples/sec: 176.43 - lr: 0.100000
2022-10-04 05:40:28,249 epoch 10 - iter 160/209 - loss 0.04178446 - samples/sec: 176.61 - lr: 0.100000
2022-10-04 05:40:32,256 epoch 10 - iter 180/209 - loss 

100%|██████████| 45/45 [00:09<00:00,  4.94it/s]


2022-10-04 05:40:46,142 Evaluating as a multi-label problem: False
2022-10-04 05:40:46,157 DEV : loss 0.04637054726481438 - f1-score (micro avg)  0.845
2022-10-04 05:40:46,250 BAD EPOCHS (no improvement): 0
2022-10-04 05:40:46,255 saving best model
2022-10-04 05:40:46,989 ----------------------------------------------------------------------------------------------------
2022-10-04 05:40:50,784 epoch 11 - iter 20/209 - loss 0.04803611 - samples/sec: 168.97 - lr: 0.100000
2022-10-04 05:40:54,753 epoch 11 - iter 40/209 - loss 0.04129475 - samples/sec: 161.46 - lr: 0.100000
2022-10-04 05:40:58,904 epoch 11 - iter 60/209 - loss 0.04004095 - samples/sec: 154.38 - lr: 0.100000
2022-10-04 05:41:02,657 epoch 11 - iter 80/209 - loss 0.03931082 - samples/sec: 170.79 - lr: 0.100000
2022-10-04 05:41:06,239 epoch 11 - iter 100/209 - loss 0.04074366 - samples/sec: 178.88 - lr: 0.100000
2022-10-04 05:41:09,368 epoch 11 - iter 120/209 - loss 0.04208986 - samples/sec: 204.80 - lr: 0.100000
2022-10-04 0

100%|██████████| 45/45 [00:09<00:00,  4.64it/s]


2022-10-04 05:41:35,883 Evaluating as a multi-label problem: False
2022-10-04 05:41:35,898 DEV : loss 0.04224751144647598 - f1-score (micro avg)  0.8389
2022-10-04 05:41:35,990 BAD EPOCHS (no improvement): 1
2022-10-04 05:41:35,996 ----------------------------------------------------------------------------------------------------
2022-10-04 05:41:39,667 epoch 12 - iter 20/209 - loss 0.02952437 - samples/sec: 174.65 - lr: 0.100000
2022-10-04 05:41:43,577 epoch 12 - iter 40/209 - loss 0.03868607 - samples/sec: 163.87 - lr: 0.100000
2022-10-04 05:41:47,099 epoch 12 - iter 60/209 - loss 0.04154795 - samples/sec: 181.93 - lr: 0.100000
2022-10-04 05:41:51,116 epoch 12 - iter 80/209 - loss 0.04442386 - samples/sec: 159.52 - lr: 0.100000
2022-10-04 05:41:55,025 epoch 12 - iter 100/209 - loss 0.04329236 - samples/sec: 163.92 - lr: 0.100000
2022-10-04 05:41:58,869 epoch 12 - iter 120/209 - loss 0.04199310 - samples/sec: 166.68 - lr: 0.100000
2022-10-04 05:42:02,462 epoch 12 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.84it/s]


2022-10-04 05:42:23,388 Evaluating as a multi-label problem: False
2022-10-04 05:42:23,403 DEV : loss 0.038745809346437454 - f1-score (micro avg)  0.8444
2022-10-04 05:42:23,493 BAD EPOCHS (no improvement): 2
2022-10-04 05:42:23,500 ----------------------------------------------------------------------------------------------------
2022-10-04 05:42:26,830 epoch 13 - iter 20/209 - loss 0.03649700 - samples/sec: 192.63 - lr: 0.100000
2022-10-04 05:42:31,094 epoch 13 - iter 40/209 - loss 0.03491071 - samples/sec: 150.26 - lr: 0.100000
2022-10-04 05:42:34,876 epoch 13 - iter 60/209 - loss 0.03224873 - samples/sec: 169.44 - lr: 0.100000
2022-10-04 05:42:38,921 epoch 13 - iter 80/209 - loss 0.03186616 - samples/sec: 158.45 - lr: 0.100000
2022-10-04 05:42:42,760 epoch 13 - iter 100/209 - loss 0.03034522 - samples/sec: 166.98 - lr: 0.100000
2022-10-04 05:42:46,261 epoch 13 - iter 120/209 - loss 0.03315228 - samples/sec: 183.01 - lr: 0.100000
2022-10-04 05:42:50,365 epoch 13 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.77it/s]

2022-10-04 05:43:12,834 Evaluating as a multi-label problem: False
2022-10-04 05:43:12,849 DEV : loss 0.04167303070425987 - f1-score (micro avg)  0.8363
2022-10-04 05:43:12,942 BAD EPOCHS (no improvement): 3
2022-10-04 05:43:12,949 ----------------------------------------------------------------------------------------------------





2022-10-04 05:43:17,315 epoch 14 - iter 20/209 - loss 0.02980401 - samples/sec: 146.82 - lr: 0.100000
2022-10-04 05:43:21,131 epoch 14 - iter 40/209 - loss 0.03006117 - samples/sec: 167.93 - lr: 0.100000
2022-10-04 05:43:24,700 epoch 14 - iter 60/209 - loss 0.02943008 - samples/sec: 179.55 - lr: 0.100000
2022-10-04 05:43:28,553 epoch 14 - iter 80/209 - loss 0.02679424 - samples/sec: 166.32 - lr: 0.100000
2022-10-04 05:43:32,978 epoch 14 - iter 100/209 - loss 0.02877195 - samples/sec: 144.79 - lr: 0.100000
2022-10-04 05:43:36,575 epoch 14 - iter 120/209 - loss 0.02874556 - samples/sec: 178.15 - lr: 0.100000
2022-10-04 05:43:40,024 epoch 14 - iter 140/209 - loss 0.03066637 - samples/sec: 185.87 - lr: 0.100000
2022-10-04 05:43:43,236 epoch 14 - iter 160/209 - loss 0.03423040 - samples/sec: 199.53 - lr: 0.100000
2022-10-04 05:43:47,486 epoch 14 - iter 180/209 - loss 0.03420059 - samples/sec: 150.81 - lr: 0.100000
2022-10-04 05:43:50,810 epoch 14 - iter 200/209 - loss 0.03334038 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.75it/s]


2022-10-04 05:44:01,572 Evaluating as a multi-label problem: False
2022-10-04 05:44:01,591 DEV : loss 0.041958753019571304 - f1-score (micro avg)  0.8506
2022-10-04 05:44:01,683 BAD EPOCHS (no improvement): 0
2022-10-04 05:44:01,689 saving best model
2022-10-04 05:44:02,416 ----------------------------------------------------------------------------------------------------
2022-10-04 05:44:07,097 epoch 15 - iter 20/209 - loss 0.03153648 - samples/sec: 136.92 - lr: 0.100000
2022-10-04 05:44:11,126 epoch 15 - iter 40/209 - loss 0.03033912 - samples/sec: 159.08 - lr: 0.100000
2022-10-04 05:44:15,248 epoch 15 - iter 60/209 - loss 0.02844562 - samples/sec: 155.42 - lr: 0.100000
2022-10-04 05:44:18,658 epoch 15 - iter 80/209 - loss 0.02861322 - samples/sec: 187.97 - lr: 0.100000
2022-10-04 05:44:22,858 epoch 15 - iter 100/209 - loss 0.02819313 - samples/sec: 152.54 - lr: 0.100000
2022-10-04 05:44:26,402 epoch 15 - iter 120/209 - loss 0.02720257 - samples/sec: 180.84 - lr: 0.100000
2022-10-04

100%|██████████| 45/45 [00:09<00:00,  4.93it/s]


2022-10-04 05:44:51,192 Evaluating as a multi-label problem: False
2022-10-04 05:44:51,207 DEV : loss 0.03923148289322853 - f1-score (micro avg)  0.8684
2022-10-04 05:44:51,300 BAD EPOCHS (no improvement): 0
2022-10-04 05:44:51,307 saving best model
2022-10-04 05:44:52,025 ----------------------------------------------------------------------------------------------------
2022-10-04 05:44:56,392 epoch 16 - iter 20/209 - loss 0.02720362 - samples/sec: 146.76 - lr: 0.100000
2022-10-04 05:45:00,269 epoch 16 - iter 40/209 - loss 0.02586910 - samples/sec: 165.29 - lr: 0.100000
2022-10-04 05:45:05,358 epoch 16 - iter 60/209 - loss 0.02616053 - samples/sec: 125.89 - lr: 0.100000
2022-10-04 05:45:08,757 epoch 16 - iter 80/209 - loss 0.02676294 - samples/sec: 188.55 - lr: 0.100000
2022-10-04 05:45:12,199 epoch 16 - iter 100/209 - loss 0.02575664 - samples/sec: 186.21 - lr: 0.100000
2022-10-04 05:45:15,858 epoch 16 - iter 120/209 - loss 0.02702852 - samples/sec: 175.14 - lr: 0.100000
2022-10-04 

100%|██████████| 45/45 [00:09<00:00,  4.93it/s]

2022-10-04 05:45:41,411 Evaluating as a multi-label problem: False
2022-10-04 05:45:41,426 DEV : loss 0.04206893965601921 - f1-score (micro avg)  0.8566
2022-10-04 05:45:41,521 BAD EPOCHS (no improvement): 1
2022-10-04 05:45:41,525 ----------------------------------------------------------------------------------------------------





2022-10-04 05:45:45,159 epoch 17 - iter 20/209 - loss 0.02986693 - samples/sec: 176.46 - lr: 0.100000
2022-10-04 05:45:48,857 epoch 17 - iter 40/209 - loss 0.02741694 - samples/sec: 173.27 - lr: 0.100000
2022-10-04 05:45:52,555 epoch 17 - iter 60/209 - loss 0.03001159 - samples/sec: 173.30 - lr: 0.100000
2022-10-04 05:45:56,091 epoch 17 - iter 80/209 - loss 0.02903714 - samples/sec: 181.26 - lr: 0.100000
2022-10-04 05:45:59,639 epoch 17 - iter 100/209 - loss 0.02638125 - samples/sec: 180.62 - lr: 0.100000
2022-10-04 05:46:02,875 epoch 17 - iter 120/209 - loss 0.02739707 - samples/sec: 198.07 - lr: 0.100000
2022-10-04 05:46:06,468 epoch 17 - iter 140/209 - loss 0.02828226 - samples/sec: 178.36 - lr: 0.100000
2022-10-04 05:46:10,686 epoch 17 - iter 160/209 - loss 0.02710349 - samples/sec: 151.88 - lr: 0.100000
2022-10-04 05:46:14,145 epoch 17 - iter 180/209 - loss 0.02596510 - samples/sec: 185.29 - lr: 0.100000
2022-10-04 05:46:18,390 epoch 17 - iter 200/209 - loss 0.02678328 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.88it/s]


2022-10-04 05:46:29,970 Evaluating as a multi-label problem: False
2022-10-04 05:46:29,985 DEV : loss 0.03701391816139221 - f1-score (micro avg)  0.8631
2022-10-04 05:46:30,076 BAD EPOCHS (no improvement): 2
2022-10-04 05:46:30,081 ----------------------------------------------------------------------------------------------------
2022-10-04 05:46:33,410 epoch 18 - iter 20/209 - loss 0.02169776 - samples/sec: 192.60 - lr: 0.100000
2022-10-04 05:46:37,212 epoch 18 - iter 40/209 - loss 0.01960079 - samples/sec: 168.56 - lr: 0.100000
2022-10-04 05:46:41,077 epoch 18 - iter 60/209 - loss 0.01910406 - samples/sec: 165.79 - lr: 0.100000
2022-10-04 05:46:45,173 epoch 18 - iter 80/209 - loss 0.02373642 - samples/sec: 156.42 - lr: 0.100000
2022-10-04 05:46:48,448 epoch 18 - iter 100/209 - loss 0.02729723 - samples/sec: 195.67 - lr: 0.100000
2022-10-04 05:46:52,246 epoch 18 - iter 120/209 - loss 0.02869175 - samples/sec: 168.72 - lr: 0.100000
2022-10-04 05:46:55,701 epoch 18 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.95it/s]


2022-10-04 05:47:18,189 Evaluating as a multi-label problem: False
2022-10-04 05:47:18,204 DEV : loss 0.04064689949154854 - f1-score (micro avg)  0.8612
2022-10-04 05:47:18,296 BAD EPOCHS (no improvement): 3
2022-10-04 05:47:18,301 ----------------------------------------------------------------------------------------------------
2022-10-04 05:47:21,671 epoch 19 - iter 20/209 - loss 0.02606302 - samples/sec: 190.28 - lr: 0.100000
2022-10-04 05:47:25,972 epoch 19 - iter 40/209 - loss 0.02666914 - samples/sec: 148.94 - lr: 0.100000
2022-10-04 05:47:30,321 epoch 19 - iter 60/209 - loss 0.03160707 - samples/sec: 147.32 - lr: 0.100000
2022-10-04 05:47:34,427 epoch 19 - iter 80/209 - loss 0.02834404 - samples/sec: 156.08 - lr: 0.100000
2022-10-04 05:47:38,245 epoch 19 - iter 100/209 - loss 0.02676546 - samples/sec: 167.83 - lr: 0.100000
2022-10-04 05:47:41,361 epoch 19 - iter 120/209 - loss 0.02596884 - samples/sec: 205.66 - lr: 0.100000
2022-10-04 05:47:45,491 epoch 19 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.85it/s]

2022-10-04 05:48:06,388 Evaluating as a multi-label problem: False
2022-10-04 05:48:06,403 DEV : loss 0.03446009382605553 - f1-score (micro avg)  0.8603
2022-10-04 05:48:06,496 Epoch    19: reducing learning rate of group 0 to 5.0000e-02.
2022-10-04 05:48:06,498 BAD EPOCHS (no improvement): 4
2022-10-04 05:48:06,505 ----------------------------------------------------------------------------------------------------





2022-10-04 05:48:10,061 epoch 20 - iter 20/209 - loss 0.02072532 - samples/sec: 180.25 - lr: 0.050000
2022-10-04 05:48:14,061 epoch 20 - iter 40/209 - loss 0.01947552 - samples/sec: 160.19 - lr: 0.050000
2022-10-04 05:48:17,498 epoch 20 - iter 60/209 - loss 0.02158679 - samples/sec: 186.43 - lr: 0.050000
2022-10-04 05:48:20,973 epoch 20 - iter 80/209 - loss 0.02216467 - samples/sec: 184.43 - lr: 0.050000
2022-10-04 05:48:24,194 epoch 20 - iter 100/209 - loss 0.02130723 - samples/sec: 198.94 - lr: 0.050000
2022-10-04 05:48:27,854 epoch 20 - iter 120/209 - loss 0.02234719 - samples/sec: 175.14 - lr: 0.050000
2022-10-04 05:48:31,701 epoch 20 - iter 140/209 - loss 0.02191465 - samples/sec: 166.56 - lr: 0.050000
2022-10-04 05:48:35,858 epoch 20 - iter 160/209 - loss 0.02197061 - samples/sec: 154.12 - lr: 0.050000
2022-10-04 05:48:39,446 epoch 20 - iter 180/209 - loss 0.02129801 - samples/sec: 178.60 - lr: 0.050000
2022-10-04 05:48:44,414 epoch 20 - iter 200/209 - loss 0.02088105 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.68it/s]


2022-10-04 05:48:55,279 Evaluating as a multi-label problem: False
2022-10-04 05:48:55,294 DEV : loss 0.03424276411533356 - f1-score (micro avg)  0.8741
2022-10-04 05:48:55,384 BAD EPOCHS (no improvement): 0
2022-10-04 05:48:55,389 saving best model
2022-10-04 05:48:56,100 ----------------------------------------------------------------------------------------------------
2022-10-04 05:49:00,410 epoch 21 - iter 20/209 - loss 0.02000345 - samples/sec: 148.72 - lr: 0.050000
2022-10-04 05:49:03,353 epoch 21 - iter 40/209 - loss 0.01988644 - samples/sec: 217.81 - lr: 0.050000
2022-10-04 05:49:07,322 epoch 21 - iter 60/209 - loss 0.01749872 - samples/sec: 161.43 - lr: 0.050000
2022-10-04 05:49:11,287 epoch 21 - iter 80/209 - loss 0.01635425 - samples/sec: 161.61 - lr: 0.050000
2022-10-04 05:49:14,779 epoch 21 - iter 100/209 - loss 0.01716428 - samples/sec: 183.57 - lr: 0.050000
2022-10-04 05:49:19,372 epoch 21 - iter 120/209 - loss 0.01668688 - samples/sec: 139.49 - lr: 0.050000
2022-10-04 

100%|██████████| 45/45 [00:09<00:00,  4.76it/s]


2022-10-04 05:49:44,666 Evaluating as a multi-label problem: False
2022-10-04 05:49:44,681 DEV : loss 0.03426947444677353 - f1-score (micro avg)  0.8881
2022-10-04 05:49:44,774 BAD EPOCHS (no improvement): 0
2022-10-04 05:49:44,779 saving best model
2022-10-04 05:49:45,487 ----------------------------------------------------------------------------------------------------
2022-10-04 05:49:50,354 epoch 22 - iter 20/209 - loss 0.02030395 - samples/sec: 131.71 - lr: 0.050000
2022-10-04 05:49:54,108 epoch 22 - iter 40/209 - loss 0.01579204 - samples/sec: 170.75 - lr: 0.050000
2022-10-04 05:49:57,850 epoch 22 - iter 60/209 - loss 0.01481956 - samples/sec: 171.33 - lr: 0.050000
2022-10-04 05:50:01,161 epoch 22 - iter 80/209 - loss 0.01532455 - samples/sec: 193.60 - lr: 0.050000
2022-10-04 05:50:05,046 epoch 22 - iter 100/209 - loss 0.01842723 - samples/sec: 164.93 - lr: 0.050000
2022-10-04 05:50:09,146 epoch 22 - iter 120/209 - loss 0.01686300 - samples/sec: 156.29 - lr: 0.050000
2022-10-04 

100%|██████████| 45/45 [00:09<00:00,  4.55it/s]


2022-10-04 05:50:34,468 Evaluating as a multi-label problem: False
2022-10-04 05:50:34,483 DEV : loss 0.03882388770580292 - f1-score (micro avg)  0.8726
2022-10-04 05:50:34,576 BAD EPOCHS (no improvement): 1
2022-10-04 05:50:34,585 ----------------------------------------------------------------------------------------------------
2022-10-04 05:50:37,749 epoch 23 - iter 20/209 - loss 0.01449184 - samples/sec: 202.61 - lr: 0.050000
2022-10-04 05:50:42,093 epoch 23 - iter 40/209 - loss 0.01550535 - samples/sec: 147.51 - lr: 0.050000
2022-10-04 05:50:46,674 epoch 23 - iter 60/209 - loss 0.01751384 - samples/sec: 139.84 - lr: 0.050000
2022-10-04 05:50:50,366 epoch 23 - iter 80/209 - loss 0.01744938 - samples/sec: 173.57 - lr: 0.050000
2022-10-04 05:50:53,712 epoch 23 - iter 100/209 - loss 0.01735984 - samples/sec: 191.59 - lr: 0.050000
2022-10-04 05:50:57,119 epoch 23 - iter 120/209 - loss 0.01697531 - samples/sec: 188.11 - lr: 0.050000
2022-10-04 05:51:01,039 epoch 23 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.96it/s]


2022-10-04 05:51:22,945 Evaluating as a multi-label problem: False
2022-10-04 05:51:22,960 DEV : loss 0.041657477617263794 - f1-score (micro avg)  0.8528
2022-10-04 05:51:23,052 BAD EPOCHS (no improvement): 2
2022-10-04 05:51:23,057 ----------------------------------------------------------------------------------------------------
2022-10-04 05:51:26,779 epoch 24 - iter 20/209 - loss 0.01655219 - samples/sec: 172.19 - lr: 0.050000
2022-10-04 05:51:30,340 epoch 24 - iter 40/209 - loss 0.02004088 - samples/sec: 179.98 - lr: 0.050000
2022-10-04 05:51:33,889 epoch 24 - iter 60/209 - loss 0.01968991 - samples/sec: 180.58 - lr: 0.050000
2022-10-04 05:51:38,097 epoch 24 - iter 80/209 - loss 0.01905586 - samples/sec: 152.24 - lr: 0.050000
2022-10-04 05:51:41,078 epoch 24 - iter 100/209 - loss 0.01956661 - samples/sec: 215.07 - lr: 0.050000
2022-10-04 05:51:44,968 epoch 24 - iter 120/209 - loss 0.01836147 - samples/sec: 164.70 - lr: 0.050000
2022-10-04 05:51:48,370 epoch 24 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.52it/s]


2022-10-04 05:52:11,713 Evaluating as a multi-label problem: False
2022-10-04 05:52:11,728 DEV : loss 0.03571942076086998 - f1-score (micro avg)  0.8854
2022-10-04 05:52:11,818 BAD EPOCHS (no improvement): 3
2022-10-04 05:52:11,824 ----------------------------------------------------------------------------------------------------
2022-10-04 05:52:15,465 epoch 25 - iter 20/209 - loss 0.01993831 - samples/sec: 176.08 - lr: 0.050000
2022-10-04 05:52:18,967 epoch 25 - iter 40/209 - loss 0.01650090 - samples/sec: 182.98 - lr: 0.050000
2022-10-04 05:52:22,666 epoch 25 - iter 60/209 - loss 0.01470098 - samples/sec: 173.32 - lr: 0.050000
2022-10-04 05:52:26,845 epoch 25 - iter 80/209 - loss 0.01561499 - samples/sec: 153.31 - lr: 0.050000
2022-10-04 05:52:30,292 epoch 25 - iter 100/209 - loss 0.01538331 - samples/sec: 185.96 - lr: 0.050000
2022-10-04 05:52:34,244 epoch 25 - iter 120/209 - loss 0.01586565 - samples/sec: 162.14 - lr: 0.050000
2022-10-04 05:52:37,802 epoch 25 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.89it/s]

2022-10-04 05:52:59,780 Evaluating as a multi-label problem: False
2022-10-04 05:52:59,795 DEV : loss 0.03778481110930443 - f1-score (micro avg)  0.8821
2022-10-04 05:52:59,886 Epoch    25: reducing learning rate of group 0 to 2.5000e-02.
2022-10-04 05:52:59,889 BAD EPOCHS (no improvement): 4
2022-10-04 05:52:59,894 ----------------------------------------------------------------------------------------------------





2022-10-04 05:53:03,399 epoch 26 - iter 20/209 - loss 0.01967255 - samples/sec: 182.95 - lr: 0.025000
2022-10-04 05:53:06,934 epoch 26 - iter 40/209 - loss 0.01463242 - samples/sec: 181.26 - lr: 0.025000
2022-10-04 05:53:10,288 epoch 26 - iter 60/209 - loss 0.01412666 - samples/sec: 191.11 - lr: 0.025000
2022-10-04 05:53:13,930 epoch 26 - iter 80/209 - loss 0.01378679 - samples/sec: 175.94 - lr: 0.025000
2022-10-04 05:53:18,273 epoch 26 - iter 100/209 - loss 0.01434530 - samples/sec: 147.53 - lr: 0.025000
2022-10-04 05:53:22,319 epoch 26 - iter 120/209 - loss 0.01344290 - samples/sec: 158.36 - lr: 0.025000
2022-10-04 05:53:25,992 epoch 26 - iter 140/209 - loss 0.01340357 - samples/sec: 174.52 - lr: 0.025000
2022-10-04 05:53:30,217 epoch 26 - iter 160/209 - loss 0.01362597 - samples/sec: 151.63 - lr: 0.025000
2022-10-04 05:53:33,512 epoch 26 - iter 180/209 - loss 0.01330793 - samples/sec: 194.47 - lr: 0.025000
2022-10-04 05:53:37,548 epoch 26 - iter 200/209 - loss 0.01357656 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.67it/s]

2022-10-04 05:53:48,435 Evaluating as a multi-label problem: False
2022-10-04 05:53:48,451 DEV : loss 0.03459864482283592 - f1-score (micro avg)  0.8825
2022-10-04 05:53:48,541 BAD EPOCHS (no improvement): 1
2022-10-04 05:53:48,546 ----------------------------------------------------------------------------------------------------





2022-10-04 05:53:52,578 epoch 27 - iter 20/209 - loss 0.01175891 - samples/sec: 158.99 - lr: 0.025000
2022-10-04 05:53:56,133 epoch 27 - iter 40/209 - loss 0.01106545 - samples/sec: 180.22 - lr: 0.025000
2022-10-04 05:53:59,489 epoch 27 - iter 60/209 - loss 0.01069050 - samples/sec: 190.98 - lr: 0.025000
2022-10-04 05:54:02,958 epoch 27 - iter 80/209 - loss 0.01132348 - samples/sec: 184.69 - lr: 0.025000
2022-10-04 05:54:07,106 epoch 27 - iter 100/209 - loss 0.01228738 - samples/sec: 154.49 - lr: 0.025000
2022-10-04 05:54:11,024 epoch 27 - iter 120/209 - loss 0.01309411 - samples/sec: 163.58 - lr: 0.025000
2022-10-04 05:54:14,470 epoch 27 - iter 140/209 - loss 0.01345793 - samples/sec: 185.95 - lr: 0.025000
2022-10-04 05:54:18,162 epoch 27 - iter 160/209 - loss 0.01393774 - samples/sec: 173.55 - lr: 0.025000
2022-10-04 05:54:21,934 epoch 27 - iter 180/209 - loss 0.01335527 - samples/sec: 169.92 - lr: 0.025000
2022-10-04 05:54:25,135 epoch 27 - iter 200/209 - loss 0.01298843 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.89it/s]

2022-10-04 05:54:36,554 Evaluating as a multi-label problem: False
2022-10-04 05:54:36,571 DEV : loss 0.034660086035728455 - f1-score (micro avg)  0.8906
2022-10-04 05:54:36,661 BAD EPOCHS (no improvement): 0
2022-10-04 05:54:36,668 saving best model





2022-10-04 05:54:37,393 ----------------------------------------------------------------------------------------------------
2022-10-04 05:54:41,040 epoch 28 - iter 20/209 - loss 0.01054552 - samples/sec: 176.07 - lr: 0.025000
2022-10-04 05:54:44,946 epoch 28 - iter 40/209 - loss 0.01374067 - samples/sec: 164.04 - lr: 0.025000
2022-10-04 05:54:48,599 epoch 28 - iter 60/209 - loss 0.01201335 - samples/sec: 175.56 - lr: 0.025000
2022-10-04 05:54:52,494 epoch 28 - iter 80/209 - loss 0.01254415 - samples/sec: 164.49 - lr: 0.025000
2022-10-04 05:54:56,144 epoch 28 - iter 100/209 - loss 0.01342201 - samples/sec: 175.59 - lr: 0.025000
2022-10-04 05:55:00,062 epoch 28 - iter 120/209 - loss 0.01298945 - samples/sec: 163.55 - lr: 0.025000
2022-10-04 05:55:03,336 epoch 28 - iter 140/209 - loss 0.01266880 - samples/sec: 195.80 - lr: 0.025000
2022-10-04 05:55:07,579 epoch 28 - iter 160/209 - loss 0.01261674 - samples/sec: 151.04 - lr: 0.025000
2022-10-04 05:55:11,633 epoch 28 - iter 180/209 - loss 

100%|██████████| 45/45 [00:09<00:00,  4.58it/s]


2022-10-04 05:55:26,249 Evaluating as a multi-label problem: False
2022-10-04 05:55:26,265 DEV : loss 0.03398394212126732 - f1-score (micro avg)  0.8971
2022-10-04 05:55:26,355 BAD EPOCHS (no improvement): 0
2022-10-04 05:55:26,361 saving best model
2022-10-04 05:55:27,089 ----------------------------------------------------------------------------------------------------
2022-10-04 05:55:30,894 epoch 29 - iter 20/209 - loss 0.00919966 - samples/sec: 168.46 - lr: 0.025000
2022-10-04 05:55:34,135 epoch 29 - iter 40/209 - loss 0.01038876 - samples/sec: 197.78 - lr: 0.025000
2022-10-04 05:55:37,525 epoch 29 - iter 60/209 - loss 0.01092163 - samples/sec: 189.20 - lr: 0.025000
2022-10-04 05:55:41,740 epoch 29 - iter 80/209 - loss 0.01215935 - samples/sec: 151.98 - lr: 0.025000
2022-10-04 05:55:44,830 epoch 29 - iter 100/209 - loss 0.01162038 - samples/sec: 207.46 - lr: 0.025000
2022-10-04 05:55:49,004 epoch 29 - iter 120/209 - loss 0.01227539 - samples/sec: 153.48 - lr: 0.025000
2022-10-04 

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]

2022-10-04 05:56:15,160 Evaluating as a multi-label problem: False
2022-10-04 05:56:15,176 DEV : loss 0.03598839417099953 - f1-score (micro avg)  0.8789
2022-10-04 05:56:15,271 BAD EPOCHS (no improvement): 1
2022-10-04 05:56:15,276 ----------------------------------------------------------------------------------------------------





2022-10-04 05:56:18,776 epoch 30 - iter 20/209 - loss 0.00953641 - samples/sec: 183.19 - lr: 0.025000
2022-10-04 05:56:22,014 epoch 30 - iter 40/209 - loss 0.01216349 - samples/sec: 197.94 - lr: 0.025000
2022-10-04 05:56:26,038 epoch 30 - iter 60/209 - loss 0.01165593 - samples/sec: 159.23 - lr: 0.025000
2022-10-04 05:56:30,644 epoch 30 - iter 80/209 - loss 0.01036310 - samples/sec: 139.08 - lr: 0.025000
2022-10-04 05:56:33,663 epoch 30 - iter 100/209 - loss 0.01120701 - samples/sec: 212.30 - lr: 0.025000
2022-10-04 05:56:37,577 epoch 30 - iter 120/209 - loss 0.01146768 - samples/sec: 163.71 - lr: 0.025000
2022-10-04 05:56:41,816 epoch 30 - iter 140/209 - loss 0.01178776 - samples/sec: 151.14 - lr: 0.025000
2022-10-04 05:56:45,427 epoch 30 - iter 160/209 - loss 0.01273598 - samples/sec: 177.49 - lr: 0.025000
2022-10-04 05:56:48,825 epoch 30 - iter 180/209 - loss 0.01269582 - samples/sec: 188.58 - lr: 0.025000
2022-10-04 05:56:52,339 epoch 30 - iter 200/209 - loss 0.01244290 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.58it/s]


2022-10-04 05:57:03,513 Evaluating as a multi-label problem: False
2022-10-04 05:57:03,528 DEV : loss 0.03772483393549919 - f1-score (micro avg)  0.8811
2022-10-04 05:57:03,620 BAD EPOCHS (no improvement): 2
2022-10-04 05:57:03,627 ----------------------------------------------------------------------------------------------------
2022-10-04 05:57:07,449 epoch 31 - iter 20/209 - loss 0.01236270 - samples/sec: 167.76 - lr: 0.025000
2022-10-04 05:57:11,367 epoch 31 - iter 40/209 - loss 0.01118533 - samples/sec: 163.53 - lr: 0.025000
2022-10-04 05:57:14,790 epoch 31 - iter 60/209 - loss 0.01042194 - samples/sec: 187.17 - lr: 0.025000
2022-10-04 05:57:19,446 epoch 31 - iter 80/209 - loss 0.01320989 - samples/sec: 137.62 - lr: 0.025000
2022-10-04 05:57:22,882 epoch 31 - iter 100/209 - loss 0.01219673 - samples/sec: 186.52 - lr: 0.025000
2022-10-04 05:57:26,509 epoch 31 - iter 120/209 - loss 0.01278864 - samples/sec: 176.67 - lr: 0.025000
2022-10-04 05:57:30,388 epoch 31 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.79it/s]


2022-10-04 05:57:51,807 Evaluating as a multi-label problem: False
2022-10-04 05:57:51,821 DEV : loss 0.03528624773025513 - f1-score (micro avg)  0.8964
2022-10-04 05:57:51,912 BAD EPOCHS (no improvement): 3
2022-10-04 05:57:51,918 ----------------------------------------------------------------------------------------------------
2022-10-04 05:57:55,838 epoch 32 - iter 20/209 - loss 0.01122096 - samples/sec: 163.58 - lr: 0.025000
2022-10-04 05:57:59,540 epoch 32 - iter 40/209 - loss 0.01012146 - samples/sec: 173.08 - lr: 0.025000
2022-10-04 05:58:03,139 epoch 32 - iter 60/209 - loss 0.01366236 - samples/sec: 178.08 - lr: 0.025000
2022-10-04 05:58:06,884 epoch 32 - iter 80/209 - loss 0.01366934 - samples/sec: 171.14 - lr: 0.025000
2022-10-04 05:58:10,587 epoch 32 - iter 100/209 - loss 0.01296765 - samples/sec: 173.01 - lr: 0.025000
2022-10-04 05:58:14,579 epoch 32 - iter 120/209 - loss 0.01263353 - samples/sec: 160.51 - lr: 0.025000
2022-10-04 05:58:18,061 epoch 32 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.66it/s]


2022-10-04 05:58:40,419 Evaluating as a multi-label problem: False
2022-10-04 05:58:40,433 DEV : loss 0.03627915680408478 - f1-score (micro avg)  0.8889
2022-10-04 05:58:40,524 Epoch    32: reducing learning rate of group 0 to 1.2500e-02.
2022-10-04 05:58:40,526 BAD EPOCHS (no improvement): 4
2022-10-04 05:58:40,533 ----------------------------------------------------------------------------------------------------
2022-10-04 05:58:43,778 epoch 33 - iter 20/209 - loss 0.01074466 - samples/sec: 197.52 - lr: 0.012500
2022-10-04 05:58:47,008 epoch 33 - iter 40/209 - loss 0.01381590 - samples/sec: 198.46 - lr: 0.012500
2022-10-04 05:58:51,226 epoch 33 - iter 60/209 - loss 0.01455667 - samples/sec: 151.91 - lr: 0.012500
2022-10-04 05:58:54,488 epoch 33 - iter 80/209 - loss 0.01295653 - samples/sec: 196.46 - lr: 0.012500
2022-10-04 05:58:57,940 epoch 33 - iter 100/209 - loss 0.01344330 - samples/sec: 185.65 - lr: 0.012500
2022-10-04 05:59:01,149 epoch 33 - iter 120/209 - loss 0.01279400 - sa

100%|██████████| 45/45 [00:09<00:00,  4.88it/s]


2022-10-04 05:59:28,561 Evaluating as a multi-label problem: False
2022-10-04 05:59:28,576 DEV : loss 0.03610891103744507 - f1-score (micro avg)  0.8934
2022-10-04 05:59:28,669 BAD EPOCHS (no improvement): 1
2022-10-04 05:59:28,675 ----------------------------------------------------------------------------------------------------
2022-10-04 05:59:32,917 epoch 34 - iter 20/209 - loss 0.01028631 - samples/sec: 151.08 - lr: 0.012500
2022-10-04 05:59:36,770 epoch 34 - iter 40/209 - loss 0.00992648 - samples/sec: 166.32 - lr: 0.012500
2022-10-04 05:59:40,047 epoch 34 - iter 60/209 - loss 0.01073276 - samples/sec: 195.63 - lr: 0.012500
2022-10-04 05:59:43,797 epoch 34 - iter 80/209 - loss 0.01046705 - samples/sec: 170.85 - lr: 0.012500
2022-10-04 05:59:47,162 epoch 34 - iter 100/209 - loss 0.01014213 - samples/sec: 190.47 - lr: 0.012500
2022-10-04 05:59:50,771 epoch 34 - iter 120/209 - loss 0.01068458 - samples/sec: 177.58 - lr: 0.012500
2022-10-04 05:59:55,250 epoch 34 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.81it/s]


2022-10-04 06:00:16,840 Evaluating as a multi-label problem: False
2022-10-04 06:00:16,856 DEV : loss 0.036554399877786636 - f1-score (micro avg)  0.8891
2022-10-04 06:00:16,948 BAD EPOCHS (no improvement): 2
2022-10-04 06:00:16,955 ----------------------------------------------------------------------------------------------------
2022-10-04 06:00:21,844 epoch 35 - iter 20/209 - loss 0.00938407 - samples/sec: 131.06 - lr: 0.012500
2022-10-04 06:00:25,356 epoch 35 - iter 40/209 - loss 0.00914864 - samples/sec: 182.48 - lr: 0.012500
2022-10-04 06:00:28,610 epoch 35 - iter 60/209 - loss 0.01067397 - samples/sec: 196.93 - lr: 0.012500
2022-10-04 06:00:32,449 epoch 35 - iter 80/209 - loss 0.01079024 - samples/sec: 166.92 - lr: 0.012500
2022-10-04 06:00:35,817 epoch 35 - iter 100/209 - loss 0.01088875 - samples/sec: 190.28 - lr: 0.012500
2022-10-04 06:00:40,277 epoch 35 - iter 120/209 - loss 0.01040033 - samples/sec: 143.62 - lr: 0.012500
2022-10-04 06:00:43,450 epoch 35 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.81it/s]

2022-10-04 06:01:05,238 Evaluating as a multi-label problem: False
2022-10-04 06:01:05,253 DEV : loss 0.03681835159659386 - f1-score (micro avg)  0.8914
2022-10-04 06:01:05,345 BAD EPOCHS (no improvement): 3
2022-10-04 06:01:05,350 ----------------------------------------------------------------------------------------------------





2022-10-04 06:01:09,109 epoch 36 - iter 20/209 - loss 0.00585385 - samples/sec: 170.53 - lr: 0.012500
2022-10-04 06:01:13,124 epoch 36 - iter 40/209 - loss 0.00785727 - samples/sec: 159.60 - lr: 0.012500
2022-10-04 06:01:16,944 epoch 36 - iter 60/209 - loss 0.00979658 - samples/sec: 167.77 - lr: 0.012500
2022-10-04 06:01:21,225 epoch 36 - iter 80/209 - loss 0.00966733 - samples/sec: 149.63 - lr: 0.012500
2022-10-04 06:01:24,573 epoch 36 - iter 100/209 - loss 0.01037219 - samples/sec: 191.44 - lr: 0.012500
2022-10-04 06:01:27,993 epoch 36 - iter 120/209 - loss 0.01038273 - samples/sec: 187.36 - lr: 0.012500
2022-10-04 06:01:31,770 epoch 36 - iter 140/209 - loss 0.00956070 - samples/sec: 169.68 - lr: 0.012500
2022-10-04 06:01:34,894 epoch 36 - iter 160/209 - loss 0.00951557 - samples/sec: 205.13 - lr: 0.012500
2022-10-04 06:01:38,132 epoch 36 - iter 180/209 - loss 0.00983683 - samples/sec: 197.96 - lr: 0.012500
2022-10-04 06:01:42,393 epoch 36 - iter 200/209 - loss 0.01005551 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]

2022-10-04 06:01:53,132 Evaluating as a multi-label problem: False
2022-10-04 06:01:53,148 DEV : loss 0.0368008092045784 - f1-score (micro avg)  0.8935
2022-10-04 06:01:53,240 Epoch    36: reducing learning rate of group 0 to 6.2500e-03.
2022-10-04 06:01:53,242 BAD EPOCHS (no improvement): 4
2022-10-04 06:01:53,247 ----------------------------------------------------------------------------------------------------





2022-10-04 06:01:56,892 epoch 37 - iter 20/209 - loss 0.00911506 - samples/sec: 175.83 - lr: 0.006250
2022-10-04 06:02:00,950 epoch 37 - iter 40/209 - loss 0.00799043 - samples/sec: 157.91 - lr: 0.006250
2022-10-04 06:02:05,123 epoch 37 - iter 60/209 - loss 0.01042876 - samples/sec: 153.53 - lr: 0.006250
2022-10-04 06:02:08,688 epoch 37 - iter 80/209 - loss 0.00950002 - samples/sec: 179.78 - lr: 0.006250
2022-10-04 06:02:11,782 epoch 37 - iter 100/209 - loss 0.00904592 - samples/sec: 207.13 - lr: 0.006250
2022-10-04 06:02:15,689 epoch 37 - iter 120/209 - loss 0.00981121 - samples/sec: 164.02 - lr: 0.006250
2022-10-04 06:02:19,668 epoch 37 - iter 140/209 - loss 0.00974833 - samples/sec: 160.98 - lr: 0.006250
2022-10-04 06:02:23,602 epoch 37 - iter 160/209 - loss 0.01028125 - samples/sec: 162.91 - lr: 0.006250
2022-10-04 06:02:27,003 epoch 37 - iter 180/209 - loss 0.01055163 - samples/sec: 188.45 - lr: 0.006250
2022-10-04 06:02:30,692 epoch 37 - iter 200/209 - loss 0.01023426 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]


2022-10-04 06:02:41,323 Evaluating as a multi-label problem: False
2022-10-04 06:02:41,338 DEV : loss 0.037444423884153366 - f1-score (micro avg)  0.8879
2022-10-04 06:02:41,431 BAD EPOCHS (no improvement): 1
2022-10-04 06:02:41,436 ----------------------------------------------------------------------------------------------------
2022-10-04 06:02:44,998 epoch 38 - iter 20/209 - loss 0.01032092 - samples/sec: 179.99 - lr: 0.006250
2022-10-04 06:02:49,023 epoch 38 - iter 40/209 - loss 0.00817445 - samples/sec: 159.19 - lr: 0.006250
2022-10-04 06:02:52,260 epoch 38 - iter 60/209 - loss 0.00819486 - samples/sec: 198.01 - lr: 0.006250
2022-10-04 06:02:56,357 epoch 38 - iter 80/209 - loss 0.00841288 - samples/sec: 156.39 - lr: 0.006250
2022-10-04 06:03:00,254 epoch 38 - iter 100/209 - loss 0.00869600 - samples/sec: 164.44 - lr: 0.006250
2022-10-04 06:03:03,040 epoch 38 - iter 120/209 - loss 0.00875049 - samples/sec: 230.10 - lr: 0.006250
2022-10-04 06:03:06,134 epoch 38 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.84it/s]

2022-10-04 06:03:29,557 Evaluating as a multi-label problem: False
2022-10-04 06:03:29,572 DEV : loss 0.036882925778627396 - f1-score (micro avg)  0.8897
2022-10-04 06:03:29,663 BAD EPOCHS (no improvement): 2
2022-10-04 06:03:29,668 ----------------------------------------------------------------------------------------------------





2022-10-04 06:03:33,651 epoch 39 - iter 20/209 - loss 0.00720484 - samples/sec: 160.95 - lr: 0.006250
2022-10-04 06:03:37,117 epoch 39 - iter 40/209 - loss 0.00866009 - samples/sec: 184.90 - lr: 0.006250
2022-10-04 06:03:41,509 epoch 39 - iter 60/209 - loss 0.00984890 - samples/sec: 145.85 - lr: 0.006250
2022-10-04 06:03:44,962 epoch 39 - iter 80/209 - loss 0.01017707 - samples/sec: 185.60 - lr: 0.006250
2022-10-04 06:03:48,922 epoch 39 - iter 100/209 - loss 0.00927738 - samples/sec: 161.78 - lr: 0.006250
2022-10-04 06:03:52,515 epoch 39 - iter 120/209 - loss 0.00969488 - samples/sec: 178.45 - lr: 0.006250
2022-10-04 06:03:56,930 epoch 39 - iter 140/209 - loss 0.00930524 - samples/sec: 145.12 - lr: 0.006250
2022-10-04 06:04:00,466 epoch 39 - iter 160/209 - loss 0.00895152 - samples/sec: 181.24 - lr: 0.006250
2022-10-04 06:04:03,663 epoch 39 - iter 180/209 - loss 0.00871912 - samples/sec: 200.47 - lr: 0.006250
2022-10-04 06:04:07,065 epoch 39 - iter 200/209 - loss 0.00849781 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.77it/s]

2022-10-04 06:04:18,295 Evaluating as a multi-label problem: False
2022-10-04 06:04:18,309 DEV : loss 0.03780832514166832 - f1-score (micro avg)  0.8858
2022-10-04 06:04:18,399 BAD EPOCHS (no improvement): 3
2022-10-04 06:04:18,404 ----------------------------------------------------------------------------------------------------





2022-10-04 06:04:23,159 epoch 40 - iter 20/209 - loss 0.01454504 - samples/sec: 134.77 - lr: 0.006250
2022-10-04 06:04:27,018 epoch 40 - iter 40/209 - loss 0.01110806 - samples/sec: 166.03 - lr: 0.006250
2022-10-04 06:04:30,576 epoch 40 - iter 60/209 - loss 0.01105876 - samples/sec: 180.16 - lr: 0.006250
2022-10-04 06:04:34,139 epoch 40 - iter 80/209 - loss 0.01046653 - samples/sec: 179.85 - lr: 0.006250
2022-10-04 06:04:37,332 epoch 40 - iter 100/209 - loss 0.00982467 - samples/sec: 200.72 - lr: 0.006250
2022-10-04 06:04:40,735 epoch 40 - iter 120/209 - loss 0.01021223 - samples/sec: 188.35 - lr: 0.006250
2022-10-04 06:04:44,260 epoch 40 - iter 140/209 - loss 0.01030779 - samples/sec: 181.77 - lr: 0.006250
2022-10-04 06:04:47,824 epoch 40 - iter 160/209 - loss 0.00970592 - samples/sec: 179.85 - lr: 0.006250
2022-10-04 06:04:51,542 epoch 40 - iter 180/209 - loss 0.00961284 - samples/sec: 172.36 - lr: 0.006250
2022-10-04 06:04:55,273 epoch 40 - iter 200/209 - loss 0.00954269 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.82it/s]

2022-10-04 06:05:06,126 Evaluating as a multi-label problem: False
2022-10-04 06:05:06,141 DEV : loss 0.0374482125043869 - f1-score (micro avg)  0.8922
2022-10-04 06:05:06,232 Epoch    40: reducing learning rate of group 0 to 3.1250e-03.
2022-10-04 06:05:06,234 BAD EPOCHS (no improvement): 4
2022-10-04 06:05:06,240 ----------------------------------------------------------------------------------------------------





2022-10-04 06:05:09,864 epoch 41 - iter 20/209 - loss 0.00966414 - samples/sec: 176.90 - lr: 0.003125
2022-10-04 06:05:13,416 epoch 41 - iter 40/209 - loss 0.00893676 - samples/sec: 180.39 - lr: 0.003125
2022-10-04 06:05:16,711 epoch 41 - iter 60/209 - loss 0.00844606 - samples/sec: 194.55 - lr: 0.003125
2022-10-04 06:05:20,687 epoch 41 - iter 80/209 - loss 0.00852052 - samples/sec: 161.13 - lr: 0.003125
2022-10-04 06:05:24,453 epoch 41 - iter 100/209 - loss 0.00821224 - samples/sec: 170.18 - lr: 0.003125
2022-10-04 06:05:28,956 epoch 41 - iter 120/209 - loss 0.00852039 - samples/sec: 142.27 - lr: 0.003125
2022-10-04 06:05:32,623 epoch 41 - iter 140/209 - loss 0.00851900 - samples/sec: 174.72 - lr: 0.003125
2022-10-04 06:05:36,536 epoch 41 - iter 160/209 - loss 0.00807858 - samples/sec: 163.72 - lr: 0.003125
2022-10-04 06:05:40,029 epoch 41 - iter 180/209 - loss 0.00778218 - samples/sec: 183.53 - lr: 0.003125
2022-10-04 06:05:43,475 epoch 41 - iter 200/209 - loss 0.00782003 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.91it/s]


2022-10-04 06:05:54,820 Evaluating as a multi-label problem: False
2022-10-04 06:05:54,835 DEV : loss 0.037678979337215424 - f1-score (micro avg)  0.893
2022-10-04 06:05:54,930 BAD EPOCHS (no improvement): 1
2022-10-04 06:05:54,936 ----------------------------------------------------------------------------------------------------
2022-10-04 06:05:58,071 epoch 42 - iter 20/209 - loss 0.00721139 - samples/sec: 204.51 - lr: 0.003125
2022-10-04 06:06:01,349 epoch 42 - iter 40/209 - loss 0.00857131 - samples/sec: 195.56 - lr: 0.003125
2022-10-04 06:06:04,532 epoch 42 - iter 60/209 - loss 0.00867251 - samples/sec: 201.36 - lr: 0.003125
2022-10-04 06:06:08,553 epoch 42 - iter 80/209 - loss 0.00922860 - samples/sec: 159.33 - lr: 0.003125
2022-10-04 06:06:13,092 epoch 42 - iter 100/209 - loss 0.00961675 - samples/sec: 141.17 - lr: 0.003125
2022-10-04 06:06:17,012 epoch 42 - iter 120/209 - loss 0.00909729 - samples/sec: 163.41 - lr: 0.003125
2022-10-04 06:06:21,018 epoch 42 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.88it/s]

2022-10-04 06:06:42,725 Evaluating as a multi-label problem: False
2022-10-04 06:06:42,740 DEV : loss 0.03834991157054901 - f1-score (micro avg)  0.8899
2022-10-04 06:06:42,832 BAD EPOCHS (no improvement): 2
2022-10-04 06:06:42,839 ----------------------------------------------------------------------------------------------------





2022-10-04 06:06:48,127 epoch 43 - iter 20/209 - loss 0.00778794 - samples/sec: 121.17 - lr: 0.003125
2022-10-04 06:06:51,489 epoch 43 - iter 40/209 - loss 0.00902505 - samples/sec: 190.63 - lr: 0.003125
2022-10-04 06:06:55,140 epoch 43 - iter 60/209 - loss 0.00810162 - samples/sec: 175.54 - lr: 0.003125
2022-10-04 06:06:58,744 epoch 43 - iter 80/209 - loss 0.00844783 - samples/sec: 177.84 - lr: 0.003125
2022-10-04 06:07:02,708 epoch 43 - iter 100/209 - loss 0.00798762 - samples/sec: 161.63 - lr: 0.003125
2022-10-04 06:07:06,436 epoch 43 - iter 120/209 - loss 0.00881416 - samples/sec: 171.92 - lr: 0.003125
2022-10-04 06:07:09,996 epoch 43 - iter 140/209 - loss 0.00932053 - samples/sec: 179.99 - lr: 0.003125
2022-10-04 06:07:13,376 epoch 43 - iter 160/209 - loss 0.00904417 - samples/sec: 189.61 - lr: 0.003125
2022-10-04 06:07:17,180 epoch 43 - iter 180/209 - loss 0.00885551 - samples/sec: 168.47 - lr: 0.003125
2022-10-04 06:07:20,012 epoch 43 - iter 200/209 - loss 0.00874426 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.52it/s]


2022-10-04 06:07:31,658 Evaluating as a multi-label problem: False
2022-10-04 06:07:31,673 DEV : loss 0.038088683038949966 - f1-score (micro avg)  0.8879
2022-10-04 06:07:31,767 BAD EPOCHS (no improvement): 3
2022-10-04 06:07:31,772 ----------------------------------------------------------------------------------------------------
2022-10-04 06:07:35,183 epoch 44 - iter 20/209 - loss 0.00868002 - samples/sec: 188.05 - lr: 0.003125
2022-10-04 06:07:38,643 epoch 44 - iter 40/209 - loss 0.01136547 - samples/sec: 185.21 - lr: 0.003125
2022-10-04 06:07:42,288 epoch 44 - iter 60/209 - loss 0.00900600 - samples/sec: 175.80 - lr: 0.003125
2022-10-04 06:07:46,399 epoch 44 - iter 80/209 - loss 0.00955548 - samples/sec: 155.84 - lr: 0.003125
2022-10-04 06:07:49,535 epoch 44 - iter 100/209 - loss 0.00944047 - samples/sec: 204.44 - lr: 0.003125
2022-10-04 06:07:53,019 epoch 44 - iter 120/209 - loss 0.00907744 - samples/sec: 183.92 - lr: 0.003125
2022-10-04 06:07:57,480 epoch 44 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]


2022-10-04 06:08:19,957 Evaluating as a multi-label problem: False
2022-10-04 06:08:19,971 DEV : loss 0.037843670696020126 - f1-score (micro avg)  0.8914
2022-10-04 06:08:20,062 Epoch    44: reducing learning rate of group 0 to 1.5625e-03.
2022-10-04 06:08:20,065 BAD EPOCHS (no improvement): 4
2022-10-04 06:08:20,070 ----------------------------------------------------------------------------------------------------
2022-10-04 06:08:23,467 epoch 45 - iter 20/209 - loss 0.00915057 - samples/sec: 188.75 - lr: 0.001563
2022-10-04 06:08:27,390 epoch 45 - iter 40/209 - loss 0.00806987 - samples/sec: 163.33 - lr: 0.001563
2022-10-04 06:08:30,808 epoch 45 - iter 60/209 - loss 0.00765503 - samples/sec: 187.53 - lr: 0.001563
2022-10-04 06:08:35,751 epoch 45 - iter 80/209 - loss 0.00841716 - samples/sec: 129.60 - lr: 0.001563
2022-10-04 06:08:39,710 epoch 45 - iter 100/209 - loss 0.00798017 - samples/sec: 161.81 - lr: 0.001563
2022-10-04 06:08:43,587 epoch 45 - iter 120/209 - loss 0.00809475 - s

100%|██████████| 45/45 [00:09<00:00,  4.60it/s]


2022-10-04 06:09:09,026 Evaluating as a multi-label problem: False
2022-10-04 06:09:09,041 DEV : loss 0.03789843991398811 - f1-score (micro avg)  0.8899
2022-10-04 06:09:09,132 BAD EPOCHS (no improvement): 1
2022-10-04 06:09:09,138 ----------------------------------------------------------------------------------------------------
2022-10-04 06:09:12,415 epoch 46 - iter 20/209 - loss 0.00825654 - samples/sec: 195.68 - lr: 0.001563
2022-10-04 06:09:15,533 epoch 46 - iter 40/209 - loss 0.00960911 - samples/sec: 205.61 - lr: 0.001563
2022-10-04 06:09:20,083 epoch 46 - iter 60/209 - loss 0.01020414 - samples/sec: 140.78 - lr: 0.001563
2022-10-04 06:09:23,546 epoch 46 - iter 80/209 - loss 0.00987878 - samples/sec: 185.09 - lr: 0.001563
2022-10-04 06:09:27,190 epoch 46 - iter 100/209 - loss 0.00951879 - samples/sec: 175.84 - lr: 0.001563
2022-10-04 06:09:30,430 epoch 46 - iter 120/209 - loss 0.00904749 - samples/sec: 197.78 - lr: 0.001563
2022-10-04 06:09:34,636 epoch 46 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.84it/s]


2022-10-04 06:09:56,792 Evaluating as a multi-label problem: False
2022-10-04 06:09:56,807 DEV : loss 0.03774372115731239 - f1-score (micro avg)  0.8906
2022-10-04 06:09:56,899 BAD EPOCHS (no improvement): 2
2022-10-04 06:09:56,908 ----------------------------------------------------------------------------------------------------
2022-10-04 06:10:00,630 epoch 47 - iter 20/209 - loss 0.00921869 - samples/sec: 172.17 - lr: 0.001563
2022-10-04 06:10:04,153 epoch 47 - iter 40/209 - loss 0.00960107 - samples/sec: 181.89 - lr: 0.001563
2022-10-04 06:10:07,512 epoch 47 - iter 60/209 - loss 0.00868945 - samples/sec: 190.76 - lr: 0.001563
2022-10-04 06:10:11,139 epoch 47 - iter 80/209 - loss 0.00895019 - samples/sec: 176.67 - lr: 0.001563
2022-10-04 06:10:15,956 epoch 47 - iter 100/209 - loss 0.00940435 - samples/sec: 132.98 - lr: 0.001563
2022-10-04 06:10:19,644 epoch 47 - iter 120/209 - loss 0.00895410 - samples/sec: 173.78 - lr: 0.001563
2022-10-04 06:10:23,462 epoch 47 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.56it/s]

2022-10-04 06:10:45,072 Evaluating as a multi-label problem: False
2022-10-04 06:10:45,087 DEV : loss 0.037846531718969345 - f1-score (micro avg)  0.8906
2022-10-04 06:10:45,181 BAD EPOCHS (no improvement): 3
2022-10-04 06:10:45,187 ----------------------------------------------------------------------------------------------------





2022-10-04 06:10:49,212 epoch 48 - iter 20/209 - loss 0.00885887 - samples/sec: 159.30 - lr: 0.001563
2022-10-04 06:10:53,223 epoch 48 - iter 40/209 - loss 0.00982232 - samples/sec: 159.73 - lr: 0.001563
2022-10-04 06:10:56,692 epoch 48 - iter 60/209 - loss 0.00891986 - samples/sec: 184.72 - lr: 0.001563
2022-10-04 06:11:00,584 epoch 48 - iter 80/209 - loss 0.00822634 - samples/sec: 164.73 - lr: 0.001563
2022-10-04 06:11:03,921 epoch 48 - iter 100/209 - loss 0.00815458 - samples/sec: 192.05 - lr: 0.001563
2022-10-04 06:11:06,774 epoch 48 - iter 120/209 - loss 0.00823428 - samples/sec: 224.74 - lr: 0.001563
2022-10-04 06:11:10,599 epoch 48 - iter 140/209 - loss 0.00813609 - samples/sec: 167.55 - lr: 0.001563
2022-10-04 06:11:14,957 epoch 48 - iter 160/209 - loss 0.00819137 - samples/sec: 147.01 - lr: 0.001563
2022-10-04 06:11:18,395 epoch 48 - iter 180/209 - loss 0.00806218 - samples/sec: 186.43 - lr: 0.001563
2022-10-04 06:11:23,248 epoch 48 - iter 200/209 - loss 0.00788857 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.91it/s]


2022-10-04 06:11:34,120 Evaluating as a multi-label problem: False
2022-10-04 06:11:34,136 DEV : loss 0.03800590708851814 - f1-score (micro avg)  0.8906
2022-10-04 06:11:34,229 Epoch    48: reducing learning rate of group 0 to 7.8125e-04.
2022-10-04 06:11:34,232 BAD EPOCHS (no improvement): 4
2022-10-04 06:11:34,239 ----------------------------------------------------------------------------------------------------
2022-10-04 06:11:38,276 epoch 49 - iter 20/209 - loss 0.00723680 - samples/sec: 158.75 - lr: 0.000781
2022-10-04 06:11:41,882 epoch 49 - iter 40/209 - loss 0.00742433 - samples/sec: 177.72 - lr: 0.000781
2022-10-04 06:11:45,918 epoch 49 - iter 60/209 - loss 0.00735570 - samples/sec: 158.78 - lr: 0.000781
2022-10-04 06:11:49,769 epoch 49 - iter 80/209 - loss 0.00758143 - samples/sec: 166.43 - lr: 0.000781
2022-10-04 06:11:53,118 epoch 49 - iter 100/209 - loss 0.00723109 - samples/sec: 191.39 - lr: 0.000781
2022-10-04 06:11:56,679 epoch 49 - iter 120/209 - loss 0.00795437 - sa

100%|██████████| 45/45 [00:09<00:00,  4.59it/s]


2022-10-04 06:12:22,660 Evaluating as a multi-label problem: False
2022-10-04 06:12:22,675 DEV : loss 0.038007382303476334 - f1-score (micro avg)  0.8906
2022-10-04 06:12:22,769 BAD EPOCHS (no improvement): 1
2022-10-04 06:12:22,773 ----------------------------------------------------------------------------------------------------
2022-10-04 06:12:27,153 epoch 50 - iter 20/209 - loss 0.00575047 - samples/sec: 146.31 - lr: 0.000781
2022-10-04 06:12:30,634 epoch 50 - iter 40/209 - loss 0.00608610 - samples/sec: 184.13 - lr: 0.000781
2022-10-04 06:12:34,190 epoch 50 - iter 60/209 - loss 0.00799629 - samples/sec: 180.22 - lr: 0.000781
2022-10-04 06:12:37,501 epoch 50 - iter 80/209 - loss 0.00825067 - samples/sec: 193.60 - lr: 0.000781
2022-10-04 06:12:41,826 epoch 50 - iter 100/209 - loss 0.00924510 - samples/sec: 148.13 - lr: 0.000781
2022-10-04 06:12:45,715 epoch 50 - iter 120/209 - loss 0.00867024 - samples/sec: 164.76 - lr: 0.000781
2022-10-04 06:12:49,134 epoch 50 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]

2022-10-04 06:13:11,116 Evaluating as a multi-label problem: False
2022-10-04 06:13:11,133 DEV : loss 0.038025226444005966 - f1-score (micro avg)  0.8906
2022-10-04 06:13:11,225 BAD EPOCHS (no improvement): 2
2022-10-04 06:13:11,231 ----------------------------------------------------------------------------------------------------





2022-10-04 06:13:14,963 epoch 51 - iter 20/209 - loss 0.00907028 - samples/sec: 171.74 - lr: 0.000781
2022-10-04 06:13:18,617 epoch 51 - iter 40/209 - loss 0.00816937 - samples/sec: 175.42 - lr: 0.000781
2022-10-04 06:13:21,897 epoch 51 - iter 60/209 - loss 0.00762472 - samples/sec: 195.42 - lr: 0.000781
2022-10-04 06:13:26,133 epoch 51 - iter 80/209 - loss 0.00841463 - samples/sec: 151.23 - lr: 0.000781
2022-10-04 06:13:29,964 epoch 51 - iter 100/209 - loss 0.00775230 - samples/sec: 167.28 - lr: 0.000781
2022-10-04 06:13:33,045 epoch 51 - iter 120/209 - loss 0.00766789 - samples/sec: 207.99 - lr: 0.000781
2022-10-04 06:13:36,902 epoch 51 - iter 140/209 - loss 0.00748351 - samples/sec: 166.15 - lr: 0.000781
2022-10-04 06:13:40,916 epoch 51 - iter 160/209 - loss 0.00780402 - samples/sec: 159.64 - lr: 0.000781
2022-10-04 06:13:44,277 epoch 51 - iter 180/209 - loss 0.00758937 - samples/sec: 190.65 - lr: 0.000781
2022-10-04 06:13:48,452 epoch 51 - iter 200/209 - loss 0.00747814 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.56it/s]

2022-10-04 06:14:00,254 Evaluating as a multi-label problem: False
2022-10-04 06:14:00,270 DEV : loss 0.038149621337652206 - f1-score (micro avg)  0.8914
2022-10-04 06:14:00,369 BAD EPOCHS (no improvement): 3
2022-10-04 06:14:00,374 ----------------------------------------------------------------------------------------------------





2022-10-04 06:14:03,993 epoch 52 - iter 20/209 - loss 0.00807312 - samples/sec: 177.12 - lr: 0.000781
2022-10-04 06:14:07,964 epoch 52 - iter 40/209 - loss 0.00746013 - samples/sec: 161.39 - lr: 0.000781
2022-10-04 06:14:11,841 epoch 52 - iter 60/209 - loss 0.00917152 - samples/sec: 165.28 - lr: 0.000781
2022-10-04 06:14:15,376 epoch 52 - iter 80/209 - loss 0.00875250 - samples/sec: 181.22 - lr: 0.000781
2022-10-04 06:14:18,841 epoch 52 - iter 100/209 - loss 0.00891767 - samples/sec: 184.99 - lr: 0.000781
2022-10-04 06:14:21,969 epoch 52 - iter 120/209 - loss 0.00851070 - samples/sec: 204.84 - lr: 0.000781
2022-10-04 06:14:25,567 epoch 52 - iter 140/209 - loss 0.00824121 - samples/sec: 178.13 - lr: 0.000781
2022-10-04 06:14:29,382 epoch 52 - iter 160/209 - loss 0.00843917 - samples/sec: 167.93 - lr: 0.000781
2022-10-04 06:14:32,880 epoch 52 - iter 180/209 - loss 0.00830659 - samples/sec: 183.19 - lr: 0.000781
2022-10-04 06:14:37,207 epoch 52 - iter 200/209 - loss 0.00837678 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.84it/s]

2022-10-04 06:14:48,136 Evaluating as a multi-label problem: False
2022-10-04 06:14:48,151 DEV : loss 0.038144659250974655 - f1-score (micro avg)  0.8922
2022-10-04 06:14:48,248 Epoch    52: reducing learning rate of group 0 to 3.9063e-04.
2022-10-04 06:14:48,250 BAD EPOCHS (no improvement): 4
2022-10-04 06:14:48,256 ----------------------------------------------------------------------------------------------------





2022-10-04 06:14:52,196 epoch 53 - iter 20/209 - loss 0.01103940 - samples/sec: 162.68 - lr: 0.000391
2022-10-04 06:14:57,029 epoch 53 - iter 40/209 - loss 0.01270246 - samples/sec: 132.56 - lr: 0.000391
2022-10-04 06:15:00,835 epoch 53 - iter 60/209 - loss 0.01043549 - samples/sec: 168.39 - lr: 0.000391
2022-10-04 06:15:04,543 epoch 53 - iter 80/209 - loss 0.01143983 - samples/sec: 172.84 - lr: 0.000391
2022-10-04 06:15:07,817 epoch 53 - iter 100/209 - loss 0.01024308 - samples/sec: 195.77 - lr: 0.000391
2022-10-04 06:15:11,062 epoch 53 - iter 120/209 - loss 0.00957503 - samples/sec: 197.54 - lr: 0.000391
2022-10-04 06:15:14,567 epoch 53 - iter 140/209 - loss 0.00952799 - samples/sec: 182.81 - lr: 0.000391
2022-10-04 06:15:18,493 epoch 53 - iter 160/209 - loss 0.00972760 - samples/sec: 163.20 - lr: 0.000391
2022-10-04 06:15:21,689 epoch 53 - iter 180/209 - loss 0.00973548 - samples/sec: 200.62 - lr: 0.000391
2022-10-04 06:15:25,295 epoch 53 - iter 200/209 - loss 0.00951458 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.65it/s]


2022-10-04 06:15:36,365 Evaluating as a multi-label problem: False
2022-10-04 06:15:36,381 DEV : loss 0.03815596550703049 - f1-score (micro avg)  0.8906
2022-10-04 06:15:36,471 BAD EPOCHS (no improvement): 1
2022-10-04 06:15:36,476 ----------------------------------------------------------------------------------------------------
2022-10-04 06:15:40,090 epoch 54 - iter 20/209 - loss 0.00581222 - samples/sec: 177.39 - lr: 0.000391
2022-10-04 06:15:43,332 epoch 54 - iter 40/209 - loss 0.00700413 - samples/sec: 197.73 - lr: 0.000391
2022-10-04 06:15:46,986 epoch 54 - iter 60/209 - loss 0.00758364 - samples/sec: 175.35 - lr: 0.000391
2022-10-04 06:15:51,398 epoch 54 - iter 80/209 - loss 0.00845202 - samples/sec: 145.18 - lr: 0.000391
2022-10-04 06:15:55,427 epoch 54 - iter 100/209 - loss 0.00857451 - samples/sec: 159.04 - lr: 0.000391
2022-10-04 06:15:59,161 epoch 54 - iter 120/209 - loss 0.00889105 - samples/sec: 171.61 - lr: 0.000391
2022-10-04 06:16:02,473 epoch 54 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.76it/s]


2022-10-04 06:16:24,477 Evaluating as a multi-label problem: False
2022-10-04 06:16:24,492 DEV : loss 0.03805556148290634 - f1-score (micro avg)  0.8906
2022-10-04 06:16:24,585 BAD EPOCHS (no improvement): 2
2022-10-04 06:16:24,592 ----------------------------------------------------------------------------------------------------
2022-10-04 06:16:28,601 epoch 55 - iter 20/209 - loss 0.01075096 - samples/sec: 159.90 - lr: 0.000391
2022-10-04 06:16:32,071 epoch 55 - iter 40/209 - loss 0.01310031 - samples/sec: 184.66 - lr: 0.000391
2022-10-04 06:16:36,509 epoch 55 - iter 60/209 - loss 0.01258739 - samples/sec: 144.32 - lr: 0.000391
2022-10-04 06:16:40,007 epoch 55 - iter 80/209 - loss 0.01135015 - samples/sec: 183.19 - lr: 0.000391
2022-10-04 06:16:43,881 epoch 55 - iter 100/209 - loss 0.01041704 - samples/sec: 165.43 - lr: 0.000391
2022-10-04 06:16:47,367 epoch 55 - iter 120/209 - loss 0.00984000 - samples/sec: 183.86 - lr: 0.000391
2022-10-04 06:16:50,812 epoch 55 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.59it/s]


2022-10-04 06:17:13,091 Evaluating as a multi-label problem: False
2022-10-04 06:17:13,106 DEV : loss 0.038078758865594864 - f1-score (micro avg)  0.8914
2022-10-04 06:17:13,196 BAD EPOCHS (no improvement): 3
2022-10-04 06:17:13,201 ----------------------------------------------------------------------------------------------------
2022-10-04 06:17:17,914 epoch 56 - iter 20/209 - loss 0.00893760 - samples/sec: 136.01 - lr: 0.000391
2022-10-04 06:17:21,453 epoch 56 - iter 40/209 - loss 0.00915405 - samples/sec: 181.09 - lr: 0.000391
2022-10-04 06:17:25,476 epoch 56 - iter 60/209 - loss 0.00785992 - samples/sec: 159.26 - lr: 0.000391
2022-10-04 06:17:28,931 epoch 56 - iter 80/209 - loss 0.00830929 - samples/sec: 185.51 - lr: 0.000391
2022-10-04 06:17:32,134 epoch 56 - iter 100/209 - loss 0.00872877 - samples/sec: 200.10 - lr: 0.000391
2022-10-04 06:17:35,722 epoch 56 - iter 120/209 - loss 0.00869752 - samples/sec: 178.64 - lr: 0.000391
2022-10-04 06:17:39,032 epoch 56 - iter 140/209 - lo

100%|██████████| 45/45 [00:09<00:00,  4.91it/s]


2022-10-04 06:18:00,830 Evaluating as a multi-label problem: False
2022-10-04 06:18:00,844 DEV : loss 0.038082562386989594 - f1-score (micro avg)  0.8899
2022-10-04 06:18:00,937 Epoch    56: reducing learning rate of group 0 to 1.9531e-04.
2022-10-04 06:18:00,939 BAD EPOCHS (no improvement): 4
2022-10-04 06:18:00,945 ----------------------------------------------------------------------------------------------------
2022-10-04 06:18:04,368 epoch 57 - iter 20/209 - loss 0.00799015 - samples/sec: 187.34 - lr: 0.000195
2022-10-04 06:18:08,384 epoch 57 - iter 40/209 - loss 0.00766520 - samples/sec: 159.55 - lr: 0.000195
2022-10-04 06:18:11,744 epoch 57 - iter 60/209 - loss 0.00751153 - samples/sec: 190.73 - lr: 0.000195
2022-10-04 06:18:15,132 epoch 57 - iter 80/209 - loss 0.00926397 - samples/sec: 189.16 - lr: 0.000195
2022-10-04 06:18:18,822 epoch 57 - iter 100/209 - loss 0.00874149 - samples/sec: 173.63 - lr: 0.000195
2022-10-04 06:18:22,704 epoch 57 - iter 120/209 - loss 0.00914877 - s

100%|██████████| 45/45 [00:09<00:00,  4.96it/s]


2022-10-04 06:18:48,752 Evaluating as a multi-label problem: False
2022-10-04 06:18:48,768 DEV : loss 0.03809021785855293 - f1-score (micro avg)  0.8908
2022-10-04 06:18:48,860 BAD EPOCHS (no improvement): 1
2022-10-04 06:18:48,865 ----------------------------------------------------------------------------------------------------
2022-10-04 06:18:53,029 epoch 58 - iter 20/209 - loss 0.01286258 - samples/sec: 153.90 - lr: 0.000195
2022-10-04 06:18:56,466 epoch 58 - iter 40/209 - loss 0.00981505 - samples/sec: 186.45 - lr: 0.000195
2022-10-04 06:18:59,814 epoch 58 - iter 60/209 - loss 0.00900506 - samples/sec: 191.46 - lr: 0.000195
2022-10-04 06:19:03,675 epoch 58 - iter 80/209 - loss 0.00766555 - samples/sec: 165.92 - lr: 0.000195
2022-10-04 06:19:07,896 epoch 58 - iter 100/209 - loss 0.00685628 - samples/sec: 151.80 - lr: 0.000195
2022-10-04 06:19:12,001 epoch 58 - iter 120/209 - loss 0.00711517 - samples/sec: 156.09 - lr: 0.000195
2022-10-04 06:19:15,103 epoch 58 - iter 140/209 - los

100%|██████████| 45/45 [00:09<00:00,  4.83it/s]

2022-10-04 06:19:36,496 Evaluating as a multi-label problem: False
2022-10-04 06:19:36,517 DEV : loss 0.03806339576840401 - f1-score (micro avg)  0.8899
2022-10-04 06:19:36,631 BAD EPOCHS (no improvement): 2
2022-10-04 06:19:36,638 ----------------------------------------------------------------------------------------------------





2022-10-04 06:19:39,793 epoch 59 - iter 20/209 - loss 0.01022128 - samples/sec: 203.28 - lr: 0.000195
2022-10-04 06:19:43,329 epoch 59 - iter 40/209 - loss 0.00782568 - samples/sec: 181.25 - lr: 0.000195
2022-10-04 06:19:46,719 epoch 59 - iter 60/209 - loss 0.00760278 - samples/sec: 189.01 - lr: 0.000195
2022-10-04 06:19:50,312 epoch 59 - iter 80/209 - loss 0.00807451 - samples/sec: 178.35 - lr: 0.000195
2022-10-04 06:19:53,955 epoch 59 - iter 100/209 - loss 0.00851865 - samples/sec: 175.92 - lr: 0.000195
2022-10-04 06:19:57,877 epoch 59 - iter 120/209 - loss 0.00909560 - samples/sec: 163.38 - lr: 0.000195
2022-10-04 06:20:01,264 epoch 59 - iter 140/209 - loss 0.00911432 - samples/sec: 189.28 - lr: 0.000195
2022-10-04 06:20:06,748 epoch 59 - iter 160/209 - loss 0.00927495 - samples/sec: 116.81 - lr: 0.000195
2022-10-04 06:20:11,298 epoch 59 - iter 180/209 - loss 0.00907668 - samples/sec: 140.79 - lr: 0.000195
2022-10-04 06:20:15,017 epoch 59 - iter 200/209 - loss 0.00907362 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.76it/s]

2022-10-04 06:20:25,874 Evaluating as a multi-label problem: False
2022-10-04 06:20:25,889 DEV : loss 0.03805448114871979 - f1-score (micro avg)  0.8899
2022-10-04 06:20:25,983 BAD EPOCHS (no improvement): 3
2022-10-04 06:20:25,988 ----------------------------------------------------------------------------------------------------





2022-10-04 06:20:29,327 epoch 60 - iter 20/209 - loss 0.00549876 - samples/sec: 192.05 - lr: 0.000195
2022-10-04 06:20:33,106 epoch 60 - iter 40/209 - loss 0.00827131 - samples/sec: 169.58 - lr: 0.000195
2022-10-04 06:20:36,786 epoch 60 - iter 60/209 - loss 0.00767564 - samples/sec: 174.14 - lr: 0.000195
2022-10-04 06:20:40,275 epoch 60 - iter 80/209 - loss 0.00754839 - samples/sec: 183.63 - lr: 0.000195
2022-10-04 06:20:44,259 epoch 60 - iter 100/209 - loss 0.00754319 - samples/sec: 160.85 - lr: 0.000195
2022-10-04 06:20:47,773 epoch 60 - iter 120/209 - loss 0.00785504 - samples/sec: 182.36 - lr: 0.000195
2022-10-04 06:20:51,116 epoch 60 - iter 140/209 - loss 0.00783770 - samples/sec: 191.74 - lr: 0.000195
2022-10-04 06:20:55,087 epoch 60 - iter 160/209 - loss 0.00792624 - samples/sec: 161.33 - lr: 0.000195
2022-10-04 06:20:58,478 epoch 60 - iter 180/209 - loss 0.00799376 - samples/sec: 188.98 - lr: 0.000195
2022-10-04 06:21:02,632 epoch 60 - iter 200/209 - loss 0.00845501 - samples/s

100%|██████████| 45/45 [00:09<00:00,  4.87it/s]


2022-10-04 06:21:13,523 Evaluating as a multi-label problem: False
2022-10-04 06:21:13,539 DEV : loss 0.038046371191740036 - f1-score (micro avg)  0.8906
2022-10-04 06:21:13,634 Epoch    60: reducing learning rate of group 0 to 9.7656e-05.
2022-10-04 06:21:13,637 BAD EPOCHS (no improvement): 4
2022-10-04 06:21:13,643 ----------------------------------------------------------------------------------------------------
2022-10-04 06:21:13,645 ----------------------------------------------------------------------------------------------------
2022-10-04 06:21:13,647 learning rate too small - quitting training!
2022-10-04 06:21:13,648 ----------------------------------------------------------------------------------------------------
2022-10-04 06:21:14,329 ----------------------------------------------------------------------------------------------------
2022-10-04 06:21:14,334 loading file /content/drive/MyDrive/Flair_NLP/sota-ner-flair/best-model.pt
2022-10-04 06:21:15,156 SequenceTagge

100%|██████████| 45/45 [00:38<00:00,  1.17it/s]

2022-10-04 06:21:53,861 Evaluating as a multi-label problem: False
2022-10-04 06:21:53,877 0.8961	0.8932	0.8946	0.8193
2022-10-04 06:21:53,878 
Results:
- F-score (micro) 0.8946
- F-score (macro) 0.856
- Accuracy 0.8193

By class:
              precision    recall  f1-score   support

  FUNDAMENTO     0.9431    0.9355    0.9393       124
      PESSOA     0.9167    0.9244    0.9205       119
       LOCAL     0.8173    0.8416    0.8293       101
        DATA     0.9600    0.9796    0.9697        98
 ORGANIZACAO     0.8710    0.8617    0.8663        94
PRODUTODELEI     0.8235    0.7778    0.8000        54
      EVENTO     0.8333    0.5556    0.6667         9

   micro avg     0.8961    0.8932    0.8946       599
   macro avg     0.8807    0.8394    0.8560       599
weighted avg     0.8957    0.8932    0.8939       599

2022-10-04 06:21:53,880 ----------------------------------------------------------------------------------------------------





{'test_score': 0.8946488294314382,
 'dev_score_history': [0.5040916530278232,
  0.618213660245184,
  0.7527472527472527,
  0.7420000000000001,
  0.7791519434628975,
  0.7723292469352013,
  0.8042895442359249,
  0.8197945845004668,
  0.82842287694974,
  0.8449682683590208,
  0.8388746803069055,
  0.8444055944055944,
  0.8363309352517986,
  0.8506151142355008,
  0.8683522231909329,
  0.8566463944396178,
  0.8631211857018308,
  0.8612354521038496,
  0.8603256212510711,
  0.8741258741258741,
  0.8881057268722466,
  0.8726003490401397,
  0.8528174936921783,
  0.8853893263342082,
  0.8821490467937607,
  0.8825065274151437,
  0.8906386701662291,
  0.8970976253298153,
  0.8788927335640138,
  0.881118881118881,
  0.896431679721497,
  0.8888888888888888,
  0.8933566433566434,
  0.8890845070422535,
  0.8913987836663771,
  0.8935427574171029,
  0.8879159369527145,
  0.8896672504378285,
  0.8857644991212653,
  0.8921739130434783,
  0.8929824561403509,
  0.8898601398601399,
  0.8879159369527145,
  0

## Vetores Estático e de Contexto concatenados (Pt-Wiki-Fastext e Flair Embeddings)

### Imports

In [None]:
## Importes
## datasets
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Embeddings
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings

## Modelo/Treino
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

### Corpus

In [None]:
## Montando o Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## carregando um corpus e definindo as colunas
# define columns
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
data_folder = '/content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='valid.txt')

## Tarefa
label_type = 'ner'

2022-10-05 21:18:46,495 Reading data from /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria
2022-10-05 21:18:46,497 Train: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/train.txt
2022-10-05 21:18:46,498 Dev: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/valid.txt
2022-10-05 21:18:46,499 Test: /content/drive/MyDrive/Flair_NLP/Corpus/pl_corpus_categoria/test.txt


In [None]:
## Dicionário de rótulos
# Make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-10-05 21:18:54,409 Computing label dictionary. Progress:


6667it [00:00, 53636.67it/s]

2022-10-05 21:18:54,582 Dictionary created for label 'ner' with 8 values: PESSOA (seen 628 times), FUNDAMENTO (seen 490 times), ORGANIZACAO (seen 435 times), DATA (seen 433 times), LOCAL (seen 369 times), PRODUTODELEI (seen 230 times), EVENTO (seen 9 times)
Dictionary with 8 tags: <unk>, PESSOA, FUNDAMENTO, ORGANIZACAO, DATA, LOCAL, PRODUTODELEI, EVENTO





### Embeddings

In [None]:
## Stacked Embeddings
# Initialize embedding stack with 
embedding_types = [
    WordEmbeddings('pt'),
    FlairEmbeddings('pt-forward'),
    FlairEmbeddings('pt-backward')
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

2022-10-05 21:18:55,258 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M.vectors.npy not found in cache, downloading to /tmp/tmp4kfa13pe


100%|██████████| 710528528/710528528 [00:53<00:00, 13262686.27B/s]

2022-10-05 21:19:49,803 copying /tmp/tmp4kfa13pe to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M.vectors.npy





2022-10-05 21:19:51,766 removing temp file /tmp/tmp4kfa13pe
2022-10-05 21:19:52,816 https://flair.informatik.hu-berlin.de/resources/embeddings/token/pt-wiki-fasttext-300d-1M not found in cache, downloading to /tmp/tmpdxhnahfl


100%|██████████| 23541010/23541010 [00:03<00:00, 6577452.98B/s] 

2022-10-05 21:19:57,086 copying /tmp/tmpdxhnahfl to cache at /root/.flair/embeddings/pt-wiki-fasttext-300d-1M
2022-10-05 21:19:57,112 removing temp file /tmp/tmpdxhnahfl





2022-10-05 21:20:00,427 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-forward.pt not found in cache, downloading to /tmp/tmpa5j19a4h


100%|██████████| 72819080/72819080 [00:06<00:00, 10789816.41B/s]

2022-10-05 21:20:07,819 copying /tmp/tmpa5j19a4h to cache at /root/.flair/embeddings/lm-pt-forward.pt





2022-10-05 21:20:07,911 removing temp file /tmp/tmpa5j19a4h
2022-10-05 21:20:18,702 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/lm-pt-backward.pt not found in cache, downloading to /tmp/tmpcritzwjg


100%|██████████| 72819080/72819080 [00:41<00:00, 1754470.60B/s]

2022-10-05 21:21:00,910 copying /tmp/tmpcritzwjg to cache at /root/.flair/embeddings/lm-pt-backward.pt
2022-10-05 21:21:00,979 removing temp file /tmp/tmpcritzwjg





### Treino

In [None]:
## Inicializando o modelo
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

2022-10-05 21:21:01,190 SequenceTagger predicts: Dictionary with 29 tags: O, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-FUNDAMENTO, B-FUNDAMENTO, E-FUNDAMENTO, I-FUNDAMENTO, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-DATA, B-DATA, E-DATA, I-DATA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, S-PRODUTODELEI, B-PRODUTODELEI, E-PRODUTODELEI, I-PRODUTODELEI, S-EVENTO, B-EVENTO, E-EVENTO, I-EVENTO


In [None]:
## Treinando o modelo
# Initialize trainer
trainer = ModelTrainer(tagger, corpus)
path = '/content/drive/MyDrive/Flair_NLP/sota-ner-flair'

# Start training
trainer.train(path,
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=40)

  "There should be no best model saved at epoch 1 except there "


2022-10-05 21:21:02,612 ----------------------------------------------------------------------------------------------------
2022-10-05 21:21:02,614 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'pt'
      (embedding): Embedding(592108, 300)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.5, inplace=False)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=275, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=4396, out_features=4396, bias=T

100%|██████████| 45/45 [00:33<00:00,  1.36it/s]


2022-10-05 21:24:10,466 Evaluating as a multi-label problem: False
2022-10-05 21:24:10,483 DEV : loss 0.1848800778388977 - f1-score (micro avg)  0.3554
2022-10-05 21:24:10,587 BAD EPOCHS (no improvement): 0
2022-10-05 21:24:10,592 saving best model
2022-10-05 21:24:14,967 ----------------------------------------------------------------------------------------------------
2022-10-05 21:24:18,778 epoch 2 - iter 20/209 - loss 0.20290499 - samples/sec: 168.59 - lr: 0.100000
2022-10-05 21:24:23,038 epoch 2 - iter 40/209 - loss 0.17775158 - samples/sec: 150.35 - lr: 0.100000
2022-10-05 21:24:26,554 epoch 2 - iter 60/209 - loss 0.17685361 - samples/sec: 182.28 - lr: 0.100000
2022-10-05 21:24:29,969 epoch 2 - iter 80/209 - loss 0.16575493 - samples/sec: 187.67 - lr: 0.100000
2022-10-05 21:24:34,706 epoch 2 - iter 100/209 - loss 0.16704786 - samples/sec: 135.23 - lr: 0.100000
2022-10-05 21:24:39,112 epoch 2 - iter 120/209 - loss 0.15990150 - samples/sec: 145.40 - lr: 0.100000
2022-10-05 21:24:4

100%|██████████| 45/45 [00:08<00:00,  5.28it/s]

2022-10-05 21:25:03,591 Evaluating as a multi-label problem: False





2022-10-05 21:25:03,606 DEV : loss 0.10556729137897491 - f1-score (micro avg)  0.6008
2022-10-05 21:25:03,705 BAD EPOCHS (no improvement): 0
2022-10-05 21:25:03,709 saving best model
2022-10-05 21:25:07,946 ----------------------------------------------------------------------------------------------------
2022-10-05 21:25:11,409 epoch 3 - iter 20/209 - loss 0.11541722 - samples/sec: 185.13 - lr: 0.100000
2022-10-05 21:25:16,403 epoch 3 - iter 40/209 - loss 0.11182255 - samples/sec: 128.26 - lr: 0.100000
2022-10-05 21:25:20,328 epoch 3 - iter 60/209 - loss 0.12935928 - samples/sec: 163.21 - lr: 0.100000
2022-10-05 21:25:24,428 epoch 3 - iter 80/209 - loss 0.12186519 - samples/sec: 156.29 - lr: 0.100000
2022-10-05 21:25:28,325 epoch 3 - iter 100/209 - loss 0.11785908 - samples/sec: 164.40 - lr: 0.100000
2022-10-05 21:25:32,210 epoch 3 - iter 120/209 - loss 0.11635322 - samples/sec: 164.92 - lr: 0.100000
2022-10-05 21:25:36,208 epoch 3 - iter 140/209 - loss 0.11446433 - samples/sec: 160.

100%|██████████| 45/45 [00:08<00:00,  5.20it/s]


2022-10-05 21:25:57,727 Evaluating as a multi-label problem: False
2022-10-05 21:25:57,741 DEV : loss 0.10561355203390121 - f1-score (micro avg)  0.6265
2022-10-05 21:25:57,844 BAD EPOCHS (no improvement): 0
2022-10-05 21:25:57,849 saving best model
2022-10-05 21:26:02,054 ----------------------------------------------------------------------------------------------------
2022-10-05 21:26:07,352 epoch 4 - iter 20/209 - loss 0.08648542 - samples/sec: 120.93 - lr: 0.100000
2022-10-05 21:26:11,116 epoch 4 - iter 40/209 - loss 0.09000425 - samples/sec: 170.22 - lr: 0.100000
2022-10-05 21:26:15,110 epoch 4 - iter 60/209 - loss 0.08596960 - samples/sec: 160.38 - lr: 0.100000
2022-10-05 21:26:19,234 epoch 4 - iter 80/209 - loss 0.08908865 - samples/sec: 155.38 - lr: 0.100000
2022-10-05 21:26:22,871 epoch 4 - iter 100/209 - loss 0.09743297 - samples/sec: 176.18 - lr: 0.100000
2022-10-05 21:26:26,414 epoch 4 - iter 120/209 - loss 0.09350501 - samples/sec: 180.82 - lr: 0.100000
2022-10-05 21:26:

100%|██████████| 45/45 [00:08<00:00,  5.30it/s]

2022-10-05 21:26:51,054 Evaluating as a multi-label problem: False





2022-10-05 21:26:51,070 DEV : loss 0.06625504791736603 - f1-score (micro avg)  0.7642
2022-10-05 21:26:51,184 BAD EPOCHS (no improvement): 0
2022-10-05 21:26:51,189 saving best model
2022-10-05 21:26:55,281 ----------------------------------------------------------------------------------------------------
2022-10-05 21:26:58,715 epoch 5 - iter 20/209 - loss 0.07382398 - samples/sec: 186.66 - lr: 0.100000
2022-10-05 21:27:02,542 epoch 5 - iter 40/209 - loss 0.07442300 - samples/sec: 167.39 - lr: 0.100000
2022-10-05 21:27:06,147 epoch 5 - iter 60/209 - loss 0.07241555 - samples/sec: 177.78 - lr: 0.100000
2022-10-05 21:27:10,762 epoch 5 - iter 80/209 - loss 0.07524735 - samples/sec: 138.83 - lr: 0.100000
2022-10-05 21:27:15,334 epoch 5 - iter 100/209 - loss 0.07657167 - samples/sec: 140.13 - lr: 0.100000
2022-10-05 21:27:18,434 epoch 5 - iter 120/209 - loss 0.07292674 - samples/sec: 206.70 - lr: 0.100000
2022-10-05 21:27:22,923 epoch 5 - iter 140/209 - loss 0.07319745 - samples/sec: 142.

100%|██████████| 45/45 [00:08<00:00,  5.18it/s]

2022-10-05 21:27:43,763 Evaluating as a multi-label problem: False





2022-10-05 21:27:43,778 DEV : loss 0.0641130656003952 - f1-score (micro avg)  0.7273
2022-10-05 21:27:43,881 BAD EPOCHS (no improvement): 1
2022-10-05 21:27:43,886 ----------------------------------------------------------------------------------------------------
2022-10-05 21:27:48,346 epoch 6 - iter 20/209 - loss 0.07178041 - samples/sec: 143.65 - lr: 0.100000
2022-10-05 21:27:51,428 epoch 6 - iter 40/209 - loss 0.07102524 - samples/sec: 207.89 - lr: 0.100000
2022-10-05 21:27:55,330 epoch 6 - iter 60/209 - loss 0.06735485 - samples/sec: 164.15 - lr: 0.100000
2022-10-05 21:27:58,659 epoch 6 - iter 80/209 - loss 0.06497303 - samples/sec: 192.48 - lr: 0.100000
2022-10-05 21:28:01,894 epoch 6 - iter 100/209 - loss 0.06898219 - samples/sec: 198.10 - lr: 0.100000
2022-10-05 21:28:06,344 epoch 6 - iter 120/209 - loss 0.06644753 - samples/sec: 143.96 - lr: 0.100000
2022-10-05 21:28:10,132 epoch 6 - iter 140/209 - loss 0.06407983 - samples/sec: 169.15 - lr: 0.100000
2022-10-05 21:28:15,237 e

100%|██████████| 45/45 [00:08<00:00,  5.14it/s]

2022-10-05 21:28:32,724 Evaluating as a multi-label problem: False





2022-10-05 21:28:32,739 DEV : loss 0.053595658391714096 - f1-score (micro avg)  0.8236
2022-10-05 21:28:32,843 BAD EPOCHS (no improvement): 0
2022-10-05 21:28:32,847 saving best model
2022-10-05 21:28:37,029 ----------------------------------------------------------------------------------------------------
2022-10-05 21:28:40,755 epoch 7 - iter 20/209 - loss 0.04414143 - samples/sec: 172.11 - lr: 0.100000
2022-10-05 21:28:44,641 epoch 7 - iter 40/209 - loss 0.05017447 - samples/sec: 164.89 - lr: 0.100000
2022-10-05 21:28:48,272 epoch 7 - iter 60/209 - loss 0.05605570 - samples/sec: 176.44 - lr: 0.100000
2022-10-05 21:28:52,497 epoch 7 - iter 80/209 - loss 0.05949845 - samples/sec: 151.61 - lr: 0.100000
2022-10-05 21:28:56,123 epoch 7 - iter 100/209 - loss 0.05599440 - samples/sec: 176.71 - lr: 0.100000
2022-10-05 21:29:00,661 epoch 7 - iter 120/209 - loss 0.05359963 - samples/sec: 141.13 - lr: 0.100000
2022-10-05 21:29:05,266 epoch 7 - iter 140/209 - loss 0.05388439 - samples/sec: 139

100%|██████████| 45/45 [00:08<00:00,  5.17it/s]

2022-10-05 21:29:26,500 Evaluating as a multi-label problem: False





2022-10-05 21:29:26,514 DEV : loss 0.04733815789222717 - f1-score (micro avg)  0.8326
2022-10-05 21:29:26,613 BAD EPOCHS (no improvement): 0
2022-10-05 21:29:26,617 saving best model
2022-10-05 21:29:30,762 ----------------------------------------------------------------------------------------------------
2022-10-05 21:29:36,049 epoch 8 - iter 20/209 - loss 0.04569936 - samples/sec: 121.22 - lr: 0.100000
2022-10-05 21:29:39,284 epoch 8 - iter 40/209 - loss 0.05077859 - samples/sec: 198.04 - lr: 0.100000
2022-10-05 21:29:43,534 epoch 8 - iter 60/209 - loss 0.04905524 - samples/sec: 150.73 - lr: 0.100000
2022-10-05 21:29:47,673 epoch 8 - iter 80/209 - loss 0.04949787 - samples/sec: 154.81 - lr: 0.100000
2022-10-05 21:29:51,841 epoch 8 - iter 100/209 - loss 0.04512084 - samples/sec: 153.69 - lr: 0.100000
2022-10-05 21:29:55,378 epoch 8 - iter 120/209 - loss 0.04542671 - samples/sec: 181.17 - lr: 0.100000
2022-10-05 21:30:00,029 epoch 8 - iter 140/209 - loss 0.04681635 - samples/sec: 137.

100%|██████████| 45/45 [00:08<00:00,  5.23it/s]

2022-10-05 21:30:21,083 Evaluating as a multi-label problem: False





2022-10-05 21:30:21,100 DEV : loss 0.04674920812249184 - f1-score (micro avg)  0.8346
2022-10-05 21:30:21,210 BAD EPOCHS (no improvement): 0
2022-10-05 21:30:21,215 saving best model
2022-10-05 21:30:25,358 ----------------------------------------------------------------------------------------------------
2022-10-05 21:30:29,194 epoch 9 - iter 20/209 - loss 0.03085014 - samples/sec: 167.11 - lr: 0.100000
2022-10-05 21:30:33,631 epoch 9 - iter 40/209 - loss 0.03677123 - samples/sec: 144.34 - lr: 0.100000
2022-10-05 21:30:37,387 epoch 9 - iter 60/209 - loss 0.03806133 - samples/sec: 170.60 - lr: 0.100000
2022-10-05 21:30:41,768 epoch 9 - iter 80/209 - loss 0.03881291 - samples/sec: 146.21 - lr: 0.100000
2022-10-05 21:30:44,895 epoch 9 - iter 100/209 - loss 0.03928512 - samples/sec: 204.98 - lr: 0.100000
2022-10-05 21:30:48,238 epoch 9 - iter 120/209 - loss 0.03969840 - samples/sec: 191.70 - lr: 0.100000
2022-10-05 21:30:52,730 epoch 9 - iter 140/209 - loss 0.03996324 - samples/sec: 142.

100%|██████████| 45/45 [00:08<00:00,  5.12it/s]

2022-10-05 21:31:14,594 Evaluating as a multi-label problem: False





2022-10-05 21:31:14,608 DEV : loss 0.04545168578624725 - f1-score (micro avg)  0.8434
2022-10-05 21:31:14,708 BAD EPOCHS (no improvement): 0
2022-10-05 21:31:14,713 saving best model
2022-10-05 21:31:18,878 ----------------------------------------------------------------------------------------------------
2022-10-05 21:31:23,917 epoch 10 - iter 20/209 - loss 0.03383951 - samples/sec: 127.13 - lr: 0.100000
2022-10-05 21:31:28,117 epoch 10 - iter 40/209 - loss 0.03514448 - samples/sec: 152.57 - lr: 0.100000
2022-10-05 21:31:31,723 epoch 10 - iter 60/209 - loss 0.03327105 - samples/sec: 177.70 - lr: 0.100000
2022-10-05 21:31:36,083 epoch 10 - iter 80/209 - loss 0.03435673 - samples/sec: 146.94 - lr: 0.100000
2022-10-05 21:31:39,629 epoch 10 - iter 100/209 - loss 0.03522639 - samples/sec: 180.65 - lr: 0.100000
2022-10-05 21:31:43,819 epoch 10 - iter 120/209 - loss 0.03621806 - samples/sec: 152.90 - lr: 0.100000
2022-10-05 21:31:47,296 epoch 10 - iter 140/209 - loss 0.03742392 - samples/se

100%|██████████| 45/45 [00:08<00:00,  5.10it/s]


2022-10-05 21:32:08,965 Evaluating as a multi-label problem: False
2022-10-05 21:32:08,986 DEV : loss 0.04424387961626053 - f1-score (micro avg)  0.8622
2022-10-05 21:32:09,090 BAD EPOCHS (no improvement): 0
2022-10-05 21:32:09,094 saving best model
2022-10-05 21:32:13,127 ----------------------------------------------------------------------------------------------------
2022-10-05 21:32:17,122 epoch 11 - iter 20/209 - loss 0.03257700 - samples/sec: 160.40 - lr: 0.100000
2022-10-05 21:32:21,086 epoch 11 - iter 40/209 - loss 0.03352178 - samples/sec: 161.64 - lr: 0.100000
2022-10-05 21:32:24,932 epoch 11 - iter 60/209 - loss 0.03510874 - samples/sec: 166.59 - lr: 0.100000
2022-10-05 21:32:28,739 epoch 11 - iter 80/209 - loss 0.03390923 - samples/sec: 168.31 - lr: 0.100000
2022-10-05 21:32:32,470 epoch 11 - iter 100/209 - loss 0.03517851 - samples/sec: 171.77 - lr: 0.100000
2022-10-05 21:32:36,650 epoch 11 - iter 120/209 - loss 0.03514172 - samples/sec: 153.29 - lr: 0.100000
2022-10-05 

100%|██████████| 45/45 [00:08<00:00,  5.25it/s]


2022-10-05 21:33:02,525 Evaluating as a multi-label problem: False
2022-10-05 21:33:02,539 DEV : loss 0.04265153035521507 - f1-score (micro avg)  0.8579
2022-10-05 21:33:02,639 BAD EPOCHS (no improvement): 1
2022-10-05 21:33:02,643 ----------------------------------------------------------------------------------------------------
2022-10-05 21:33:06,662 epoch 12 - iter 20/209 - loss 0.03155935 - samples/sec: 159.44 - lr: 0.100000
2022-10-05 21:33:10,075 epoch 12 - iter 40/209 - loss 0.03468160 - samples/sec: 187.73 - lr: 0.100000
2022-10-05 21:33:14,369 epoch 12 - iter 60/209 - loss 0.03069837 - samples/sec: 149.19 - lr: 0.100000
2022-10-05 21:33:18,599 epoch 12 - iter 80/209 - loss 0.03005796 - samples/sec: 151.44 - lr: 0.100000
2022-10-05 21:33:22,344 epoch 12 - iter 100/209 - loss 0.03097189 - samples/sec: 171.06 - lr: 0.100000
2022-10-05 21:33:25,843 epoch 12 - iter 120/209 - loss 0.03324901 - samples/sec: 183.16 - lr: 0.100000
2022-10-05 21:33:29,472 epoch 12 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.10it/s]


2022-10-05 21:33:51,380 Evaluating as a multi-label problem: False
2022-10-05 21:33:51,398 DEV : loss 0.04187864810228348 - f1-score (micro avg)  0.8607
2022-10-05 21:33:51,498 BAD EPOCHS (no improvement): 2
2022-10-05 21:33:51,502 ----------------------------------------------------------------------------------------------------
2022-10-05 21:33:54,614 epoch 13 - iter 20/209 - loss 0.03408621 - samples/sec: 206.06 - lr: 0.100000
2022-10-05 21:33:58,329 epoch 13 - iter 40/209 - loss 0.03399409 - samples/sec: 172.44 - lr: 0.100000
2022-10-05 21:34:02,751 epoch 13 - iter 60/209 - loss 0.03419849 - samples/sec: 144.89 - lr: 0.100000
2022-10-05 21:34:06,369 epoch 13 - iter 80/209 - loss 0.03294558 - samples/sec: 177.04 - lr: 0.100000
2022-10-05 21:34:10,190 epoch 13 - iter 100/209 - loss 0.03109294 - samples/sec: 167.71 - lr: 0.100000
2022-10-05 21:34:13,707 epoch 13 - iter 120/209 - loss 0.03091515 - samples/sec: 182.20 - lr: 0.100000
2022-10-05 21:34:17,424 epoch 13 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.26it/s]


2022-10-05 21:34:39,346 Evaluating as a multi-label problem: False
2022-10-05 21:34:39,360 DEV : loss 0.03841810300946236 - f1-score (micro avg)  0.8818
2022-10-05 21:34:39,460 BAD EPOCHS (no improvement): 0
2022-10-05 21:34:39,463 saving best model
2022-10-05 21:34:43,571 ----------------------------------------------------------------------------------------------------
2022-10-05 21:34:47,883 epoch 14 - iter 20/209 - loss 0.02195948 - samples/sec: 148.62 - lr: 0.100000
2022-10-05 21:34:51,565 epoch 14 - iter 40/209 - loss 0.02317068 - samples/sec: 174.01 - lr: 0.100000
2022-10-05 21:34:55,464 epoch 14 - iter 60/209 - loss 0.02174408 - samples/sec: 164.35 - lr: 0.100000
2022-10-05 21:34:59,431 epoch 14 - iter 80/209 - loss 0.02539978 - samples/sec: 161.51 - lr: 0.100000
2022-10-05 21:35:04,574 epoch 14 - iter 100/209 - loss 0.02694561 - samples/sec: 124.57 - lr: 0.100000
2022-10-05 21:35:08,458 epoch 14 - iter 120/209 - loss 0.02681864 - samples/sec: 164.95 - lr: 0.100000
2022-10-05 

100%|██████████| 45/45 [00:08<00:00,  5.13it/s]


2022-10-05 21:35:33,919 Evaluating as a multi-label problem: False
2022-10-05 21:35:33,932 DEV : loss 0.04374944046139717 - f1-score (micro avg)  0.8445
2022-10-05 21:35:34,033 BAD EPOCHS (no improvement): 1
2022-10-05 21:35:34,038 ----------------------------------------------------------------------------------------------------
2022-10-05 21:35:37,743 epoch 15 - iter 20/209 - loss 0.02351027 - samples/sec: 172.97 - lr: 0.100000
2022-10-05 21:35:42,481 epoch 15 - iter 40/209 - loss 0.02376470 - samples/sec: 135.21 - lr: 0.100000
2022-10-05 21:35:46,564 epoch 15 - iter 60/209 - loss 0.02417537 - samples/sec: 156.88 - lr: 0.100000
2022-10-05 21:35:50,730 epoch 15 - iter 80/209 - loss 0.02424160 - samples/sec: 153.79 - lr: 0.100000
2022-10-05 21:35:54,673 epoch 15 - iter 100/209 - loss 0.02404251 - samples/sec: 162.48 - lr: 0.100000
2022-10-05 21:35:58,269 epoch 15 - iter 120/209 - loss 0.02405063 - samples/sec: 178.22 - lr: 0.100000
2022-10-05 21:36:01,505 epoch 15 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.14it/s]

2022-10-05 21:36:22,224 Evaluating as a multi-label problem: False





2022-10-05 21:36:22,239 DEV : loss 0.04772869125008583 - f1-score (micro avg)  0.8637
2022-10-05 21:36:22,341 BAD EPOCHS (no improvement): 2
2022-10-05 21:36:22,345 ----------------------------------------------------------------------------------------------------
2022-10-05 21:36:25,883 epoch 16 - iter 20/209 - loss 0.01926824 - samples/sec: 181.16 - lr: 0.100000
2022-10-05 21:36:29,906 epoch 16 - iter 40/209 - loss 0.02653799 - samples/sec: 159.27 - lr: 0.100000
2022-10-05 21:36:33,716 epoch 16 - iter 60/209 - loss 0.02486661 - samples/sec: 168.14 - lr: 0.100000
2022-10-05 21:36:36,950 epoch 16 - iter 80/209 - loss 0.02407938 - samples/sec: 198.14 - lr: 0.100000
2022-10-05 21:36:40,413 epoch 16 - iter 100/209 - loss 0.02526230 - samples/sec: 185.08 - lr: 0.100000
2022-10-05 21:36:44,403 epoch 16 - iter 120/209 - loss 0.02407721 - samples/sec: 160.55 - lr: 0.100000
2022-10-05 21:36:48,319 epoch 16 - iter 140/209 - loss 0.02429720 - samples/sec: 163.62 - lr: 0.100000
2022-10-05 21:36:

100%|██████████| 45/45 [00:08<00:00,  5.17it/s]


2022-10-05 21:37:10,691 Evaluating as a multi-label problem: False
2022-10-05 21:37:10,705 DEV : loss 0.04398127645254135 - f1-score (micro avg)  0.8621
2022-10-05 21:37:10,808 BAD EPOCHS (no improvement): 3
2022-10-05 21:37:10,812 ----------------------------------------------------------------------------------------------------
2022-10-05 21:37:14,339 epoch 17 - iter 20/209 - loss 0.02031844 - samples/sec: 181.71 - lr: 0.100000
2022-10-05 21:37:17,409 epoch 17 - iter 40/209 - loss 0.02325208 - samples/sec: 208.75 - lr: 0.100000
2022-10-05 21:37:22,055 epoch 17 - iter 60/209 - loss 0.02726224 - samples/sec: 137.91 - lr: 0.100000
2022-10-05 21:37:25,625 epoch 17 - iter 80/209 - loss 0.02616710 - samples/sec: 179.48 - lr: 0.100000
2022-10-05 21:37:29,351 epoch 17 - iter 100/209 - loss 0.02663420 - samples/sec: 171.91 - lr: 0.100000
2022-10-05 21:37:33,062 epoch 17 - iter 120/209 - loss 0.02624300 - samples/sec: 172.63 - lr: 0.100000
2022-10-05 21:37:37,218 epoch 17 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.11it/s]

2022-10-05 21:37:59,071 Evaluating as a multi-label problem: False





2022-10-05 21:37:59,085 DEV : loss 0.0367460660636425 - f1-score (micro avg)  0.8754
2022-10-05 21:37:59,187 Epoch    17: reducing learning rate of group 0 to 5.0000e-02.
2022-10-05 21:37:59,189 BAD EPOCHS (no improvement): 4
2022-10-05 21:37:59,193 ----------------------------------------------------------------------------------------------------
2022-10-05 21:38:03,376 epoch 18 - iter 20/209 - loss 0.01589900 - samples/sec: 153.19 - lr: 0.050000
2022-10-05 21:38:07,760 epoch 18 - iter 40/209 - loss 0.01636240 - samples/sec: 146.12 - lr: 0.050000
2022-10-05 21:38:10,961 epoch 18 - iter 60/209 - loss 0.01623631 - samples/sec: 200.20 - lr: 0.050000
2022-10-05 21:38:15,519 epoch 18 - iter 80/209 - loss 0.01857460 - samples/sec: 140.52 - lr: 0.050000
2022-10-05 21:38:19,262 epoch 18 - iter 100/209 - loss 0.01845181 - samples/sec: 171.20 - lr: 0.050000
2022-10-05 21:38:23,174 epoch 18 - iter 120/209 - loss 0.01829115 - samples/sec: 163.74 - lr: 0.050000
2022-10-05 21:38:27,054 epoch 18 - 

100%|██████████| 45/45 [00:08<00:00,  5.18it/s]

2022-10-05 21:38:47,782 Evaluating as a multi-label problem: False





2022-10-05 21:38:47,797 DEV : loss 0.041797056794166565 - f1-score (micro avg)  0.8797
2022-10-05 21:38:47,901 BAD EPOCHS (no improvement): 1
2022-10-05 21:38:47,906 ----------------------------------------------------------------------------------------------------
2022-10-05 21:38:52,111 epoch 19 - iter 20/209 - loss 0.01687931 - samples/sec: 152.37 - lr: 0.050000
2022-10-05 21:38:55,825 epoch 19 - iter 40/209 - loss 0.01821464 - samples/sec: 172.52 - lr: 0.050000
2022-10-05 21:38:59,055 epoch 19 - iter 60/209 - loss 0.01724672 - samples/sec: 198.46 - lr: 0.050000
2022-10-05 21:39:02,551 epoch 19 - iter 80/209 - loss 0.01668321 - samples/sec: 183.23 - lr: 0.050000
2022-10-05 21:39:06,801 epoch 19 - iter 100/209 - loss 0.01839335 - samples/sec: 150.75 - lr: 0.050000
2022-10-05 21:39:10,485 epoch 19 - iter 120/209 - loss 0.01732240 - samples/sec: 173.87 - lr: 0.050000
2022-10-05 21:39:15,106 epoch 19 - iter 140/209 - loss 0.01673944 - samples/sec: 138.63 - lr: 0.050000
2022-10-05 21:39

100%|██████████| 45/45 [00:08<00:00,  5.26it/s]

2022-10-05 21:39:35,257 Evaluating as a multi-label problem: False





2022-10-05 21:39:35,272 DEV : loss 0.0396113283932209 - f1-score (micro avg)  0.8806
2022-10-05 21:39:35,372 BAD EPOCHS (no improvement): 2
2022-10-05 21:39:35,376 ----------------------------------------------------------------------------------------------------
2022-10-05 21:39:39,487 epoch 20 - iter 20/209 - loss 0.01300777 - samples/sec: 155.87 - lr: 0.050000
2022-10-05 21:39:43,108 epoch 20 - iter 40/209 - loss 0.01113714 - samples/sec: 176.99 - lr: 0.050000
2022-10-05 21:39:46,728 epoch 20 - iter 60/209 - loss 0.01352605 - samples/sec: 177.00 - lr: 0.050000
2022-10-05 21:39:50,748 epoch 20 - iter 80/209 - loss 0.01432690 - samples/sec: 159.37 - lr: 0.050000
2022-10-05 21:39:54,557 epoch 20 - iter 100/209 - loss 0.01359308 - samples/sec: 168.25 - lr: 0.050000
2022-10-05 21:39:58,185 epoch 20 - iter 120/209 - loss 0.01347934 - samples/sec: 176.60 - lr: 0.050000
2022-10-05 21:40:01,908 epoch 20 - iter 140/209 - loss 0.01329447 - samples/sec: 172.09 - lr: 0.050000
2022-10-05 21:40:0

100%|██████████| 45/45 [00:08<00:00,  5.11it/s]

2022-10-05 21:40:23,549 Evaluating as a multi-label problem: False





2022-10-05 21:40:23,565 DEV : loss 0.04213029518723488 - f1-score (micro avg)  0.8846
2022-10-05 21:40:23,666 BAD EPOCHS (no improvement): 0
2022-10-05 21:40:23,671 saving best model
2022-10-05 21:40:27,755 ----------------------------------------------------------------------------------------------------
2022-10-05 21:40:31,723 epoch 21 - iter 20/209 - loss 0.00371124 - samples/sec: 161.53 - lr: 0.050000
2022-10-05 21:40:35,305 epoch 21 - iter 40/209 - loss 0.00980371 - samples/sec: 178.86 - lr: 0.050000
2022-10-05 21:40:39,545 epoch 21 - iter 60/209 - loss 0.01403910 - samples/sec: 151.18 - lr: 0.050000
2022-10-05 21:40:43,052 epoch 21 - iter 80/209 - loss 0.01371703 - samples/sec: 182.72 - lr: 0.050000
2022-10-05 21:40:47,045 epoch 21 - iter 100/209 - loss 0.01472907 - samples/sec: 160.43 - lr: 0.050000
2022-10-05 21:40:51,240 epoch 21 - iter 120/209 - loss 0.01510171 - samples/sec: 152.70 - lr: 0.050000
2022-10-05 21:40:55,579 epoch 21 - iter 140/209 - loss 0.01595048 - samples/se

100%|██████████| 45/45 [00:08<00:00,  5.17it/s]

2022-10-05 21:41:17,095 Evaluating as a multi-label problem: False





2022-10-05 21:41:17,111 DEV : loss 0.04122639447450638 - f1-score (micro avg)  0.8935
2022-10-05 21:41:17,215 BAD EPOCHS (no improvement): 0
2022-10-05 21:41:17,219 saving best model
2022-10-05 21:41:21,313 ----------------------------------------------------------------------------------------------------
2022-10-05 21:41:24,749 epoch 22 - iter 20/209 - loss 0.02159565 - samples/sec: 186.60 - lr: 0.050000
2022-10-05 21:41:28,929 epoch 22 - iter 40/209 - loss 0.01633133 - samples/sec: 153.26 - lr: 0.050000
2022-10-05 21:41:32,274 epoch 22 - iter 60/209 - loss 0.01381400 - samples/sec: 191.55 - lr: 0.050000
2022-10-05 21:41:35,733 epoch 22 - iter 80/209 - loss 0.01360445 - samples/sec: 185.26 - lr: 0.050000
2022-10-05 21:41:39,476 epoch 22 - iter 100/209 - loss 0.01258573 - samples/sec: 171.15 - lr: 0.050000
2022-10-05 21:41:43,201 epoch 22 - iter 120/209 - loss 0.01258263 - samples/sec: 172.04 - lr: 0.050000
2022-10-05 21:41:47,787 epoch 22 - iter 140/209 - loss 0.01326233 - samples/se

100%|██████████| 45/45 [00:08<00:00,  5.23it/s]

2022-10-05 21:42:10,582 Evaluating as a multi-label problem: False





2022-10-05 21:42:10,596 DEV : loss 0.0426638089120388 - f1-score (micro avg)  0.8797
2022-10-05 21:42:10,698 BAD EPOCHS (no improvement): 1
2022-10-05 21:42:10,704 ----------------------------------------------------------------------------------------------------
2022-10-05 21:42:15,188 epoch 23 - iter 20/209 - loss 0.01521899 - samples/sec: 142.89 - lr: 0.050000
2022-10-05 21:42:18,341 epoch 23 - iter 40/209 - loss 0.01296490 - samples/sec: 203.21 - lr: 0.050000
2022-10-05 21:42:22,000 epoch 23 - iter 60/209 - loss 0.01339334 - samples/sec: 175.11 - lr: 0.050000
2022-10-05 21:42:26,856 epoch 23 - iter 80/209 - loss 0.01191753 - samples/sec: 131.90 - lr: 0.050000
2022-10-05 21:42:30,215 epoch 23 - iter 100/209 - loss 0.01157915 - samples/sec: 190.75 - lr: 0.050000
2022-10-05 21:42:33,829 epoch 23 - iter 120/209 - loss 0.01163532 - samples/sec: 177.29 - lr: 0.050000
2022-10-05 21:42:37,496 epoch 23 - iter 140/209 - loss 0.01139401 - samples/sec: 174.69 - lr: 0.050000
2022-10-05 21:42:4

100%|██████████| 45/45 [00:08<00:00,  5.09it/s]


2022-10-05 21:42:58,846 Evaluating as a multi-label problem: False
2022-10-05 21:42:58,860 DEV : loss 0.041186459362506866 - f1-score (micro avg)  0.886
2022-10-05 21:42:58,961 BAD EPOCHS (no improvement): 2
2022-10-05 21:42:58,965 ----------------------------------------------------------------------------------------------------
2022-10-05 21:43:02,872 epoch 24 - iter 20/209 - loss 0.01441993 - samples/sec: 164.04 - lr: 0.050000
2022-10-05 21:43:06,447 epoch 24 - iter 40/209 - loss 0.01325895 - samples/sec: 179.19 - lr: 0.050000
2022-10-05 21:43:11,706 epoch 24 - iter 60/209 - loss 0.01324376 - samples/sec: 121.81 - lr: 0.050000
2022-10-05 21:43:15,686 epoch 24 - iter 80/209 - loss 0.01326353 - samples/sec: 160.97 - lr: 0.050000
2022-10-05 21:43:19,263 epoch 24 - iter 100/209 - loss 0.01236676 - samples/sec: 179.15 - lr: 0.050000
2022-10-05 21:43:22,426 epoch 24 - iter 120/209 - loss 0.01247524 - samples/sec: 202.64 - lr: 0.050000
2022-10-05 21:43:25,758 epoch 24 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.27it/s]

2022-10-05 21:43:47,072 Evaluating as a multi-label problem: False





2022-10-05 21:43:47,087 DEV : loss 0.039838846772909164 - f1-score (micro avg)  0.8885
2022-10-05 21:43:47,189 BAD EPOCHS (no improvement): 3
2022-10-05 21:43:47,193 ----------------------------------------------------------------------------------------------------
2022-10-05 21:43:50,905 epoch 25 - iter 20/209 - loss 0.01756077 - samples/sec: 172.65 - lr: 0.050000
2022-10-05 21:43:54,767 epoch 25 - iter 40/209 - loss 0.01404397 - samples/sec: 165.90 - lr: 0.050000
2022-10-05 21:43:57,935 epoch 25 - iter 60/209 - loss 0.01368055 - samples/sec: 202.26 - lr: 0.050000
2022-10-05 21:44:01,282 epoch 25 - iter 80/209 - loss 0.01328832 - samples/sec: 191.48 - lr: 0.050000
2022-10-05 21:44:05,046 epoch 25 - iter 100/209 - loss 0.01241484 - samples/sec: 170.21 - lr: 0.050000
2022-10-05 21:44:09,397 epoch 25 - iter 120/209 - loss 0.01314229 - samples/sec: 147.25 - lr: 0.050000
2022-10-05 21:44:13,203 epoch 25 - iter 140/209 - loss 0.01241984 - samples/sec: 168.31 - lr: 0.050000
2022-10-05 21:44

100%|██████████| 45/45 [00:08<00:00,  5.21it/s]

2022-10-05 21:44:35,015 Evaluating as a multi-label problem: False





2022-10-05 21:44:35,029 DEV : loss 0.043526165187358856 - f1-score (micro avg)  0.8897
2022-10-05 21:44:35,131 Epoch    25: reducing learning rate of group 0 to 2.5000e-02.
2022-10-05 21:44:35,133 BAD EPOCHS (no improvement): 4
2022-10-05 21:44:35,138 ----------------------------------------------------------------------------------------------------
2022-10-05 21:44:39,131 epoch 26 - iter 20/209 - loss 0.00972412 - samples/sec: 160.43 - lr: 0.025000
2022-10-05 21:44:43,122 epoch 26 - iter 40/209 - loss 0.01112099 - samples/sec: 160.51 - lr: 0.025000
2022-10-05 21:44:46,916 epoch 26 - iter 60/209 - loss 0.01039840 - samples/sec: 168.87 - lr: 0.025000
2022-10-05 21:44:50,956 epoch 26 - iter 80/209 - loss 0.01176128 - samples/sec: 158.58 - lr: 0.025000
2022-10-05 21:44:54,668 epoch 26 - iter 100/209 - loss 0.01129144 - samples/sec: 172.62 - lr: 0.025000
2022-10-05 21:44:58,508 epoch 26 - iter 120/209 - loss 0.01102369 - samples/sec: 166.83 - lr: 0.025000
2022-10-05 21:45:01,982 epoch 26 

100%|██████████| 45/45 [00:08<00:00,  5.09it/s]


2022-10-05 21:45:23,754 Evaluating as a multi-label problem: False
2022-10-05 21:45:23,769 DEV : loss 0.04127642512321472 - f1-score (micro avg)  0.8914
2022-10-05 21:45:23,885 BAD EPOCHS (no improvement): 1
2022-10-05 21:45:23,890 ----------------------------------------------------------------------------------------------------
2022-10-05 21:45:27,378 epoch 27 - iter 20/209 - loss 0.01392249 - samples/sec: 183.80 - lr: 0.025000
2022-10-05 21:45:30,931 epoch 27 - iter 40/209 - loss 0.01211990 - samples/sec: 180.32 - lr: 0.025000
2022-10-05 21:45:34,673 epoch 27 - iter 60/209 - loss 0.01059058 - samples/sec: 171.20 - lr: 0.025000
2022-10-05 21:45:38,181 epoch 27 - iter 80/209 - loss 0.00973524 - samples/sec: 182.66 - lr: 0.025000
2022-10-05 21:45:41,878 epoch 27 - iter 100/209 - loss 0.00975362 - samples/sec: 173.31 - lr: 0.025000
2022-10-05 21:45:45,633 epoch 27 - iter 120/209 - loss 0.01002076 - samples/sec: 170.64 - lr: 0.025000
2022-10-05 21:45:49,587 epoch 27 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.19it/s]


2022-10-05 21:46:11,898 Evaluating as a multi-label problem: False
2022-10-05 21:46:11,911 DEV : loss 0.04112984240055084 - f1-score (micro avg)  0.8856
2022-10-05 21:46:12,010 BAD EPOCHS (no improvement): 2
2022-10-05 21:46:12,014 ----------------------------------------------------------------------------------------------------
2022-10-05 21:46:15,677 epoch 28 - iter 20/209 - loss 0.00988743 - samples/sec: 175.01 - lr: 0.025000
2022-10-05 21:46:20,417 epoch 28 - iter 40/209 - loss 0.00989922 - samples/sec: 135.15 - lr: 0.025000
2022-10-05 21:46:24,500 epoch 28 - iter 60/209 - loss 0.00933759 - samples/sec: 156.90 - lr: 0.025000
2022-10-05 21:46:28,285 epoch 28 - iter 80/209 - loss 0.00892438 - samples/sec: 169.27 - lr: 0.025000
2022-10-05 21:46:31,363 epoch 28 - iter 100/209 - loss 0.00961709 - samples/sec: 208.22 - lr: 0.025000
2022-10-05 21:46:35,196 epoch 28 - iter 120/209 - loss 0.00924728 - samples/sec: 167.12 - lr: 0.025000
2022-10-05 21:46:38,907 epoch 28 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.15it/s]

2022-10-05 21:47:00,729 Evaluating as a multi-label problem: False
2022-10-05 21:47:00,743 DEV : loss 0.04193611070513725 - f1-score (micro avg)  0.892
2022-10-05 21:47:00,852 BAD EPOCHS (no improvement): 3
2022-10-05 21:47:00,857 ----------------------------------------------------------------------------------------------------





2022-10-05 21:47:04,946 epoch 29 - iter 20/209 - loss 0.01232944 - samples/sec: 156.72 - lr: 0.025000
2022-10-05 21:47:07,896 epoch 29 - iter 40/209 - loss 0.01164638 - samples/sec: 217.21 - lr: 0.025000
2022-10-05 21:47:11,585 epoch 29 - iter 60/209 - loss 0.00926472 - samples/sec: 173.67 - lr: 0.025000
2022-10-05 21:47:15,230 epoch 29 - iter 80/209 - loss 0.01005147 - samples/sec: 175.79 - lr: 0.025000
2022-10-05 21:47:18,848 epoch 29 - iter 100/209 - loss 0.00973328 - samples/sec: 177.09 - lr: 0.025000
2022-10-05 21:47:22,173 epoch 29 - iter 120/209 - loss 0.00959216 - samples/sec: 192.73 - lr: 0.025000
2022-10-05 21:47:26,028 epoch 29 - iter 140/209 - loss 0.00972614 - samples/sec: 166.18 - lr: 0.025000
2022-10-05 21:47:30,402 epoch 29 - iter 160/209 - loss 0.01041004 - samples/sec: 146.50 - lr: 0.025000
2022-10-05 21:47:33,642 epoch 29 - iter 180/209 - loss 0.01085476 - samples/sec: 197.75 - lr: 0.025000
2022-10-05 21:47:38,627 epoch 29 - iter 200/209 - loss 0.01115439 - samples/s

100%|██████████| 45/45 [00:08<00:00,  5.22it/s]


2022-10-05 21:47:48,719 Evaluating as a multi-label problem: False
2022-10-05 21:47:48,733 DEV : loss 0.04304569214582443 - f1-score (micro avg)  0.8929
2022-10-05 21:47:48,833 Epoch    29: reducing learning rate of group 0 to 1.2500e-02.
2022-10-05 21:47:48,834 BAD EPOCHS (no improvement): 4
2022-10-05 21:47:48,841 ----------------------------------------------------------------------------------------------------
2022-10-05 21:47:51,979 epoch 30 - iter 20/209 - loss 0.00900415 - samples/sec: 204.26 - lr: 0.012500
2022-10-05 21:47:56,100 epoch 30 - iter 40/209 - loss 0.00930958 - samples/sec: 155.48 - lr: 0.012500
2022-10-05 21:48:00,595 epoch 30 - iter 60/209 - loss 0.00918096 - samples/sec: 142.48 - lr: 0.012500
2022-10-05 21:48:04,216 epoch 30 - iter 80/209 - loss 0.00951263 - samples/sec: 176.94 - lr: 0.012500
2022-10-05 21:48:07,869 epoch 30 - iter 100/209 - loss 0.00919009 - samples/sec: 175.40 - lr: 0.012500
2022-10-05 21:48:10,912 epoch 30 - iter 120/209 - loss 0.00882923 - sa

100%|██████████| 45/45 [00:08<00:00,  5.12it/s]


2022-10-05 21:48:36,782 Evaluating as a multi-label problem: False
2022-10-05 21:48:36,797 DEV : loss 0.043189745396375656 - f1-score (micro avg)  0.8987
2022-10-05 21:48:36,901 BAD EPOCHS (no improvement): 0
2022-10-05 21:48:36,905 saving best model
2022-10-05 21:48:41,005 ----------------------------------------------------------------------------------------------------
2022-10-05 21:48:44,992 epoch 31 - iter 20/209 - loss 0.01111741 - samples/sec: 160.73 - lr: 0.012500
2022-10-05 21:48:49,145 epoch 31 - iter 40/209 - loss 0.01065424 - samples/sec: 154.26 - lr: 0.012500
2022-10-05 21:48:53,016 epoch 31 - iter 60/209 - loss 0.00924915 - samples/sec: 165.50 - lr: 0.012500
2022-10-05 21:48:56,662 epoch 31 - iter 80/209 - loss 0.00945991 - samples/sec: 175.73 - lr: 0.012500
2022-10-05 21:49:00,834 epoch 31 - iter 100/209 - loss 0.00867199 - samples/sec: 153.62 - lr: 0.012500
2022-10-05 21:49:04,744 epoch 31 - iter 120/209 - loss 0.00863448 - samples/sec: 163.81 - lr: 0.012500
2022-10-05

100%|██████████| 45/45 [00:08<00:00,  5.27it/s]

2022-10-05 21:49:29,894 Evaluating as a multi-label problem: False





2022-10-05 21:49:29,908 DEV : loss 0.04211299121379852 - f1-score (micro avg)  0.8972
2022-10-05 21:49:30,009 BAD EPOCHS (no improvement): 1
2022-10-05 21:49:30,013 ----------------------------------------------------------------------------------------------------
2022-10-05 21:49:33,682 epoch 32 - iter 20/209 - loss 0.00678333 - samples/sec: 174.68 - lr: 0.012500
2022-10-05 21:49:38,029 epoch 32 - iter 40/209 - loss 0.00725025 - samples/sec: 147.39 - lr: 0.012500
2022-10-05 21:49:41,469 epoch 32 - iter 60/209 - loss 0.00685618 - samples/sec: 186.24 - lr: 0.012500
2022-10-05 21:49:45,511 epoch 32 - iter 80/209 - loss 0.00881562 - samples/sec: 158.47 - lr: 0.012500
2022-10-05 21:49:48,498 epoch 32 - iter 100/209 - loss 0.00954683 - samples/sec: 214.51 - lr: 0.012500
2022-10-05 21:49:51,768 epoch 32 - iter 120/209 - loss 0.00926375 - samples/sec: 195.93 - lr: 0.012500
2022-10-05 21:49:55,634 epoch 32 - iter 140/209 - loss 0.00898170 - samples/sec: 165.76 - lr: 0.012500
2022-10-05 21:49:

100%|██████████| 45/45 [00:08<00:00,  5.07it/s]


2022-10-05 21:50:17,423 Evaluating as a multi-label problem: False
2022-10-05 21:50:17,436 DEV : loss 0.042406294494867325 - f1-score (micro avg)  0.8945
2022-10-05 21:50:17,537 BAD EPOCHS (no improvement): 2
2022-10-05 21:50:17,543 ----------------------------------------------------------------------------------------------------
2022-10-05 21:50:21,044 epoch 33 - iter 20/209 - loss 0.01397413 - samples/sec: 182.99 - lr: 0.012500
2022-10-05 21:50:24,261 epoch 33 - iter 40/209 - loss 0.00987107 - samples/sec: 199.21 - lr: 0.012500
2022-10-05 21:50:27,939 epoch 33 - iter 60/209 - loss 0.01083281 - samples/sec: 174.19 - lr: 0.012500
2022-10-05 21:50:32,133 epoch 33 - iter 80/209 - loss 0.00992414 - samples/sec: 152.75 - lr: 0.012500
2022-10-05 21:50:37,101 epoch 33 - iter 100/209 - loss 0.01058709 - samples/sec: 128.92 - lr: 0.012500
2022-10-05 21:50:40,321 epoch 33 - iter 120/209 - loss 0.00999387 - samples/sec: 199.00 - lr: 0.012500
2022-10-05 21:50:43,540 epoch 33 - iter 140/209 - lo

100%|██████████| 45/45 [00:08<00:00,  5.17it/s]

2022-10-05 21:51:05,092 Evaluating as a multi-label problem: False





2022-10-05 21:51:05,108 DEV : loss 0.042864881455898285 - f1-score (micro avg)  0.8968
2022-10-05 21:51:05,210 BAD EPOCHS (no improvement): 3
2022-10-05 21:51:05,215 ----------------------------------------------------------------------------------------------------
2022-10-05 21:51:08,666 epoch 34 - iter 20/209 - loss 0.00942216 - samples/sec: 185.79 - lr: 0.012500
2022-10-05 21:51:12,734 epoch 34 - iter 40/209 - loss 0.00929236 - samples/sec: 157.44 - lr: 0.012500
2022-10-05 21:51:17,409 epoch 34 - iter 60/209 - loss 0.00933935 - samples/sec: 137.03 - lr: 0.012500
2022-10-05 21:51:21,556 epoch 34 - iter 80/209 - loss 0.00886663 - samples/sec: 154.47 - lr: 0.012500
2022-10-05 21:51:24,563 epoch 34 - iter 100/209 - loss 0.00867597 - samples/sec: 213.14 - lr: 0.012500
2022-10-05 21:51:28,275 epoch 34 - iter 120/209 - loss 0.00867409 - samples/sec: 172.58 - lr: 0.012500
2022-10-05 21:51:32,453 epoch 34 - iter 140/209 - loss 0.00887532 - samples/sec: 153.32 - lr: 0.012500
2022-10-05 21:51

100%|██████████| 45/45 [00:08<00:00,  5.30it/s]

2022-10-05 21:51:52,664 Evaluating as a multi-label problem: False





2022-10-05 21:51:52,678 DEV : loss 0.04302244633436203 - f1-score (micro avg)  0.8985
2022-10-05 21:51:52,779 Epoch    34: reducing learning rate of group 0 to 6.2500e-03.
2022-10-05 21:51:52,780 BAD EPOCHS (no improvement): 4
2022-10-05 21:51:52,784 ----------------------------------------------------------------------------------------------------
2022-10-05 21:51:56,714 epoch 35 - iter 20/209 - loss 0.01065642 - samples/sec: 163.10 - lr: 0.006250
2022-10-05 21:52:00,834 epoch 35 - iter 40/209 - loss 0.00976164 - samples/sec: 155.51 - lr: 0.006250
2022-10-05 21:52:04,506 epoch 35 - iter 60/209 - loss 0.00823052 - samples/sec: 174.48 - lr: 0.006250
2022-10-05 21:52:08,601 epoch 35 - iter 80/209 - loss 0.00869985 - samples/sec: 156.43 - lr: 0.006250
2022-10-05 21:52:12,091 epoch 35 - iter 100/209 - loss 0.00803104 - samples/sec: 183.57 - lr: 0.006250
2022-10-05 21:52:15,602 epoch 35 - iter 120/209 - loss 0.00810181 - samples/sec: 182.45 - lr: 0.006250
2022-10-05 21:52:19,064 epoch 35 -

100%|██████████| 45/45 [00:08<00:00,  5.14it/s]


2022-10-05 21:52:40,786 Evaluating as a multi-label problem: False
2022-10-05 21:52:40,801 DEV : loss 0.0440131239593029 - f1-score (micro avg)  0.8977
2022-10-05 21:52:40,918 BAD EPOCHS (no improvement): 1
2022-10-05 21:52:40,922 ----------------------------------------------------------------------------------------------------
2022-10-05 21:52:44,901 epoch 36 - iter 20/209 - loss 0.01148051 - samples/sec: 161.06 - lr: 0.006250
2022-10-05 21:52:48,902 epoch 36 - iter 40/209 - loss 0.00975667 - samples/sec: 160.11 - lr: 0.006250
2022-10-05 21:52:53,074 epoch 36 - iter 60/209 - loss 0.00880615 - samples/sec: 153.57 - lr: 0.006250
2022-10-05 21:52:56,618 epoch 36 - iter 80/209 - loss 0.00809481 - samples/sec: 180.77 - lr: 0.006250
2022-10-05 21:53:00,044 epoch 36 - iter 100/209 - loss 0.00777736 - samples/sec: 186.99 - lr: 0.006250
2022-10-05 21:53:04,398 epoch 36 - iter 120/209 - loss 0.00788669 - samples/sec: 147.12 - lr: 0.006250
2022-10-05 21:53:08,369 epoch 36 - iter 140/209 - loss

100%|██████████| 45/45 [00:08<00:00,  5.29it/s]

2022-10-05 21:53:29,474 Evaluating as a multi-label problem: False





2022-10-05 21:53:29,489 DEV : loss 0.0434238463640213 - f1-score (micro avg)  0.8989
2022-10-05 21:53:29,589 BAD EPOCHS (no improvement): 0
2022-10-05 21:53:29,594 saving best model
2022-10-05 21:53:34,335 ----------------------------------------------------------------------------------------------------
2022-10-05 21:53:38,732 epoch 37 - iter 20/209 - loss 0.00775726 - samples/sec: 145.73 - lr: 0.006250
2022-10-05 21:53:42,823 epoch 37 - iter 40/209 - loss 0.00624281 - samples/sec: 156.59 - lr: 0.006250
2022-10-05 21:53:46,547 epoch 37 - iter 60/209 - loss 0.00650477 - samples/sec: 172.05 - lr: 0.006250
2022-10-05 21:53:50,000 epoch 37 - iter 80/209 - loss 0.00736840 - samples/sec: 185.55 - lr: 0.006250
2022-10-05 21:53:54,004 epoch 37 - iter 100/209 - loss 0.00780496 - samples/sec: 159.97 - lr: 0.006250
2022-10-05 21:53:57,170 epoch 37 - iter 120/209 - loss 0.00727453 - samples/sec: 202.39 - lr: 0.006250
2022-10-05 21:54:01,689 epoch 37 - iter 140/209 - loss 0.00776301 - samples/sec

100%|██████████| 45/45 [00:08<00:00,  5.31it/s]

2022-10-05 21:54:22,653 Evaluating as a multi-label problem: False





2022-10-05 21:54:22,668 DEV : loss 0.043052833527326584 - f1-score (micro avg)  0.8954
2022-10-05 21:54:22,769 BAD EPOCHS (no improvement): 1
2022-10-05 21:54:22,773 ----------------------------------------------------------------------------------------------------
2022-10-05 21:54:26,454 epoch 38 - iter 20/209 - loss 0.00253162 - samples/sec: 174.10 - lr: 0.006250
2022-10-05 21:54:30,802 epoch 38 - iter 40/209 - loss 0.00448836 - samples/sec: 147.37 - lr: 0.006250
2022-10-05 21:54:34,672 epoch 38 - iter 60/209 - loss 0.00542029 - samples/sec: 165.52 - lr: 0.006250
2022-10-05 21:54:39,000 epoch 38 - iter 80/209 - loss 0.00624532 - samples/sec: 148.02 - lr: 0.006250
2022-10-05 21:54:42,693 epoch 38 - iter 100/209 - loss 0.00665653 - samples/sec: 173.49 - lr: 0.006250
2022-10-05 21:54:46,071 epoch 38 - iter 120/209 - loss 0.00628900 - samples/sec: 189.64 - lr: 0.006250
2022-10-05 21:54:49,305 epoch 38 - iter 140/209 - loss 0.00668611 - samples/sec: 198.14 - lr: 0.006250
2022-10-05 21:54

100%|██████████| 45/45 [00:08<00:00,  5.22it/s]

2022-10-05 21:55:11,833 Evaluating as a multi-label problem: False





2022-10-05 21:55:11,850 DEV : loss 0.044001515954732895 - f1-score (micro avg)  0.8931
2022-10-05 21:55:11,953 BAD EPOCHS (no improvement): 2
2022-10-05 21:55:11,957 ----------------------------------------------------------------------------------------------------
2022-10-05 21:55:16,430 epoch 39 - iter 20/209 - loss 0.00961082 - samples/sec: 143.24 - lr: 0.006250
2022-10-05 21:55:20,353 epoch 39 - iter 40/209 - loss 0.00901881 - samples/sec: 163.33 - lr: 0.006250
2022-10-05 21:55:24,674 epoch 39 - iter 60/209 - loss 0.00789886 - samples/sec: 148.26 - lr: 0.006250
2022-10-05 21:55:28,578 epoch 39 - iter 80/209 - loss 0.00775143 - samples/sec: 164.09 - lr: 0.006250
2022-10-05 21:55:32,360 epoch 39 - iter 100/209 - loss 0.00720805 - samples/sec: 169.40 - lr: 0.006250
2022-10-05 21:55:35,384 epoch 39 - iter 120/209 - loss 0.00781003 - samples/sec: 211.90 - lr: 0.006250
2022-10-05 21:55:39,154 epoch 39 - iter 140/209 - loss 0.00766624 - samples/sec: 169.92 - lr: 0.006250
2022-10-05 21:55

100%|██████████| 45/45 [00:08<00:00,  5.14it/s]


2022-10-05 21:55:59,523 Evaluating as a multi-label problem: False
2022-10-05 21:55:59,536 DEV : loss 0.04349417984485626 - f1-score (micro avg)  0.8943
2022-10-05 21:55:59,637 BAD EPOCHS (no improvement): 3
2022-10-05 21:55:59,641 ----------------------------------------------------------------------------------------------------
2022-10-05 21:56:04,154 epoch 40 - iter 20/209 - loss 0.00492850 - samples/sec: 141.98 - lr: 0.006250
2022-10-05 21:56:07,492 epoch 40 - iter 40/209 - loss 0.00820768 - samples/sec: 191.95 - lr: 0.006250
2022-10-05 21:56:11,218 epoch 40 - iter 60/209 - loss 0.00792448 - samples/sec: 171.96 - lr: 0.006250
2022-10-05 21:56:15,399 epoch 40 - iter 80/209 - loss 0.00716753 - samples/sec: 153.22 - lr: 0.006250
2022-10-05 21:56:19,689 epoch 40 - iter 100/209 - loss 0.00836589 - samples/sec: 149.32 - lr: 0.006250
2022-10-05 21:56:23,723 epoch 40 - iter 120/209 - loss 0.00816008 - samples/sec: 158.81 - lr: 0.006250
2022-10-05 21:56:28,041 epoch 40 - iter 140/209 - los

100%|██████████| 45/45 [00:08<00:00,  5.25it/s]

2022-10-05 21:56:48,041 Evaluating as a multi-label problem: False





2022-10-05 21:56:48,056 DEV : loss 0.04413469880819321 - f1-score (micro avg)  0.8954
2022-10-05 21:56:48,155 Epoch    40: reducing learning rate of group 0 to 3.1250e-03.
2022-10-05 21:56:48,158 BAD EPOCHS (no improvement): 4
2022-10-05 21:56:57,847 ----------------------------------------------------------------------------------------------------
2022-10-05 21:56:57,850 loading file /content/drive/MyDrive/Flair_NLP/sota-ner-flair/best-model.pt
2022-10-05 21:57:00,436 SequenceTagger predicts: Dictionary with 31 tags: O, S-PESSOA, B-PESSOA, E-PESSOA, I-PESSOA, S-FUNDAMENTO, B-FUNDAMENTO, E-FUNDAMENTO, I-FUNDAMENTO, S-ORGANIZACAO, B-ORGANIZACAO, E-ORGANIZACAO, I-ORGANIZACAO, S-DATA, B-DATA, E-DATA, I-DATA, S-LOCAL, B-LOCAL, E-LOCAL, I-LOCAL, S-PRODUTODELEI, B-PRODUTODELEI, E-PRODUTODELEI, I-PRODUTODELEI, S-EVENTO, B-EVENTO, E-EVENTO, I-EVENTO, <START>, <STOP>


100%|██████████| 45/45 [00:38<00:00,  1.18it/s]


2022-10-05 21:57:39,697 Evaluating as a multi-label problem: False
2022-10-05 21:57:39,710 0.8842	0.9048	0.8944	0.8138
2022-10-05 21:57:39,712 
Results:
- F-score (micro) 0.8944
- F-score (macro) 0.8532
- Accuracy 0.8138

By class:
              precision    recall  f1-score   support

  FUNDAMENTO     0.9200    0.9274    0.9237       124
      PESSOA     0.9256    0.9412    0.9333       119
       LOCAL     0.8037    0.8515    0.8269       101
        DATA     0.9697    0.9796    0.9746        98
 ORGANIZACAO     0.8300    0.8830    0.8557        94
PRODUTODELEI     0.8333    0.8333    0.8333        54
      EVENTO     0.7143    0.5556    0.6250         9

   micro avg     0.8842    0.9048    0.8944       599
   macro avg     0.8567    0.8531    0.8532       599
weighted avg     0.8846    0.9048    0.8943       599

2022-10-05 21:57:39,714 ----------------------------------------------------------------------------------------------------


{'test_score': 0.8943894389438943,
 'dev_score_history': [0.35537742150968604,
  0.6008163265306123,
  0.6264840182648402,
  0.7641996557659209,
  0.7272727272727273,
  0.8236331569664904,
  0.8326105810928013,
  0.8346186803770351,
  0.8434163701067615,
  0.8622222222222221,
  0.8579040852575489,
  0.8606921029281278,
  0.8818342151675485,
  0.8445199660152931,
  0.8637137989778535,
  0.8620988725065047,
  0.8754325259515572,
  0.8796536796536796,
  0.8806228373702423,
  0.8846487424111016,
  0.8934707903780069,
  0.8796536796536796,
  0.8859878154917319,
  0.8885017421602787,
  0.889661164205039,
  0.8913793103448276,
  0.885640584694755,
  0.89198606271777,
  0.8929188255613126,
  0.8987012987012988,
  0.897212543554007,
  0.8944636678200691,
  0.8967909800520383,
  0.8985255854293148,
  0.8977469670710572,
  0.898876404494382,
  0.8954191875540191,
  0.8931034482758621,
  0.8942807625649914,
  0.8954191875540191],
 'train_loss_history': [0.37498423234156175,
  0.1498252156393744,
 