# Imports

In [None]:
!pip install flair
!pip install sentence-transformers

In [2]:
import torch

import random
import os
import json
import re

from google.colab import drive, files

import pandas as pd

from flair.data import Sentence, Token, Label, Corpus
from flair.datasets import ColumnCorpus, ClassificationCorpus
from flair.embeddings import TransformerEmbeddings, TransformerDocumentEmbeddings, TransformerWordEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer

# Create the dataset samples

## Files setup

In [None]:
# Note: This may not be available forever
!gdown 10V0fqO5aOk7yFMypH3884UVw0sQ7gday
!unzip -o -q rumoureval2019.zip
%cd rumoureval2019
!unzip -o -q rumoureval-2019-test-data.zip
!unzip -o -q rumoureval-2019-training-data.zip

The files are structured so that each folder inside reddit/twitter train/dev/test parent folder corresponds to a single conversation.

Each conversation contains a `replies` folder containing the raw tweet/reddit comments and a `structure.json` file indicating the nesting of the replies (a reply can either refer to the main comment or to another reply)

The functions below navigate the structure of the reddit and twitter folders and create a list of samples, which are `dict`s containing the data for a single conversation:

`id`: ID of the conversation

`title`: Main tweet / reddit post title

`structure`:`dict` containing the reply structure, based on their IDs

`replies`: `dict` containing the reply text contents, based on their IDs

We additionally perform a "cleaning" step that replaces URL and direct mentions (e.g. @username) by generic values, namely \$URL\$ and \$MENTION\$, on both titles and comments. This slightly improves the classification as it prevents the model from learning from the contents of usernames, and reduces noise caused by them and the URL tokens.

In [None]:
def get_dirs(root):
  return [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]


def get_json_files(root):
  return [os.path.join(root, file) for file in os.listdir(root) if file.endswith('.json')]


def read_json_file(path):
  return json.load(open(path, 'r'))


def get_file_id(path):
  filename_without_extension = os.path.splitext(os.path.basename(path))[0]
  basename = filename_without_extension.split('/')[-1]
  return basename


# Clean twitter and reddit comments by replacing URLs with $URL$ and mentions
# with $MENTION$
# Adapted from https://github.com/Anushka-Prakash/RumourEval-2019-Stance-Detection
def clean_comment(comment):
  comment = re.sub(r"(http|ftp|https)\S+", "$URL$", comment.strip())
  comment = re.sub(r"(@[A-Za-z0-9_\\-]+)", "$MENTION$", comment.strip()) # Included '_' and '-' characters

  return comment


def create_sample_for_conversation(data_dir, conversation_dir, clean_comments):
  sample = dict()

  conversation_dir = os.path.join(data_dir, conversation_dir)
  source_json_files = get_json_files(conversation_dir+"/source-tweet/")
  reply_json_files = get_json_files(conversation_dir+"/replies/")

  source = read_json_file(source_json_files[0])

  post_title = None

  # The reddit and twitter reply files contain small differences in their structure,
  # so we have to distinguish them
  if "reddit" in data_dir: # It's a reddit post
    for child in source["data"]["children"]:
      if "title" in child["data"]:
        post_title = clean_comment(child["data"]["title"])
        break

  else: # It's a twitter thread
    post_title = clean_comment(source["text"])

  sample["id"] = get_file_id(source_json_files[0])
  sample["title"] = post_title
  sample["structure"] = read_json_file(conversation_dir+"/structure.json")
  sample["replies"] = dict()

  if "reddit" in data_dir: # It's a reddit post
    for reply in reply_json_files:
      reply_dict = read_json_file(reply)

      comment = None
      if "body" in reply_dict["data"]:
        comment = reply_dict["data"]["body"]
      # It's a wrongly parsed comment. Since there are not many, instead of
      # discarding the whole sample we simply put a "spurious" value
      if comment is None:
        comment = "invalid"

      if clean_comments:
        comment = clean_comment(comment)

      sample["replies"][get_file_id(reply)] = comment

  else: # It's a twitter thread
    for reply in reply_json_files:
      reply_dict = read_json_file(reply)

      # Get the file's path without the file extension
      sample["replies"][get_file_id(reply)] = clean_comment(reply_dict["text"])

  return sample


def create_samples(root, clean_comments=True):
  samples = []

  for data_dir in get_dirs(root):
    print(f"Reading conversations in {data_dir}...")
    data_dir = os.path.join(root, data_dir)
    conversation_dirs = get_dirs(data_dir)

    if "reddit" in data_dir: # It's a reddit conversations dir
      for i, conversation_dir in enumerate(conversation_dirs):
        samples.append(create_sample_for_conversation(data_dir, conversation_dir, clean_comments))

    else: # It's a twitter conversations dir
      for conversations_dir in get_dirs(data_dir): # We have another layer of subfolders
        conversations_dir = os.path.join(data_dir, conversations_dir)
        conversation_subdirs = get_dirs(conversations_dir)

        for i, conversation_dir in enumerate(conversation_subdirs):
          samples.append(create_sample_for_conversation(conversations_dir, conversation_dir, clean_comments))


  return samples

In [None]:
samples_train = create_samples("rumoureval-2019-training-data")
samples_test = create_samples("rumoureval-2019-test-data")

# The ground truth files don't require any special processing, since we work with file IDs too
ground_truths_train = read_json_file("rumoureval-2019-training-data/train-key.json")["subtaskaenglish"]
ground_truths_dev = read_json_file("rumoureval-2019-training-data/dev-key.json")["subtaskaenglish"]
ground_truths_test = read_json_file("final-eval-key.json")["subtaskaenglish"]

# The twitter samples are not separated into train and development, so we will do
# it manually when training the BERT model
ground_truths = ground_truths_train | ground_truths_dev | ground_truths_test

Reading conversations in twitter-english...
Reading conversations in reddit-training-data...
Reading conversations in reddit-dev-data...
Reading conversations in reddit-test-data...
Reading conversations in twitter-en-test-data...


# Train flair's model (BERT models)

Now that we have parsed all samples, we can create the actual training/dev/test samples for the models following different strategies. A basic sample consists of a reply's text, to which we can add:


*   The main twitter thread's comment/reddit post's title
*   The comment's text the current sample is actually replying to (if it is the main one, we don't add it in order to avoid duplicates)
*   The depth of the reply with regard to the whole comment chain (1 if answering to the main comment, 2 if answering to a reply of the main comment, etc.)


These are all concatenated to the reply's text via '|' characters, which the BERT model should associate (by fine-tuning it) to a custom sentence separator. We don't use [SEP] since the model has been trained to only expect one [SEP] token, and we can have up to three separators.

A final sample would look like this, where the first three fields are optional:

`reply_depth | post_title | previous_comment | reply`



We additionally downsample the training dataset, since we have noticed that the `comment` class is extremely oversampled and can easily lead the model to only learn how to classify them


We have used the flair library for creating a training/dev/test dataset and training a BERT model for text classification (https://flairnlp.github.io/). This type of classification consists in using a [CLS] token, internally appended to the leftmost part of every sample, as sentence embeddings.

By doing this, every sample will have a single sentence embedding regardless of its contents or length, which will be then fed into a linear layer and classified via Softmax into one of the 4 classes (comment, support, deny or query)



In [None]:
# Recursively navigates replies_structure and adds samples to bert_samples,
# considering the provided configuration (add the title or not, etc.)
def add_to_sample(sample, replies_structure, post_title, previous_comment, bert_samples, id, ground_truths, add_previous_comment, add_post_title, add_depth, depth):
  if replies_structure != []: # Stop condition
    for id, children in replies_structure.items():
      if id in sample["replies"]:
        sentence_text = sample["replies"][id]

        if add_previous_comment and previous_comment is not None:
          sentence_text = previous_comment + " | " + sentence_text

        # Do not add duplicate post_title | previous_comment | reply text when
        # post_title == previous_comment (it's a reply to the main post)
        if add_post_title and post_title is not None and previous_comment != post_title:
          sentence_text = post_title + " | " + sentence_text

        if add_depth:
          sentence_text = str(depth) + " | " + sentence_text # Changed from [SEP]

        sentence = Sentence(sentence_text)

        # Add its class as a label
        sentence.add_label("class", ground_truths[id])
        bert_samples.append(sentence)

        add_to_sample(sample, children, post_title, sample["replies"][id], bert_samples, id, ground_truths, add_previous_comment, add_post_title, add_depth, depth+1)


def create_bert_samples(samples, ground_truths, add_previous_comment, add_post_title, add_depth):
  bert_samples = []
  for sample in samples:
    title = sample["title"]
    structure = sample["structure"]

    for title_id, replies_structure in structure.items(): # We will only iterate once, it's the root (title)
      # We signal the id as None as we won't find it in the ground truth (we classify replies,
      # not the "root" post/comment)
      add_to_sample(sample, replies_structure, title, title, bert_samples, None, ground_truths, add_previous_comment, add_post_title, add_depth, 1)

  return bert_samples

In [None]:
# Simple downsampling strategy implementation that limits classes to only
# oversample the least represented one by a factor of 1.25
def downsample(sentences):
  class_counts = dict()
  for sentence in sentences:
      class_val = sentence.get_label("class").value
      if class_val not in class_counts:
        class_counts[class_val] = 0
      class_counts[class_val] += 1

  min_samples = int(min(class_counts.values()) * 1.25) # Allow classes to be oversampled by a factor of 1.25

  balanced_sentences = []
  for class_label, count in class_counts.items():
      class_sentences = [sentence for sentence in sentences if sentence.get_label("class").value == class_label][:min_samples]
      balanced_sentences.extend(class_sentences)

  random.shuffle(balanced_sentences)

  return balanced_sentences

In [None]:
# Converts the BERT samples to a fasttext format file (note: this is a flair requirement,
# it simply uses this format for handling datasets)
def to_fasttext_dataset(sentences, dest_name):
  with open(dest_name, "w", encoding="utf-8") as file:
    for sentence in sentences:
      line = f"__label__{sentence.get_label('class').value} {sentence.text.replace(',', ' ')}\n"
      file.write(line)

In [None]:
# Creates train/dev/test datasets readable by flair based on the given configuration and
# fine-tunes a BERT model (indicated by model_name) for text classification
def test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, model_name):
  # Create the BERT samples
  bert_samples_train = create_bert_samples(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)
  bert_samples_test = create_bert_samples(samples_test, ground_truths, add_previous_comment, add_post_title, add_depth)

  print("Sample example:", bert_samples_train[42])

  # Further split train into dev+train
  # The dev dataset will be used to
  random.shuffle(bert_samples_train)
  split_ratio = 0.8

  split_index = int(len(bert_samples_train) * split_ratio)

  bert_samples_dev = bert_samples_train[split_index:]
  bert_samples_train = bert_samples_train[:split_index]

  # Downsample the train dataset (the comment class is oversampled)
  bert_samples_train_downsampled = downsample(bert_samples_train)

  # Write the datasets in fasttext sample format
  to_fasttext_dataset(bert_samples_train_downsampled, "train_fasttext_format.txt")
  to_fasttext_dataset(bert_samples_dev, "dev_fasttext_format.txt")
  to_fasttext_dataset(bert_samples_test, "test_fasttext_format.txt")

  # Create flair's corpus
  data_folder = '.'
  corpus: Corpus = ClassificationCorpus(data_folder,
                                        test_file='test_fasttext_format.txt',
                                        dev_file='dev_fasttext_format.txt',
                                        train_file='train_fasttext_format.txt',
                                        label_type='class')

  # We can easily inspect the corpus structure (class to samples ratios...)
  print("Corpus statistics:\n", corpus.obtain_statistics())

  # Declare the embeddings we want to use
  document_embeddings = TransformerDocumentEmbeddings(model_name,
                                                      # Extract embeddings directly from the last layer
                                                      layers="-1",
                                                      # Indicate flair that we want to do [CLS] pooling, which is the strategy we want to do
                                                      cls_pooling="cls",
                                                      # We also want to finetune the model's embeddings, so that, at the very least, it learns
                                                      # the meaning of the '|' symbol we use as a comment separator
                                                      fine_tune=True,
                                                      # Hint that we only want to use the [CLS] token, so that we obtain a single sentence
                                                      # embedding per sample
                                                      is_token_embedding=False)

  example_sentence = bert_samples_train_downsampled[0]
  document_embeddings.embed(example_sentence)
  print(f"Example of an embedding:\n\t{example_sentence}\n\t{example_sentence.embedding}")

  # Instantiate flair's classifier for text classification (internally, it uses torch classifier layers)
  classifier = TextClassifier(document_embeddings,
                              label_dictionary=corpus.make_label_dictionary(label_type='class'),
                              label_type='class')

  # And train it
  trainer = ModelTrainer(classifier, corpus)
  trainer.fine_tune('resources/taggers/rumoureval_transformers',
                    learning_rate=5.0e-5,
                    mini_batch_size=4,
                    # We only need to train for a limited number of epochs
                    max_epochs=10)

  with torch.no_grad():
    torch.cuda.empty_cache()

Below are the results for every meaningful parameter configuration. We have used deberta-v3-base as our BERT model, which is one of the most recent/state of the art BERT models, from 2021.

Since there is a class imbalance (comment has, by far, the most samples), we need to choose the best model based on the F-macro metric, which calculates the F1-score on a per-class basis and then averages them (the F1-micro score, instead, simply calculates it for all samples).

The best performing configuration is the one that adds the title's text and the comment's depth indicator.

In [None]:
add_previous_comment = False
add_post_title = False
add_depth = False

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[27]: "“$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 10:52:14,196 Reading data from .
2023-12-21 10:52:14,200 Train: train_fasttext_format.txt
2023-12-21 10:52:14,202 Dev: dev_fasttext_format.txt
2023-12-21 10:52:14,205 Test: test_fasttext_format.txt
2023-12-21 10:52:14,343 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1643,
        "number_of_documents_per_class": {
            "query": 394,
            "support": 446,
            "comment": 446,
            "deny": 357
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 36765,
            "min": 1,
            "max": 749,
            "avg": 22.376749847839317
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_documents_



Example of an embedding:
	Sentence[6]: "$MENTION$ which version?" → query (1.0)
	tensor([ 1.5943e-01,  2.5077e-01,  5.7776e-02, -9.0526e-02,  3.7470e-02,
        -1.3958e-01,  5.3581e-03,  1.5695e-01, -2.5912e-01,  6.8150e-02,
         5.1543e-02, -2.9987e-01, -7.9545e-02,  2.5842e-01,  1.4114e-01,
         7.3076e-03, -1.3795e-01, -4.8675e-02, -1.6805e-01,  1.3327e-02,
        -2.2311e-01, -1.9917e-01, -5.7200e-02, -2.4599e-03,  8.1132e-03,
        -4.5756e-02, -1.1947e-01, -4.5453e-02,  8.2243e-02,  1.0843e-01,
        -1.8072e-01, -8.1738e-03, -9.5604e-02, -7.4721e-02,  2.2969e-01,
         1.7892e-01, -8.2302e-02,  4.8289e-02, -1.3907e-01,  2.5242e-02,
        -2.0426e-01, -5.1149e-02,  1.0878e-02, -5.4616e-02,  7.5109e-02,
         1.2400e-02,  6.8857e-02, -5.9202e-02, -1.1723e-01,  6.0641e-02,
        -1.3637e-01, -1.4378e-01,  1.3940e-01, -1.1313e-01, -1.3165e+00,
         7.0044e-02, -2.8430e-01,  1.3835e-01,  1.3929e-01, -2.3284e-01,
         1.1076e-01, -1.8614e-01,  4.6364e-

0it [00:00, ?it/s]
1643it [00:00, 2766.51it/s]

2023-12-21 10:52:25,045 Dictionary created for label 'class' with 4 values: support (seen 446 times), comment (seen 446 times), query (seen 394 times), deny (seen 357 times)





2023-12-21 10:52:25,057 ----------------------------------------------------------------------------------------------------
2023-12-21 10:52:25,061 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
                (value_proj): Linear(in_features=768, out_features=768, bias=True)
                (pos_dropout): StableDropout()
                (dropout): StableDropout()
        

100%|██████████| 80/80 [00:15<00:00,  5.32it/s]

2023-12-21 10:53:43,846 DEV : loss 1.4389363527297974 - f1-score (micro avg)  0.1648





2023-12-21 10:53:44,513 ----------------------------------------------------------------------------------------------------
2023-12-21 10:53:50,339 epoch 2 - iter 41/411 - loss 1.19293944 - time (sec): 5.82 - samples/sec: 28.17 - lr: 0.000049 - momentum: 0.000000
2023-12-21 10:53:57,500 epoch 2 - iter 82/411 - loss 1.13559918 - time (sec): 12.98 - samples/sec: 25.26 - lr: 0.000049 - momentum: 0.000000
2023-12-21 10:54:03,718 epoch 2 - iter 123/411 - loss 1.15381613 - time (sec): 19.20 - samples/sec: 25.62 - lr: 0.000048 - momentum: 0.000000
2023-12-21 10:54:10,325 epoch 2 - iter 164/411 - loss 1.17700733 - time (sec): 25.81 - samples/sec: 25.42 - lr: 0.000048 - momentum: 0.000000
2023-12-21 10:54:16,599 epoch 2 - iter 205/411 - loss 1.21170911 - time (sec): 32.08 - samples/sec: 25.56 - lr: 0.000047 - momentum: 0.000000
2023-12-21 10:54:22,401 epoch 2 - iter 246/411 - loss 1.20472082 - time (sec): 37.88 - samples/sec: 25.97 - lr: 0.000047 - momentum: 0.000000
2023-12-21 10:54:28,967 ep

100%|██████████| 80/80 [00:14<00:00,  5.48it/s]

2023-12-21 10:55:02,359 DEV : loss 1.1546884775161743 - f1-score (micro avg)  0.6853





2023-12-21 10:55:02,868 ----------------------------------------------------------------------------------------------------
2023-12-21 10:55:09,365 epoch 3 - iter 41/411 - loss 1.13521779 - time (sec): 6.49 - samples/sec: 25.25 - lr: 0.000044 - momentum: 0.000000
2023-12-21 10:55:16,968 epoch 3 - iter 82/411 - loss 1.03415363 - time (sec): 14.10 - samples/sec: 23.27 - lr: 0.000043 - momentum: 0.000000
2023-12-21 10:55:22,881 epoch 3 - iter 123/411 - loss 0.99204650 - time (sec): 20.01 - samples/sec: 24.59 - lr: 0.000043 - momentum: 0.000000
2023-12-21 10:55:29,255 epoch 3 - iter 164/411 - loss 1.02258440 - time (sec): 26.38 - samples/sec: 24.86 - lr: 0.000042 - momentum: 0.000000
2023-12-21 10:55:35,170 epoch 3 - iter 205/411 - loss 1.03197720 - time (sec): 32.30 - samples/sec: 25.39 - lr: 0.000042 - momentum: 0.000000
2023-12-21 10:55:41,289 epoch 3 - iter 246/411 - loss 1.03218829 - time (sec): 38.42 - samples/sec: 25.61 - lr: 0.000041 - momentum: 0.000000
2023-12-21 10:55:47,613 ep

100%|██████████| 80/80 [00:14<00:00,  5.43it/s]

2023-12-21 10:56:21,145 DEV : loss 1.4655673503875732 - f1-score (micro avg)  0.3983





2023-12-21 10:56:21,671 ----------------------------------------------------------------------------------------------------
2023-12-21 10:56:28,128 epoch 4 - iter 41/411 - loss 0.62339086 - time (sec): 6.45 - samples/sec: 25.41 - lr: 0.000038 - momentum: 0.000000
2023-12-21 10:56:33,959 epoch 4 - iter 82/411 - loss 0.74903125 - time (sec): 12.28 - samples/sec: 26.70 - lr: 0.000038 - momentum: 0.000000
2023-12-21 10:56:41,225 epoch 4 - iter 123/411 - loss 0.78666551 - time (sec): 19.55 - samples/sec: 25.17 - lr: 0.000037 - momentum: 0.000000
2023-12-21 10:56:47,609 epoch 4 - iter 164/411 - loss 0.76486062 - time (sec): 25.93 - samples/sec: 25.29 - lr: 0.000037 - momentum: 0.000000
2023-12-21 10:56:53,677 epoch 4 - iter 205/411 - loss 0.77740798 - time (sec): 32.00 - samples/sec: 25.62 - lr: 0.000036 - momentum: 0.000000
2023-12-21 10:57:01,327 epoch 4 - iter 246/411 - loss 0.78811183 - time (sec): 39.65 - samples/sec: 24.82 - lr: 0.000036 - momentum: 0.000000
2023-12-21 10:57:07,195 ep

100%|██████████| 80/80 [00:14<00:00,  5.42it/s]

2023-12-21 10:57:41,181 DEV : loss 2.8848485946655273 - f1-score (micro avg)  0.3415





2023-12-21 10:57:41,932 ----------------------------------------------------------------------------------------------------
2023-12-21 10:57:47,766 epoch 5 - iter 41/411 - loss 0.57918877 - time (sec): 5.83 - samples/sec: 28.13 - lr: 0.000033 - momentum: 0.000000
2023-12-21 10:57:53,841 epoch 5 - iter 82/411 - loss 0.55890871 - time (sec): 11.91 - samples/sec: 27.55 - lr: 0.000032 - momentum: 0.000000
2023-12-21 10:58:00,192 epoch 5 - iter 123/411 - loss 0.52292049 - time (sec): 18.26 - samples/sec: 26.95 - lr: 0.000032 - momentum: 0.000000
2023-12-21 10:58:06,095 epoch 5 - iter 164/411 - loss 0.56264481 - time (sec): 24.16 - samples/sec: 27.15 - lr: 0.000031 - momentum: 0.000000
2023-12-21 10:58:12,706 epoch 5 - iter 205/411 - loss 0.52979012 - time (sec): 30.77 - samples/sec: 26.65 - lr: 0.000031 - momentum: 0.000000
2023-12-21 10:58:19,088 epoch 5 - iter 246/411 - loss 0.53588090 - time (sec): 37.15 - samples/sec: 26.49 - lr: 0.000030 - momentum: 0.000000
2023-12-21 10:58:25,638 ep

100%|██████████| 80/80 [00:14<00:00,  5.46it/s]

2023-12-21 10:58:59,836 DEV : loss 3.1242644786834717 - f1-score (micro avg)  0.3588





2023-12-21 10:59:00,345 ----------------------------------------------------------------------------------------------------
2023-12-21 10:59:07,237 epoch 6 - iter 41/411 - loss 0.24787742 - time (sec): 6.89 - samples/sec: 23.81 - lr: 0.000027 - momentum: 0.000000
2023-12-21 10:59:13,414 epoch 6 - iter 82/411 - loss 0.30545232 - time (sec): 13.07 - samples/sec: 25.10 - lr: 0.000027 - momentum: 0.000000
2023-12-21 10:59:19,255 epoch 6 - iter 123/411 - loss 0.30220429 - time (sec): 18.91 - samples/sec: 26.02 - lr: 0.000026 - momentum: 0.000000
2023-12-21 10:59:25,642 epoch 6 - iter 164/411 - loss 0.34108027 - time (sec): 25.29 - samples/sec: 25.93 - lr: 0.000026 - momentum: 0.000000
2023-12-21 10:59:31,428 epoch 6 - iter 205/411 - loss 0.36999837 - time (sec): 31.08 - samples/sec: 26.38 - lr: 0.000025 - momentum: 0.000000
2023-12-21 10:59:37,704 epoch 6 - iter 246/411 - loss 0.37844627 - time (sec): 37.36 - samples/sec: 26.34 - lr: 0.000025 - momentum: 0.000000
2023-12-21 10:59:43,886 ep

100%|██████████| 80/80 [00:14<00:00,  5.41it/s]

2023-12-21 11:00:18,014 DEV : loss 3.955028533935547 - f1-score (micro avg)  0.5047





2023-12-21 11:00:18,510 ----------------------------------------------------------------------------------------------------
2023-12-21 11:00:24,884 epoch 7 - iter 41/411 - loss 0.07122861 - time (sec): 6.37 - samples/sec: 25.74 - lr: 0.000022 - momentum: 0.000000
2023-12-21 11:00:30,888 epoch 7 - iter 82/411 - loss 0.12932137 - time (sec): 12.37 - samples/sec: 26.51 - lr: 0.000021 - momentum: 0.000000
2023-12-21 11:00:37,463 epoch 7 - iter 123/411 - loss 0.18320800 - time (sec): 18.95 - samples/sec: 25.96 - lr: 0.000021 - momentum: 0.000000
2023-12-21 11:00:43,470 epoch 7 - iter 164/411 - loss 0.17576128 - time (sec): 24.96 - samples/sec: 26.29 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:00:49,532 epoch 7 - iter 205/411 - loss 0.18679221 - time (sec): 31.02 - samples/sec: 26.44 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:00:56,556 epoch 7 - iter 246/411 - loss 0.20685852 - time (sec): 38.04 - samples/sec: 25.87 - lr: 0.000019 - momentum: 0.000000
2023-12-21 11:01:03,316 ep

100%|██████████| 80/80 [00:16<00:00,  4.89it/s]

2023-12-21 11:01:38,704 DEV : loss 5.407830238342285 - f1-score (micro avg)  0.4535





2023-12-21 11:01:39,211 ----------------------------------------------------------------------------------------------------
2023-12-21 11:01:45,202 epoch 8 - iter 41/411 - loss 0.15190855 - time (sec): 5.99 - samples/sec: 27.39 - lr: 0.000016 - momentum: 0.000000
2023-12-21 11:01:52,677 epoch 8 - iter 82/411 - loss 0.12755570 - time (sec): 13.46 - samples/sec: 24.36 - lr: 0.000016 - momentum: 0.000000
2023-12-21 11:01:58,455 epoch 8 - iter 123/411 - loss 0.13292236 - time (sec): 19.24 - samples/sec: 25.57 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:02:04,894 epoch 8 - iter 164/411 - loss 0.14880589 - time (sec): 25.68 - samples/sec: 25.55 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:02:11,023 epoch 8 - iter 205/411 - loss 0.14208148 - time (sec): 31.81 - samples/sec: 25.78 - lr: 0.000014 - momentum: 0.000000
2023-12-21 11:02:16,969 epoch 8 - iter 246/411 - loss 0.13424491 - time (sec): 37.75 - samples/sec: 26.06 - lr: 0.000013 - momentum: 0.000000
2023-12-21 11:02:23,973 ep

100%|██████████| 80/80 [00:14<00:00,  5.40it/s]

2023-12-21 11:02:57,733 DEV : loss 4.914754390716553 - f1-score (micro avg)  0.5032





2023-12-21 11:02:58,222 ----------------------------------------------------------------------------------------------------
2023-12-21 11:03:05,080 epoch 9 - iter 41/411 - loss 0.07136892 - time (sec): 6.86 - samples/sec: 23.92 - lr: 0.000011 - momentum: 0.000000
2023-12-21 11:03:11,887 epoch 9 - iter 82/411 - loss 0.08252132 - time (sec): 13.66 - samples/sec: 24.01 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:03:17,962 epoch 9 - iter 123/411 - loss 0.10804690 - time (sec): 19.74 - samples/sec: 24.93 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:03:24,975 epoch 9 - iter 164/411 - loss 0.10649062 - time (sec): 26.75 - samples/sec: 24.52 - lr: 0.000009 - momentum: 0.000000
2023-12-21 11:03:30,846 epoch 9 - iter 205/411 - loss 0.11009125 - time (sec): 32.62 - samples/sec: 25.14 - lr: 0.000008 - momentum: 0.000000
2023-12-21 11:03:37,640 epoch 9 - iter 246/411 - loss 0.09807928 - time (sec): 39.42 - samples/sec: 24.96 - lr: 0.000008 - momentum: 0.000000
2023-12-21 11:03:44,933 ep

100%|██████████| 80/80 [00:14<00:00,  5.37it/s]

2023-12-21 11:04:19,809 DEV : loss 5.298559188842773 - f1-score (micro avg)  0.4637





2023-12-21 11:04:20,524 ----------------------------------------------------------------------------------------------------
2023-12-21 11:04:27,752 epoch 10 - iter 41/411 - loss 0.06648660 - time (sec): 7.22 - samples/sec: 22.71 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:04:36,022 epoch 10 - iter 82/411 - loss 0.07796075 - time (sec): 15.49 - samples/sec: 21.17 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:04:42,250 epoch 10 - iter 123/411 - loss 0.07202370 - time (sec): 21.72 - samples/sec: 22.65 - lr: 0.000004 - momentum: 0.000000
2023-12-21 11:04:49,196 epoch 10 - iter 164/411 - loss 0.08147683 - time (sec): 28.67 - samples/sec: 22.88 - lr: 0.000003 - momentum: 0.000000
2023-12-21 11:04:55,268 epoch 10 - iter 205/411 - loss 0.07729005 - time (sec): 34.74 - samples/sec: 23.60 - lr: 0.000003 - momentum: 0.000000
2023-12-21 11:05:02,107 epoch 10 - iter 246/411 - loss 0.07390485 - time (sec): 41.58 - samples/sec: 23.67 - lr: 0.000002 - momentum: 0.000000
2023-12-21 11:05:08,

100%|██████████| 80/80 [00:14<00:00,  5.39it/s]

2023-12-21 11:05:43,280 DEV : loss 5.071529865264893 - f1-score (micro avg)  0.4905





2023-12-21 11:05:49,129 ----------------------------------------------------------------------------------------------------
2023-12-21 11:05:49,132 Testing using last state of model ...


100%|██████████| 110/110 [00:17<00:00,  6.44it/s]

2023-12-21 11:06:06,254 
Results:
- F-score (micro) 0.579
- F-score (macro) 0.4189
- Accuracy 0.579

By class:
              precision    recall  f1-score   support

     comment     0.9248    0.5752    0.7093      1476
     support     0.1340    0.4808    0.2096       104
        deny     0.2089    0.6100    0.3112       100
       query     0.3129    0.7727    0.4454        66

    accuracy                         0.5790      1746
   macro avg     0.3952    0.6097    0.4189      1746
weighted avg     0.8136    0.5790    0.6467      1746

2023-12-21 11:06:06,257 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = True
add_post_title = False
add_depth = False

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[51]: "The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 11:39:18,401 Reading data from .
2023-12-21 11:39:18,402 Train: train_fasttext_format.txt
2023-12-21 11:39:18,407 Dev: dev_fasttext_format.txt
2023-12-21 11:39:18,408 Test: test_fasttext_format.txt
2023-12-21 11:39:18,499 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1586,
        "number_of_documents_per_class": {
            "support": 427,
            "query": 390,
            "deny": 342,
            "comment": 427
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 75371,
            "min": 11,
            "max": 573,
            "avg": 47.52269861286255
        }
    },
    "TEST": {
        "dataset": "



Example of an embedding:
	Sentence[37]: "$MENTION$ $MENTION$ Added to what happened in Quebec on Monday, the people who protect our nations need our full support. | $MENTION$ $MENTION$ Yes they do." → support (1.0)
	tensor([ 1.4228e-01,  2.2221e-01,  6.3073e-02, -5.9620e-02,  8.1782e-02,
        -1.2725e-01, -1.6231e-03,  9.6102e-02, -1.7054e-01,  1.0007e-01,
         5.2429e-02, -2.7547e-01,  4.2257e-02,  2.3015e-01,  7.9419e-02,
        -4.4294e-02, -1.2677e-01, -9.0077e-03, -1.3385e-01, -4.0198e-02,
        -2.2864e-01, -1.8995e-01, -2.7270e-02,  5.1124e-02, -2.6129e-03,
        -2.8032e-02, -4.0835e-02, -5.9706e-02,  2.9471e-02,  1.0979e-01,
        -8.9512e-02, -2.0280e-02, -6.7833e-02, -4.4873e-02,  1.2313e-01,
         1.2662e-01, -7.3993e-02,  4.2909e-02, -1.1559e-01,  6.7592e-02,
        -1.6930e-01, -1.9147e-02,  1.7154e-02, -4.2533e-02,  7.8238e-02,
         5.9729e-03,  9.0034e-02, -1.2926e-01, -3.4793e-02,  1.7157e-02,
        -5.1485e-02, -1.2702e-01,  9.5652e-02, -9.5657

0it [00:00, ?it/s]
1586it [00:01, 802.34it/s]

2023-12-21 11:39:37,293 Dictionary created for label 'class' with 4 values: support (seen 427 times), comment (seen 427 times), query (seen 390 times), deny (seen 342 times)
2023-12-21 11:39:37,305 ----------------------------------------------------------------------------------------------------
2023-12-21 11:39:37,311 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 11:39:43,662 epoch 1 - iter 39/397 - loss 2.51093915 - time (sec): 6.30 - samples/sec: 24.78 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:39:50,386 epoch 1 - iter 78/397 - loss 2.00331366 - time (sec): 13.02 - samples/sec: 23.96 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:39:57,056 epoch 1 - iter 117/397 - loss 1.80016863 - time (sec): 19.69 - samples/sec: 23.77 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:40:04,472 epoch 1 - iter 156/397 - loss 1.71023376 - time (sec): 27.11 - samples/sec: 23.02 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:40:10,954 epoch 1 - iter 195/397 - loss 1.64309295 - time (sec): 33.59 - samples/sec: 23.22 - lr: 0.000024 - momentum: 0.000000
2023-12-21 11:40:17,338 epoch 1 - iter 234/397 - loss 1.59084342 - time (sec): 39.97 - samples/sec: 23.42 - lr: 0.000029 - momentum: 0.000000
2023-12-21 11:40:24,424 epoch 1 - iter 273/397 - loss 1.55712618 - time (sec): 47.06 - samples/sec: 23.21 - lr: 0.000034 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:22<00:00,  3.53it/s]

2023-12-21 11:41:08,588 DEV : loss 1.2761369943618774 - f1-score (micro avg)  0.5055





2023-12-21 11:41:09,847 ----------------------------------------------------------------------------------------------------
2023-12-21 11:41:16,010 epoch 2 - iter 39/397 - loss 1.11351638 - time (sec): 6.16 - samples/sec: 25.33 - lr: 0.000049 - momentum: 0.000000
2023-12-21 11:41:23,175 epoch 2 - iter 78/397 - loss 1.18528549 - time (sec): 13.32 - samples/sec: 23.41 - lr: 0.000049 - momentum: 0.000000
2023-12-21 11:41:29,933 epoch 2 - iter 117/397 - loss 1.21522505 - time (sec): 20.08 - samples/sec: 23.30 - lr: 0.000048 - momentum: 0.000000
2023-12-21 11:41:36,712 epoch 2 - iter 156/397 - loss 1.22679396 - time (sec): 26.86 - samples/sec: 23.23 - lr: 0.000048 - momentum: 0.000000
2023-12-21 11:41:43,339 epoch 2 - iter 195/397 - loss 1.22353686 - time (sec): 33.49 - samples/sec: 23.29 - lr: 0.000047 - momentum: 0.000000
2023-12-21 11:41:50,055 epoch 2 - iter 234/397 - loss 1.22587783 - time (sec): 40.20 - samples/sec: 23.28 - lr: 0.000047 - momentum: 0.000000
2023-12-21 11:41:56,474 ep

100%|██████████| 80/80 [00:24<00:00,  3.27it/s]

2023-12-21 11:42:41,893 DEV : loss 1.120391607284546 - f1-score (micro avg)  0.623





2023-12-21 11:42:42,871 ----------------------------------------------------------------------------------------------------
2023-12-21 11:42:49,372 epoch 3 - iter 39/397 - loss 1.07602664 - time (sec): 6.50 - samples/sec: 24.01 - lr: 0.000044 - momentum: 0.000000
2023-12-21 11:42:56,219 epoch 3 - iter 78/397 - loss 1.08172267 - time (sec): 13.34 - samples/sec: 23.38 - lr: 0.000043 - momentum: 0.000000
2023-12-21 11:43:02,579 epoch 3 - iter 117/397 - loss 1.08415733 - time (sec): 19.70 - samples/sec: 23.75 - lr: 0.000043 - momentum: 0.000000
2023-12-21 11:43:09,049 epoch 3 - iter 156/397 - loss 1.08752954 - time (sec): 26.17 - samples/sec: 23.84 - lr: 0.000042 - momentum: 0.000000
2023-12-21 11:43:15,222 epoch 3 - iter 195/397 - loss 1.09692566 - time (sec): 32.35 - samples/sec: 24.11 - lr: 0.000042 - momentum: 0.000000
2023-12-21 11:43:23,165 epoch 3 - iter 234/397 - loss 1.07547215 - time (sec): 40.29 - samples/sec: 23.23 - lr: 0.000041 - momentum: 0.000000
2023-12-21 11:43:29,075 ep

100%|██████████| 80/80 [00:22<00:00,  3.51it/s]

2023-12-21 11:44:12,858 DEV : loss 1.5388213396072388 - f1-score (micro avg)  0.362





2023-12-21 11:44:13,816 ----------------------------------------------------------------------------------------------------
2023-12-21 11:44:21,007 epoch 4 - iter 39/397 - loss 0.82153253 - time (sec): 7.19 - samples/sec: 21.70 - lr: 0.000038 - momentum: 0.000000
2023-12-21 11:44:27,397 epoch 4 - iter 78/397 - loss 0.80273498 - time (sec): 13.58 - samples/sec: 22.98 - lr: 0.000038 - momentum: 0.000000
2023-12-21 11:44:34,298 epoch 4 - iter 117/397 - loss 0.85804808 - time (sec): 20.48 - samples/sec: 22.85 - lr: 0.000037 - momentum: 0.000000
2023-12-21 11:44:40,899 epoch 4 - iter 156/397 - loss 0.85190524 - time (sec): 27.08 - samples/sec: 23.04 - lr: 0.000037 - momentum: 0.000000
2023-12-21 11:44:47,353 epoch 4 - iter 195/397 - loss 0.87912907 - time (sec): 33.53 - samples/sec: 23.26 - lr: 0.000036 - momentum: 0.000000
2023-12-21 11:44:53,858 epoch 4 - iter 234/397 - loss 0.87550837 - time (sec): 40.04 - samples/sec: 23.38 - lr: 0.000036 - momentum: 0.000000
2023-12-21 11:45:00,658 ep

100%|██████████| 80/80 [00:23<00:00,  3.46it/s]

2023-12-21 11:45:46,098 DEV : loss 1.302445650100708 - f1-score (micro avg)  0.5662





2023-12-21 11:45:47,750 ----------------------------------------------------------------------------------------------------
2023-12-21 11:45:54,613 epoch 5 - iter 39/397 - loss 0.62354151 - time (sec): 6.86 - samples/sec: 22.74 - lr: 0.000033 - momentum: 0.000000
2023-12-21 11:46:01,086 epoch 5 - iter 78/397 - loss 0.57764096 - time (sec): 13.33 - samples/sec: 23.40 - lr: 0.000032 - momentum: 0.000000
2023-12-21 11:46:07,329 epoch 5 - iter 117/397 - loss 0.63325361 - time (sec): 19.58 - samples/sec: 23.91 - lr: 0.000032 - momentum: 0.000000
2023-12-21 11:46:13,731 epoch 5 - iter 156/397 - loss 0.61385055 - time (sec): 25.98 - samples/sec: 24.02 - lr: 0.000031 - momentum: 0.000000
2023-12-21 11:46:20,135 epoch 5 - iter 195/397 - loss 0.63131293 - time (sec): 32.38 - samples/sec: 24.09 - lr: 0.000031 - momentum: 0.000000
2023-12-21 11:46:27,028 epoch 5 - iter 234/397 - loss 0.61963514 - time (sec): 39.28 - samples/sec: 23.83 - lr: 0.000030 - momentum: 0.000000
2023-12-21 11:46:34,428 ep

100%|██████████| 80/80 [00:23<00:00,  3.47it/s]

2023-12-21 11:47:18,415 DEV : loss 1.930671215057373 - f1-score (micro avg)  0.5757





2023-12-21 11:47:20,818 ----------------------------------------------------------------------------------------------------
2023-12-21 11:47:27,067 epoch 6 - iter 39/397 - loss 0.24564883 - time (sec): 6.25 - samples/sec: 24.98 - lr: 0.000027 - momentum: 0.000000
2023-12-21 11:47:33,918 epoch 6 - iter 78/397 - loss 0.34972047 - time (sec): 13.10 - samples/sec: 23.82 - lr: 0.000027 - momentum: 0.000000
2023-12-21 11:47:40,333 epoch 6 - iter 117/397 - loss 0.32678968 - time (sec): 19.51 - samples/sec: 23.99 - lr: 0.000026 - momentum: 0.000000
2023-12-21 11:47:46,812 epoch 6 - iter 156/397 - loss 0.34701636 - time (sec): 25.99 - samples/sec: 24.01 - lr: 0.000026 - momentum: 0.000000
2023-12-21 11:47:53,449 epoch 6 - iter 195/397 - loss 0.34120852 - time (sec): 32.63 - samples/sec: 23.91 - lr: 0.000025 - momentum: 0.000000
2023-12-21 11:48:00,844 epoch 6 - iter 234/397 - loss 0.32308652 - time (sec): 40.02 - samples/sec: 23.39 - lr: 0.000025 - momentum: 0.000000
2023-12-21 11:48:07,070 ep

100%|██████████| 80/80 [00:22<00:00,  3.50it/s]

2023-12-21 11:48:51,219 DEV : loss 2.507018566131592 - f1-score (micro avg)  0.5599





2023-12-21 11:48:52,193 ----------------------------------------------------------------------------------------------------
2023-12-21 11:48:58,864 epoch 7 - iter 39/397 - loss 0.17567202 - time (sec): 6.67 - samples/sec: 23.40 - lr: 0.000022 - momentum: 0.000000
2023-12-21 11:49:05,136 epoch 7 - iter 78/397 - loss 0.14890906 - time (sec): 12.94 - samples/sec: 24.11 - lr: 0.000021 - momentum: 0.000000
2023-12-21 11:49:11,654 epoch 7 - iter 117/397 - loss 0.17507454 - time (sec): 19.46 - samples/sec: 24.05 - lr: 0.000021 - momentum: 0.000000
2023-12-21 11:49:18,316 epoch 7 - iter 156/397 - loss 0.18626968 - time (sec): 26.12 - samples/sec: 23.89 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:49:24,873 epoch 7 - iter 195/397 - loss 0.18562914 - time (sec): 32.68 - samples/sec: 23.87 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:49:31,997 epoch 7 - iter 234/397 - loss 0.17083203 - time (sec): 39.80 - samples/sec: 23.52 - lr: 0.000019 - momentum: 0.000000
2023-12-21 11:49:39,138 ep

100%|██████████| 80/80 [00:22<00:00,  3.50it/s]

2023-12-21 11:50:23,364 DEV : loss 5.079106330871582 - f1-score (micro avg)  0.3856





2023-12-21 11:50:26,852 ----------------------------------------------------------------------------------------------------
2023-12-21 11:50:33,341 epoch 8 - iter 39/397 - loss 0.08248258 - time (sec): 6.49 - samples/sec: 24.05 - lr: 0.000016 - momentum: 0.000000
2023-12-21 11:50:40,217 epoch 8 - iter 78/397 - loss 0.08496630 - time (sec): 13.36 - samples/sec: 23.35 - lr: 0.000016 - momentum: 0.000000
2023-12-21 11:50:46,808 epoch 8 - iter 117/397 - loss 0.08490756 - time (sec): 19.95 - samples/sec: 23.46 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:50:53,964 epoch 8 - iter 156/397 - loss 0.08443921 - time (sec): 27.11 - samples/sec: 23.02 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:51:00,042 epoch 8 - iter 195/397 - loss 0.06893569 - time (sec): 33.19 - samples/sec: 23.50 - lr: 0.000014 - momentum: 0.000000
2023-12-21 11:51:06,752 epoch 8 - iter 234/397 - loss 0.08703686 - time (sec): 39.90 - samples/sec: 23.46 - lr: 0.000013 - momentum: 0.000000
2023-12-21 11:51:13,593 ep

100%|██████████| 80/80 [00:23<00:00,  3.47it/s]

2023-12-21 11:51:57,256 DEV : loss 4.249913215637207 - f1-score (micro avg)  0.5331





2023-12-21 11:51:58,263 ----------------------------------------------------------------------------------------------------
2023-12-21 11:52:04,413 epoch 9 - iter 39/397 - loss 0.06172268 - time (sec): 6.14 - samples/sec: 25.39 - lr: 0.000011 - momentum: 0.000000
2023-12-21 11:52:11,452 epoch 9 - iter 78/397 - loss 0.03980046 - time (sec): 13.18 - samples/sec: 23.67 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:52:17,742 epoch 9 - iter 117/397 - loss 0.03723061 - time (sec): 19.47 - samples/sec: 24.03 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:52:24,382 epoch 9 - iter 156/397 - loss 0.04572830 - time (sec): 26.11 - samples/sec: 23.90 - lr: 0.000009 - momentum: 0.000000
2023-12-21 11:52:31,254 epoch 9 - iter 195/397 - loss 0.05882337 - time (sec): 32.99 - samples/sec: 23.65 - lr: 0.000008 - momentum: 0.000000
2023-12-21 11:52:37,727 epoch 9 - iter 234/397 - loss 0.06960923 - time (sec): 39.46 - samples/sec: 23.72 - lr: 0.000008 - momentum: 0.000000
2023-12-21 11:52:44,423 ep

100%|██████████| 80/80 [00:24<00:00,  3.27it/s]

2023-12-21 11:53:30,277 DEV : loss 4.7403483390808105 - f1-score (micro avg)  0.5063





2023-12-21 11:53:31,234 ----------------------------------------------------------------------------------------------------
2023-12-21 11:53:37,772 epoch 10 - iter 39/397 - loss 0.00012135 - time (sec): 6.53 - samples/sec: 23.87 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:53:44,460 epoch 10 - iter 78/397 - loss 0.01051857 - time (sec): 13.22 - samples/sec: 23.60 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:53:50,872 epoch 10 - iter 117/397 - loss 0.01296980 - time (sec): 19.63 - samples/sec: 23.84 - lr: 0.000004 - momentum: 0.000000
2023-12-21 11:53:57,068 epoch 10 - iter 156/397 - loss 0.02515321 - time (sec): 25.83 - samples/sec: 24.16 - lr: 0.000003 - momentum: 0.000000
2023-12-21 11:54:03,432 epoch 10 - iter 195/397 - loss 0.03081948 - time (sec): 32.19 - samples/sec: 24.23 - lr: 0.000003 - momentum: 0.000000
2023-12-21 11:54:10,469 epoch 10 - iter 234/397 - loss 0.02826934 - time (sec): 39.23 - samples/sec: 23.86 - lr: 0.000002 - momentum: 0.000000
2023-12-21 11:54:17,

100%|██████████| 80/80 [00:22<00:00,  3.51it/s]

2023-12-21 11:55:01,184 DEV : loss 4.791125297546387 - f1-score (micro avg)  0.5079





2023-12-21 11:55:08,997 ----------------------------------------------------------------------------------------------------
2023-12-21 11:55:09,002 Testing using last state of model ...


100%|██████████| 110/110 [00:26<00:00,  4.13it/s]

2023-12-21 11:55:35,702 
Results:
- F-score (micro) 0.5624
- F-score (macro) 0.406
- Accuracy 0.5624

By class:
              precision    recall  f1-score   support

     comment     0.9231    0.5610    0.6979      1476
     support     0.1117    0.4038    0.1750       104
        deny     0.1963    0.6400    0.3005       100
       query     0.3265    0.7273    0.4507        66

    accuracy                         0.5624      1746
   macro avg     0.3894    0.5830    0.4060      1746
weighted avg     0.8106    0.5624    0.6346      1746

2023-12-21 11:55:35,703 ----------------------------------------------------------------------------------------------------





In [None]:
# microsoft/deberta-v3-base
add_previous_comment = False
add_post_title = True
add_depth = False

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[27]: "“$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 11:56:37,613 Reading data from .
2023-12-21 11:56:37,618 Train: train_fasttext_format.txt
2023-12-21 11:56:37,621 Dev: dev_fasttext_format.txt
2023-12-21 11:56:37,623 Test: test_fasttext_format.txt
2023-12-21 11:56:37,787 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1577,
        "number_of_documents_per_class": {
            "support": 428,
            "comment": 428,
            "deny": 343,
            "query": 378
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 47357,
            "min": 1,
            "max": 414,
            "avg": 30.029803424223207
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_documents_



Example of an embedding:
	Sentence[24]: "#Australia #ISIS #Terrorism $MENTION$: #BREAKING #Hostages held inside #Sydney cafe, Islamic flag held up" → support (1.0)
	tensor([ 1.5658e-01,  2.2798e-01,  2.9953e-02, -6.3864e-02,  7.7165e-02,
        -1.2456e-01,  5.5820e-04,  1.2335e-01, -2.1314e-01,  7.1097e-02,
         7.5221e-02, -2.9232e-01,  4.9414e-04,  2.8242e-01,  8.7127e-02,
        -1.9393e-02, -1.1177e-01, -3.4503e-02, -1.2788e-01, -1.1266e-03,
        -2.5108e-01, -1.8039e-01, -7.6450e-02,  1.5031e-02,  6.5897e-02,
        -4.2171e-02, -4.3525e-02, -6.9999e-02,  5.2235e-02,  1.0993e-01,
        -1.5200e-01,  7.1379e-03, -1.1858e-01, -4.7109e-02,  1.8609e-01,
         1.6253e-01, -1.1561e-01,  9.1119e-02, -1.2881e-01,  5.2922e-02,
        -1.9519e-01,  6.6728e-03,  1.8223e-02, -7.5289e-02,  7.1455e-02,
         8.1270e-03,  6.4313e-02, -1.0381e-01, -3.4297e-02,  1.5270e-02,
        -6.5520e-02, -1.4845e-01,  1.0706e-01, -8.6872e-02, -1.1260e+00,
         7.8040e-02, -2.6892e-01

0it [00:00, ?it/s]
1577it [00:00, 2178.40it/s]

2023-12-21 11:56:50,292 Dictionary created for label 'class' with 4 values: support (seen 428 times), comment (seen 428 times), query (seen 378 times), deny (seen 343 times)
2023-12-21 11:56:50,302 ----------------------------------------------------------------------------------------------------
2023-12-21 11:56:50,306 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 11:56:56,659 epoch 1 - iter 39/395 - loss 2.08958704 - time (sec): 6.32 - samples/sec: 24.70 - lr: 0.000005 - momentum: 0.000000
2023-12-21 11:57:03,217 epoch 1 - iter 78/395 - loss 1.84237485 - time (sec): 12.87 - samples/sec: 24.23 - lr: 0.000010 - momentum: 0.000000
2023-12-21 11:57:08,988 epoch 1 - iter 117/395 - loss 1.71237250 - time (sec): 18.65 - samples/sec: 25.10 - lr: 0.000015 - momentum: 0.000000
2023-12-21 11:57:17,015 epoch 1 - iter 156/395 - loss 1.64248368 - time (sec): 26.67 - samples/sec: 23.39 - lr: 0.000020 - momentum: 0.000000
2023-12-21 11:57:23,550 epoch 1 - iter 195/395 - loss 1.59789881 - time (sec): 33.21 - samples/sec: 23.49 - lr: 0.000025 - momentum: 0.000000
2023-12-21 11:57:30,552 epoch 1 - iter 234/395 - loss 1.58127649 - time (sec): 40.21 - samples/sec: 23.28 - lr: 0.000029 - momentum: 0.000000
2023-12-21 11:57:36,376 epoch 1 - iter 273/395 - loss 1.54697644 - time (sec): 46.03 - samples/sec: 23.72 - lr: 0.000034 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:12<00:00,  6.19it/s]

2023-12-21 11:58:08,915 DEV : loss 1.3297399282455444 - f1-score (micro avg)  0.2145





2023-12-21 11:58:09,592 ----------------------------------------------------------------------------------------------------
2023-12-21 11:58:16,062 epoch 2 - iter 39/395 - loss 1.22401811 - time (sec): 6.47 - samples/sec: 24.12 - lr: 0.000049 - momentum: 0.000000
2023-12-21 11:58:21,934 epoch 2 - iter 78/395 - loss 1.27000218 - time (sec): 12.34 - samples/sec: 25.29 - lr: 0.000049 - momentum: 0.000000
2023-12-21 11:58:28,631 epoch 2 - iter 117/395 - loss 1.27512756 - time (sec): 19.04 - samples/sec: 24.59 - lr: 0.000048 - momentum: 0.000000
2023-12-21 11:58:34,769 epoch 2 - iter 156/395 - loss 1.26744199 - time (sec): 25.17 - samples/sec: 24.79 - lr: 0.000048 - momentum: 0.000000
2023-12-21 11:58:40,576 epoch 2 - iter 195/395 - loss 1.26429862 - time (sec): 30.98 - samples/sec: 25.18 - lr: 0.000047 - momentum: 0.000000
2023-12-21 11:58:47,651 epoch 2 - iter 234/395 - loss 1.26086840 - time (sec): 38.06 - samples/sec: 24.60 - lr: 0.000047 - momentum: 0.000000
2023-12-21 11:58:53,651 ep

100%|██████████| 80/80 [00:14<00:00,  5.62it/s]

2023-12-21 11:59:27,046 DEV : loss 0.9471229910850525 - f1-score (micro avg)  0.683





2023-12-21 11:59:28,179 ----------------------------------------------------------------------------------------------------
2023-12-21 11:59:34,499 epoch 3 - iter 39/395 - loss 1.00410671 - time (sec): 6.32 - samples/sec: 24.70 - lr: 0.000044 - momentum: 0.000000
2023-12-21 11:59:40,321 epoch 3 - iter 78/395 - loss 1.06401361 - time (sec): 12.14 - samples/sec: 25.70 - lr: 0.000043 - momentum: 0.000000
2023-12-21 11:59:46,596 epoch 3 - iter 117/395 - loss 1.02220848 - time (sec): 18.41 - samples/sec: 25.42 - lr: 0.000043 - momentum: 0.000000
2023-12-21 11:59:52,238 epoch 3 - iter 156/395 - loss 1.02197266 - time (sec): 24.06 - samples/sec: 25.94 - lr: 0.000042 - momentum: 0.000000
2023-12-21 11:59:58,251 epoch 3 - iter 195/395 - loss 1.03301018 - time (sec): 30.07 - samples/sec: 25.94 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:00:04,384 epoch 3 - iter 234/395 - loss 1.03729497 - time (sec): 36.20 - samples/sec: 25.86 - lr: 0.000041 - momentum: 0.000000
2023-12-21 12:00:11,156 ep

100%|██████████| 80/80 [00:12<00:00,  6.20it/s]

2023-12-21 12:00:43,533 DEV : loss 0.9955548644065857 - f1-score (micro avg)  0.6215





2023-12-21 12:00:44,733 ----------------------------------------------------------------------------------------------------
2023-12-21 12:00:50,602 epoch 4 - iter 39/395 - loss 0.61942332 - time (sec): 5.86 - samples/sec: 26.60 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:00:56,972 epoch 4 - iter 78/395 - loss 0.80661972 - time (sec): 12.23 - samples/sec: 25.50 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:01:03,064 epoch 4 - iter 117/395 - loss 0.84734588 - time (sec): 18.33 - samples/sec: 25.54 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:01:08,892 epoch 4 - iter 156/395 - loss 0.88069202 - time (sec): 24.15 - samples/sec: 25.83 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:01:15,122 epoch 4 - iter 195/395 - loss 0.83099171 - time (sec): 30.38 - samples/sec: 25.67 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:01:20,913 epoch 4 - iter 234/395 - loss 0.82421227 - time (sec): 36.17 - samples/sec: 25.87 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:01:27,809 ep

100%|██████████| 80/80 [00:12<00:00,  6.18it/s]

2023-12-21 12:01:59,783 DEV : loss 1.4708088636398315 - f1-score (micro avg)  0.4961





2023-12-21 12:02:00,412 ----------------------------------------------------------------------------------------------------
2023-12-21 12:02:06,850 epoch 5 - iter 39/395 - loss 0.55249023 - time (sec): 6.43 - samples/sec: 24.24 - lr: 0.000033 - momentum: 0.000000
2023-12-21 12:02:13,618 epoch 5 - iter 78/395 - loss 0.47396629 - time (sec): 13.20 - samples/sec: 23.63 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:02:19,742 epoch 5 - iter 117/395 - loss 0.47368620 - time (sec): 19.33 - samples/sec: 24.22 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:02:25,681 epoch 5 - iter 156/395 - loss 0.51240934 - time (sec): 25.27 - samples/sec: 24.70 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:02:31,676 epoch 5 - iter 195/395 - loss 0.53506487 - time (sec): 31.26 - samples/sec: 24.95 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:02:37,511 epoch 5 - iter 234/395 - loss 0.54407825 - time (sec): 37.09 - samples/sec: 25.23 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:02:43,658 ep

100%|██████████| 80/80 [00:14<00:00,  5.53it/s]

2023-12-21 12:03:16,923 DEV : loss 2.322049140930176 - f1-score (micro avg)  0.5213





2023-12-21 12:03:17,589 ----------------------------------------------------------------------------------------------------
2023-12-21 12:03:23,758 epoch 6 - iter 39/395 - loss 0.33179173 - time (sec): 6.17 - samples/sec: 25.30 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:03:29,911 epoch 6 - iter 78/395 - loss 0.27138272 - time (sec): 12.32 - samples/sec: 25.33 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:03:35,768 epoch 6 - iter 117/395 - loss 0.33455898 - time (sec): 18.18 - samples/sec: 25.75 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:03:42,028 epoch 6 - iter 156/395 - loss 0.31623688 - time (sec): 24.44 - samples/sec: 25.54 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:03:48,469 epoch 6 - iter 195/395 - loss 0.31272650 - time (sec): 30.88 - samples/sec: 25.26 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:03:55,041 epoch 6 - iter 234/395 - loss 0.35155771 - time (sec): 37.45 - samples/sec: 24.99 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:04:00,965 ep

100%|██████████| 80/80 [00:13<00:00,  6.14it/s]

2023-12-21 12:04:32,900 DEV : loss 2.53371262550354 - f1-score (micro avg)  0.5386





2023-12-21 12:04:33,595 ----------------------------------------------------------------------------------------------------
2023-12-21 12:04:39,584 epoch 7 - iter 39/395 - loss 0.07691224 - time (sec): 5.98 - samples/sec: 26.07 - lr: 0.000022 - momentum: 0.000000
2023-12-21 12:04:45,866 epoch 7 - iter 78/395 - loss 0.13371266 - time (sec): 12.27 - samples/sec: 25.43 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:04:51,675 epoch 7 - iter 117/395 - loss 0.11169969 - time (sec): 18.08 - samples/sec: 25.89 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:04:57,943 epoch 7 - iter 156/395 - loss 0.13225394 - time (sec): 24.34 - samples/sec: 25.63 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:05:03,981 epoch 7 - iter 195/395 - loss 0.13127132 - time (sec): 30.38 - samples/sec: 25.67 - lr: 0.000019 - momentum: 0.000000
2023-12-21 12:05:10,301 epoch 7 - iter 234/395 - loss 0.16695569 - time (sec): 36.70 - samples/sec: 25.50 - lr: 0.000019 - momentum: 0.000000
2023-12-21 12:05:16,251 ep

100%|██████████| 80/80 [00:13<00:00,  6.15it/s]

2023-12-21 12:05:48,837 DEV : loss 2.9352893829345703 - f1-score (micro avg)  0.6033





2023-12-21 12:05:51,281 ----------------------------------------------------------------------------------------------------
2023-12-21 12:05:57,515 epoch 8 - iter 39/395 - loss 0.11531138 - time (sec): 6.23 - samples/sec: 25.04 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:06:03,875 epoch 8 - iter 78/395 - loss 0.16557266 - time (sec): 12.59 - samples/sec: 24.78 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:06:10,476 epoch 8 - iter 117/395 - loss 0.15752940 - time (sec): 19.19 - samples/sec: 24.39 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:06:16,191 epoch 8 - iter 156/395 - loss 0.15021398 - time (sec): 24.91 - samples/sec: 25.05 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:06:22,139 epoch 8 - iter 195/395 - loss 0.15261338 - time (sec): 30.85 - samples/sec: 25.28 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:06:28,175 epoch 8 - iter 234/395 - loss 0.17599401 - time (sec): 36.89 - samples/sec: 25.37 - lr: 0.000013 - momentum: 0.000000
2023-12-21 12:06:34,315 ep

100%|██████████| 80/80 [00:12<00:00,  6.20it/s]

2023-12-21 12:07:06,534 DEV : loss 3.4906165599823 - f1-score (micro avg)  0.5434





2023-12-21 12:07:07,688 ----------------------------------------------------------------------------------------------------
2023-12-21 12:07:13,978 epoch 9 - iter 39/395 - loss 0.07918209 - time (sec): 6.29 - samples/sec: 24.82 - lr: 0.000011 - momentum: 0.000000
2023-12-21 12:07:20,718 epoch 9 - iter 78/395 - loss 0.05740744 - time (sec): 13.03 - samples/sec: 23.95 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:07:26,753 epoch 9 - iter 117/395 - loss 0.07909145 - time (sec): 19.06 - samples/sec: 24.55 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:07:32,546 epoch 9 - iter 156/395 - loss 0.08068207 - time (sec): 24.85 - samples/sec: 25.11 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:07:39,142 epoch 9 - iter 195/395 - loss 0.09283474 - time (sec): 31.45 - samples/sec: 24.80 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:07:45,000 epoch 9 - iter 234/395 - loss 0.11149778 - time (sec): 37.31 - samples/sec: 25.09 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:07:51,268 ep

100%|██████████| 80/80 [00:12<00:00,  6.19it/s]

2023-12-21 12:08:23,949 DEV : loss 4.237194061279297 - f1-score (micro avg)  0.4929





2023-12-21 12:08:24,593 ----------------------------------------------------------------------------------------------------
2023-12-21 12:08:30,549 epoch 10 - iter 39/395 - loss 0.09534309 - time (sec): 5.95 - samples/sec: 26.21 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:08:37,040 epoch 10 - iter 78/395 - loss 0.07337435 - time (sec): 12.44 - samples/sec: 25.07 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:08:43,175 epoch 10 - iter 117/395 - loss 0.09845242 - time (sec): 18.58 - samples/sec: 25.19 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:08:49,440 epoch 10 - iter 156/395 - loss 0.08885077 - time (sec): 24.84 - samples/sec: 25.12 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:08:55,632 epoch 10 - iter 195/395 - loss 0.09625780 - time (sec): 31.04 - samples/sec: 25.13 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:09:01,901 epoch 10 - iter 234/395 - loss 0.08333971 - time (sec): 37.30 - samples/sec: 25.09 - lr: 0.000002 - momentum: 0.000000
2023-12-21 12:09:08,

100%|██████████| 80/80 [00:14<00:00,  5.55it/s]

2023-12-21 12:09:41,778 DEV : loss 3.924910545349121 - f1-score (micro avg)  0.5331





2023-12-21 12:09:47,167 ----------------------------------------------------------------------------------------------------
2023-12-21 12:09:47,175 Testing using last state of model ...


100%|██████████| 110/110 [00:18<00:00,  5.90it/s]


2023-12-21 12:10:05,872 
Results:
- F-score (micro) 0.5584
- F-score (macro) 0.4002
- Accuracy 0.5584

By class:
              precision    recall  f1-score   support

     comment     0.9169    0.5535    0.6903      1476
     support     0.1025    0.3558    0.1591       104
        deny     0.2250    0.7200    0.3429       100
       query     0.2816    0.7424    0.4083        66

    accuracy                         0.5584      1746
   macro avg     0.3815    0.5929    0.4002      1746
weighted avg     0.8048    0.5584    0.6281      1746

2023-12-21 12:10:05,874 ----------------------------------------------------------------------------------------------------


In [None]:
# microsoft/deberta-v3-base
add_previous_comment = True
add_post_title = True
add_depth = False

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[51]: "The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 12:10:16,481 Reading data from .
2023-12-21 12:10:16,482 Train: train_fasttext_format.txt
2023-12-21 12:10:16,486 Dev: dev_fasttext_format.txt
2023-12-21 12:10:16,488 Test: test_fasttext_format.txt
2023-12-21 12:10:16,578 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1647,
        "number_of_documents_per_class": {
            "deny": 360,
            "query": 387,
            "comment": 450,
            "support": 450
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 92447,
            "min": 13,
            "max": 664,
            "avg": 56.13054037644201
        }
    },
    "TEST": {
        "dataset": "



Example of an embedding:
	Sentence[53]: "AFP reports there are 2 dead, and 5 hostages being held in the Kosher store in Eastern Paris; separate incident to charlie hebdo shooters. | $MENTION$ $MENTION$ Officials have apparently confirmed that the deli is linked to #CharlieHebdo &amp; killing of officer." → deny (1.0)
	tensor([ 1.6262e-01,  2.2467e-01,  3.0557e-02, -8.8129e-02,  5.6375e-02,
        -1.2209e-01,  1.1113e-02,  1.1126e-01, -2.0146e-01,  8.9319e-02,
         9.0549e-02, -3.0501e-01,  1.3721e-02,  2.5434e-01,  8.2459e-02,
        -3.8001e-02, -1.3914e-01, -6.6079e-03, -1.2918e-01, -9.8285e-03,
        -2.0756e-01, -2.2062e-01, -3.1388e-02,  7.2702e-02, -1.5297e-02,
        -6.0766e-02, -3.9302e-02, -5.4088e-02,  2.4068e-02,  9.7093e-02,
        -9.1520e-02, -7.6734e-03, -6.7796e-02, -7.4935e-02,  1.4654e-01,
         1.6277e-01, -8.0084e-02,  5.2077e-02, -1.3061e-01,  6.9444e-02,
        -1.8608e-01, -1.0225e-02, -1.7062e-02, -7.3506e-02,  1.1273e-01,
         1.6067e-02,  5

0it [00:00, ?it/s]
1647it [00:02, 696.11it/s]

2023-12-21 12:10:36,865 Dictionary created for label 'class' with 4 values: comment (seen 450 times), support (seen 450 times), query (seen 387 times), deny (seen 360 times)
2023-12-21 12:10:36,874 ----------------------------------------------------------------------------------------------------
2023-12-21 12:10:36,878 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 12:10:45,396 epoch 1 - iter 41/412 - loss 1.93933976 - time (sec): 8.49 - samples/sec: 19.33 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:10:52,890 epoch 1 - iter 82/412 - loss 1.71226209 - time (sec): 15.98 - samples/sec: 20.53 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:11:00,200 epoch 1 - iter 123/412 - loss 1.60944956 - time (sec): 23.29 - samples/sec: 21.13 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:11:07,402 epoch 1 - iter 164/412 - loss 1.56011538 - time (sec): 30.49 - samples/sec: 21.51 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:11:15,945 epoch 1 - iter 205/412 - loss 1.53093772 - time (sec): 39.03 - samples/sec: 21.01 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:11:23,050 epoch 1 - iter 246/412 - loss 1.49148375 - time (sec): 46.14 - samples/sec: 21.33 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:11:30,215 epoch 1 - iter 287/412 - loss 1.46261405 - time (sec): 53.30 - samples/sec: 21.54 - lr: 0.000035 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:26<00:00,  2.99it/s]

2023-12-21 12:12:21,399 DEV : loss 1.18055260181427 - f1-score (micro avg)  0.3935





2023-12-21 12:12:22,862 ----------------------------------------------------------------------------------------------------
2023-12-21 12:12:29,665 epoch 2 - iter 41/412 - loss 1.29070579 - time (sec): 6.80 - samples/sec: 24.12 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:12:37,190 epoch 2 - iter 82/412 - loss 1.26132902 - time (sec): 14.32 - samples/sec: 22.90 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:12:43,982 epoch 2 - iter 123/412 - loss 1.25247072 - time (sec): 21.12 - samples/sec: 23.30 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:12:51,895 epoch 2 - iter 164/412 - loss 1.22514015 - time (sec): 29.03 - samples/sec: 22.60 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:12:59,786 epoch 2 - iter 205/412 - loss 1.21511181 - time (sec): 36.92 - samples/sec: 22.21 - lr: 0.000047 - momentum: 0.000000
2023-12-21 12:13:08,973 epoch 2 - iter 246/412 - loss 1.20929530 - time (sec): 46.11 - samples/sec: 21.34 - lr: 0.000047 - momentum: 0.000000
2023-12-21 12:13:16,517 ep

100%|██████████| 80/80 [00:25<00:00,  3.14it/s]

2023-12-21 12:14:05,401 DEV : loss 1.3054436445236206 - f1-score (micro avg)  0.4826





2023-12-21 12:14:08,709 ----------------------------------------------------------------------------------------------------
2023-12-21 12:14:16,070 epoch 3 - iter 41/412 - loss 0.96934815 - time (sec): 7.35 - samples/sec: 22.30 - lr: 0.000044 - momentum: 0.000000
2023-12-21 12:14:25,107 epoch 3 - iter 82/412 - loss 1.04361461 - time (sec): 16.39 - samples/sec: 20.01 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:14:32,964 epoch 3 - iter 123/412 - loss 1.06033989 - time (sec): 24.25 - samples/sec: 20.29 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:14:41,737 epoch 3 - iter 164/412 - loss 1.02384608 - time (sec): 33.02 - samples/sec: 19.87 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:14:48,915 epoch 3 - iter 205/412 - loss 1.03395982 - time (sec): 40.20 - samples/sec: 20.40 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:14:56,160 epoch 3 - iter 246/412 - loss 1.03927905 - time (sec): 47.44 - samples/sec: 20.74 - lr: 0.000041 - momentum: 0.000000
2023-12-21 12:15:03,732 ep

100%|██████████| 80/80 [00:25<00:00,  3.15it/s]

2023-12-21 12:15:51,586 DEV : loss 1.3814054727554321 - f1-score (micro avg)  0.3785





2023-12-21 12:15:53,897 ----------------------------------------------------------------------------------------------------
2023-12-21 12:16:01,717 epoch 4 - iter 41/412 - loss 0.72042607 - time (sec): 7.81 - samples/sec: 20.99 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:16:09,245 epoch 4 - iter 82/412 - loss 0.78864361 - time (sec): 15.34 - samples/sec: 21.38 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:16:16,518 epoch 4 - iter 123/412 - loss 0.74636661 - time (sec): 22.61 - samples/sec: 21.76 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:16:24,258 epoch 4 - iter 164/412 - loss 0.83959966 - time (sec): 30.35 - samples/sec: 21.61 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:16:32,852 epoch 4 - iter 205/412 - loss 0.79267351 - time (sec): 38.95 - samples/sec: 21.05 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:16:40,012 epoch 4 - iter 246/412 - loss 0.78434379 - time (sec): 46.11 - samples/sec: 21.34 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:16:48,286 ep

100%|██████████| 80/80 [00:26<00:00,  2.98it/s]

2023-12-21 12:17:38,134 DEV : loss 2.4884722232818604 - f1-score (micro avg)  0.317





2023-12-21 12:17:39,434 ----------------------------------------------------------------------------------------------------
2023-12-21 12:17:47,327 epoch 5 - iter 41/412 - loss 0.43735224 - time (sec): 7.89 - samples/sec: 20.79 - lr: 0.000033 - momentum: 0.000000
2023-12-21 12:17:54,956 epoch 5 - iter 82/412 - loss 0.48046431 - time (sec): 15.52 - samples/sec: 21.14 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:18:02,900 epoch 5 - iter 123/412 - loss 0.51532144 - time (sec): 23.46 - samples/sec: 20.97 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:18:10,240 epoch 5 - iter 164/412 - loss 0.49111177 - time (sec): 30.80 - samples/sec: 21.30 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:18:18,534 epoch 5 - iter 205/412 - loss 0.49302924 - time (sec): 39.10 - samples/sec: 20.97 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:18:25,509 epoch 5 - iter 246/412 - loss 0.51922285 - time (sec): 46.07 - samples/sec: 21.36 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:18:32,908 ep

100%|██████████| 80/80 [00:25<00:00,  3.16it/s]

2023-12-21 12:19:21,252 DEV : loss 2.8006699085235596 - f1-score (micro avg)  0.3888





2023-12-21 12:19:22,599 ----------------------------------------------------------------------------------------------------
2023-12-21 12:19:30,076 epoch 6 - iter 41/412 - loss 0.30830922 - time (sec): 7.47 - samples/sec: 21.95 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:19:37,822 epoch 6 - iter 82/412 - loss 0.36230393 - time (sec): 15.22 - samples/sec: 21.55 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:19:45,095 epoch 6 - iter 123/412 - loss 0.34314080 - time (sec): 22.49 - samples/sec: 21.87 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:19:53,501 epoch 6 - iter 164/412 - loss 0.33838823 - time (sec): 30.90 - samples/sec: 21.23 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:20:00,241 epoch 6 - iter 205/412 - loss 0.29467722 - time (sec): 37.64 - samples/sec: 21.79 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:20:08,004 epoch 6 - iter 246/412 - loss 0.30824981 - time (sec): 45.40 - samples/sec: 21.67 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:20:15,909 ep

100%|██████████| 80/80 [00:25<00:00,  3.15it/s]


2023-12-21 12:21:06,347 DEV : loss 5.426933288574219 - f1-score (micro avg)  0.3281
2023-12-21 12:21:07,481 ----------------------------------------------------------------------------------------------------
2023-12-21 12:21:14,465 epoch 7 - iter 41/412 - loss 0.14912524 - time (sec): 6.98 - samples/sec: 23.49 - lr: 0.000022 - momentum: 0.000000
2023-12-21 12:21:22,062 epoch 7 - iter 82/412 - loss 0.14724392 - time (sec): 14.58 - samples/sec: 22.50 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:21:29,406 epoch 7 - iter 123/412 - loss 0.15776884 - time (sec): 21.92 - samples/sec: 22.44 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:21:37,819 epoch 7 - iter 164/412 - loss 0.13957673 - time (sec): 30.33 - samples/sec: 21.63 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:21:44,731 epoch 7 - iter 205/412 - loss 0.14053685 - time (sec): 37.25 - samples/sec: 22.02 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:21:52,472 epoch 7 - iter 246/412 - loss 0.13589929 - time (sec): 44.99

100%|██████████| 80/80 [00:25<00:00,  3.18it/s]


2023-12-21 12:22:48,803 DEV : loss 5.367938041687012 - f1-score (micro avg)  0.4109
2023-12-21 12:22:51,524 ----------------------------------------------------------------------------------------------------
2023-12-21 12:22:58,811 epoch 8 - iter 41/412 - loss 0.07416724 - time (sec): 7.28 - samples/sec: 22.51 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:23:06,310 epoch 8 - iter 82/412 - loss 0.06084457 - time (sec): 14.78 - samples/sec: 22.19 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:23:13,378 epoch 8 - iter 123/412 - loss 0.05667513 - time (sec): 21.85 - samples/sec: 22.52 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:23:21,495 epoch 8 - iter 164/412 - loss 0.09376452 - time (sec): 29.97 - samples/sec: 21.89 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:23:28,672 epoch 8 - iter 205/412 - loss 0.09377528 - time (sec): 37.15 - samples/sec: 22.08 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:23:37,088 epoch 8 - iter 246/412 - loss 0.09751925 - time (sec): 45.56

100%|██████████| 80/80 [00:25<00:00,  3.15it/s]

2023-12-21 12:24:33,659 DEV : loss 4.741517066955566 - f1-score (micro avg)  0.5205





2023-12-21 12:24:35,664 ----------------------------------------------------------------------------------------------------
2023-12-21 12:24:43,750 epoch 9 - iter 41/412 - loss 0.00175277 - time (sec): 8.08 - samples/sec: 20.30 - lr: 0.000011 - momentum: 0.000000
2023-12-21 12:24:51,464 epoch 9 - iter 82/412 - loss 0.00228322 - time (sec): 15.79 - samples/sec: 20.77 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:24:59,511 epoch 9 - iter 123/412 - loss 0.02178006 - time (sec): 23.84 - samples/sec: 20.64 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:25:06,424 epoch 9 - iter 164/412 - loss 0.02193626 - time (sec): 30.75 - samples/sec: 21.33 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:25:14,368 epoch 9 - iter 205/412 - loss 0.01762455 - time (sec): 38.70 - samples/sec: 21.19 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:25:21,746 epoch 9 - iter 246/412 - loss 0.03284200 - time (sec): 46.08 - samples/sec: 21.36 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:25:30,840 ep

100%|██████████| 80/80 [00:27<00:00,  2.93it/s]


2023-12-21 12:26:19,949 DEV : loss 5.21014404296875 - f1-score (micro avg)  0.4929
2023-12-21 12:26:21,078 ----------------------------------------------------------------------------------------------------
2023-12-21 12:26:27,949 epoch 10 - iter 41/412 - loss 0.01761200 - time (sec): 6.87 - samples/sec: 23.88 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:26:35,422 epoch 10 - iter 82/412 - loss 0.02906772 - time (sec): 14.34 - samples/sec: 22.87 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:26:42,397 epoch 10 - iter 123/412 - loss 0.02795712 - time (sec): 21.31 - samples/sec: 23.08 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:26:50,211 epoch 10 - iter 164/412 - loss 0.04748201 - time (sec): 29.13 - samples/sec: 22.52 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:26:58,719 epoch 10 - iter 205/412 - loss 0.04407823 - time (sec): 37.64 - samples/sec: 21.79 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:27:05,877 epoch 10 - iter 246/412 - loss 0.03741078 - time (sec): 

100%|██████████| 80/80 [00:25<00:00,  3.16it/s]


2023-12-21 12:28:03,195 DEV : loss 5.538442611694336 - f1-score (micro avg)  0.474
2023-12-21 12:28:08,904 ----------------------------------------------------------------------------------------------------
2023-12-21 12:28:08,908 Testing using last state of model ...


100%|██████████| 110/110 [00:27<00:00,  3.98it/s]

2023-12-21 12:28:36,578 
Results:
- F-score (micro) 0.516
- F-score (macro) 0.3804
- Accuracy 0.516

By class:
              precision    recall  f1-score   support

     comment     0.9072    0.5102    0.6531      1476
     support     0.0747    0.2788    0.1179       104
        deny     0.1867    0.7300    0.2974       100
       query     0.3358    0.6970    0.4532        66

    accuracy                         0.5160      1746
   macro avg     0.3761    0.5540    0.3804      1746
weighted avg     0.7948    0.5160    0.5933      1746

2023-12-21 12:28:36,579 ----------------------------------------------------------------------------------------------------





In [None]:
# microsoft/deberta-v3-base
add_previous_comment = False
add_post_title = False
add_depth = True

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[29]: "1 | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 12:29:52,579 Reading data from .
2023-12-21 12:29:52,582 Train: train_fasttext_format.txt
2023-12-21 12:29:52,583 Dev: dev_fasttext_format.txt
2023-12-21 12:29:52,586 Test: test_fasttext_format.txt
2023-12-21 12:29:52,676 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1619,
        "number_of_documents_per_class": {
            "support": 437,
            "comment": 437,
            "query": 395,
            "deny": 350
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 40815,
            "min": 3,
            "max": 751,
            "avg": 25.210006176652254
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_docume



Example of an embedding:
	Sentence[17]: "2 | $MENTION$ $MENTION$ Yes, but think it was too dangerous." → support (1.0)
	tensor([ 1.4270e-01,  2.5445e-01,  4.7761e-02, -8.6881e-02,  8.7037e-02,
        -1.2384e-01,  4.5833e-02,  1.4208e-01, -2.1360e-01,  9.1224e-02,
         4.8549e-02, -3.1168e-01, -1.2635e-02,  2.6830e-01,  9.6695e-02,
        -4.4871e-02, -1.0445e-01, -6.6423e-02, -1.7607e-01, -4.2733e-02,
        -2.0852e-01, -1.7605e-01, -4.3894e-02,  3.5927e-02,  3.7008e-02,
        -5.5675e-02, -4.6586e-02, -8.6087e-02,  3.9670e-02,  1.2012e-01,
        -1.5467e-01, -1.7173e-02, -1.2741e-01, -1.0395e-01,  1.4453e-01,
         1.7798e-01, -8.5988e-02,  4.4911e-02, -1.0264e-01,  3.7803e-02,
        -1.9328e-01, -4.2592e-02,  1.6599e-03, -4.7770e-02,  5.3300e-02,
         4.8131e-02,  5.8819e-02, -1.2751e-01, -2.2832e-02, -1.8337e-03,
        -5.9992e-02, -1.5252e-01,  1.5046e-01, -1.1368e-01, -9.1492e-01,
         8.1495e-02, -3.0327e-01,  1.6789e-01,  9.1057e-02, -2.0041e-01,
    

0it [00:00, ?it/s]
1619it [00:00, 2105.68it/s]

2023-12-21 12:30:03,085 Dictionary created for label 'class' with 4 values: support (seen 437 times), comment (seen 437 times), query (seen 395 times), deny (seen 350 times)
2023-12-21 12:30:03,095 ----------------------------------------------------------------------------------------------------
2023-12-21 12:30:03,098 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 12:30:08,939 epoch 1 - iter 40/405 - loss 1.62618510 - time (sec): 5.80 - samples/sec: 27.57 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:30:14,951 epoch 1 - iter 80/405 - loss 1.56961123 - time (sec): 11.82 - samples/sec: 27.08 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:30:21,233 epoch 1 - iter 120/405 - loss 1.52448821 - time (sec): 18.10 - samples/sec: 26.52 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:30:27,100 epoch 1 - iter 160/405 - loss 1.50097753 - time (sec): 23.96 - samples/sec: 26.71 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:30:34,057 epoch 1 - iter 200/405 - loss 1.47211652 - time (sec): 30.92 - samples/sec: 25.87 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:30:41,095 epoch 1 - iter 240/405 - loss 1.44835305 - time (sec): 37.96 - samples/sec: 25.29 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:30:47,991 epoch 1 - iter 280/405 - loss 1.45794543 - time (sec): 44.85 - samples/sec: 24.97 - lr: 0.000034 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:16<00:00,  4.97it/s]

2023-12-21 12:31:22,961 DEV : loss 1.0464226007461548 - f1-score (micro avg)  0.6333





2023-12-21 12:31:23,495 ----------------------------------------------------------------------------------------------------
2023-12-21 12:31:30,126 epoch 2 - iter 40/405 - loss 1.18073328 - time (sec): 6.63 - samples/sec: 24.14 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:31:36,223 epoch 2 - iter 80/405 - loss 1.21901857 - time (sec): 12.73 - samples/sec: 25.15 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:31:42,228 epoch 2 - iter 120/405 - loss 1.18325350 - time (sec): 18.73 - samples/sec: 25.63 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:31:48,757 epoch 2 - iter 160/405 - loss 1.17524868 - time (sec): 25.26 - samples/sec: 25.34 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:31:54,450 epoch 2 - iter 200/405 - loss 1.19947671 - time (sec): 30.95 - samples/sec: 25.85 - lr: 0.000047 - momentum: 0.000000
2023-12-21 12:32:01,768 epoch 2 - iter 240/405 - loss 1.20862678 - time (sec): 38.27 - samples/sec: 25.08 - lr: 0.000047 - momentum: 0.000000
2023-12-21 12:32:07,639 ep

100%|██████████| 80/80 [00:16<00:00,  4.98it/s]

2023-12-21 12:32:43,096 DEV : loss 1.1575891971588135 - f1-score (micro avg)  0.5079





2023-12-21 12:32:45,389 ----------------------------------------------------------------------------------------------------
2023-12-21 12:32:51,391 epoch 3 - iter 40/405 - loss 0.98170653 - time (sec): 6.00 - samples/sec: 26.68 - lr: 0.000044 - momentum: 0.000000
2023-12-21 12:32:57,601 epoch 3 - iter 80/405 - loss 1.01513398 - time (sec): 12.21 - samples/sec: 26.22 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:33:03,743 epoch 3 - iter 120/405 - loss 1.03728639 - time (sec): 18.35 - samples/sec: 26.16 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:33:09,598 epoch 3 - iter 160/405 - loss 1.03942651 - time (sec): 24.20 - samples/sec: 26.44 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:33:16,677 epoch 3 - iter 200/405 - loss 1.06196010 - time (sec): 31.28 - samples/sec: 25.57 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:33:23,162 epoch 3 - iter 240/405 - loss 1.05395290 - time (sec): 37.77 - samples/sec: 25.42 - lr: 0.000041 - momentum: 0.000000
2023-12-21 12:33:29,567 ep

100%|██████████| 80/80 [00:15<00:00,  5.00it/s]

2023-12-21 12:34:05,901 DEV : loss 0.9105156660079956 - f1-score (micro avg)  0.664





2023-12-21 12:34:06,452 ----------------------------------------------------------------------------------------------------
2023-12-21 12:34:12,060 epoch 4 - iter 40/405 - loss 0.81975073 - time (sec): 5.60 - samples/sec: 28.55 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:34:18,534 epoch 4 - iter 80/405 - loss 0.84111448 - time (sec): 12.08 - samples/sec: 26.49 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:34:24,148 epoch 4 - iter 120/405 - loss 0.87364847 - time (sec): 17.69 - samples/sec: 27.13 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:34:30,602 epoch 4 - iter 160/405 - loss 0.81396598 - time (sec): 24.15 - samples/sec: 26.50 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:34:37,621 epoch 4 - iter 200/405 - loss 0.82327964 - time (sec): 31.17 - samples/sec: 25.67 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:34:43,814 epoch 4 - iter 240/405 - loss 0.83004833 - time (sec): 37.36 - samples/sec: 25.70 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:34:50,582 ep

100%|██████████| 80/80 [00:16<00:00,  4.99it/s]

2023-12-21 12:35:25,810 DEV : loss 2.9140923023223877 - f1-score (micro avg)  0.4164





2023-12-21 12:35:26,668 ----------------------------------------------------------------------------------------------------
2023-12-21 12:35:33,839 epoch 5 - iter 40/405 - loss 0.64690250 - time (sec): 7.17 - samples/sec: 22.33 - lr: 0.000033 - momentum: 0.000000
2023-12-21 12:35:39,687 epoch 5 - iter 80/405 - loss 0.70231259 - time (sec): 13.01 - samples/sec: 24.59 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:35:46,743 epoch 5 - iter 120/405 - loss 0.61508401 - time (sec): 20.07 - samples/sec: 23.92 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:35:53,064 epoch 5 - iter 160/405 - loss 0.63351852 - time (sec): 26.39 - samples/sec: 24.25 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:35:59,267 epoch 5 - iter 200/405 - loss 0.62769233 - time (sec): 32.59 - samples/sec: 24.54 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:36:05,238 epoch 5 - iter 240/405 - loss 0.65281051 - time (sec): 38.57 - samples/sec: 24.89 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:36:11,079 ep

100%|██████████| 80/80 [00:16<00:00,  5.00it/s]

2023-12-21 12:36:46,192 DEV : loss 3.515554666519165 - f1-score (micro avg)  0.4259





2023-12-21 12:36:46,764 ----------------------------------------------------------------------------------------------------
2023-12-21 12:36:52,482 epoch 6 - iter 40/405 - loss 0.52167000 - time (sec): 5.71 - samples/sec: 28.00 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:36:58,778 epoch 6 - iter 80/405 - loss 0.50198954 - time (sec): 12.01 - samples/sec: 26.64 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:37:05,045 epoch 6 - iter 120/405 - loss 0.44110068 - time (sec): 18.28 - samples/sec: 26.26 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:37:11,073 epoch 6 - iter 160/405 - loss 0.45365216 - time (sec): 24.31 - samples/sec: 26.33 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:37:17,207 epoch 6 - iter 200/405 - loss 0.42161496 - time (sec): 30.44 - samples/sec: 26.28 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:37:22,912 epoch 6 - iter 240/405 - loss 0.41991671 - time (sec): 36.14 - samples/sec: 26.56 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:37:29,721 ep

100%|██████████| 80/80 [00:16<00:00,  4.96it/s]

2023-12-21 12:38:07,699 DEV : loss 3.567004680633545 - f1-score (micro avg)  0.5276





2023-12-21 12:38:08,547 ----------------------------------------------------------------------------------------------------
2023-12-21 12:38:14,683 epoch 7 - iter 40/405 - loss 0.42002550 - time (sec): 6.13 - samples/sec: 26.09 - lr: 0.000022 - momentum: 0.000000
2023-12-21 12:38:21,526 epoch 7 - iter 80/405 - loss 0.33727093 - time (sec): 12.97 - samples/sec: 24.66 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:38:28,268 epoch 7 - iter 120/405 - loss 0.35614937 - time (sec): 19.72 - samples/sec: 24.35 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:38:34,321 epoch 7 - iter 160/405 - loss 0.30578193 - time (sec): 25.77 - samples/sec: 24.83 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:38:40,474 epoch 7 - iter 200/405 - loss 0.28599596 - time (sec): 31.92 - samples/sec: 25.06 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:38:46,500 epoch 7 - iter 240/405 - loss 0.28228303 - time (sec): 37.95 - samples/sec: 25.30 - lr: 0.000019 - momentum: 0.000000
2023-12-21 12:38:52,684 ep

100%|██████████| 80/80 [00:15<00:00,  5.00it/s]

2023-12-21 12:39:28,554 DEV : loss 5.727193355560303 - f1-score (micro avg)  0.3927





2023-12-21 12:39:29,101 ----------------------------------------------------------------------------------------------------
2023-12-21 12:39:34,811 epoch 8 - iter 40/405 - loss 0.16218713 - time (sec): 5.71 - samples/sec: 28.03 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:39:41,379 epoch 8 - iter 80/405 - loss 0.16620309 - time (sec): 12.27 - samples/sec: 26.07 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:39:47,072 epoch 8 - iter 120/405 - loss 0.16741679 - time (sec): 17.97 - samples/sec: 26.71 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:39:54,134 epoch 8 - iter 160/405 - loss 0.17067320 - time (sec): 25.03 - samples/sec: 25.57 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:40:00,274 epoch 8 - iter 200/405 - loss 0.15914132 - time (sec): 31.17 - samples/sec: 25.67 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:40:06,712 epoch 8 - iter 240/405 - loss 0.15392502 - time (sec): 37.61 - samples/sec: 25.53 - lr: 0.000013 - momentum: 0.000000
2023-12-21 12:40:13,235 ep

100%|██████████| 80/80 [00:16<00:00,  4.98it/s]

2023-12-21 12:40:48,492 DEV : loss 4.1046977043151855 - f1-score (micro avg)  0.5197





2023-12-21 12:40:49,036 ----------------------------------------------------------------------------------------------------
2023-12-21 12:40:56,464 epoch 9 - iter 40/405 - loss 0.11900860 - time (sec): 7.42 - samples/sec: 21.55 - lr: 0.000011 - momentum: 0.000000
2023-12-21 12:41:02,360 epoch 9 - iter 80/405 - loss 0.10274720 - time (sec): 13.32 - samples/sec: 24.02 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:41:08,516 epoch 9 - iter 120/405 - loss 0.08237205 - time (sec): 19.48 - samples/sec: 24.65 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:41:14,843 epoch 9 - iter 160/405 - loss 0.12547783 - time (sec): 25.80 - samples/sec: 24.80 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:41:20,717 epoch 9 - iter 200/405 - loss 0.12452749 - time (sec): 31.68 - samples/sec: 25.25 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:41:27,449 epoch 9 - iter 240/405 - loss 0.12535766 - time (sec): 38.41 - samples/sec: 24.99 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:41:33,547 ep

100%|██████████| 80/80 [00:16<00:00,  4.90it/s]

2023-12-21 12:42:11,388 DEV : loss 5.2262139320373535 - f1-score (micro avg)  0.4629





2023-12-21 12:42:12,376 ----------------------------------------------------------------------------------------------------
2023-12-21 12:42:18,809 epoch 10 - iter 40/405 - loss 0.08202100 - time (sec): 6.43 - samples/sec: 24.89 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:42:26,144 epoch 10 - iter 80/405 - loss 0.10943967 - time (sec): 13.76 - samples/sec: 23.25 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:42:32,610 epoch 10 - iter 120/405 - loss 0.09555158 - time (sec): 20.23 - samples/sec: 23.73 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:42:38,644 epoch 10 - iter 160/405 - loss 0.09348484 - time (sec): 26.26 - samples/sec: 24.37 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:42:45,014 epoch 10 - iter 200/405 - loss 0.09741289 - time (sec): 32.63 - samples/sec: 24.51 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:42:51,223 epoch 10 - iter 240/405 - loss 0.10712862 - time (sec): 38.84 - samples/sec: 24.72 - lr: 0.000002 - momentum: 0.000000
2023-12-21 12:42:57,

100%|██████████| 80/80 [00:16<00:00,  4.97it/s]

2023-12-21 12:43:32,541 DEV : loss 5.205692291259766 - f1-score (micro avg)  0.4874





2023-12-21 12:43:39,134 ----------------------------------------------------------------------------------------------------
2023-12-21 12:43:39,139 Testing using last state of model ...


100%|██████████| 110/110 [00:17<00:00,  6.37it/s]

2023-12-21 12:43:56,468 
Results:
- F-score (micro) 0.6128
- F-score (macro) 0.4127
- Accuracy 0.6128

By class:
              precision    recall  f1-score   support

     comment     0.9269    0.6267    0.7478      1476
     support     0.1010    0.2788    0.1483       104
        deny     0.2334    0.6700    0.3463       100
       query     0.2816    0.7424    0.4083        66

    accuracy                         0.6128      1746
   macro avg     0.3857    0.5795    0.4127      1746
weighted avg     0.8136    0.6128    0.6762      1746

2023-12-21 12:43:56,474 ----------------------------------------------------------------------------------------------------





In [None]:
# microsoft/deberta-v3-base
add_previous_comment = True
add_post_title = True
add_depth = True

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[53]: "1 | The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 12:44:06,702 Reading data from .
2023-12-21 12:44:06,704 Train: train_fasttext_format.txt
2023-12-21 12:44:06,706 Dev: dev_fasttext_format.txt
2023-12-21 12:44:06,708 Test: test_fasttext_format.txt
2023-12-21 12:44:06,801 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1606,
        "number_of_documents_per_class": {
            "support": 433,
            "deny": 347,
            "query": 393,
            "comment": 433
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 90466,
            "min": 16,
            "max": 601,
            "avg": 56.33001245330013
        }
    },
    "TEST": {
        "dataset



Example of an embedding:
	Sentence[42]: "1 | FIRST PHOTOS: Hostage pressed against a window. #SydneySiege $URL$ | $MENTION$ CAUTION Be aware that this could be a #FalseFlag Event unfolding right across the street from channel 7 coincidence? Perhaps" → support (1.0)
	tensor([ 1.5862e-01,  2.0805e-01,  8.2993e-03, -8.3260e-02,  5.8025e-02,
        -1.2342e-01,  6.4949e-03,  1.2236e-01, -2.2791e-01,  9.8080e-02,
         7.2621e-02, -3.2411e-01,  1.2610e-02,  2.9934e-01,  8.7222e-02,
        -3.5200e-02, -1.3333e-01,  1.2582e-02, -1.5071e-01, -7.6307e-03,
        -2.3050e-01, -2.0185e-01, -5.6920e-02,  4.3676e-02,  1.1515e-02,
        -6.5593e-02, -2.6738e-02, -5.8312e-02,  5.3346e-02,  1.0934e-01,
        -1.0719e-01,  1.1189e-02, -1.0134e-01, -6.4594e-02,  1.5853e-01,
         1.6814e-01, -6.9279e-02,  5.9100e-02, -1.0106e-01,  7.1812e-02,
        -1.7794e-01, -1.4851e-02, -1.5957e-02, -7.0774e-02,  8.7063e-02,
         2.3831e-02,  6.6053e-02, -1.2024e-01, -2.6739e-02,  2.7363e-02,
   

0it [00:00, ?it/s]
1606it [00:02, 685.17it/s]

2023-12-21 12:44:27,901 Dictionary created for label 'class' with 4 values: support (seen 433 times), comment (seen 433 times), query (seen 393 times), deny (seen 347 times)
2023-12-21 12:44:27,914 ----------------------------------------------------------------------------------------------------
2023-12-21 12:44:27,919 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 12:44:34,956 epoch 1 - iter 40/402 - loss 1.50682333 - time (sec): 7.00 - samples/sec: 22.87 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:44:42,634 epoch 1 - iter 80/402 - loss 1.48483536 - time (sec): 14.67 - samples/sec: 21.81 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:44:49,609 epoch 1 - iter 120/402 - loss 1.45628777 - time (sec): 21.65 - samples/sec: 22.17 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:44:56,552 epoch 1 - iter 160/402 - loss 1.44759624 - time (sec): 28.59 - samples/sec: 22.38 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:45:04,655 epoch 1 - iter 200/402 - loss 1.42345399 - time (sec): 36.70 - samples/sec: 21.80 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:45:11,861 epoch 1 - iter 240/402 - loss 1.41025421 - time (sec): 43.90 - samples/sec: 21.87 - lr: 0.000030 - momentum: 0.000000
2023-12-21 12:45:18,523 epoch 1 - iter 280/402 - loss 1.38162565 - time (sec): 50.56 - samples/sec: 22.15 - lr: 0.000035 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:24<00:00,  3.30it/s]


2023-12-21 12:46:05,309 DEV : loss 1.2171944379806519 - f1-score (micro avg)  0.7295
2023-12-21 12:46:06,389 ----------------------------------------------------------------------------------------------------
2023-12-21 12:46:13,944 epoch 2 - iter 40/402 - loss 1.45482230 - time (sec): 7.55 - samples/sec: 21.19 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:46:21,614 epoch 2 - iter 80/402 - loss 1.39901064 - time (sec): 15.22 - samples/sec: 21.02 - lr: 0.000049 - momentum: 0.000000
2023-12-21 12:46:31,309 epoch 2 - iter 120/402 - loss 1.36264504 - time (sec): 24.92 - samples/sec: 19.26 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:46:38,480 epoch 2 - iter 160/402 - loss 1.39161612 - time (sec): 32.09 - samples/sec: 19.95 - lr: 0.000048 - momentum: 0.000000
2023-12-21 12:46:45,276 epoch 2 - iter 200/402 - loss 1.39829957 - time (sec): 38.88 - samples/sec: 20.57 - lr: 0.000047 - momentum: 0.000000
2023-12-21 12:46:52,209 epoch 2 - iter 240/402 - loss 1.40087312 - time (sec): 45.8

100%|██████████| 80/80 [00:24<00:00,  3.31it/s]


2023-12-21 12:47:45,054 DEV : loss 1.1444802284240723 - f1-score (micro avg)  0.6759
2023-12-21 12:47:46,103 ----------------------------------------------------------------------------------------------------
2023-12-21 12:47:54,336 epoch 3 - iter 40/402 - loss 1.28865003 - time (sec): 8.23 - samples/sec: 19.44 - lr: 0.000044 - momentum: 0.000000
2023-12-21 12:48:01,156 epoch 3 - iter 80/402 - loss 1.30246119 - time (sec): 15.05 - samples/sec: 21.26 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:48:08,540 epoch 3 - iter 120/402 - loss 1.30195829 - time (sec): 22.43 - samples/sec: 21.40 - lr: 0.000043 - momentum: 0.000000
2023-12-21 12:48:15,475 epoch 3 - iter 160/402 - loss 1.30055621 - time (sec): 29.37 - samples/sec: 21.79 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:48:23,376 epoch 3 - iter 200/402 - loss 1.27820597 - time (sec): 37.27 - samples/sec: 21.47 - lr: 0.000042 - momentum: 0.000000
2023-12-21 12:48:30,078 epoch 3 - iter 240/402 - loss 1.27084065 - time (sec): 43.9

100%|██████████| 80/80 [00:24<00:00,  3.29it/s]

2023-12-21 12:49:23,043 DEV : loss 1.08995521068573 - f1-score (micro avg)  0.5647





2023-12-21 12:49:25,956 ----------------------------------------------------------------------------------------------------
2023-12-21 12:49:32,621 epoch 4 - iter 40/402 - loss 1.04026534 - time (sec): 6.66 - samples/sec: 24.02 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:49:39,793 epoch 4 - iter 80/402 - loss 1.26115330 - time (sec): 13.83 - samples/sec: 23.13 - lr: 0.000038 - momentum: 0.000000
2023-12-21 12:49:46,993 epoch 4 - iter 120/402 - loss 1.27630345 - time (sec): 21.03 - samples/sec: 22.82 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:49:55,604 epoch 4 - iter 160/402 - loss 1.26868323 - time (sec): 29.64 - samples/sec: 21.59 - lr: 0.000037 - momentum: 0.000000
2023-12-21 12:50:02,296 epoch 4 - iter 200/402 - loss 1.24460263 - time (sec): 36.33 - samples/sec: 22.02 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:50:09,628 epoch 4 - iter 240/402 - loss 1.22298057 - time (sec): 43.67 - samples/sec: 21.98 - lr: 0.000036 - momentum: 0.000000
2023-12-21 12:50:16,541 ep

100%|██████████| 80/80 [00:23<00:00,  3.35it/s]


2023-12-21 12:51:02,898 DEV : loss 1.8426169157028198 - f1-score (micro avg)  0.1972
2023-12-21 12:51:04,758 ----------------------------------------------------------------------------------------------------
2023-12-21 12:51:12,462 epoch 5 - iter 40/402 - loss 1.05022631 - time (sec): 7.70 - samples/sec: 20.78 - lr: 0.000033 - momentum: 0.000000
2023-12-21 12:51:19,916 epoch 5 - iter 80/402 - loss 1.04631523 - time (sec): 15.15 - samples/sec: 21.12 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:51:27,002 epoch 5 - iter 120/402 - loss 1.03410025 - time (sec): 22.24 - samples/sec: 21.58 - lr: 0.000032 - momentum: 0.000000
2023-12-21 12:51:33,802 epoch 5 - iter 160/402 - loss 1.03493993 - time (sec): 29.04 - samples/sec: 22.04 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:51:41,676 epoch 5 - iter 200/402 - loss 1.05447413 - time (sec): 36.91 - samples/sec: 21.67 - lr: 0.000031 - momentum: 0.000000
2023-12-21 12:51:48,476 epoch 5 - iter 240/402 - loss 1.03349530 - time (sec): 43.7

100%|██████████| 80/80 [00:26<00:00,  3.07it/s]


2023-12-21 12:52:43,546 DEV : loss 1.3166472911834717 - f1-score (micro avg)  0.5213
2023-12-21 12:52:44,634 ----------------------------------------------------------------------------------------------------
2023-12-21 12:52:51,536 epoch 6 - iter 40/402 - loss 0.84259849 - time (sec): 6.90 - samples/sec: 23.19 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:52:59,035 epoch 6 - iter 80/402 - loss 0.94034076 - time (sec): 14.40 - samples/sec: 22.22 - lr: 0.000027 - momentum: 0.000000
2023-12-21 12:53:05,968 epoch 6 - iter 120/402 - loss 0.96291692 - time (sec): 21.33 - samples/sec: 22.50 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:53:12,995 epoch 6 - iter 160/402 - loss 0.94839207 - time (sec): 28.36 - samples/sec: 22.57 - lr: 0.000026 - momentum: 0.000000
2023-12-21 12:53:20,541 epoch 6 - iter 200/402 - loss 0.93001643 - time (sec): 35.90 - samples/sec: 22.28 - lr: 0.000025 - momentum: 0.000000
2023-12-21 12:53:27,459 epoch 6 - iter 240/402 - loss 0.94058110 - time (sec): 42.8

100%|██████████| 80/80 [00:24<00:00,  3.28it/s]


2023-12-21 12:54:21,819 DEV : loss 2.2592055797576904 - f1-score (micro avg)  0.4125
2023-12-21 12:54:22,888 ----------------------------------------------------------------------------------------------------
2023-12-21 12:54:30,823 epoch 7 - iter 40/402 - loss 0.70146767 - time (sec): 7.93 - samples/sec: 20.17 - lr: 0.000022 - momentum: 0.000000
2023-12-21 12:54:38,111 epoch 7 - iter 80/402 - loss 0.75139565 - time (sec): 15.22 - samples/sec: 21.03 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:54:45,384 epoch 7 - iter 120/402 - loss 0.74016126 - time (sec): 22.49 - samples/sec: 21.34 - lr: 0.000021 - momentum: 0.000000
2023-12-21 12:54:54,283 epoch 7 - iter 160/402 - loss 0.71150735 - time (sec): 31.39 - samples/sec: 20.39 - lr: 0.000020 - momentum: 0.000000
2023-12-21 12:55:01,387 epoch 7 - iter 200/402 - loss 0.72536881 - time (sec): 38.50 - samples/sec: 20.78 - lr: 0.000019 - momentum: 0.000000
2023-12-21 12:55:08,654 epoch 7 - iter 240/402 - loss 0.73900398 - time (sec): 45.7

100%|██████████| 80/80 [00:24<00:00,  3.33it/s]

2023-12-21 12:56:01,329 DEV : loss 1.881283164024353 - f1-score (micro avg)  0.5252





2023-12-21 12:56:03,267 ----------------------------------------------------------------------------------------------------
2023-12-21 12:56:10,487 epoch 8 - iter 40/402 - loss 0.48134586 - time (sec): 7.22 - samples/sec: 22.17 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:56:17,865 epoch 8 - iter 80/402 - loss 0.47242893 - time (sec): 14.59 - samples/sec: 21.93 - lr: 0.000016 - momentum: 0.000000
2023-12-21 12:56:24,732 epoch 8 - iter 120/402 - loss 0.51387005 - time (sec): 21.46 - samples/sec: 22.37 - lr: 0.000015 - momentum: 0.000000
2023-12-21 12:56:31,475 epoch 8 - iter 160/402 - loss 0.53719282 - time (sec): 28.20 - samples/sec: 22.69 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:56:38,710 epoch 8 - iter 200/402 - loss 0.53726967 - time (sec): 35.44 - samples/sec: 22.57 - lr: 0.000014 - momentum: 0.000000
2023-12-21 12:56:46,131 epoch 8 - iter 240/402 - loss 0.55459930 - time (sec): 42.86 - samples/sec: 22.40 - lr: 0.000013 - momentum: 0.000000
2023-12-21 12:56:52,876 ep

100%|██████████| 80/80 [00:24<00:00,  3.29it/s]


2023-12-21 12:57:39,789 DEV : loss 2.725135564804077 - f1-score (micro avg)  0.4874
2023-12-21 12:57:42,536 ----------------------------------------------------------------------------------------------------
2023-12-21 12:57:49,429 epoch 9 - iter 40/402 - loss 0.48051082 - time (sec): 6.89 - samples/sec: 23.23 - lr: 0.000011 - momentum: 0.000000
2023-12-21 12:57:56,164 epoch 9 - iter 80/402 - loss 0.38547292 - time (sec): 13.62 - samples/sec: 23.49 - lr: 0.000010 - momentum: 0.000000
2023-12-21 12:58:03,131 epoch 9 - iter 120/402 - loss 0.43066630 - time (sec): 20.59 - samples/sec: 23.31 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:58:10,536 epoch 9 - iter 160/402 - loss 0.39831439 - time (sec): 28.00 - samples/sec: 22.86 - lr: 0.000009 - momentum: 0.000000
2023-12-21 12:58:17,959 epoch 9 - iter 200/402 - loss 0.46588648 - time (sec): 35.42 - samples/sec: 22.59 - lr: 0.000008 - momentum: 0.000000
2023-12-21 12:58:25,325 epoch 9 - iter 240/402 - loss 0.45325786 - time (sec): 42.79

100%|██████████| 80/80 [00:24<00:00,  3.25it/s]


2023-12-21 12:59:19,924 DEV : loss 3.0031678676605225 - f1-score (micro avg)  0.455
2023-12-21 12:59:20,979 ----------------------------------------------------------------------------------------------------
2023-12-21 12:59:27,673 epoch 10 - iter 40/402 - loss 0.26588507 - time (sec): 6.69 - samples/sec: 23.92 - lr: 0.000005 - momentum: 0.000000
2023-12-21 12:59:35,179 epoch 10 - iter 80/402 - loss 0.29011932 - time (sec): 14.20 - samples/sec: 22.54 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:59:42,261 epoch 10 - iter 120/402 - loss 0.29137150 - time (sec): 21.28 - samples/sec: 22.56 - lr: 0.000004 - momentum: 0.000000
2023-12-21 12:59:49,257 epoch 10 - iter 160/402 - loss 0.29437995 - time (sec): 28.27 - samples/sec: 22.64 - lr: 0.000003 - momentum: 0.000000
2023-12-21 12:59:56,148 epoch 10 - iter 200/402 - loss 0.29188624 - time (sec): 35.16 - samples/sec: 22.75 - lr: 0.000003 - momentum: 0.000000
2023-12-21 13:00:03,863 epoch 10 - iter 240/402 - loss 0.32722054 - time (sec):

100%|██████████| 80/80 [00:25<00:00,  3.10it/s]

2023-12-21 13:00:59,030 DEV : loss 3.290358781814575 - f1-score (micro avg)  0.4574





2023-12-21 13:01:04,621 ----------------------------------------------------------------------------------------------------
2023-12-21 13:01:04,625 Testing using last state of model ...


100%|██████████| 110/110 [00:28<00:00,  3.90it/s]

2023-12-21 13:01:32,859 
Results:
- F-score (micro) 0.4387
- F-score (macro) 0.3732
- Accuracy 0.4387

By class:
              precision    recall  f1-score   support

     comment     0.9376    0.4072    0.5678      1476
     support     0.0930    0.6346    0.1622       104
        deny     0.1970    0.5200    0.2857       100
       query     0.3588    0.7121    0.4772        66

    accuracy                         0.4387      1746
   macro avg     0.3966    0.5685    0.3732      1746
weighted avg     0.8230    0.4387    0.5240      1746

2023-12-21 13:01:32,861 ----------------------------------------------------------------------------------------------------





In [None]:
# microsoft/deberta-v3-base
add_previous_comment = True
add_post_title = False
add_depth = True

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[53]: "1 | The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 13:13:58,795 Reading data from .
2023-12-21 13:13:58,798 Train: train_fasttext_format.txt
2023-12-21 13:13:58,799 Dev: dev_fasttext_format.txt
2023-12-21 13:13:58,803 Test: test_fasttext_format.txt
2023-12-21 13:13:58,953 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1643,
        "number_of_documents_per_class": {
            "comment": 450,
            "query": 383,
            "deny": 360,
            "support": 450
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 81420,
            "min": 13,
            "max": 722,
            "avg": 49.55569080949483
        }
    },
    "TEST": {
        "dataset



Example of an embedding:
	Sentence[62]: "3 | $MENTION$ $MENTION$ $MENTION$ Why is he a hero? His life is worth nothing more than the life of a civilian would be... | $MENTION$ $MENTION$ $MENTION$ Because soldiers and police are always running towards what we are running away from! That's a hero!" → comment (1.0)
	tensor([ 1.3220e-01,  1.9850e-01,  7.3510e-02, -7.5041e-02,  8.0121e-02,
        -1.1836e-01, -1.2611e-02,  9.2433e-02, -1.5213e-01,  1.1440e-01,
         5.4930e-02, -2.6069e-01,  2.9575e-02,  2.2835e-01,  9.5882e-02,
        -4.8221e-02, -1.1460e-01,  1.8375e-02, -1.0880e-01, -1.4825e-02,
        -2.1163e-01, -2.0967e-01, -1.8485e-02,  4.5558e-02, -4.5602e-03,
        -1.4737e-02, -5.6363e-02, -4.6349e-02,  3.1586e-02,  1.2927e-01,
        -6.5931e-02,  9.6189e-03, -5.3213e-02, -3.0546e-02,  1.0771e-01,
         1.1329e-01, -5.7490e-02,  4.1111e-02, -1.1803e-01,  7.0540e-02,
        -1.9045e-01, -6.1328e-03,  1.8892e-02, -8.9352e-02,  1.0730e-01,
        -1.3585e-03,  8.6849

0it [00:00, ?it/s]
1643it [00:01, 1387.58it/s]

2023-12-21 13:14:25,123 Dictionary created for label 'class' with 4 values: comment (seen 450 times), support (seen 450 times), query (seen 383 times), deny (seen 360 times)
2023-12-21 13:14:25,133 ----------------------------------------------------------------------------------------------------
2023-12-21 13:14:25,135 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-21 13:14:32,129 epoch 1 - iter 41/411 - loss 1.58334182 - time (sec): 6.96 - samples/sec: 23.55 - lr: 0.000005 - momentum: 0.000000
2023-12-21 13:14:38,559 epoch 1 - iter 82/411 - loss 1.53275704 - time (sec): 13.39 - samples/sec: 24.49 - lr: 0.000010 - momentum: 0.000000
2023-12-21 13:14:45,128 epoch 1 - iter 123/411 - loss 1.50290068 - time (sec): 19.96 - samples/sec: 24.65 - lr: 0.000015 - momentum: 0.000000
2023-12-21 13:14:52,469 epoch 1 - iter 164/411 - loss 1.48371644 - time (sec): 27.30 - samples/sec: 24.03 - lr: 0.000020 - momentum: 0.000000
2023-12-21 13:14:59,891 epoch 1 - iter 205/411 - loss 1.44775126 - time (sec): 34.72 - samples/sec: 23.61 - lr: 0.000025 - momentum: 0.000000
2023-12-21 13:15:06,942 epoch 1 - iter 246/411 - loss 1.42975637 - time (sec): 41.78 - samples/sec: 23.55 - lr: 0.000030 - momentum: 0.000000
2023-12-21 13:15:14,392 epoch 1 - iter 287/411 - loss 1.40288081 - time (sec): 49.23 - samples/sec: 23.32 - lr: 0.000035 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:22<00:00,  3.63it/s]

2023-12-21 13:15:59,106 DEV : loss 1.2874109745025635 - f1-score (micro avg)  0.1932





2023-12-21 13:16:01,882 ----------------------------------------------------------------------------------------------------
2023-12-21 13:16:08,791 epoch 2 - iter 41/411 - loss 1.11182257 - time (sec): 6.90 - samples/sec: 23.76 - lr: 0.000049 - momentum: 0.000000
2023-12-21 13:16:15,577 epoch 2 - iter 82/411 - loss 1.19775765 - time (sec): 13.69 - samples/sec: 23.96 - lr: 0.000049 - momentum: 0.000000
2023-12-21 13:16:22,288 epoch 2 - iter 123/411 - loss 1.18881389 - time (sec): 20.40 - samples/sec: 24.12 - lr: 0.000048 - momentum: 0.000000
2023-12-21 13:16:30,602 epoch 2 - iter 164/411 - loss 1.21042157 - time (sec): 28.71 - samples/sec: 22.85 - lr: 0.000048 - momentum: 0.000000
2023-12-21 13:16:38,161 epoch 2 - iter 205/411 - loss 1.19055711 - time (sec): 36.27 - samples/sec: 22.61 - lr: 0.000047 - momentum: 0.000000
2023-12-21 13:16:46,304 epoch 2 - iter 246/411 - loss 1.18425140 - time (sec): 44.42 - samples/sec: 22.15 - lr: 0.000047 - momentum: 0.000000
2023-12-21 13:16:53,346 ep

100%|██████████| 80/80 [00:21<00:00,  3.66it/s]

2023-12-21 13:17:36,423 DEV : loss 1.299777626991272 - f1-score (micro avg)  0.2216





2023-12-21 13:17:37,429 ----------------------------------------------------------------------------------------------------
2023-12-21 13:17:44,787 epoch 3 - iter 41/411 - loss 1.07896200 - time (sec): 7.35 - samples/sec: 22.30 - lr: 0.000044 - momentum: 0.000000
2023-12-21 13:17:53,211 epoch 3 - iter 82/411 - loss 1.12001014 - time (sec): 15.78 - samples/sec: 20.79 - lr: 0.000043 - momentum: 0.000000
2023-12-21 13:18:00,241 epoch 3 - iter 123/411 - loss 1.11088966 - time (sec): 22.81 - samples/sec: 21.57 - lr: 0.000043 - momentum: 0.000000
2023-12-21 13:18:06,945 epoch 3 - iter 164/411 - loss 1.12048892 - time (sec): 29.51 - samples/sec: 22.23 - lr: 0.000042 - momentum: 0.000000
2023-12-21 13:18:14,698 epoch 3 - iter 205/411 - loss 1.11439606 - time (sec): 37.27 - samples/sec: 22.00 - lr: 0.000042 - momentum: 0.000000
2023-12-21 13:18:21,872 epoch 3 - iter 246/411 - loss 1.10975145 - time (sec): 44.44 - samples/sec: 22.14 - lr: 0.000041 - momentum: 0.000000
2023-12-21 13:18:29,782 ep

100%|██████████| 80/80 [00:22<00:00,  3.62it/s]

2023-12-21 13:19:12,884 DEV : loss 1.0499732494354248 - f1-score (micro avg)  0.5741





2023-12-21 13:19:14,689 ----------------------------------------------------------------------------------------------------
2023-12-21 13:19:21,273 epoch 4 - iter 41/411 - loss 0.90621572 - time (sec): 6.58 - samples/sec: 24.94 - lr: 0.000038 - momentum: 0.000000
2023-12-21 13:19:27,907 epoch 4 - iter 82/411 - loss 0.95783004 - time (sec): 13.21 - samples/sec: 24.83 - lr: 0.000038 - momentum: 0.000000
2023-12-21 13:19:35,216 epoch 4 - iter 123/411 - loss 0.96232611 - time (sec): 20.52 - samples/sec: 23.98 - lr: 0.000037 - momentum: 0.000000
2023-12-21 13:19:42,375 epoch 4 - iter 164/411 - loss 0.92222069 - time (sec): 27.68 - samples/sec: 23.70 - lr: 0.000037 - momentum: 0.000000
2023-12-21 13:19:49,436 epoch 4 - iter 205/411 - loss 0.91181789 - time (sec): 34.74 - samples/sec: 23.60 - lr: 0.000036 - momentum: 0.000000
2023-12-21 13:19:56,044 epoch 4 - iter 246/411 - loss 0.91994450 - time (sec): 41.35 - samples/sec: 23.80 - lr: 0.000036 - momentum: 0.000000
2023-12-21 13:20:05,547 ep

100%|██████████| 80/80 [00:21<00:00,  3.66it/s]

2023-12-21 13:20:49,694 DEV : loss 1.752276062965393 - f1-score (micro avg)  0.4274





2023-12-21 13:20:50,697 ----------------------------------------------------------------------------------------------------
2023-12-21 13:20:59,126 epoch 5 - iter 41/411 - loss 0.48059221 - time (sec): 8.42 - samples/sec: 19.47 - lr: 0.000033 - momentum: 0.000000
2023-12-21 13:21:05,684 epoch 5 - iter 82/411 - loss 0.67441173 - time (sec): 14.98 - samples/sec: 21.89 - lr: 0.000032 - momentum: 0.000000
2023-12-21 13:21:12,808 epoch 5 - iter 123/411 - loss 0.66771534 - time (sec): 22.11 - samples/sec: 22.26 - lr: 0.000032 - momentum: 0.000000
2023-12-21 13:21:20,194 epoch 5 - iter 164/411 - loss 0.66282520 - time (sec): 29.49 - samples/sec: 22.24 - lr: 0.000031 - momentum: 0.000000
2023-12-21 13:21:27,109 epoch 5 - iter 205/411 - loss 0.63944366 - time (sec): 36.41 - samples/sec: 22.52 - lr: 0.000031 - momentum: 0.000000
2023-12-21 13:21:33,882 epoch 5 - iter 246/411 - loss 0.65473499 - time (sec): 43.18 - samples/sec: 22.79 - lr: 0.000030 - momentum: 0.000000
2023-12-21 13:21:40,681 ep

100%|██████████| 80/80 [00:23<00:00,  3.47it/s]

2023-12-21 13:22:26,350 DEV : loss 2.415405511856079 - f1-score (micro avg)  0.3935





2023-12-21 13:22:27,842 ----------------------------------------------------------------------------------------------------
2023-12-21 13:22:35,076 epoch 6 - iter 41/411 - loss 0.27728527 - time (sec): 7.23 - samples/sec: 22.68 - lr: 0.000027 - momentum: 0.000000
2023-12-21 13:22:42,190 epoch 6 - iter 82/411 - loss 0.37626213 - time (sec): 14.35 - samples/sec: 22.86 - lr: 0.000027 - momentum: 0.000000
2023-12-21 13:22:48,706 epoch 6 - iter 123/411 - loss 0.43754624 - time (sec): 20.86 - samples/sec: 23.58 - lr: 0.000026 - momentum: 0.000000
2023-12-21 13:22:55,379 epoch 6 - iter 164/411 - loss 0.44661229 - time (sec): 27.53 - samples/sec: 23.82 - lr: 0.000026 - momentum: 0.000000
2023-12-21 13:23:03,772 epoch 6 - iter 205/411 - loss 0.44019257 - time (sec): 35.93 - samples/sec: 22.82 - lr: 0.000025 - momentum: 0.000000
2023-12-21 13:23:11,555 epoch 6 - iter 246/411 - loss 0.42905856 - time (sec): 43.71 - samples/sec: 22.51 - lr: 0.000025 - momentum: 0.000000
2023-12-21 13:23:18,075 ep

100%|██████████| 80/80 [00:22<00:00,  3.50it/s]

2023-12-21 13:24:02,690 DEV : loss 3.121324300765991 - f1-score (micro avg)  0.5126





2023-12-21 13:24:03,662 ----------------------------------------------------------------------------------------------------
2023-12-21 13:24:10,291 epoch 7 - iter 41/411 - loss 0.37640460 - time (sec): 6.63 - samples/sec: 24.75 - lr: 0.000022 - momentum: 0.000000
2023-12-21 13:24:16,732 epoch 7 - iter 82/411 - loss 0.38350933 - time (sec): 13.07 - samples/sec: 25.10 - lr: 0.000021 - momentum: 0.000000
2023-12-21 13:24:25,344 epoch 7 - iter 123/411 - loss 0.36068970 - time (sec): 21.68 - samples/sec: 22.70 - lr: 0.000021 - momentum: 0.000000
2023-12-21 13:24:32,613 epoch 7 - iter 164/411 - loss 0.35166284 - time (sec): 28.95 - samples/sec: 22.66 - lr: 0.000020 - momentum: 0.000000
2023-12-21 13:24:40,812 epoch 7 - iter 205/411 - loss 0.34823217 - time (sec): 37.15 - samples/sec: 22.07 - lr: 0.000020 - momentum: 0.000000
2023-12-21 13:24:47,511 epoch 7 - iter 246/411 - loss 0.32173609 - time (sec): 43.85 - samples/sec: 22.44 - lr: 0.000019 - momentum: 0.000000
2023-12-21 13:24:54,206 ep

100%|██████████| 80/80 [00:23<00:00,  3.46it/s]

2023-12-21 13:25:38,946 DEV : loss 3.3038277626037598 - f1-score (micro avg)  0.5118





2023-12-21 13:25:40,696 ----------------------------------------------------------------------------------------------------
2023-12-21 13:25:47,823 epoch 8 - iter 41/411 - loss 0.12098194 - time (sec): 7.12 - samples/sec: 23.03 - lr: 0.000016 - momentum: 0.000000
2023-12-21 13:25:55,052 epoch 8 - iter 82/411 - loss 0.10346738 - time (sec): 14.35 - samples/sec: 22.86 - lr: 0.000016 - momentum: 0.000000
2023-12-21 13:26:02,633 epoch 8 - iter 123/411 - loss 0.16052690 - time (sec): 21.93 - samples/sec: 22.43 - lr: 0.000015 - momentum: 0.000000
2023-12-21 13:26:09,402 epoch 8 - iter 164/411 - loss 0.15458651 - time (sec): 28.70 - samples/sec: 22.86 - lr: 0.000015 - momentum: 0.000000
2023-12-21 13:26:16,585 epoch 8 - iter 205/411 - loss 0.16319892 - time (sec): 35.88 - samples/sec: 22.85 - lr: 0.000014 - momentum: 0.000000
2023-12-21 13:26:24,219 epoch 8 - iter 246/411 - loss 0.16283209 - time (sec): 43.52 - samples/sec: 22.61 - lr: 0.000013 - momentum: 0.000000
2023-12-21 13:26:30,994 ep

100%|██████████| 80/80 [00:22<00:00,  3.50it/s]

2023-12-21 13:27:15,568 DEV : loss 3.7176594734191895 - f1-score (micro avg)  0.5584





2023-12-21 13:27:16,552 ----------------------------------------------------------------------------------------------------
2023-12-21 13:27:23,062 epoch 9 - iter 41/411 - loss 0.14006832 - time (sec): 6.51 - samples/sec: 25.20 - lr: 0.000011 - momentum: 0.000000
2023-12-21 13:27:30,191 epoch 9 - iter 82/411 - loss 0.10733034 - time (sec): 13.64 - samples/sec: 24.06 - lr: 0.000010 - momentum: 0.000000
2023-12-21 13:27:37,014 epoch 9 - iter 123/411 - loss 0.10261129 - time (sec): 20.46 - samples/sec: 24.05 - lr: 0.000010 - momentum: 0.000000
2023-12-21 13:27:43,670 epoch 9 - iter 164/411 - loss 0.10919438 - time (sec): 27.11 - samples/sec: 24.19 - lr: 0.000009 - momentum: 0.000000
2023-12-21 13:27:50,578 epoch 9 - iter 205/411 - loss 0.10481816 - time (sec): 34.02 - samples/sec: 24.10 - lr: 0.000008 - momentum: 0.000000
2023-12-21 13:27:57,536 epoch 9 - iter 246/411 - loss 0.10172924 - time (sec): 40.98 - samples/sec: 24.01 - lr: 0.000008 - momentum: 0.000000
2023-12-21 13:28:04,797 ep

100%|██████████| 80/80 [00:21<00:00,  3.65it/s]

2023-12-21 13:28:50,611 DEV : loss 3.87241530418396 - f1-score (micro avg)  0.5465





2023-12-21 13:28:53,269 ----------------------------------------------------------------------------------------------------
2023-12-21 13:29:00,447 epoch 10 - iter 41/411 - loss 0.04148016 - time (sec): 7.17 - samples/sec: 22.86 - lr: 0.000005 - momentum: 0.000000
2023-12-21 13:29:08,634 epoch 10 - iter 82/411 - loss 0.06924412 - time (sec): 15.36 - samples/sec: 21.35 - lr: 0.000005 - momentum: 0.000000
2023-12-21 13:29:15,167 epoch 10 - iter 123/411 - loss 0.05764865 - time (sec): 21.89 - samples/sec: 22.47 - lr: 0.000004 - momentum: 0.000000
2023-12-21 13:29:22,197 epoch 10 - iter 164/411 - loss 0.06796849 - time (sec): 28.92 - samples/sec: 22.68 - lr: 0.000003 - momentum: 0.000000
2023-12-21 13:29:29,134 epoch 10 - iter 205/411 - loss 0.05968885 - time (sec): 35.86 - samples/sec: 22.87 - lr: 0.000003 - momentum: 0.000000
2023-12-21 13:29:36,715 epoch 10 - iter 246/411 - loss 0.05591459 - time (sec): 43.44 - samples/sec: 22.65 - lr: 0.000002 - momentum: 0.000000
2023-12-21 13:29:43,

100%|██████████| 80/80 [00:21<00:00,  3.65it/s]

2023-12-21 13:30:27,511 DEV : loss 4.063159465789795 - f1-score (micro avg)  0.5513





2023-12-21 13:30:32,361 ----------------------------------------------------------------------------------------------------
2023-12-21 13:30:32,367 Testing using last state of model ...


100%|██████████| 110/110 [00:25<00:00,  4.28it/s]

2023-12-21 13:30:58,126 
Results:
- F-score (micro) 0.6025
- F-score (macro) 0.3983
- Accuracy 0.6025

By class:
              precision    recall  f1-score   support

     comment     0.9284    0.6145    0.7395      1476
        deny     0.1972    0.5700    0.2931       100
     support     0.1362    0.3365    0.1939       104
       query     0.2377    0.8030    0.3668        66

    accuracy                         0.6025      1746
   macro avg     0.3749    0.5810    0.3983      1746
weighted avg     0.8132    0.6025    0.6673      1746

2023-12-21 13:30:58,127 ----------------------------------------------------------------------------------------------------





In [None]:
# microsoft/deberta-v3-base
add_previous_comment = False
add_post_title = True
add_depth = True

test_model_configuration(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth, 'microsoft/deberta-v3-base')

Sample example: Sentence[29]: "1 | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 09:07:02,390 Reading data from .
2023-12-22 09:07:02,391 Train: train_fasttext_format.txt
2023-12-22 09:07:02,395 Dev: dev_fasttext_format.txt
2023-12-22 09:07:02,397 Test: test_fasttext_format.txt
2023-12-22 09:07:02,478 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1594,
        "number_of_documents_per_class": {
            "support": 428,
            "query": 395,
            "deny": 343,
            "comment": 428
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 53777,
            "min": 5,
            "max": 764,
            "avg": 33.737139272271015
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_docume

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Example of an embedding:
	Sentence[44]: "5 | At least 12 killed in today's attack on satirical magazine #CharlieHebdo in Paris, official says. $URL$ | $MENTION$ $MENTION$ $MENTION$ $MENTION$ yeah i feel really sorry for them" → support (1.0)
	tensor([ 1.2900e-01,  2.3680e-01,  4.4531e-02, -7.4048e-02,  6.7678e-02,
        -1.2625e-01,  1.2346e-02,  1.0599e-01, -1.8670e-01,  8.5667e-02,
         6.0628e-02, -2.8525e-01, -7.1049e-03,  2.6453e-01,  7.6964e-02,
        -5.8990e-02, -1.3040e-01, -1.4125e-03, -1.3804e-01, -2.5579e-02,
        -2.3247e-01, -1.9994e-01, -3.3998e-02,  6.8175e-02,  4.8704e-03,
        -3.4545e-02, -4.0781e-02, -6.9924e-02,  5.6807e-02,  1.0957e-01,
        -1.2153e-01, -1.8095e-02, -7.9180e-02, -4.9439e-02,  1.1234e-01,
         1.6172e-01, -7.2757e-02,  5.5988e-02, -1.3247e-01,  7.3195e-02,
        -1.9392e-01, -9.7494e-03,  9.8137e-03, -8.8093e-02,  8.2716e-02,
         2.0058e-02,  7.2808e-02, -1.2176e-01,  1.8081e-03,  1.9911e-02,
        -5.8353e-02, -1.338

0it [00:00, ?it/s]
1594it [00:00, 1933.05it/s]

2023-12-22 09:07:19,964 Dictionary created for label 'class' with 4 values: support (seen 428 times), comment (seen 428 times), query (seen 395 times), deny (seen 343 times)
2023-12-22 09:07:19,975 ----------------------------------------------------------------------------------------------------
2023-12-22 09:07:19,980 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128101, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): StableDropout()
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): Linear(in_features=768, out_features=768, bias=True)
                (key_proj): Linear(in_features=768, out_features=768, bias=True)
       




2023-12-22 09:07:27,235 epoch 1 - iter 39/399 - loss 1.64911855 - time (sec): 7.21 - samples/sec: 21.63 - lr: 0.000005 - momentum: 0.000000
2023-12-22 09:07:33,532 epoch 1 - iter 78/399 - loss 1.57551142 - time (sec): 13.51 - samples/sec: 23.10 - lr: 0.000010 - momentum: 0.000000
2023-12-22 09:07:39,902 epoch 1 - iter 117/399 - loss 1.54414102 - time (sec): 19.88 - samples/sec: 23.54 - lr: 0.000015 - momentum: 0.000000
2023-12-22 09:07:48,220 epoch 1 - iter 156/399 - loss 1.50606073 - time (sec): 28.20 - samples/sec: 22.13 - lr: 0.000019 - momentum: 0.000000
2023-12-22 09:07:54,057 epoch 1 - iter 195/399 - loss 1.47050007 - time (sec): 34.03 - samples/sec: 22.92 - lr: 0.000024 - momentum: 0.000000
2023-12-22 09:08:00,162 epoch 1 - iter 234/399 - loss 1.43831906 - time (sec): 40.14 - samples/sec: 23.32 - lr: 0.000029 - momentum: 0.000000
2023-12-22 09:08:06,258 epoch 1 - iter 273/399 - loss 1.42652115 - time (sec): 46.24 - samples/sec: 23.62 - lr: 0.000034 - momentum: 0.000000
2023-12-2

100%|██████████| 80/80 [00:12<00:00,  6.56it/s]

2023-12-22 09:08:37,740 DEV : loss 1.2748682498931885 - f1-score (micro avg)  0.28





2023-12-22 09:08:38,412 ----------------------------------------------------------------------------------------------------
2023-12-22 09:08:44,743 epoch 2 - iter 39/399 - loss 1.20442242 - time (sec): 6.33 - samples/sec: 24.66 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:08:50,835 epoch 2 - iter 78/399 - loss 1.19521526 - time (sec): 12.42 - samples/sec: 25.12 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:08:57,555 epoch 2 - iter 117/399 - loss 1.17475589 - time (sec): 19.14 - samples/sec: 24.45 - lr: 0.000048 - momentum: 0.000000
2023-12-22 09:09:03,288 epoch 2 - iter 156/399 - loss 1.18108703 - time (sec): 24.87 - samples/sec: 25.09 - lr: 0.000048 - momentum: 0.000000
2023-12-22 09:09:09,023 epoch 2 - iter 195/399 - loss 1.18795048 - time (sec): 30.61 - samples/sec: 25.48 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:09:15,022 epoch 2 - iter 234/399 - loss 1.18275285 - time (sec): 36.61 - samples/sec: 25.57 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:09:21,316 ep

100%|██████████| 80/80 [00:13<00:00,  5.98it/s]

2023-12-22 09:09:55,259 DEV : loss 1.4810066223144531 - f1-score (micro avg)  0.2161





2023-12-22 09:09:56,408 ----------------------------------------------------------------------------------------------------
2023-12-22 09:10:03,375 epoch 3 - iter 39/399 - loss 1.09760818 - time (sec): 6.96 - samples/sec: 22.41 - lr: 0.000044 - momentum: 0.000000
2023-12-22 09:10:09,316 epoch 3 - iter 78/399 - loss 1.02726977 - time (sec): 12.90 - samples/sec: 24.18 - lr: 0.000043 - momentum: 0.000000
2023-12-22 09:10:15,261 epoch 3 - iter 117/399 - loss 1.04755446 - time (sec): 18.85 - samples/sec: 24.83 - lr: 0.000043 - momentum: 0.000000
2023-12-22 09:10:21,925 epoch 3 - iter 156/399 - loss 1.08474246 - time (sec): 25.51 - samples/sec: 24.46 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:10:28,406 epoch 3 - iter 195/399 - loss 1.06789149 - time (sec): 31.99 - samples/sec: 24.38 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:10:34,586 epoch 3 - iter 234/399 - loss 1.06739431 - time (sec): 38.17 - samples/sec: 24.52 - lr: 0.000041 - momentum: 0.000000
2023-12-22 09:10:40,606 ep

100%|██████████| 80/80 [00:12<00:00,  6.32it/s]

2023-12-22 09:11:12,393 DEV : loss 1.4939020872116089 - f1-score (micro avg)  0.358





2023-12-22 09:11:13,744 ----------------------------------------------------------------------------------------------------
2023-12-22 09:11:19,508 epoch 4 - iter 39/399 - loss 0.85842906 - time (sec): 5.76 - samples/sec: 27.08 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:11:26,196 epoch 4 - iter 78/399 - loss 0.80528952 - time (sec): 12.45 - samples/sec: 25.06 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:11:33,095 epoch 4 - iter 117/399 - loss 0.86117534 - time (sec): 19.35 - samples/sec: 24.19 - lr: 0.000037 - momentum: 0.000000
2023-12-22 09:11:39,433 epoch 4 - iter 156/399 - loss 0.88535895 - time (sec): 25.69 - samples/sec: 24.29 - lr: 0.000037 - momentum: 0.000000
2023-12-22 09:11:45,128 epoch 4 - iter 195/399 - loss 0.84893061 - time (sec): 31.38 - samples/sec: 24.86 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:11:51,319 epoch 4 - iter 234/399 - loss 0.82230651 - time (sec): 37.57 - samples/sec: 24.91 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:11:57,233 ep

100%|██████████| 80/80 [00:12<00:00,  6.27it/s]

2023-12-22 09:12:29,742 DEV : loss 1.4056299924850464 - f1-score (micro avg)  0.5686





2023-12-22 09:12:30,393 ----------------------------------------------------------------------------------------------------
2023-12-22 09:12:36,942 epoch 5 - iter 39/399 - loss 0.61352336 - time (sec): 6.55 - samples/sec: 23.83 - lr: 0.000033 - momentum: 0.000000
2023-12-22 09:12:43,145 epoch 5 - iter 78/399 - loss 0.67026319 - time (sec): 12.75 - samples/sec: 24.47 - lr: 0.000032 - momentum: 0.000000
2023-12-22 09:12:49,409 epoch 5 - iter 117/399 - loss 0.63204472 - time (sec): 19.01 - samples/sec: 24.61 - lr: 0.000032 - momentum: 0.000000
2023-12-22 09:12:55,211 epoch 5 - iter 156/399 - loss 0.62676770 - time (sec): 24.82 - samples/sec: 25.15 - lr: 0.000031 - momentum: 0.000000
2023-12-22 09:13:03,104 epoch 5 - iter 195/399 - loss 0.62973594 - time (sec): 32.71 - samples/sec: 23.85 - lr: 0.000031 - momentum: 0.000000
2023-12-22 09:13:09,043 epoch 5 - iter 234/399 - loss 0.61126730 - time (sec): 38.65 - samples/sec: 24.22 - lr: 0.000030 - momentum: 0.000000
2023-12-22 09:13:14,902 ep

100%|██████████| 80/80 [00:12<00:00,  6.31it/s]

2023-12-22 09:13:47,519 DEV : loss 2.0267257690429688 - f1-score (micro avg)  0.6175





2023-12-22 09:13:48,717 ----------------------------------------------------------------------------------------------------
2023-12-22 09:13:54,708 epoch 6 - iter 39/399 - loss 0.49532503 - time (sec): 5.99 - samples/sec: 26.06 - lr: 0.000027 - momentum: 0.000000
2023-12-22 09:14:00,773 epoch 6 - iter 78/399 - loss 0.41731632 - time (sec): 12.05 - samples/sec: 25.89 - lr: 0.000027 - momentum: 0.000000
2023-12-22 09:14:07,587 epoch 6 - iter 117/399 - loss 0.47592611 - time (sec): 18.87 - samples/sec: 24.81 - lr: 0.000026 - momentum: 0.000000
2023-12-22 09:14:13,329 epoch 6 - iter 156/399 - loss 0.51867333 - time (sec): 24.61 - samples/sec: 25.36 - lr: 0.000026 - momentum: 0.000000
2023-12-22 09:14:19,336 epoch 6 - iter 195/399 - loss 0.50993547 - time (sec): 30.61 - samples/sec: 25.48 - lr: 0.000025 - momentum: 0.000000
2023-12-22 09:14:25,152 epoch 6 - iter 234/399 - loss 0.49926329 - time (sec): 36.43 - samples/sec: 25.69 - lr: 0.000025 - momentum: 0.000000
2023-12-22 09:14:31,016 ep

100%|██████████| 80/80 [00:13<00:00,  5.95it/s]

2023-12-22 09:15:05,569 DEV : loss 3.6893177032470703 - f1-score (micro avg)  0.5032





2023-12-22 09:15:06,240 ----------------------------------------------------------------------------------------------------
2023-12-22 09:15:11,824 epoch 7 - iter 39/399 - loss 0.25320461 - time (sec): 5.58 - samples/sec: 27.96 - lr: 0.000022 - momentum: 0.000000
2023-12-22 09:15:18,080 epoch 7 - iter 78/399 - loss 0.27720237 - time (sec): 11.84 - samples/sec: 26.36 - lr: 0.000021 - momentum: 0.000000
2023-12-22 09:15:24,085 epoch 7 - iter 117/399 - loss 0.23815027 - time (sec): 17.84 - samples/sec: 26.23 - lr: 0.000021 - momentum: 0.000000
2023-12-22 09:15:30,960 epoch 7 - iter 156/399 - loss 0.28362379 - time (sec): 24.72 - samples/sec: 25.25 - lr: 0.000020 - momentum: 0.000000
2023-12-22 09:15:37,028 epoch 7 - iter 195/399 - loss 0.26576100 - time (sec): 30.78 - samples/sec: 25.34 - lr: 0.000020 - momentum: 0.000000
2023-12-22 09:15:43,395 epoch 7 - iter 234/399 - loss 0.31299022 - time (sec): 37.15 - samples/sec: 25.19 - lr: 0.000019 - momentum: 0.000000
2023-12-22 09:15:49,503 ep

100%|██████████| 80/80 [00:12<00:00,  6.27it/s]

2023-12-22 09:16:22,835 DEV : loss 3.952692985534668 - f1-score (micro avg)  0.5371





2023-12-22 09:16:24,164 ----------------------------------------------------------------------------------------------------
2023-12-22 09:16:31,423 epoch 8 - iter 39/399 - loss 0.06182325 - time (sec): 7.25 - samples/sec: 21.50 - lr: 0.000016 - momentum: 0.000000
2023-12-22 09:16:37,198 epoch 8 - iter 78/399 - loss 0.11681891 - time (sec): 13.03 - samples/sec: 23.94 - lr: 0.000016 - momentum: 0.000000
2023-12-22 09:16:43,461 epoch 8 - iter 117/399 - loss 0.15737066 - time (sec): 19.29 - samples/sec: 24.26 - lr: 0.000015 - momentum: 0.000000
2023-12-22 09:16:49,655 epoch 8 - iter 156/399 - loss 0.17558014 - time (sec): 25.49 - samples/sec: 24.48 - lr: 0.000015 - momentum: 0.000000
2023-12-22 09:16:55,746 epoch 8 - iter 195/399 - loss 0.17157866 - time (sec): 31.58 - samples/sec: 24.70 - lr: 0.000014 - momentum: 0.000000
2023-12-22 09:17:02,932 epoch 8 - iter 234/399 - loss 0.16169614 - time (sec): 38.76 - samples/sec: 24.15 - lr: 0.000013 - momentum: 0.000000
2023-12-22 09:17:08,637 ep

100%|██████████| 80/80 [00:12<00:00,  6.33it/s]

2023-12-22 09:17:40,384 DEV : loss 4.717190742492676 - f1-score (micro avg)  0.5552





2023-12-22 09:17:41,568 ----------------------------------------------------------------------------------------------------
2023-12-22 09:17:47,801 epoch 9 - iter 39/399 - loss 0.07689806 - time (sec): 6.23 - samples/sec: 25.04 - lr: 0.000011 - momentum: 0.000000
2023-12-22 09:17:53,757 epoch 9 - iter 78/399 - loss 0.07821683 - time (sec): 12.19 - samples/sec: 25.61 - lr: 0.000010 - momentum: 0.000000
2023-12-22 09:17:59,740 epoch 9 - iter 117/399 - loss 0.10634312 - time (sec): 18.17 - samples/sec: 25.76 - lr: 0.000010 - momentum: 0.000000
2023-12-22 09:18:06,195 epoch 9 - iter 156/399 - loss 0.08365950 - time (sec): 24.62 - samples/sec: 25.34 - lr: 0.000009 - momentum: 0.000000
2023-12-22 09:18:13,813 epoch 9 - iter 195/399 - loss 0.07006212 - time (sec): 32.24 - samples/sec: 24.19 - lr: 0.000008 - momentum: 0.000000
2023-12-22 09:18:19,711 epoch 9 - iter 234/399 - loss 0.06996943 - time (sec): 38.14 - samples/sec: 24.54 - lr: 0.000008 - momentum: 0.000000
2023-12-22 09:18:25,752 ep

100%|██████████| 80/80 [00:13<00:00,  5.87it/s]

2023-12-22 09:18:58,567 DEV : loss 5.566875457763672 - f1-score (micro avg)  0.5032





2023-12-22 09:18:59,250 ----------------------------------------------------------------------------------------------------
2023-12-22 09:19:05,039 epoch 10 - iter 39/399 - loss 0.10298385 - time (sec): 5.78 - samples/sec: 26.97 - lr: 0.000005 - momentum: 0.000000
2023-12-22 09:19:11,121 epoch 10 - iter 78/399 - loss 0.13280353 - time (sec): 11.87 - samples/sec: 26.29 - lr: 0.000005 - momentum: 0.000000
2023-12-22 09:19:16,988 epoch 10 - iter 117/399 - loss 0.13266105 - time (sec): 17.73 - samples/sec: 26.39 - lr: 0.000004 - momentum: 0.000000
2023-12-22 09:19:22,856 epoch 10 - iter 156/399 - loss 0.10203886 - time (sec): 23.60 - samples/sec: 26.44 - lr: 0.000003 - momentum: 0.000000
2023-12-22 09:19:28,559 epoch 10 - iter 195/399 - loss 0.09486216 - time (sec): 29.30 - samples/sec: 26.62 - lr: 0.000003 - momentum: 0.000000
2023-12-22 09:19:34,838 epoch 10 - iter 234/399 - loss 0.08952227 - time (sec): 35.58 - samples/sec: 26.30 - lr: 0.000002 - momentum: 0.000000
2023-12-22 09:19:41,

100%|██████████| 80/80 [00:12<00:00,  6.24it/s]

2023-12-22 09:20:15,474 DEV : loss 5.076405048370361 - f1-score (micro avg)  0.5473





2023-12-22 09:20:19,533 ----------------------------------------------------------------------------------------------------
2023-12-22 09:20:19,542 Testing using last state of model ...


100%|██████████| 110/110 [00:18<00:00,  5.90it/s]

2023-12-22 09:20:38,237 
Results:
- F-score (micro) 0.6191
- F-score (macro) 0.4453
- Accuracy 0.6191

By class:
              precision    recall  f1-score   support

     comment     0.9451    0.6186    0.7477      1476
     support     0.1313    0.4519    0.2035       104
        deny     0.2785    0.6600    0.3917       100
       query     0.2973    0.8333    0.4382        66

    accuracy                         0.6191      1746
   macro avg     0.4130    0.6410    0.4453      1746
weighted avg     0.8340    0.6191    0.6832      1746

2023-12-22 09:20:38,239 ----------------------------------------------------------------------------------------------------





# Train flair's model (static and character-level embeddings via GRUs)

Since the flair library allows us to easily change the underlying classifier architecture, we have also tested using a more "traditional" setup consisting of a GRU classifier (a simple Recunnet Neural Network cell architecture similar to LSTMs), where the input embedding are a concatenation of:


*   Static (GLoVe) embeddings
*   Contextual, character-level embeddings (starting from the left side)
*   Contextual, character-level embeddings (starting from the right side)

The character-level contextual embeddings are an implementation of https://aclanthology.org/C18-1139/ and predate BERT by only two months. They are implemented via a LSTM network, and can help handling out of vocabulary (OOV) words from GLoVe while also encoding a minimal amount of contextual information.

Since we have execute two RNNs for the input and another one for the output, this architecture is actually slower than a BERT model, and its performance is also slightly worse.


In [None]:
from flair.embeddings import FlairEmbeddings, WordEmbeddings, DocumentRNNEmbeddings

In [None]:
def test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth):
  # Create the BERT samples
  bert_samples_train = create_bert_samples(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)
  bert_samples_test = create_bert_samples(samples_test, ground_truths, add_previous_comment, add_post_title, add_depth)

  print("Sample example:", bert_samples_train[42])

  # Further split train into dev+train
  random.shuffle(bert_samples_train)
  split_ratio = 0.8

  split_index = int(len(bert_samples_train) * split_ratio)

  bert_samples_dev = bert_samples_train[split_index:]
  bert_samples_train = bert_samples_train[:split_index]

  # Downsample the train dataset (the comment class is oversampled)
  bert_samples_train_downsampled = downsample(bert_samples_train)

  # Write the datasets in fasttext sample format
  to_fasttext_dataset(bert_samples_train_downsampled, "train_fasttext_format.txt")
  to_fasttext_dataset(bert_samples_dev, "dev_fasttext_format.txt")
  to_fasttext_dataset(bert_samples_test, "test_fasttext_format.txt")

  # Create flair's corpus
  data_folder = '.'
  corpus: Corpus = ClassificationCorpus(data_folder,
                                        test_file='test_fasttext_format.txt',
                                        dev_file='dev_fasttext_format.txt',
                                        train_file='train_fasttext_format.txt',
                                        label_type='class')

  print("Corpus statistics:\n", corpus.obtain_statistics())

  # Declare the embeddings we want to use
  word_embeddings = [
      # Static embeddings
      WordEmbeddings('glove'),

      # Character-level contextual embeddings, which help with adding context
      # to the static embeddings above
      FlairEmbeddings('news-forward'),
      FlairEmbeddings('news-backward'),]
  document_embeddings = DocumentRNNEmbeddings(word_embeddings)
                                              #hidden_size=512,
                                              #reproject_words=True,
                                              #reproject_words_dimension=256)

  example_sentence = bert_samples_train_downsampled[0]
  document_embeddings.embed(example_sentence)
  print(f"Example of an embedding:\n\t{example_sentence}\n\t{example_sentence.embedding}")

  # Instantiate flair's classifier (internally, it uses torch classifier layers)
  classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(label_type='class'), label_type='class')

  # And train it
  trainer = ModelTrainer(classifier, corpus)
  trainer.fine_tune('resources/taggers/rumoureval_character_level_embeddings',
                    learning_rate=5.0e-5,
                    mini_batch_size=64,
                    max_epochs=25)

  with torch.no_grad():
    torch.cuda.empty_cache()

In this case, the best performing configuration is the one that adds the previous comment's text and the comment's depth indicator.

Overall, the scores are lower than the BERT model. Since the training and inference cost is actually higher in this case, the BERT model is preferable.

Although we would need a bigger dataset to be able to establish a fair comparison between both models and come into deeper conclusions about the best configuration (we are training a whole RNN here and fine-tuning a BERT model in the previous case with very limited data), there seems to be an indication that, at the very least, adding the depth indicator helps the classifier.

In [None]:
add_previous_comment = False
add_post_title = False
add_depth = False

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[27]: "“$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 14:04:38,831 Reading data from .
2023-12-21 14:04:38,833 Train: train_fasttext_format.txt
2023-12-21 14:04:38,834 Dev: dev_fasttext_format.txt
2023-12-21 14:04:38,836 Test: test_fasttext_format.txt
2023-12-21 14:04:38,918 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1633,
        "number_of_documents_per_class": {
            "comment": 442,
            "deny": 354,
            "support": 442,
            "query": 395
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 37367,
            "min": 1,
            "max": 492,
            "avg": 22.882424984690754
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_documents_

100%|██████████| 153M/153M [00:09<00:00, 16.1MB/s]

2023-12-21 14:04:55,655 copying /tmp/tmpnmno8y20 to cache at /root/.flair/embeddings/glove.gensim.vectors.npy





2023-12-21 14:04:55,958 removing temp file /tmp/tmpnmno8y20
2023-12-21 14:04:56,629 https://flair.informatik.hu-berlin.de/resources/embeddings/token/glove.gensim not found in cache, downloading to /tmp/tmpp2cf6skv


100%|██████████| 20.5M/20.5M [00:02<00:00, 9.16MB/s]

2023-12-21 14:04:59,618 copying /tmp/tmpp2cf6skv to cache at /root/.flair/embeddings/glove.gensim





2023-12-21 14:04:59,639 removing temp file /tmp/tmpp2cf6skv
2023-12-21 14:05:04,586 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/news-forward-0.4.1.pt not found in cache, downloading to /tmp/tmp8114e6f7


100%|██████████| 69.7M/69.7M [00:05<00:00, 14.3MB/s]

2023-12-21 14:05:10,266 copying /tmp/tmp8114e6f7 to cache at /root/.flair/embeddings/news-forward-0.4.1.pt





2023-12-21 14:05:10,455 removing temp file /tmp/tmp8114e6f7
2023-12-21 14:05:11,401 https://flair.informatik.hu-berlin.de/resources/embeddings/flair/news-backward-0.4.1.pt not found in cache, downloading to /tmp/tmpdjeompiv


100%|██████████| 69.7M/69.7M [00:06<00:00, 11.1MB/s]

2023-12-21 14:05:18,596 copying /tmp/tmpdjeompiv to cache at /root/.flair/embeddings/news-backward-0.4.1.pt





2023-12-21 14:05:18,689 removing temp file /tmp/tmpdjeompiv
Example of an embedding:
	Sentence[16]: "$MENTION$ Send in the #British #SAS it will be over within 30mins" → comment (1.0)
	tensor([-0.0768,  0.3096, -0.1161,  0.1341,  0.0359, -0.2793, -0.4565, -0.0826,
         0.1717, -0.0164,  0.0643, -0.1748, -0.0086,  0.1738,  0.3162, -0.3030,
         0.1208,  0.1860, -0.1912,  0.4587, -0.2820, -0.3292, -0.2199,  0.0149,
        -0.0250,  0.2812,  0.2949, -0.1572, -0.2154, -0.2458,  0.1530,  0.0757,
        -0.0711, -0.0855, -0.1255,  0.0962, -0.0740,  0.0676,  0.0147, -0.1123,
         0.1867, -0.2811,  0.0116, -0.0470, -0.2275,  0.1308,  0.0940,  0.1680,
         0.0532,  0.3504, -0.0588, -0.2395, -0.1127, -0.3606, -0.1649, -0.5288,
         0.1222, -0.2326,  0.3267, -0.0918,  0.0653, -0.1509,  0.1054, -0.2195,
        -0.1341, -0.0383, -0.1086,  0.1652, -0.1119,  0.2923,  0.1276,  0.2251,
         0.5162,  0.0456,  0.0064,  0.3389, -0.1043, -0.0308, -0.1398, -0.2584,
        -0.2837

0it [00:00, ?it/s]
1633it [00:00, 2654.69it/s]

2023-12-21 14:05:19,790 Dictionary created for label 'class' with 4 values: comment (seen 442 times), support (seen 442 times), query (seen 395 times), deny (seen 354 times)
2023-12-21 14:05:19,798 ----------------------------------------------------------------------------------------------------
2023-12-21 14:05:19,800 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-21 14:05:19,805 Corpus: 1633 train + 1268 dev + 1746 test sentences
2023-12-21 14:05:19,807 ----------------------------------------------------------------------------------------------------
2023-12-21 14:05:19,808 Train:  1633 sentences
2023-12-21 14:05:19,809         (train_with_dev=False, train_with_test=False)
2023-12-21 14:05:19,811 ----------------------------------------------------------------------------------------------------
2023-12-21 14:05:19,812 Training Params:
2023-12-21 14:05:19,813  - learning_rate: "5e-05" 
2023-12-21 14:05:19,814  - mini_batch_size: "64"
2023-12-21 14:05:19,815  - max_epochs: "25"
2023-12-21 14:05:19,816  - shuffle: "True"
2023-12-21 14:05:19,817 ----------------------------------------------------------------------------------------------------
2023-12-21 14:05:19,818 Plugins:
2023-12-21 14:05:19,819  - LinearScheduler | warmup_fraction: '0.1'
2023-12-21 14:05:19,820 -----------------------------------------------------------------------

100%|██████████| 80/80 [00:46<00:00,  1.72it/s]

2023-12-21 14:06:46,423 DEV : loss 1.2804577350616455 - f1-score (micro avg)  0.5079





2023-12-21 14:06:46,909 ----------------------------------------------------------------------------------------------------
2023-12-21 14:06:48,604 epoch 2 - iter 2/26 - loss 1.26091617 - time (sec): 1.69 - samples/sec: 75.59 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:06:51,131 epoch 2 - iter 4/26 - loss 1.28055647 - time (sec): 4.22 - samples/sec: 60.66 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:06:55,624 epoch 2 - iter 6/26 - loss 1.28847355 - time (sec): 8.71 - samples/sec: 44.07 - lr: 0.000023 - momentum: 0.000000
2023-12-21 14:07:00,314 epoch 2 - iter 8/26 - loss 1.28780442 - time (sec): 13.40 - samples/sec: 38.20 - lr: 0.000025 - momentum: 0.000000
2023-12-21 14:07:02,585 epoch 2 - iter 10/26 - loss 1.30485873 - time (sec): 15.67 - samples/sec: 40.83 - lr: 0.000027 - momentum: 0.000000
2023-12-21 14:07:06,698 epoch 2 - iter 12/26 - loss 1.30087702 - time (sec): 19.79 - samples/sec: 38.81 - lr: 0.000028 - momentum: 0.000000
2023-12-21 14:07:08,955 epoch 2 - iter 14/

100%|██████████| 80/80 [00:48<00:00,  1.65it/s]

2023-12-21 14:08:16,320 DEV : loss 1.206844687461853 - f1-score (micro avg)  0.4874





2023-12-21 14:08:16,822 ----------------------------------------------------------------------------------------------------
2023-12-21 14:08:18,630 epoch 3 - iter 2/26 - loss 1.30414271 - time (sec): 1.81 - samples/sec: 70.91 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:08:23,967 epoch 3 - iter 4/26 - loss 1.24868593 - time (sec): 7.14 - samples/sec: 35.84 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:08:25,933 epoch 3 - iter 6/26 - loss 1.23046676 - time (sec): 9.11 - samples/sec: 42.16 - lr: 0.000043 - momentum: 0.000000
2023-12-21 14:08:27,492 epoch 3 - iter 8/26 - loss 1.21107398 - time (sec): 10.67 - samples/sec: 47.99 - lr: 0.000045 - momentum: 0.000000
2023-12-21 14:08:28,709 epoch 3 - iter 10/26 - loss 1.23562975 - time (sec): 11.88 - samples/sec: 53.85 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:08:31,029 epoch 3 - iter 12/26 - loss 1.23813482 - time (sec): 14.20 - samples/sec: 54.07 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:08:33,543 epoch 3 - iter 14/

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:09:43,640 DEV : loss 1.1521825790405273 - f1-score (micro avg)  0.5118





2023-12-21 14:09:44,135 ----------------------------------------------------------------------------------------------------
2023-12-21 14:09:48,643 epoch 4 - iter 2/26 - loss 1.20915246 - time (sec): 4.51 - samples/sec: 28.40 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:09:50,822 epoch 4 - iter 4/26 - loss 1.17890492 - time (sec): 6.69 - samples/sec: 38.29 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:09:52,371 epoch 4 - iter 6/26 - loss 1.25530485 - time (sec): 8.23 - samples/sec: 46.64 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:09:55,115 epoch 4 - iter 8/26 - loss 1.23211116 - time (sec): 10.98 - samples/sec: 46.64 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:09:58,715 epoch 4 - iter 10/26 - loss 1.22645744 - time (sec): 14.58 - samples/sec: 43.90 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:10:04,812 epoch 4 - iter 12/26 - loss 1.22087872 - time (sec): 20.68 - samples/sec: 37.15 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:10:07,968 epoch 4 - iter 14/

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-21 14:11:17,997 DEV : loss 1.1632128953933716 - f1-score (micro avg)  0.4692





2023-12-21 14:11:18,518 ----------------------------------------------------------------------------------------------------
2023-12-21 14:11:20,826 epoch 5 - iter 2/26 - loss 1.10492742 - time (sec): 2.31 - samples/sec: 55.49 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:11:28,549 epoch 5 - iter 4/26 - loss 1.11329925 - time (sec): 10.03 - samples/sec: 25.53 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:11:30,299 epoch 5 - iter 6/26 - loss 1.16216058 - time (sec): 11.78 - samples/sec: 32.60 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:11:33,929 epoch 5 - iter 8/26 - loss 1.16756512 - time (sec): 15.41 - samples/sec: 33.23 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:11:36,686 epoch 5 - iter 10/26 - loss 1.13930280 - time (sec): 18.17 - samples/sec: 35.23 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:11:38,710 epoch 5 - iter 12/26 - loss 1.13044322 - time (sec): 20.19 - samples/sec: 38.04 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:11:39,625 epoch 5 - iter 1

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:12:44,414 DEV : loss 1.165937900543213 - f1-score (micro avg)  0.444





2023-12-21 14:12:44,905 ----------------------------------------------------------------------------------------------------
2023-12-21 14:12:48,952 epoch 6 - iter 2/26 - loss 1.10829830 - time (sec): 4.04 - samples/sec: 31.65 - lr: 0.000045 - momentum: 0.000000
2023-12-21 14:12:50,829 epoch 6 - iter 4/26 - loss 1.11998990 - time (sec): 5.92 - samples/sec: 43.23 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:12:53,527 epoch 6 - iter 6/26 - loss 1.11427099 - time (sec): 8.62 - samples/sec: 44.55 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:12:56,051 epoch 6 - iter 8/26 - loss 1.11467882 - time (sec): 11.14 - samples/sec: 45.95 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:12:58,682 epoch 6 - iter 10/26 - loss 1.11854780 - time (sec): 13.78 - samples/sec: 46.46 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:13:02,565 epoch 6 - iter 12/26 - loss 1.11488533 - time (sec): 17.66 - samples/sec: 43.49 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:13:03,757 epoch 6 - iter 14/

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-21 14:14:11,805 DEV : loss 1.1337403059005737 - f1-score (micro avg)  0.5229





2023-12-21 14:14:12,338 ----------------------------------------------------------------------------------------------------
2023-12-21 14:14:19,644 epoch 7 - iter 2/26 - loss 1.18366015 - time (sec): 7.30 - samples/sec: 17.52 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:22,804 epoch 7 - iter 4/26 - loss 1.11704557 - time (sec): 10.46 - samples/sec: 24.46 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:29,072 epoch 7 - iter 6/26 - loss 1.09387435 - time (sec): 16.73 - samples/sec: 22.95 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:30,491 epoch 7 - iter 8/26 - loss 1.07401221 - time (sec): 18.15 - samples/sec: 28.21 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:31,500 epoch 7 - iter 10/26 - loss 1.08911182 - time (sec): 19.16 - samples/sec: 33.40 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:32,722 epoch 7 - iter 12/26 - loss 1.07878064 - time (sec): 20.38 - samples/sec: 37.68 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:14:34,303 epoch 7 - iter 1

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:15:39,109 DEV : loss 1.1103652715682983 - f1-score (micro avg)  0.5355





2023-12-21 14:15:39,614 ----------------------------------------------------------------------------------------------------
2023-12-21 14:15:42,134 epoch 8 - iter 2/26 - loss 0.99032760 - time (sec): 2.52 - samples/sec: 50.82 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:15:44,628 epoch 8 - iter 4/26 - loss 1.02484134 - time (sec): 5.01 - samples/sec: 51.08 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:15:46,746 epoch 8 - iter 6/26 - loss 1.03873485 - time (sec): 7.13 - samples/sec: 53.85 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:15:47,977 epoch 8 - iter 8/26 - loss 1.05436015 - time (sec): 8.36 - samples/sec: 61.23 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:15:53,197 epoch 8 - iter 10/26 - loss 1.06120757 - time (sec): 13.58 - samples/sec: 47.12 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:15:58,322 epoch 8 - iter 12/26 - loss 1.05382211 - time (sec): 18.71 - samples/sec: 41.06 - lr: 0.000039 - momentum: 0.000000
2023-12-21 14:16:00,393 epoch 8 - iter 14/2

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:17:09,706 DEV : loss 1.207031011581421 - f1-score (micro avg)  0.4487





2023-12-21 14:17:10,211 ----------------------------------------------------------------------------------------------------
2023-12-21 14:17:14,377 epoch 9 - iter 2/26 - loss 1.07181108 - time (sec): 4.16 - samples/sec: 30.74 - lr: 0.000038 - momentum: 0.000000
2023-12-21 14:17:17,108 epoch 9 - iter 4/26 - loss 1.09876868 - time (sec): 6.90 - samples/sec: 37.13 - lr: 0.000038 - momentum: 0.000000
2023-12-21 14:17:18,280 epoch 9 - iter 6/26 - loss 1.07794056 - time (sec): 8.07 - samples/sec: 47.60 - lr: 0.000038 - momentum: 0.000000
2023-12-21 14:17:20,791 epoch 9 - iter 8/26 - loss 1.04326752 - time (sec): 10.58 - samples/sec: 48.40 - lr: 0.000038 - momentum: 0.000000
2023-12-21 14:17:27,032 epoch 9 - iter 10/26 - loss 1.04217390 - time (sec): 16.82 - samples/sec: 38.05 - lr: 0.000037 - momentum: 0.000000
2023-12-21 14:17:27,682 epoch 9 - iter 12/26 - loss 1.03952157 - time (sec): 17.47 - samples/sec: 43.96 - lr: 0.000037 - momentum: 0.000000
2023-12-21 14:17:30,026 epoch 9 - iter 14/

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:18:38,561 DEV : loss 1.0759525299072266 - f1-score (micro avg)  0.5749





2023-12-21 14:18:40,359 ----------------------------------------------------------------------------------------------------
2023-12-21 14:18:41,913 epoch 10 - iter 2/26 - loss 1.04092842 - time (sec): 1.55 - samples/sec: 82.49 - lr: 0.000036 - momentum: 0.000000
2023-12-21 14:18:46,771 epoch 10 - iter 4/26 - loss 1.02674058 - time (sec): 6.41 - samples/sec: 39.94 - lr: 0.000036 - momentum: 0.000000
2023-12-21 14:18:49,888 epoch 10 - iter 6/26 - loss 1.01518998 - time (sec): 9.53 - samples/sec: 40.31 - lr: 0.000035 - momentum: 0.000000
2023-12-21 14:18:55,645 epoch 10 - iter 8/26 - loss 1.02595923 - time (sec): 15.28 - samples/sec: 33.50 - lr: 0.000035 - momentum: 0.000000
2023-12-21 14:18:58,467 epoch 10 - iter 10/26 - loss 1.03955827 - time (sec): 18.10 - samples/sec: 35.35 - lr: 0.000035 - momentum: 0.000000
2023-12-21 14:18:59,474 epoch 10 - iter 12/26 - loss 1.05184304 - time (sec): 19.11 - samples/sec: 40.18 - lr: 0.000035 - momentum: 0.000000
2023-12-21 14:19:01,883 epoch 10 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:20:06,586 DEV : loss 1.1269484758377075 - f1-score (micro avg)  0.5615





2023-12-21 14:20:07,125 ----------------------------------------------------------------------------------------------------
2023-12-21 14:20:09,737 epoch 11 - iter 2/26 - loss 0.99051186 - time (sec): 2.61 - samples/sec: 49.03 - lr: 0.000034 - momentum: 0.000000
2023-12-21 14:20:11,103 epoch 11 - iter 4/26 - loss 0.98084758 - time (sec): 3.98 - samples/sec: 64.39 - lr: 0.000033 - momentum: 0.000000
2023-12-21 14:20:14,268 epoch 11 - iter 6/26 - loss 0.97860961 - time (sec): 7.14 - samples/sec: 53.77 - lr: 0.000033 - momentum: 0.000000
2023-12-21 14:20:15,241 epoch 11 - iter 8/26 - loss 0.98855650 - time (sec): 8.11 - samples/sec: 63.10 - lr: 0.000033 - momentum: 0.000000
2023-12-21 14:20:21,625 epoch 11 - iter 10/26 - loss 1.00139135 - time (sec): 14.50 - samples/sec: 44.14 - lr: 0.000033 - momentum: 0.000000
2023-12-21 14:20:26,680 epoch 11 - iter 12/26 - loss 1.00487808 - time (sec): 19.55 - samples/sec: 39.28 - lr: 0.000033 - momentum: 0.000000
2023-12-21 14:20:30,256 epoch 11 - it

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:21:32,492 DEV : loss 1.167791724205017 - f1-score (micro avg)  0.4929





2023-12-21 14:21:34,372 ----------------------------------------------------------------------------------------------------
2023-12-21 14:21:36,197 epoch 12 - iter 2/26 - loss 0.97672901 - time (sec): 1.82 - samples/sec: 70.21 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:37,033 epoch 12 - iter 4/26 - loss 0.99132326 - time (sec): 2.66 - samples/sec: 96.28 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:40,206 epoch 12 - iter 6/26 - loss 1.00636172 - time (sec): 5.83 - samples/sec: 65.83 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:42,650 epoch 12 - iter 8/26 - loss 1.02453722 - time (sec): 8.28 - samples/sec: 61.86 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:47,806 epoch 12 - iter 10/26 - loss 1.01788287 - time (sec): 13.43 - samples/sec: 47.65 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:50,027 epoch 12 - iter 12/26 - loss 1.00119760 - time (sec): 15.65 - samples/sec: 49.06 - lr: 0.000031 - momentum: 0.000000
2023-12-21 14:21:55,213 epoch 12 - it

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:23:01,121 DEV : loss 1.1518765687942505 - f1-score (micro avg)  0.4976





2023-12-21 14:23:01,648 ----------------------------------------------------------------------------------------------------
2023-12-21 14:23:06,462 epoch 13 - iter 2/26 - loss 0.88842073 - time (sec): 4.81 - samples/sec: 26.60 - lr: 0.000029 - momentum: 0.000000
2023-12-21 14:23:08,304 epoch 13 - iter 4/26 - loss 0.94829114 - time (sec): 6.65 - samples/sec: 38.48 - lr: 0.000029 - momentum: 0.000000
2023-12-21 14:23:12,967 epoch 13 - iter 6/26 - loss 0.96386559 - time (sec): 11.32 - samples/sec: 33.93 - lr: 0.000029 - momentum: 0.000000
2023-12-21 14:23:15,303 epoch 13 - iter 8/26 - loss 0.95060541 - time (sec): 13.65 - samples/sec: 37.50 - lr: 0.000029 - momentum: 0.000000
2023-12-21 14:23:17,961 epoch 13 - iter 10/26 - loss 0.95689950 - time (sec): 16.31 - samples/sec: 39.24 - lr: 0.000029 - momentum: 0.000000
2023-12-21 14:23:20,034 epoch 13 - iter 12/26 - loss 0.95464015 - time (sec): 18.38 - samples/sec: 41.78 - lr: 0.000028 - momentum: 0.000000
2023-12-21 14:23:21,687 epoch 13 - 

100%|██████████| 80/80 [00:48<00:00,  1.64it/s]

2023-12-21 14:24:29,833 DEV : loss 1.140834927558899 - f1-score (micro avg)  0.4811





2023-12-21 14:24:30,350 ----------------------------------------------------------------------------------------------------
2023-12-21 14:24:31,693 epoch 14 - iter 2/26 - loss 0.95782462 - time (sec): 1.34 - samples/sec: 95.44 - lr: 0.000027 - momentum: 0.000000
2023-12-21 14:24:35,833 epoch 14 - iter 4/26 - loss 0.99261408 - time (sec): 5.48 - samples/sec: 46.71 - lr: 0.000027 - momentum: 0.000000
2023-12-21 14:24:38,173 epoch 14 - iter 6/26 - loss 0.95887128 - time (sec): 7.82 - samples/sec: 49.10 - lr: 0.000027 - momentum: 0.000000
2023-12-21 14:24:44,186 epoch 14 - iter 8/26 - loss 0.94919513 - time (sec): 13.83 - samples/sec: 37.01 - lr: 0.000027 - momentum: 0.000000
2023-12-21 14:24:45,330 epoch 14 - iter 10/26 - loss 0.93492457 - time (sec): 14.98 - samples/sec: 42.73 - lr: 0.000026 - momentum: 0.000000
2023-12-21 14:24:47,440 epoch 14 - iter 12/26 - loss 0.92537526 - time (sec): 17.09 - samples/sec: 44.94 - lr: 0.000026 - momentum: 0.000000
2023-12-21 14:24:51,799 epoch 14 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:25:58,280 DEV : loss 1.1353288888931274 - f1-score (micro avg)  0.4976





2023-12-21 14:25:58,788 ----------------------------------------------------------------------------------------------------
2023-12-21 14:26:02,268 epoch 15 - iter 2/26 - loss 0.88157976 - time (sec): 3.48 - samples/sec: 36.81 - lr: 0.000025 - momentum: 0.000000
2023-12-21 14:26:09,909 epoch 15 - iter 4/26 - loss 0.91329256 - time (sec): 11.12 - samples/sec: 23.03 - lr: 0.000025 - momentum: 0.000000
2023-12-21 14:26:15,513 epoch 15 - iter 6/26 - loss 0.94356920 - time (sec): 16.72 - samples/sec: 22.96 - lr: 0.000025 - momentum: 0.000000
2023-12-21 14:26:17,237 epoch 15 - iter 8/26 - loss 0.91100359 - time (sec): 18.45 - samples/sec: 27.76 - lr: 0.000024 - momentum: 0.000000
2023-12-21 14:26:19,458 epoch 15 - iter 10/26 - loss 0.91124522 - time (sec): 20.67 - samples/sec: 30.97 - lr: 0.000024 - momentum: 0.000000
2023-12-21 14:26:20,576 epoch 15 - iter 12/26 - loss 0.92034570 - time (sec): 21.79 - samples/sec: 35.25 - lr: 0.000024 - momentum: 0.000000
2023-12-21 14:26:23,719 epoch 15 -

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:27:29,122 DEV : loss 1.2481611967086792 - f1-score (micro avg)  0.3998





2023-12-21 14:27:29,653 ----------------------------------------------------------------------------------------------------
2023-12-21 14:27:32,162 epoch 16 - iter 2/26 - loss 0.98139352 - time (sec): 2.51 - samples/sec: 51.06 - lr: 0.000023 - momentum: 0.000000
2023-12-21 14:27:34,090 epoch 16 - iter 4/26 - loss 0.96041334 - time (sec): 4.43 - samples/sec: 57.73 - lr: 0.000023 - momentum: 0.000000
2023-12-21 14:27:37,679 epoch 16 - iter 6/26 - loss 0.95228692 - time (sec): 8.02 - samples/sec: 47.86 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:27:39,218 epoch 16 - iter 8/26 - loss 0.93553083 - time (sec): 9.56 - samples/sec: 53.54 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:27:42,920 epoch 16 - iter 10/26 - loss 0.92165139 - time (sec): 13.26 - samples/sec: 48.25 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:27:45,928 epoch 16 - iter 12/26 - loss 0.93086300 - time (sec): 16.27 - samples/sec: 47.19 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:27:46,646 epoch 16 - it

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:28:57,002 DEV : loss 1.3528763055801392 - f1-score (micro avg)  0.3636





2023-12-21 14:28:57,880 ----------------------------------------------------------------------------------------------------
2023-12-21 14:29:04,667 epoch 17 - iter 2/26 - loss 0.93752280 - time (sec): 6.78 - samples/sec: 18.87 - lr: 0.000021 - momentum: 0.000000
2023-12-21 14:29:09,024 epoch 17 - iter 4/26 - loss 0.95495643 - time (sec): 11.14 - samples/sec: 22.98 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:29:12,061 epoch 17 - iter 6/26 - loss 0.93603265 - time (sec): 14.18 - samples/sec: 27.08 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:29:13,947 epoch 17 - iter 8/26 - loss 0.93546331 - time (sec): 16.06 - samples/sec: 31.87 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:29:15,462 epoch 17 - iter 10/26 - loss 0.91669483 - time (sec): 17.58 - samples/sec: 36.41 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:29:18,975 epoch 17 - iter 12/26 - loss 0.91435302 - time (sec): 21.09 - samples/sec: 36.41 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:29:21,832 epoch 17 -

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-21 14:30:24,933 DEV : loss 1.1495939493179321 - f1-score (micro avg)  0.4929





2023-12-21 14:30:25,807 ----------------------------------------------------------------------------------------------------
2023-12-21 14:30:27,264 epoch 18 - iter 2/26 - loss 0.94794098 - time (sec): 1.45 - samples/sec: 88.17 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:29,033 epoch 18 - iter 4/26 - loss 0.89374353 - time (sec): 3.22 - samples/sec: 79.49 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:34,903 epoch 18 - iter 6/26 - loss 0.88840153 - time (sec): 9.09 - samples/sec: 42.24 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:36,502 epoch 18 - iter 8/26 - loss 0.88051008 - time (sec): 10.69 - samples/sec: 47.90 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:38,521 epoch 18 - iter 10/26 - loss 0.87559919 - time (sec): 12.71 - samples/sec: 50.36 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:39,545 epoch 18 - iter 12/26 - loss 0.88780586 - time (sec): 13.73 - samples/sec: 55.92 - lr: 0.000018 - momentum: 0.000000
2023-12-21 14:30:41,977 epoch 18 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:31:52,997 DEV : loss 1.1941193342208862 - f1-score (micro avg)  0.4637





2023-12-21 14:31:53,847 ----------------------------------------------------------------------------------------------------
2023-12-21 14:31:57,929 epoch 19 - iter 2/26 - loss 0.98316270 - time (sec): 4.08 - samples/sec: 31.38 - lr: 0.000016 - momentum: 0.000000
2023-12-21 14:32:00,525 epoch 19 - iter 4/26 - loss 0.91673094 - time (sec): 6.67 - samples/sec: 38.35 - lr: 0.000016 - momentum: 0.000000
2023-12-21 14:32:02,754 epoch 19 - iter 6/26 - loss 0.90253365 - time (sec): 8.90 - samples/sec: 43.13 - lr: 0.000016 - momentum: 0.000000
2023-12-21 14:32:04,574 epoch 19 - iter 8/26 - loss 0.88093609 - time (sec): 10.72 - samples/sec: 47.74 - lr: 0.000016 - momentum: 0.000000
2023-12-21 14:32:08,876 epoch 19 - iter 10/26 - loss 0.88032961 - time (sec): 15.03 - samples/sec: 42.59 - lr: 0.000016 - momentum: 0.000000
2023-12-21 14:32:13,387 epoch 19 - iter 12/26 - loss 0.88705983 - time (sec): 19.54 - samples/sec: 39.31 - lr: 0.000015 - momentum: 0.000000
2023-12-21 14:32:14,575 epoch 19 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:33:22,390 DEV : loss 1.187447428703308 - f1-score (micro avg)  0.4614





2023-12-21 14:33:23,278 ----------------------------------------------------------------------------------------------------
2023-12-21 14:33:24,392 epoch 20 - iter 2/26 - loss 0.83523086 - time (sec): 1.11 - samples/sec: 115.24 - lr: 0.000014 - momentum: 0.000000
2023-12-21 14:33:29,708 epoch 20 - iter 4/26 - loss 0.83845094 - time (sec): 6.43 - samples/sec: 39.83 - lr: 0.000014 - momentum: 0.000000
2023-12-21 14:33:32,692 epoch 20 - iter 6/26 - loss 0.81525547 - time (sec): 9.41 - samples/sec: 40.81 - lr: 0.000014 - momentum: 0.000000
2023-12-21 14:33:34,180 epoch 20 - iter 8/26 - loss 0.81512859 - time (sec): 10.90 - samples/sec: 46.98 - lr: 0.000014 - momentum: 0.000000
2023-12-21 14:33:37,127 epoch 20 - iter 10/26 - loss 0.84071037 - time (sec): 13.85 - samples/sec: 46.22 - lr: 0.000013 - momentum: 0.000000
2023-12-21 14:33:40,426 epoch 20 - iter 12/26 - loss 0.84466453 - time (sec): 17.15 - samples/sec: 44.79 - lr: 0.000013 - momentum: 0.000000
2023-12-21 14:33:42,462 epoch 20 - 

100%|██████████| 80/80 [00:47<00:00,  1.70it/s]

2023-12-21 14:34:51,550 DEV : loss 1.2082842588424683 - f1-score (micro avg)  0.4472





2023-12-21 14:34:53,704 ----------------------------------------------------------------------------------------------------
2023-12-21 14:34:56,382 epoch 21 - iter 2/26 - loss 0.89276695 - time (sec): 2.68 - samples/sec: 47.83 - lr: 0.000012 - momentum: 0.000000
2023-12-21 14:34:57,756 epoch 21 - iter 4/26 - loss 0.87758107 - time (sec): 4.05 - samples/sec: 63.20 - lr: 0.000012 - momentum: 0.000000
2023-12-21 14:35:02,434 epoch 21 - iter 6/26 - loss 0.85995143 - time (sec): 8.73 - samples/sec: 43.99 - lr: 0.000011 - momentum: 0.000000
2023-12-21 14:35:05,099 epoch 21 - iter 8/26 - loss 0.88865737 - time (sec): 11.39 - samples/sec: 44.94 - lr: 0.000011 - momentum: 0.000000
2023-12-21 14:35:10,023 epoch 21 - iter 10/26 - loss 0.86544261 - time (sec): 16.32 - samples/sec: 39.22 - lr: 0.000011 - momentum: 0.000000
2023-12-21 14:35:12,163 epoch 21 - iter 12/26 - loss 0.86416543 - time (sec): 18.46 - samples/sec: 41.61 - lr: 0.000011 - momentum: 0.000000
2023-12-21 14:35:13,488 epoch 21 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:36:18,741 DEV : loss 1.1197868585586548 - f1-score (micro avg)  0.5039





2023-12-21 14:36:19,596 ----------------------------------------------------------------------------------------------------
2023-12-21 14:36:22,532 epoch 22 - iter 2/26 - loss 0.90078270 - time (sec): 2.93 - samples/sec: 43.64 - lr: 0.000010 - momentum: 0.000000
2023-12-21 14:36:24,541 epoch 22 - iter 4/26 - loss 0.90706104 - time (sec): 4.94 - samples/sec: 51.80 - lr: 0.000009 - momentum: 0.000000
2023-12-21 14:36:26,798 epoch 22 - iter 6/26 - loss 0.88766742 - time (sec): 7.20 - samples/sec: 53.34 - lr: 0.000009 - momentum: 0.000000
2023-12-21 14:36:29,481 epoch 22 - iter 8/26 - loss 0.88044843 - time (sec): 9.88 - samples/sec: 51.81 - lr: 0.000009 - momentum: 0.000000
2023-12-21 14:36:34,082 epoch 22 - iter 10/26 - loss 0.87833127 - time (sec): 14.48 - samples/sec: 44.19 - lr: 0.000009 - momentum: 0.000000
2023-12-21 14:36:35,586 epoch 22 - iter 12/26 - loss 0.87563023 - time (sec): 15.99 - samples/sec: 48.04 - lr: 0.000009 - momentum: 0.000000
2023-12-21 14:36:41,979 epoch 22 - it

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:37:45,129 DEV : loss 1.218178629875183 - f1-score (micro avg)  0.4495





2023-12-21 14:37:47,587 ----------------------------------------------------------------------------------------------------
2023-12-21 14:37:49,997 epoch 23 - iter 2/26 - loss 0.89144361 - time (sec): 2.41 - samples/sec: 53.18 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:37:51,377 epoch 23 - iter 4/26 - loss 0.88433219 - time (sec): 3.79 - samples/sec: 67.59 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:37:53,192 epoch 23 - iter 6/26 - loss 0.88920371 - time (sec): 5.60 - samples/sec: 68.54 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:37:56,515 epoch 23 - iter 8/26 - loss 0.86596669 - time (sec): 8.93 - samples/sec: 57.37 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:38:05,900 epoch 23 - iter 10/26 - loss 0.86124030 - time (sec): 18.31 - samples/sec: 34.95 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:38:10,565 epoch 23 - iter 12/26 - loss 0.84539769 - time (sec): 22.98 - samples/sec: 33.43 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:38:12,340 epoch 23 - it

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-21 14:39:17,924 DEV : loss 1.2293740510940552 - f1-score (micro avg)  0.4424





2023-12-21 14:39:18,799 ----------------------------------------------------------------------------------------------------
2023-12-21 14:39:20,631 epoch 24 - iter 2/26 - loss 0.83562872 - time (sec): 1.83 - samples/sec: 69.96 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:39:24,955 epoch 24 - iter 4/26 - loss 0.89032701 - time (sec): 6.15 - samples/sec: 41.60 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:39:27,417 epoch 24 - iter 6/26 - loss 0.88167943 - time (sec): 8.62 - samples/sec: 44.57 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:39:31,560 epoch 24 - iter 8/26 - loss 0.85893252 - time (sec): 12.76 - samples/sec: 40.13 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:39:35,995 epoch 24 - iter 10/26 - loss 0.83505823 - time (sec): 17.19 - samples/sec: 37.22 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:39:40,040 epoch 24 - iter 12/26 - loss 0.83476381 - time (sec): 21.24 - samples/sec: 36.16 - lr: 0.000004 - momentum: 0.000000
2023-12-21 14:39:41,013 epoch 24 - i

100%|██████████| 80/80 [00:48<00:00,  1.64it/s]

2023-12-21 14:40:47,314 DEV : loss 1.2015999555587769 - f1-score (micro avg)  0.4582





2023-12-21 14:40:48,182 ----------------------------------------------------------------------------------------------------
2023-12-21 14:40:54,421 epoch 25 - iter 2/26 - loss 0.94004342 - time (sec): 6.24 - samples/sec: 20.53 - lr: 0.000003 - momentum: 0.000000
2023-12-21 14:40:55,949 epoch 25 - iter 4/26 - loss 0.85629429 - time (sec): 7.76 - samples/sec: 32.97 - lr: 0.000003 - momentum: 0.000000
2023-12-21 14:40:57,290 epoch 25 - iter 6/26 - loss 0.86629656 - time (sec): 9.11 - samples/sec: 42.17 - lr: 0.000003 - momentum: 0.000000
2023-12-21 14:40:58,916 epoch 25 - iter 8/26 - loss 0.85525732 - time (sec): 10.73 - samples/sec: 47.71 - lr: 0.000003 - momentum: 0.000000
2023-12-21 14:41:08,289 epoch 25 - iter 10/26 - loss 0.85075036 - time (sec): 20.10 - samples/sec: 31.83 - lr: 0.000002 - momentum: 0.000000
2023-12-21 14:41:10,707 epoch 25 - iter 12/26 - loss 0.84931550 - time (sec): 22.52 - samples/sec: 34.10 - lr: 0.000002 - momentum: 0.000000
2023-12-21 14:41:12,308 epoch 25 - i

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-21 14:42:11,037 DEV : loss 1.2047524452209473 - f1-score (micro avg)  0.455





2023-12-21 14:42:13,168 ----------------------------------------------------------------------------------------------------
2023-12-21 14:42:13,170 Testing using last state of model ...


100%|██████████| 110/110 [00:54<00:00,  2.02it/s]

2023-12-21 14:43:07,666 
Results:
- F-score (micro) 0.4983
- F-score (macro) 0.3406
- Accuracy 0.4983

By class:
              precision    recall  f1-score   support

     comment     0.9123    0.5075    0.6522      1476
     support     0.1211    0.4519    0.1911       104
        deny     0.0685    0.2500    0.1075       100
       query     0.2849    0.7424    0.4118        66

    accuracy                         0.4983      1746
   macro avg     0.3467    0.4879    0.3406      1746
weighted avg     0.7931    0.4983    0.5844      1746

2023-12-21 14:43:07,669 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = True
add_post_title = False
add_depth = False

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[51]: "The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-21 14:43:16,349 Reading data from .
2023-12-21 14:43:16,351 Train: train_fasttext_format.txt
2023-12-21 14:43:16,354 Dev: dev_fasttext_format.txt
2023-12-21 14:43:16,357 Test: test_fasttext_format.txt
2023-12-21 14:43:16,448 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1663,
        "number_of_documents_per_class": {
            "comment": 453,
            "query": 394,
            "deny": 363,
            "support": 453
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 78064,
            "min": 11,
            "max": 591,
            "avg": 46.94167167769092
        }
    },
    "TEST": {
        "dataset": "

0it [00:00, ?it/s]
1663it [00:02, 823.87it/s]

2023-12-21 14:43:38,401 Dictionary created for label 'class' with 4 values: comment (seen 453 times), support (seen 453 times), query (seen 394 times), deny (seen 363 times)
2023-12-21 14:43:38,410 ----------------------------------------------------------------------------------------------------
2023-12-21 14:43:38,414 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-21 14:43:44,944 epoch 1 - iter 2/26 - loss 1.44883072 - time (sec): 6.49 - samples/sec: 19.73 - lr: 0.000001 - momentum: 0.000000
2023-12-21 14:43:48,013 epoch 1 - iter 4/26 - loss 1.44344002 - time (sec): 9.56 - samples/sec: 26.78 - lr: 0.000002 - momentum: 0.000000
2023-12-21 14:43:52,627 epoch 1 - iter 6/26 - loss 1.43699368 - time (sec): 14.17 - samples/sec: 27.10 - lr: 0.000004 - momentum: 0.000000
2023-12-21 14:44:00,179 epoch 1 - iter 8/26 - loss 1.43938459 - time (sec): 21.72 - samples/sec: 23.57 - lr: 0.000005 - momentum: 0.000000
2023-12-21 14:44:04,167 epoch 1 - iter 10/26 - loss 1.43879520 - time (sec): 25.71 - samples/sec: 24.89 - lr: 0.000007 - momentum: 0.000000
2023-12-21 14:44:07,511 epoch 1 - iter 12/26 - loss 1.45634094 - time (sec): 29.06 - samples/sec: 26.43 - lr: 0.000008 - momentum: 0.000000
2023-12-21 14:44:13,903 epoch 1 - iter 14/26 - loss 1.45590735 - time (sec): 35.45 - samples/sec: 25.28 - lr: 0.000010 - momentum: 0.000000
2023-12-21 14:44:17,225 ep

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:45:48,802 DEV : loss 1.2364163398742676 - f1-score (micro avg)  0.5016





2023-12-21 14:45:51,150 ----------------------------------------------------------------------------------------------------
2023-12-21 14:45:54,034 epoch 2 - iter 2/26 - loss 1.35227370 - time (sec): 2.88 - samples/sec: 44.40 - lr: 0.000020 - momentum: 0.000000
2023-12-21 14:46:05,360 epoch 2 - iter 4/26 - loss 1.33027735 - time (sec): 14.21 - samples/sec: 18.02 - lr: 0.000022 - momentum: 0.000000
2023-12-21 14:46:07,349 epoch 2 - iter 6/26 - loss 1.34101506 - time (sec): 16.20 - samples/sec: 23.71 - lr: 0.000023 - momentum: 0.000000
2023-12-21 14:46:13,135 epoch 2 - iter 8/26 - loss 1.34705187 - time (sec): 21.98 - samples/sec: 23.29 - lr: 0.000025 - momentum: 0.000000
2023-12-21 14:46:15,848 epoch 2 - iter 10/26 - loss 1.32836744 - time (sec): 24.70 - samples/sec: 25.91 - lr: 0.000026 - momentum: 0.000000
2023-12-21 14:46:17,917 epoch 2 - iter 12/26 - loss 1.32289488 - time (sec): 26.77 - samples/sec: 28.69 - lr: 0.000028 - momentum: 0.000000
2023-12-21 14:46:20,336 epoch 2 - iter 1

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:47:58,043 DEV : loss 1.2168010473251343 - f1-score (micro avg)  0.4669





2023-12-21 14:48:00,538 ----------------------------------------------------------------------------------------------------
2023-12-21 14:48:01,876 epoch 3 - iter 2/26 - loss 1.34876847 - time (sec): 1.34 - samples/sec: 95.77 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:48:04,523 epoch 3 - iter 4/26 - loss 1.35428688 - time (sec): 3.98 - samples/sec: 64.26 - lr: 0.000041 - momentum: 0.000000
2023-12-21 14:48:09,522 epoch 3 - iter 6/26 - loss 1.27668420 - time (sec): 8.98 - samples/sec: 42.75 - lr: 0.000043 - momentum: 0.000000
2023-12-21 14:48:14,354 epoch 3 - iter 8/26 - loss 1.26608579 - time (sec): 13.81 - samples/sec: 37.06 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:48:17,825 epoch 3 - iter 10/26 - loss 1.24591225 - time (sec): 17.28 - samples/sec: 37.03 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:48:24,068 epoch 3 - iter 12/26 - loss 1.23869190 - time (sec): 23.53 - samples/sec: 32.64 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:48:28,409 epoch 3 - iter 14/

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:50:04,763 DEV : loss 1.2612724304199219 - f1-score (micro avg)  0.3651





2023-12-21 14:50:08,625 ----------------------------------------------------------------------------------------------------
2023-12-21 14:50:11,852 epoch 4 - iter 2/26 - loss 1.11464256 - time (sec): 3.22 - samples/sec: 39.76 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:50:17,866 epoch 4 - iter 4/26 - loss 1.14911857 - time (sec): 9.23 - samples/sec: 27.72 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:50:24,383 epoch 4 - iter 6/26 - loss 1.16492428 - time (sec): 15.75 - samples/sec: 24.38 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:50:30,868 epoch 4 - iter 8/26 - loss 1.16994400 - time (sec): 22.24 - samples/sec: 23.03 - lr: 0.000049 - momentum: 0.000000
2023-12-21 14:50:33,799 epoch 4 - iter 10/26 - loss 1.17960495 - time (sec): 25.17 - samples/sec: 25.43 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:50:38,227 epoch 4 - iter 12/26 - loss 1.19074189 - time (sec): 29.60 - samples/sec: 25.95 - lr: 0.000048 - momentum: 0.000000
2023-12-21 14:50:44,223 epoch 4 - iter 14

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:52:17,002 DEV : loss 1.1081806421279907 - f1-score (micro avg)  0.5568





2023-12-21 14:52:19,468 ----------------------------------------------------------------------------------------------------
2023-12-21 14:52:23,384 epoch 5 - iter 2/26 - loss 1.12647963 - time (sec): 3.91 - samples/sec: 32.70 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:52:31,342 epoch 5 - iter 4/26 - loss 1.13466370 - time (sec): 11.87 - samples/sec: 21.56 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:52:34,464 epoch 5 - iter 6/26 - loss 1.16085770 - time (sec): 14.99 - samples/sec: 25.61 - lr: 0.000047 - momentum: 0.000000
2023-12-21 14:52:37,557 epoch 5 - iter 8/26 - loss 1.14084946 - time (sec): 18.09 - samples/sec: 28.31 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:52:43,129 epoch 5 - iter 10/26 - loss 1.12760638 - time (sec): 23.66 - samples/sec: 27.05 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:52:46,797 epoch 5 - iter 12/26 - loss 1.13217944 - time (sec): 27.33 - samples/sec: 28.10 - lr: 0.000046 - momentum: 0.000000
2023-12-21 14:52:49,778 epoch 5 - iter 1

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:54:28,478 DEV : loss 1.3153517246246338 - f1-score (micro avg)  0.3462





2023-12-21 14:54:31,042 ----------------------------------------------------------------------------------------------------
2023-12-21 14:54:33,776 epoch 6 - iter 2/26 - loss 1.18394119 - time (sec): 2.73 - samples/sec: 46.84 - lr: 0.000045 - momentum: 0.000000
2023-12-21 14:54:37,967 epoch 6 - iter 4/26 - loss 1.12276480 - time (sec): 6.92 - samples/sec: 36.97 - lr: 0.000045 - momentum: 0.000000
2023-12-21 14:54:44,602 epoch 6 - iter 6/26 - loss 1.11393599 - time (sec): 13.56 - samples/sec: 28.32 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:54:51,247 epoch 6 - iter 8/26 - loss 1.11276244 - time (sec): 20.20 - samples/sec: 25.34 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:54:54,299 epoch 6 - iter 10/26 - loss 1.12915508 - time (sec): 23.26 - samples/sec: 27.52 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:55:02,599 epoch 6 - iter 12/26 - loss 1.13378518 - time (sec): 31.56 - samples/sec: 24.34 - lr: 0.000044 - momentum: 0.000000
2023-12-21 14:55:07,978 epoch 6 - iter 14

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:56:40,851 DEV : loss 1.2967371940612793 - f1-score (micro avg)  0.3525





2023-12-21 14:56:43,391 ----------------------------------------------------------------------------------------------------
2023-12-21 14:56:45,180 epoch 7 - iter 2/26 - loss 1.11059111 - time (sec): 1.79 - samples/sec: 71.63 - lr: 0.000043 - momentum: 0.000000
2023-12-21 14:56:47,647 epoch 7 - iter 4/26 - loss 1.08502564 - time (sec): 4.25 - samples/sec: 60.19 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:56:53,916 epoch 7 - iter 6/26 - loss 1.10176349 - time (sec): 10.52 - samples/sec: 36.49 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:56:57,836 epoch 7 - iter 8/26 - loss 1.07629238 - time (sec): 14.44 - samples/sec: 35.45 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:57:00,447 epoch 7 - iter 10/26 - loss 1.08241411 - time (sec): 17.05 - samples/sec: 37.53 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:57:11,932 epoch 7 - iter 12/26 - loss 1.08247586 - time (sec): 28.54 - samples/sec: 26.91 - lr: 0.000042 - momentum: 0.000000
2023-12-21 14:57:14,376 epoch 7 - iter 14

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 14:58:48,704 DEV : loss 1.2203562259674072 - f1-score (micro avg)  0.418





2023-12-21 14:58:51,725 ----------------------------------------------------------------------------------------------------
2023-12-21 14:58:53,949 epoch 8 - iter 2/26 - loss 1.05042619 - time (sec): 2.22 - samples/sec: 57.60 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:58:59,172 epoch 8 - iter 4/26 - loss 1.07922748 - time (sec): 7.45 - samples/sec: 34.39 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:59:00,927 epoch 8 - iter 6/26 - loss 1.06104860 - time (sec): 9.20 - samples/sec: 41.74 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:59:08,418 epoch 8 - iter 8/26 - loss 1.08714876 - time (sec): 16.69 - samples/sec: 30.67 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:59:11,782 epoch 8 - iter 10/26 - loss 1.06613571 - time (sec): 20.06 - samples/sec: 31.91 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:59:19,763 epoch 8 - iter 12/26 - loss 1.06782828 - time (sec): 28.04 - samples/sec: 27.39 - lr: 0.000040 - momentum: 0.000000
2023-12-21 14:59:23,249 epoch 8 - iter 14/

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 15:00:58,353 DEV : loss 1.2458139657974243 - f1-score (micro avg)  0.429





2023-12-21 15:01:01,447 ----------------------------------------------------------------------------------------------------
2023-12-21 15:01:08,194 epoch 9 - iter 2/26 - loss 1.11734647 - time (sec): 6.74 - samples/sec: 18.98 - lr: 0.000038 - momentum: 0.000000
2023-12-21 15:01:14,666 epoch 9 - iter 4/26 - loss 1.12480125 - time (sec): 13.21 - samples/sec: 19.37 - lr: 0.000038 - momentum: 0.000000
2023-12-21 15:01:18,202 epoch 9 - iter 6/26 - loss 1.07479225 - time (sec): 16.75 - samples/sec: 22.92 - lr: 0.000038 - momentum: 0.000000
2023-12-21 15:01:24,221 epoch 9 - iter 8/26 - loss 1.05308674 - time (sec): 22.77 - samples/sec: 22.49 - lr: 0.000038 - momentum: 0.000000
2023-12-21 15:01:30,846 epoch 9 - iter 10/26 - loss 1.07061401 - time (sec): 29.40 - samples/sec: 21.77 - lr: 0.000038 - momentum: 0.000000
2023-12-21 15:01:34,453 epoch 9 - iter 12/26 - loss 1.05688710 - time (sec): 33.00 - samples/sec: 23.27 - lr: 0.000037 - momentum: 0.000000
2023-12-21 15:01:36,351 epoch 9 - iter 1

100%|██████████| 80/80 [01:09<00:00,  1.15it/s]

2023-12-21 15:03:09,965 DEV : loss 1.2443482875823975 - f1-score (micro avg)  0.4069





2023-12-21 15:03:12,480 ----------------------------------------------------------------------------------------------------
2023-12-21 15:03:19,795 epoch 10 - iter 2/26 - loss 0.98271099 - time (sec): 7.31 - samples/sec: 17.50 - lr: 0.000036 - momentum: 0.000000
2023-12-21 15:03:22,090 epoch 10 - iter 4/26 - loss 0.97330913 - time (sec): 9.61 - samples/sec: 26.64 - lr: 0.000036 - momentum: 0.000000
2023-12-21 15:03:28,170 epoch 10 - iter 6/26 - loss 1.00256160 - time (sec): 15.69 - samples/sec: 24.48 - lr: 0.000036 - momentum: 0.000000
2023-12-21 15:03:30,945 epoch 10 - iter 8/26 - loss 1.01003572 - time (sec): 18.46 - samples/sec: 27.73 - lr: 0.000036 - momentum: 0.000000
2023-12-21 15:03:37,188 epoch 10 - iter 10/26 - loss 1.02003491 - time (sec): 24.71 - samples/sec: 25.90 - lr: 0.000036 - momentum: 0.000000
2023-12-21 15:03:43,713 epoch 10 - iter 12/26 - loss 1.01643921 - time (sec): 31.23 - samples/sec: 24.59 - lr: 0.000035 - momentum: 0.000000
2023-12-21 15:03:46,130 epoch 10 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:05:19,057 DEV : loss 1.275051236152649 - f1-score (micro avg)  0.3644





2023-12-21 15:05:19,990 ----------------------------------------------------------------------------------------------------
2023-12-21 15:05:26,060 epoch 11 - iter 2/26 - loss 1.03577524 - time (sec): 6.07 - samples/sec: 21.10 - lr: 0.000034 - momentum: 0.000000
2023-12-21 15:05:28,319 epoch 11 - iter 4/26 - loss 0.98612934 - time (sec): 8.33 - samples/sec: 30.74 - lr: 0.000034 - momentum: 0.000000
2023-12-21 15:05:35,342 epoch 11 - iter 6/26 - loss 0.99485113 - time (sec): 15.35 - samples/sec: 25.02 - lr: 0.000034 - momentum: 0.000000
2023-12-21 15:05:42,107 epoch 11 - iter 8/26 - loss 1.00140400 - time (sec): 22.11 - samples/sec: 23.15 - lr: 0.000034 - momentum: 0.000000
2023-12-21 15:05:45,290 epoch 11 - iter 10/26 - loss 1.00144297 - time (sec): 25.30 - samples/sec: 25.30 - lr: 0.000033 - momentum: 0.000000
2023-12-21 15:05:48,440 epoch 11 - iter 12/26 - loss 0.99519597 - time (sec): 28.45 - samples/sec: 27.00 - lr: 0.000033 - momentum: 0.000000
2023-12-21 15:05:55,621 epoch 11 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:07:29,583 DEV : loss 1.105717658996582 - f1-score (micro avg)  0.5355





2023-12-21 15:07:30,541 ----------------------------------------------------------------------------------------------------
2023-12-21 15:07:33,524 epoch 12 - iter 2/26 - loss 0.91897416 - time (sec): 2.98 - samples/sec: 42.93 - lr: 0.000032 - momentum: 0.000000
2023-12-21 15:07:39,417 epoch 12 - iter 4/26 - loss 0.93351848 - time (sec): 8.87 - samples/sec: 28.85 - lr: 0.000032 - momentum: 0.000000
2023-12-21 15:07:42,314 epoch 12 - iter 6/26 - loss 0.96921141 - time (sec): 11.77 - samples/sec: 32.62 - lr: 0.000032 - momentum: 0.000000
2023-12-21 15:07:44,324 epoch 12 - iter 8/26 - loss 0.96399983 - time (sec): 13.78 - samples/sec: 37.15 - lr: 0.000031 - momentum: 0.000000
2023-12-21 15:07:55,461 epoch 12 - iter 10/26 - loss 0.97520868 - time (sec): 24.92 - samples/sec: 25.68 - lr: 0.000031 - momentum: 0.000000
2023-12-21 15:07:58,197 epoch 12 - iter 12/26 - loss 0.97967331 - time (sec): 27.65 - samples/sec: 27.77 - lr: 0.000031 - momentum: 0.000000
2023-12-21 15:08:00,933 epoch 12 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:09:37,939 DEV : loss 1.1865882873535156 - f1-score (micro avg)  0.47





2023-12-21 15:09:39,597 ----------------------------------------------------------------------------------------------------
2023-12-21 15:09:41,963 epoch 13 - iter 2/26 - loss 1.09572738 - time (sec): 2.36 - samples/sec: 54.25 - lr: 0.000030 - momentum: 0.000000
2023-12-21 15:09:43,706 epoch 13 - iter 4/26 - loss 0.99222267 - time (sec): 4.10 - samples/sec: 62.39 - lr: 0.000030 - momentum: 0.000000
2023-12-21 15:09:46,257 epoch 13 - iter 6/26 - loss 0.96490161 - time (sec): 6.65 - samples/sec: 57.71 - lr: 0.000029 - momentum: 0.000000
2023-12-21 15:09:48,521 epoch 13 - iter 8/26 - loss 0.98431251 - time (sec): 8.92 - samples/sec: 57.41 - lr: 0.000029 - momentum: 0.000000
2023-12-21 15:09:53,425 epoch 13 - iter 10/26 - loss 0.97929918 - time (sec): 13.82 - samples/sec: 46.30 - lr: 0.000029 - momentum: 0.000000
2023-12-21 15:09:56,653 epoch 13 - iter 12/26 - loss 0.99414700 - time (sec): 17.05 - samples/sec: 45.04 - lr: 0.000029 - momentum: 0.000000
2023-12-21 15:10:02,249 epoch 13 - it

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:11:48,299 DEV : loss 1.2428112030029297 - f1-score (micro avg)  0.4227





2023-12-21 15:11:49,276 ----------------------------------------------------------------------------------------------------
2023-12-21 15:11:52,330 epoch 14 - iter 2/26 - loss 0.89143655 - time (sec): 3.05 - samples/sec: 41.95 - lr: 0.000028 - momentum: 0.000000
2023-12-21 15:11:55,200 epoch 14 - iter 4/26 - loss 0.94725636 - time (sec): 5.92 - samples/sec: 43.23 - lr: 0.000027 - momentum: 0.000000
2023-12-21 15:11:59,140 epoch 14 - iter 6/26 - loss 0.93828724 - time (sec): 9.86 - samples/sec: 38.94 - lr: 0.000027 - momentum: 0.000000
2023-12-21 15:12:04,399 epoch 14 - iter 8/26 - loss 0.95996751 - time (sec): 15.12 - samples/sec: 33.86 - lr: 0.000027 - momentum: 0.000000
2023-12-21 15:12:07,558 epoch 14 - iter 10/26 - loss 0.94756411 - time (sec): 18.28 - samples/sec: 35.01 - lr: 0.000027 - momentum: 0.000000
2023-12-21 15:12:10,892 epoch 14 - iter 12/26 - loss 0.93803275 - time (sec): 21.61 - samples/sec: 35.53 - lr: 0.000027 - momentum: 0.000000
2023-12-21 15:12:14,327 epoch 14 - i

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:13:59,078 DEV : loss 1.1788424253463745 - f1-score (micro avg)  0.4677





2023-12-21 15:14:00,042 ----------------------------------------------------------------------------------------------------
2023-12-21 15:14:03,629 epoch 15 - iter 2/26 - loss 1.03678882 - time (sec): 3.59 - samples/sec: 35.70 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:07,272 epoch 15 - iter 4/26 - loss 0.97608279 - time (sec): 7.23 - samples/sec: 35.42 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:11,315 epoch 15 - iter 6/26 - loss 0.97693889 - time (sec): 11.27 - samples/sec: 34.07 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:13,866 epoch 15 - iter 8/26 - loss 0.98889884 - time (sec): 13.82 - samples/sec: 37.04 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:15,920 epoch 15 - iter 10/26 - loss 0.97550021 - time (sec): 15.88 - samples/sec: 40.31 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:19,157 epoch 15 - iter 12/26 - loss 0.96028810 - time (sec): 19.11 - samples/sec: 40.18 - lr: 0.000025 - momentum: 0.000000
2023-12-21 15:14:24,745 epoch 15 - 

100%|██████████| 80/80 [01:09<00:00,  1.14it/s]

2023-12-21 15:16:09,816 DEV : loss 1.1048948764801025 - f1-score (micro avg)  0.5426





2023-12-21 15:16:10,826 ----------------------------------------------------------------------------------------------------
2023-12-21 15:16:13,999 epoch 16 - iter 2/26 - loss 0.95624724 - time (sec): 3.17 - samples/sec: 40.36 - lr: 0.000023 - momentum: 0.000000
2023-12-21 15:16:22,066 epoch 16 - iter 4/26 - loss 0.96884005 - time (sec): 11.24 - samples/sec: 22.78 - lr: 0.000023 - momentum: 0.000000
2023-12-21 15:16:28,885 epoch 16 - iter 6/26 - loss 0.91207106 - time (sec): 18.06 - samples/sec: 21.27 - lr: 0.000023 - momentum: 0.000000
2023-12-21 15:16:31,248 epoch 16 - iter 8/26 - loss 0.90784056 - time (sec): 20.42 - samples/sec: 25.07 - lr: 0.000023 - momentum: 0.000000
2023-12-21 15:16:34,930 epoch 16 - iter 10/26 - loss 0.91908705 - time (sec): 24.10 - samples/sec: 26.55 - lr: 0.000023 - momentum: 0.000000
2023-12-21 15:16:37,182 epoch 16 - iter 12/26 - loss 0.94234359 - time (sec): 26.35 - samples/sec: 29.14 - lr: 0.000022 - momentum: 0.000000
2023-12-21 15:16:39,496 epoch 16 -

100%|██████████| 80/80 [01:10<00:00,  1.14it/s]

2023-12-21 15:18:14,115 DEV : loss 1.124669075012207 - f1-score (micro avg)  0.5252





2023-12-21 15:18:15,427 ----------------------------------------------------------------------------------------------------
2023-12-21 15:18:21,540 epoch 17 - iter 2/26 - loss 0.88886556 - time (sec): 6.11 - samples/sec: 20.95 - lr: 0.000021 - momentum: 0.000000
2023-12-21 15:18:28,674 epoch 17 - iter 4/26 - loss 0.91550629 - time (sec): 13.24 - samples/sec: 19.33 - lr: 0.000021 - momentum: 0.000000
2023-12-21 15:18:31,947 epoch 17 - iter 6/26 - loss 0.94245860 - time (sec): 16.52 - samples/sec: 23.25 - lr: 0.000021 - momentum: 0.000000
2023-12-21 15:18:34,838 epoch 17 - iter 8/26 - loss 0.95018222 - time (sec): 19.41 - samples/sec: 26.38 - lr: 0.000021 - momentum: 0.000000
2023-12-21 15:18:40,648 epoch 17 - iter 10/26 - loss 0.97430499 - time (sec): 25.22 - samples/sec: 25.38 - lr: 0.000021 - momentum: 0.000000
2023-12-21 15:18:44,605 epoch 17 - iter 12/26 - loss 0.95997570 - time (sec): 29.18 - samples/sec: 26.32 - lr: 0.000020 - momentum: 0.000000
2023-12-21 15:18:49,091 epoch 17 -

100%|██████████| 80/80 [01:10<00:00,  1.13it/s]

2023-12-21 15:20:28,787 DEV : loss 1.2342743873596191 - f1-score (micro avg)  0.4156





2023-12-21 15:20:30,203 ----------------------------------------------------------------------------------------------------
2023-12-21 15:20:37,546 epoch 18 - iter 2/26 - loss 0.98885590 - time (sec): 7.34 - samples/sec: 17.44 - lr: 0.000019 - momentum: 0.000000
2023-12-21 15:20:41,276 epoch 18 - iter 4/26 - loss 0.97431445 - time (sec): 11.07 - samples/sec: 23.12 - lr: 0.000019 - momentum: 0.000000
2023-12-21 15:20:46,859 epoch 18 - iter 6/26 - loss 1.00574120 - time (sec): 16.65 - samples/sec: 23.06 - lr: 0.000019 - momentum: 0.000000
2023-12-21 15:20:49,729 epoch 18 - iter 8/26 - loss 0.98240938 - time (sec): 19.52 - samples/sec: 26.23 - lr: 0.000019 - momentum: 0.000000
2023-12-21 15:20:57,093 epoch 18 - iter 10/26 - loss 0.96613075 - time (sec): 26.89 - samples/sec: 23.80 - lr: 0.000018 - momentum: 0.000000
2023-12-21 15:20:59,925 epoch 18 - iter 12/26 - loss 0.95905959 - time (sec): 29.72 - samples/sec: 25.84 - lr: 0.000018 - momentum: 0.000000
2023-12-21 15:21:02,824 epoch 18 -

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:22:46,018 DEV : loss 1.2477701902389526 - f1-score (micro avg)  0.4172





2023-12-21 15:22:48,698 ----------------------------------------------------------------------------------------------------
2023-12-21 15:22:52,122 epoch 19 - iter 2/26 - loss 0.88709474 - time (sec): 3.42 - samples/sec: 37.47 - lr: 0.000017 - momentum: 0.000000
2023-12-21 15:22:54,632 epoch 19 - iter 4/26 - loss 0.93332702 - time (sec): 5.93 - samples/sec: 43.20 - lr: 0.000017 - momentum: 0.000000
2023-12-21 15:22:58,438 epoch 19 - iter 6/26 - loss 0.92386389 - time (sec): 9.73 - samples/sec: 39.46 - lr: 0.000017 - momentum: 0.000000
2023-12-21 15:23:03,741 epoch 19 - iter 8/26 - loss 0.93153504 - time (sec): 15.03 - samples/sec: 34.06 - lr: 0.000016 - momentum: 0.000000
2023-12-21 15:23:13,808 epoch 19 - iter 10/26 - loss 0.93123229 - time (sec): 25.10 - samples/sec: 25.50 - lr: 0.000016 - momentum: 0.000000
2023-12-21 15:23:17,065 epoch 19 - iter 12/26 - loss 0.91750996 - time (sec): 28.36 - samples/sec: 27.08 - lr: 0.000016 - momentum: 0.000000
2023-12-21 15:23:19,457 epoch 19 - i

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:24:58,955 DEV : loss 1.1997096538543701 - f1-score (micro avg)  0.4535





2023-12-21 15:25:01,780 ----------------------------------------------------------------------------------------------------
2023-12-21 15:25:04,425 epoch 20 - iter 2/26 - loss 0.94246477 - time (sec): 2.64 - samples/sec: 48.45 - lr: 0.000015 - momentum: 0.000000
2023-12-21 15:25:07,805 epoch 20 - iter 4/26 - loss 0.95646513 - time (sec): 6.02 - samples/sec: 42.52 - lr: 0.000015 - momentum: 0.000000
2023-12-21 15:25:09,842 epoch 20 - iter 6/26 - loss 0.94318099 - time (sec): 8.06 - samples/sec: 47.65 - lr: 0.000014 - momentum: 0.000000
2023-12-21 15:25:12,468 epoch 20 - iter 8/26 - loss 0.94830283 - time (sec): 10.68 - samples/sec: 47.92 - lr: 0.000014 - momentum: 0.000000
2023-12-21 15:25:17,698 epoch 20 - iter 10/26 - loss 0.95240967 - time (sec): 15.91 - samples/sec: 40.21 - lr: 0.000014 - momentum: 0.000000
2023-12-21 15:25:21,857 epoch 20 - iter 12/26 - loss 0.93519154 - time (sec): 20.07 - samples/sec: 38.26 - lr: 0.000014 - momentum: 0.000000
2023-12-21 15:25:28,604 epoch 20 - i

100%|██████████| 80/80 [01:11<00:00,  1.11it/s]

2023-12-21 15:27:12,385 DEV : loss 1.191832423210144 - f1-score (micro avg)  0.4685





2023-12-21 15:27:13,334 ----------------------------------------------------------------------------------------------------
2023-12-21 15:27:18,556 epoch 21 - iter 2/26 - loss 0.87473109 - time (sec): 5.22 - samples/sec: 24.52 - lr: 0.000013 - momentum: 0.000000
2023-12-21 15:27:24,864 epoch 21 - iter 4/26 - loss 0.86458732 - time (sec): 11.53 - samples/sec: 22.21 - lr: 0.000012 - momentum: 0.000000
2023-12-21 15:27:29,215 epoch 21 - iter 6/26 - loss 0.87405646 - time (sec): 15.88 - samples/sec: 24.18 - lr: 0.000012 - momentum: 0.000000
2023-12-21 15:27:31,566 epoch 21 - iter 8/26 - loss 0.88616338 - time (sec): 18.23 - samples/sec: 28.09 - lr: 0.000012 - momentum: 0.000000
2023-12-21 15:27:38,391 epoch 21 - iter 10/26 - loss 0.87527837 - time (sec): 25.05 - samples/sec: 25.54 - lr: 0.000012 - momentum: 0.000000
2023-12-21 15:27:42,254 epoch 21 - iter 12/26 - loss 0.87098127 - time (sec): 28.92 - samples/sec: 26.56 - lr: 0.000012 - momentum: 0.000000
2023-12-21 15:27:47,872 epoch 21 -

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:29:29,179 DEV : loss 1.1531614065170288 - f1-score (micro avg)  0.4921





2023-12-21 15:29:30,834 ----------------------------------------------------------------------------------------------------
2023-12-21 15:29:33,174 epoch 22 - iter 2/26 - loss 0.81316233 - time (sec): 2.34 - samples/sec: 54.77 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:29:36,446 epoch 22 - iter 4/26 - loss 0.88968432 - time (sec): 5.61 - samples/sec: 45.64 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:29:46,407 epoch 22 - iter 6/26 - loss 0.88956895 - time (sec): 15.57 - samples/sec: 24.66 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:29:50,573 epoch 22 - iter 8/26 - loss 0.89770174 - time (sec): 19.74 - samples/sec: 25.94 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:29:53,691 epoch 22 - iter 10/26 - loss 0.90822511 - time (sec): 22.85 - samples/sec: 28.01 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:30:04,080 epoch 22 - iter 12/26 - loss 0.90907793 - time (sec): 33.24 - samples/sec: 23.10 - lr: 0.000010 - momentum: 0.000000
2023-12-21 15:30:07,429 epoch 22 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:31:44,221 DEV : loss 1.1954134702682495 - f1-score (micro avg)  0.4614





2023-12-21 15:31:45,166 ----------------------------------------------------------------------------------------------------
2023-12-21 15:31:51,219 epoch 23 - iter 2/26 - loss 0.96568245 - time (sec): 6.05 - samples/sec: 21.15 - lr: 0.000008 - momentum: 0.000000
2023-12-21 15:31:54,906 epoch 23 - iter 4/26 - loss 0.94121563 - time (sec): 9.74 - samples/sec: 26.29 - lr: 0.000008 - momentum: 0.000000
2023-12-21 15:32:01,903 epoch 23 - iter 6/26 - loss 0.92606051 - time (sec): 16.74 - samples/sec: 22.95 - lr: 0.000008 - momentum: 0.000000
2023-12-21 15:32:07,298 epoch 23 - iter 8/26 - loss 0.92710247 - time (sec): 22.13 - samples/sec: 23.14 - lr: 0.000008 - momentum: 0.000000
2023-12-21 15:32:09,387 epoch 23 - iter 10/26 - loss 0.90496336 - time (sec): 24.22 - samples/sec: 26.43 - lr: 0.000008 - momentum: 0.000000
2023-12-21 15:32:15,927 epoch 23 - iter 12/26 - loss 0.89833248 - time (sec): 30.76 - samples/sec: 24.97 - lr: 0.000007 - momentum: 0.000000
2023-12-21 15:32:21,651 epoch 23 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:33:55,556 DEV : loss 1.1606930494308472 - f1-score (micro avg)  0.4842





2023-12-21 15:33:57,368 ----------------------------------------------------------------------------------------------------
2023-12-21 15:34:00,775 epoch 24 - iter 2/26 - loss 0.92299375 - time (sec): 3.40 - samples/sec: 37.61 - lr: 0.000006 - momentum: 0.000000
2023-12-21 15:34:05,883 epoch 24 - iter 4/26 - loss 0.91082476 - time (sec): 8.51 - samples/sec: 30.08 - lr: 0.000006 - momentum: 0.000000
2023-12-21 15:34:08,520 epoch 24 - iter 6/26 - loss 0.91086961 - time (sec): 11.15 - samples/sec: 34.44 - lr: 0.000006 - momentum: 0.000000
2023-12-21 15:34:12,348 epoch 24 - iter 8/26 - loss 0.90083352 - time (sec): 14.98 - samples/sec: 34.19 - lr: 0.000006 - momentum: 0.000000
2023-12-21 15:34:19,234 epoch 24 - iter 10/26 - loss 0.90833572 - time (sec): 21.86 - samples/sec: 29.27 - lr: 0.000006 - momentum: 0.000000
2023-12-21 15:34:22,757 epoch 24 - iter 12/26 - loss 0.90049851 - time (sec): 25.39 - samples/sec: 30.25 - lr: 0.000005 - momentum: 0.000000
2023-12-21 15:34:29,588 epoch 24 - 

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:36:14,264 DEV : loss 1.1873923540115356 - f1-score (micro avg)  0.4716





2023-12-21 15:36:18,137 ----------------------------------------------------------------------------------------------------
2023-12-21 15:36:20,691 epoch 25 - iter 2/26 - loss 0.88275844 - time (sec): 2.55 - samples/sec: 50.17 - lr: 0.000004 - momentum: 0.000000
2023-12-21 15:36:22,938 epoch 25 - iter 4/26 - loss 0.86062193 - time (sec): 4.80 - samples/sec: 53.36 - lr: 0.000004 - momentum: 0.000000
2023-12-21 15:36:26,085 epoch 25 - iter 6/26 - loss 0.84734092 - time (sec): 7.95 - samples/sec: 48.33 - lr: 0.000004 - momentum: 0.000000
2023-12-21 15:36:29,431 epoch 25 - iter 8/26 - loss 0.86495824 - time (sec): 11.29 - samples/sec: 45.35 - lr: 0.000004 - momentum: 0.000000
2023-12-21 15:36:36,117 epoch 25 - iter 10/26 - loss 0.87508838 - time (sec): 17.98 - samples/sec: 35.60 - lr: 0.000003 - momentum: 0.000000
2023-12-21 15:36:43,334 epoch 25 - iter 12/26 - loss 0.88040943 - time (sec): 25.19 - samples/sec: 30.48 - lr: 0.000003 - momentum: 0.000000
2023-12-21 15:36:48,204 epoch 25 - i

100%|██████████| 80/80 [01:11<00:00,  1.12it/s]

2023-12-21 15:38:32,308 DEV : loss 1.1975997686386108 - f1-score (micro avg)  0.4661





2023-12-21 15:38:38,246 ----------------------------------------------------------------------------------------------------
2023-12-21 15:38:38,248 Testing using last state of model ...


100%|██████████| 110/110 [01:21<00:00,  1.34it/s]


2023-12-21 15:40:00,230 
Results:
- F-score (micro) 0.5286
- F-score (macro) 0.3357
- Accuracy 0.5286

By class:
              precision    recall  f1-score   support

     comment     0.8927    0.5522    0.6823      1476
     support     0.0975    0.3750    0.1548       104
        deny     0.0873    0.2200    0.1250       100
       query     0.2597    0.7121    0.3806        66

    accuracy                         0.5286      1746
   macro avg     0.3343    0.4648    0.3357      1746
weighted avg     0.7752    0.5286    0.6075      1746

2023-12-21 15:40:00,232 ----------------------------------------------------------------------------------------------------


In [None]:
add_previous_comment = False
add_post_title = True
add_depth = False

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[27]: "“$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 09:43:20,981 Reading data from .
2023-12-22 09:43:20,984 Train: train_fasttext_format.txt
2023-12-22 09:43:20,988 Dev: dev_fasttext_format.txt
2023-12-22 09:43:20,989 Test: test_fasttext_format.txt
2023-12-22 09:43:21,122 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1617,
        "number_of_documents_per_class": {
            "support": 437,
            "deny": 350,
            "comment": 437,
            "query": 393
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 50163,
            "min": 3,
            "max": 719,
            "avg": 31.02226345083488
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_documents_p

0it [00:00, ?it/s]
1617it [00:01, 1148.53it/s]

2023-12-22 09:43:37,723 Dictionary created for label 'class' with 4 values: support (seen 437 times), comment (seen 437 times), query (seen 393 times), deny (seen 350 times)
2023-12-22 09:43:37,732 ----------------------------------------------------------------------------------------------------
2023-12-22 09:43:37,737 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-22 09:43:37,764 Computation:
2023-12-22 09:43:37,765  - compute on device: cuda:0
2023-12-22 09:43:37,767  - embedding storage: none
2023-12-22 09:43:37,768 ----------------------------------------------------------------------------------------------------
2023-12-22 09:43:37,771 Model training base path: "resources/taggers/rumoureval_character_level_embeddings"
2023-12-22 09:43:37,772 ----------------------------------------------------------------------------------------------------
2023-12-22 09:43:37,773 ----------------------------------------------------------------------------------------------------
2023-12-22 09:43:41,375 epoch 1 - iter 1/13 - loss 1.51625109 - time (sec): 3.60 - samples/sec: 35.55 - lr: 0.000000 - momentum: 0.000000
2023-12-22 09:43:45,367 epoch 1 - iter 2/13 - loss 1.50705540 - time (sec): 7.59 - samples/sec: 33.72 - lr: 0.000001 - momentum: 0.000000
2023-12-22 09:43:47,815 epoch 1 - iter 3/13 - loss 1.48917218 - time (sec): 10.04 - samples/sec: 38.

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]


2023-12-22 09:45:22,287 DEV : loss 1.2735364437103271 - f1-score (micro avg)  0.4117
2023-12-22 09:45:23,397 ----------------------------------------------------------------------------------------------------
2023-12-22 09:45:34,704 epoch 2 - iter 1/13 - loss 1.32841575 - time (sec): 11.30 - samples/sec: 11.32 - lr: 0.000019 - momentum: 0.000000
2023-12-22 09:45:43,247 epoch 2 - iter 2/13 - loss 1.35141826 - time (sec): 19.85 - samples/sec: 12.90 - lr: 0.000021 - momentum: 0.000000
2023-12-22 09:45:47,001 epoch 2 - iter 3/13 - loss 1.36820094 - time (sec): 23.60 - samples/sec: 16.27 - lr: 0.000022 - momentum: 0.000000
2023-12-22 09:45:51,514 epoch 2 - iter 4/13 - loss 1.37144354 - time (sec): 28.11 - samples/sec: 18.21 - lr: 0.000024 - momentum: 0.000000
2023-12-22 09:45:54,170 epoch 2 - iter 5/13 - loss 1.37503130 - time (sec): 30.77 - samples/sec: 20.80 - lr: 0.000025 - momentum: 0.000000
2023-12-22 09:45:57,477 epoch 2 - iter 6/13 - loss 1.37286745 - time (sec): 34.08 - samples/sec

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 09:47:12,647 DEV : loss 1.2675293684005737 - f1-score (micro avg)  0.3541





2023-12-22 09:47:13,362 ----------------------------------------------------------------------------------------------------
2023-12-22 09:47:18,103 epoch 3 - iter 1/13 - loss 1.31029487 - time (sec): 4.74 - samples/sec: 27.01 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:47:28,123 epoch 3 - iter 2/13 - loss 1.29911530 - time (sec): 14.76 - samples/sec: 17.34 - lr: 0.000040 - momentum: 0.000000
2023-12-22 09:47:31,521 epoch 3 - iter 3/13 - loss 1.29580454 - time (sec): 18.16 - samples/sec: 21.15 - lr: 0.000041 - momentum: 0.000000
2023-12-22 09:47:33,625 epoch 3 - iter 4/13 - loss 1.28455937 - time (sec): 20.26 - samples/sec: 25.27 - lr: 0.000043 - momentum: 0.000000
2023-12-22 09:47:42,417 epoch 3 - iter 5/13 - loss 1.27931917 - time (sec): 29.05 - samples/sec: 22.03 - lr: 0.000044 - momentum: 0.000000
2023-12-22 09:47:47,061 epoch 3 - iter 6/13 - loss 1.28427029 - time (sec): 33.70 - samples/sec: 22.79 - lr: 0.000046 - momentum: 0.000000
2023-12-22 09:47:58,564 epoch 3 - iter 7/1

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 09:48:58,456 DEV : loss 1.2295989990234375 - f1-score (micro avg)  0.4306
2023-12-22 09:48:59,783 ----------------------------------------------------------------------------------------------------
2023-12-22 09:49:01,789 epoch 4 - iter 1/13 - loss 1.15527248 - time (sec): 2.00 - samples/sec: 63.89 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:49:03,942 epoch 4 - iter 2/13 - loss 1.15620410 - time (sec): 4.16 - samples/sec: 61.58 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:49:07,450 epoch 4 - iter 3/13 - loss 1.21403007 - time (sec): 7.66 - samples/sec: 50.10 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:49:09,272 epoch 4 - iter 4/13 - loss 1.22066969 - time (sec): 9.49 - samples/sec: 53.97 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:49:10,802 epoch 4 - iter 5/13 - loss 1.24343894 - time (sec): 11.02 - samples/sec: 58.09 - lr: 0.000049 - momentum: 0.000000
2023-12-22 09:49:14,950 epoch 4 - iter 6/13 - loss 1.23165961 - time (sec): 15.16 - samples/sec: 50

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-22 09:50:37,932 DEV : loss 1.1635900735855103 - f1-score (micro avg)  0.515





2023-12-22 09:50:38,611 ----------------------------------------------------------------------------------------------------
2023-12-22 09:50:39,878 epoch 5 - iter 1/13 - loss 1.30079746 - time (sec): 1.26 - samples/sec: 101.20 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:50:41,068 epoch 5 - iter 2/13 - loss 1.26428813 - time (sec): 2.45 - samples/sec: 104.29 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:50:45,059 epoch 5 - iter 3/13 - loss 1.24051913 - time (sec): 6.45 - samples/sec: 59.57 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:50:54,966 epoch 5 - iter 4/13 - loss 1.22211802 - time (sec): 16.35 - samples/sec: 31.31 - lr: 0.000047 - momentum: 0.000000
2023-12-22 09:51:00,156 epoch 5 - iter 5/13 - loss 1.20255957 - time (sec): 21.54 - samples/sec: 29.71 - lr: 0.000046 - momentum: 0.000000
2023-12-22 09:51:04,744 epoch 5 - iter 6/13 - loss 1.19640785 - time (sec): 26.13 - samples/sec: 29.39 - lr: 0.000046 - momentum: 0.000000
2023-12-22 09:51:06,094 epoch 5 - iter 7/1

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 09:52:23,403 DEV : loss 1.307855486869812 - f1-score (micro avg)  0.3241





2023-12-22 09:52:24,072 ----------------------------------------------------------------------------------------------------
2023-12-22 09:52:29,526 epoch 6 - iter 1/13 - loss 1.17046070 - time (sec): 5.45 - samples/sec: 23.49 - lr: 0.000045 - momentum: 0.000000
2023-12-22 09:52:33,876 epoch 6 - iter 2/13 - loss 1.17229700 - time (sec): 9.80 - samples/sec: 26.12 - lr: 0.000045 - momentum: 0.000000
2023-12-22 09:52:37,317 epoch 6 - iter 3/13 - loss 1.18894168 - time (sec): 13.24 - samples/sec: 29.00 - lr: 0.000045 - momentum: 0.000000
2023-12-22 09:52:39,477 epoch 6 - iter 4/13 - loss 1.16822657 - time (sec): 15.40 - samples/sec: 33.24 - lr: 0.000044 - momentum: 0.000000
2023-12-22 09:52:41,863 epoch 6 - iter 5/13 - loss 1.16382582 - time (sec): 17.79 - samples/sec: 35.98 - lr: 0.000044 - momentum: 0.000000
2023-12-22 09:52:43,432 epoch 6 - iter 6/13 - loss 1.16246653 - time (sec): 19.36 - samples/sec: 39.68 - lr: 0.000044 - momentum: 0.000000
2023-12-22 09:52:45,430 epoch 6 - iter 7/13

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]

2023-12-22 09:54:06,888 DEV : loss 1.1272238492965698 - f1-score (micro avg)  0.5363





2023-12-22 09:54:07,555 ----------------------------------------------------------------------------------------------------
2023-12-22 09:54:11,514 epoch 7 - iter 1/13 - loss 1.15040958 - time (sec): 3.96 - samples/sec: 32.36 - lr: 0.000043 - momentum: 0.000000
2023-12-22 09:54:14,091 epoch 7 - iter 2/13 - loss 1.11360413 - time (sec): 6.53 - samples/sec: 39.19 - lr: 0.000043 - momentum: 0.000000
2023-12-22 09:54:18,268 epoch 7 - iter 3/13 - loss 1.10781976 - time (sec): 10.71 - samples/sec: 35.86 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:54:21,730 epoch 7 - iter 4/13 - loss 1.12420192 - time (sec): 14.17 - samples/sec: 36.13 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:54:23,629 epoch 7 - iter 5/13 - loss 1.12016644 - time (sec): 16.07 - samples/sec: 39.82 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:54:33,403 epoch 7 - iter 6/13 - loss 1.14366577 - time (sec): 25.84 - samples/sec: 29.72 - lr: 0.000042 - momentum: 0.000000
2023-12-22 09:54:45,205 epoch 7 - iter 7/13

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 09:55:49,133 DEV : loss 1.1711452007293701 - f1-score (micro avg)  0.5308





2023-12-22 09:55:49,818 ----------------------------------------------------------------------------------------------------
2023-12-22 09:55:54,105 epoch 8 - iter 1/13 - loss 0.97995389 - time (sec): 4.29 - samples/sec: 29.87 - lr: 0.000041 - momentum: 0.000000
2023-12-22 09:55:57,706 epoch 8 - iter 2/13 - loss 1.07536626 - time (sec): 7.89 - samples/sec: 32.46 - lr: 0.000041 - momentum: 0.000000
2023-12-22 09:56:00,276 epoch 8 - iter 3/13 - loss 1.08252724 - time (sec): 10.46 - samples/sec: 36.73 - lr: 0.000040 - momentum: 0.000000
2023-12-22 09:56:04,772 epoch 8 - iter 4/13 - loss 1.10992333 - time (sec): 14.95 - samples/sec: 34.24 - lr: 0.000040 - momentum: 0.000000
2023-12-22 09:56:06,158 epoch 8 - iter 5/13 - loss 1.11370978 - time (sec): 16.34 - samples/sec: 39.17 - lr: 0.000040 - momentum: 0.000000
2023-12-22 09:56:07,663 epoch 8 - iter 6/13 - loss 1.12096069 - time (sec): 17.84 - samples/sec: 43.04 - lr: 0.000040 - momentum: 0.000000
2023-12-22 09:56:19,107 epoch 8 - iter 7/13

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 09:57:27,229 DEV : loss 1.2529795169830322 - f1-score (micro avg)  0.4132
2023-12-22 09:57:28,893 ----------------------------------------------------------------------------------------------------
2023-12-22 09:57:33,758 epoch 9 - iter 1/13 - loss 1.12556458 - time (sec): 4.86 - samples/sec: 26.34 - lr: 0.000039 - momentum: 0.000000
2023-12-22 09:57:38,367 epoch 9 - iter 2/13 - loss 1.13015795 - time (sec): 9.47 - samples/sec: 27.03 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:57:50,097 epoch 9 - iter 3/13 - loss 1.13164262 - time (sec): 21.20 - samples/sec: 18.11 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:57:53,548 epoch 9 - iter 4/13 - loss 1.12255013 - time (sec): 24.65 - samples/sec: 20.77 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:57:57,814 epoch 9 - iter 5/13 - loss 1.12467763 - time (sec): 28.92 - samples/sec: 22.13 - lr: 0.000038 - momentum: 0.000000
2023-12-22 09:57:59,678 epoch 9 - iter 6/13 - loss 1.11249202 - time (sec): 30.78 - samples/sec: 

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]


2023-12-22 09:59:14,019 DEV : loss 1.1747034788131714 - f1-score (micro avg)  0.4976
2023-12-22 09:59:15,168 ----------------------------------------------------------------------------------------------------
2023-12-22 09:59:19,998 epoch 10 - iter 1/13 - loss 1.07348371 - time (sec): 4.83 - samples/sec: 26.52 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:59:21,366 epoch 10 - iter 2/13 - loss 1.09310943 - time (sec): 6.20 - samples/sec: 41.32 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:59:23,478 epoch 10 - iter 3/13 - loss 1.10788846 - time (sec): 8.31 - samples/sec: 46.23 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:59:26,995 epoch 10 - iter 4/13 - loss 1.10043293 - time (sec): 11.82 - samples/sec: 43.30 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:59:31,275 epoch 10 - iter 5/13 - loss 1.07837589 - time (sec): 16.10 - samples/sec: 39.74 - lr: 0.000036 - momentum: 0.000000
2023-12-22 09:59:34,290 epoch 10 - iter 6/13 - loss 1.07230705 - time (sec): 19.12 - samples/

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 10:01:00,772 DEV : loss 1.2140792608261108 - f1-score (micro avg)  0.4732
2023-12-22 10:01:02,432 ----------------------------------------------------------------------------------------------------
2023-12-22 10:01:04,867 epoch 11 - iter 1/13 - loss 1.13462722 - time (sec): 2.43 - samples/sec: 52.62 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:01:08,304 epoch 11 - iter 2/13 - loss 1.06303132 - time (sec): 5.87 - samples/sec: 43.62 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:01:19,877 epoch 11 - iter 3/13 - loss 1.04686979 - time (sec): 17.44 - samples/sec: 22.02 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:01:30,209 epoch 11 - iter 4/13 - loss 1.05167478 - time (sec): 27.77 - samples/sec: 18.43 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:01:32,337 epoch 11 - iter 5/13 - loss 1.05598800 - time (sec): 29.90 - samples/sec: 21.40 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:01:37,103 epoch 11 - iter 6/13 - loss 1.04224530 - time (sec): 34.67 - samples

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]

2023-12-22 10:02:46,526 DEV : loss 1.2297309637069702 - f1-score (micro avg)  0.4172





2023-12-22 10:02:47,208 ----------------------------------------------------------------------------------------------------
2023-12-22 10:02:48,882 epoch 12 - iter 1/13 - loss 1.10560155 - time (sec): 1.67 - samples/sec: 76.56 - lr: 0.000032 - momentum: 0.000000
2023-12-22 10:02:56,824 epoch 12 - iter 2/13 - loss 1.04996291 - time (sec): 9.61 - samples/sec: 26.63 - lr: 0.000032 - momentum: 0.000000
2023-12-22 10:02:58,976 epoch 12 - iter 3/13 - loss 1.08604354 - time (sec): 11.77 - samples/sec: 32.64 - lr: 0.000032 - momentum: 0.000000
2023-12-22 10:03:10,483 epoch 12 - iter 4/13 - loss 1.05612612 - time (sec): 23.27 - samples/sec: 22.00 - lr: 0.000032 - momentum: 0.000000
2023-12-22 10:03:13,920 epoch 12 - iter 5/13 - loss 1.04072140 - time (sec): 26.71 - samples/sec: 23.96 - lr: 0.000032 - momentum: 0.000000
2023-12-22 10:03:16,437 epoch 12 - iter 6/13 - loss 1.03007017 - time (sec): 29.23 - samples/sec: 26.28 - lr: 0.000031 - momentum: 0.000000
2023-12-22 10:03:19,444 epoch 12 - it

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 10:04:31,725 DEV : loss 1.2561899423599243 - f1-score (micro avg)  0.4156
2023-12-22 10:04:32,379 ----------------------------------------------------------------------------------------------------
2023-12-22 10:04:35,341 epoch 13 - iter 1/13 - loss 0.95331895 - time (sec): 2.96 - samples/sec: 43.24 - lr: 0.000030 - momentum: 0.000000
2023-12-22 10:04:39,491 epoch 13 - iter 2/13 - loss 0.98342055 - time (sec): 7.11 - samples/sec: 36.00 - lr: 0.000030 - momentum: 0.000000
2023-12-22 10:04:43,848 epoch 13 - iter 3/13 - loss 1.01740805 - time (sec): 11.47 - samples/sec: 33.49 - lr: 0.000030 - momentum: 0.000000
2023-12-22 10:04:55,463 epoch 13 - iter 4/13 - loss 1.03560781 - time (sec): 23.08 - samples/sec: 22.18 - lr: 0.000030 - momentum: 0.000000
2023-12-22 10:04:59,805 epoch 13 - iter 5/13 - loss 1.03905315 - time (sec): 27.42 - samples/sec: 23.34 - lr: 0.000029 - momentum: 0.000000
2023-12-22 10:05:02,053 epoch 13 - iter 6/13 - loss 1.03542632 - time (sec): 29.67 - samples

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 10:06:19,303 DEV : loss 1.1629917621612549 - f1-score (micro avg)  0.5047





2023-12-22 10:06:20,657 ----------------------------------------------------------------------------------------------------
2023-12-22 10:06:25,409 epoch 14 - iter 1/13 - loss 1.01708376 - time (sec): 4.75 - samples/sec: 26.95 - lr: 0.000028 - momentum: 0.000000
2023-12-22 10:06:28,889 epoch 14 - iter 2/13 - loss 1.02925205 - time (sec): 8.23 - samples/sec: 31.11 - lr: 0.000028 - momentum: 0.000000
2023-12-22 10:06:31,805 epoch 14 - iter 3/13 - loss 1.01803643 - time (sec): 11.14 - samples/sec: 34.46 - lr: 0.000028 - momentum: 0.000000
2023-12-22 10:06:33,495 epoch 14 - iter 4/13 - loss 1.03198279 - time (sec): 12.84 - samples/sec: 39.89 - lr: 0.000027 - momentum: 0.000000
2023-12-22 10:06:35,236 epoch 14 - iter 5/13 - loss 1.04225835 - time (sec): 14.58 - samples/sec: 43.91 - lr: 0.000027 - momentum: 0.000000
2023-12-22 10:06:37,629 epoch 14 - iter 6/13 - loss 1.04833040 - time (sec): 16.97 - samples/sec: 45.26 - lr: 0.000027 - momentum: 0.000000
2023-12-22 10:06:41,151 epoch 14 - it

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]


2023-12-22 10:08:08,857 DEV : loss 1.1746532917022705 - f1-score (micro avg)  0.4779
2023-12-22 10:08:09,997 ----------------------------------------------------------------------------------------------------
2023-12-22 10:08:14,186 epoch 15 - iter 1/13 - loss 1.02492571 - time (sec): 4.19 - samples/sec: 30.58 - lr: 0.000026 - momentum: 0.000000
2023-12-22 10:08:25,765 epoch 15 - iter 2/13 - loss 1.06221032 - time (sec): 15.76 - samples/sec: 16.24 - lr: 0.000026 - momentum: 0.000000
2023-12-22 10:08:28,077 epoch 15 - iter 3/13 - loss 1.03906870 - time (sec): 18.08 - samples/sec: 21.24 - lr: 0.000025 - momentum: 0.000000
2023-12-22 10:08:32,218 epoch 15 - iter 4/13 - loss 1.04756877 - time (sec): 22.22 - samples/sec: 23.04 - lr: 0.000025 - momentum: 0.000000
2023-12-22 10:08:35,814 epoch 15 - iter 5/13 - loss 1.04998925 - time (sec): 25.81 - samples/sec: 24.79 - lr: 0.000025 - momentum: 0.000000
2023-12-22 10:08:39,951 epoch 15 - iter 6/13 - loss 1.04056274 - time (sec): 29.95 - sample

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 10:09:57,806 DEV : loss 1.2600890398025513 - f1-score (micro avg)  0.4093





2023-12-22 10:09:59,140 ----------------------------------------------------------------------------------------------------
2023-12-22 10:10:03,255 epoch 16 - iter 1/13 - loss 1.11481607 - time (sec): 4.11 - samples/sec: 31.12 - lr: 0.000024 - momentum: 0.000000
2023-12-22 10:10:14,830 epoch 16 - iter 2/13 - loss 1.01990888 - time (sec): 15.69 - samples/sec: 16.32 - lr: 0.000024 - momentum: 0.000000
2023-12-22 10:10:16,798 epoch 16 - iter 3/13 - loss 1.00974411 - time (sec): 17.66 - samples/sec: 21.75 - lr: 0.000023 - momentum: 0.000000
2023-12-22 10:10:20,169 epoch 16 - iter 4/13 - loss 0.99093449 - time (sec): 21.03 - samples/sec: 24.35 - lr: 0.000023 - momentum: 0.000000
2023-12-22 10:10:21,773 epoch 16 - iter 5/13 - loss 0.98252662 - time (sec): 22.63 - samples/sec: 28.28 - lr: 0.000023 - momentum: 0.000000
2023-12-22 10:10:23,566 epoch 16 - iter 6/13 - loss 0.98913587 - time (sec): 24.42 - samples/sec: 31.45 - lr: 0.000023 - momentum: 0.000000
2023-12-22 10:10:27,676 epoch 16 - i

100%|██████████| 80/80 [00:47<00:00,  1.69it/s]


2023-12-22 10:11:37,626 DEV : loss 1.1634066104888916 - f1-score (micro avg)  0.4937
2023-12-22 10:11:38,743 ----------------------------------------------------------------------------------------------------
2023-12-22 10:11:41,170 epoch 17 - iter 1/13 - loss 0.99349552 - time (sec): 2.42 - samples/sec: 52.85 - lr: 0.000022 - momentum: 0.000000
2023-12-22 10:11:45,342 epoch 17 - iter 2/13 - loss 1.00230744 - time (sec): 6.59 - samples/sec: 38.83 - lr: 0.000021 - momentum: 0.000000
2023-12-22 10:11:49,005 epoch 17 - iter 3/13 - loss 0.97568585 - time (sec): 10.26 - samples/sec: 37.44 - lr: 0.000021 - momentum: 0.000000
2023-12-22 10:11:51,152 epoch 17 - iter 4/13 - loss 0.96294342 - time (sec): 12.40 - samples/sec: 41.28 - lr: 0.000021 - momentum: 0.000000
2023-12-22 10:12:02,640 epoch 17 - iter 5/13 - loss 0.97799457 - time (sec): 23.89 - samples/sec: 26.79 - lr: 0.000021 - momentum: 0.000000
2023-12-22 10:12:11,141 epoch 17 - iter 6/13 - loss 0.98773624 - time (sec): 32.39 - samples

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 10:13:28,976 DEV : loss 1.2482259273529053 - f1-score (micro avg)  0.4196





2023-12-22 10:13:29,645 ----------------------------------------------------------------------------------------------------
2023-12-22 10:13:33,612 epoch 18 - iter 1/13 - loss 1.00211406 - time (sec): 3.96 - samples/sec: 32.29 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:13:43,519 epoch 18 - iter 2/13 - loss 0.99599233 - time (sec): 13.87 - samples/sec: 18.46 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:13:48,111 epoch 18 - iter 3/13 - loss 0.99189339 - time (sec): 18.46 - samples/sec: 20.80 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:13:56,636 epoch 18 - iter 4/13 - loss 0.99223496 - time (sec): 26.99 - samples/sec: 18.97 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:14:01,722 epoch 18 - iter 5/13 - loss 0.99835421 - time (sec): 32.07 - samples/sec: 19.95 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:14:13,247 epoch 18 - iter 6/13 - loss 0.97815244 - time (sec): 43.60 - samples/sec: 17.61 - lr: 0.000019 - momentum: 0.000000
2023-12-22 10:14:14,968 epoch 18 - i

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]

2023-12-22 10:15:16,162 DEV : loss 1.156317949295044 - f1-score (micro avg)  0.5055





2023-12-22 10:15:17,750 ----------------------------------------------------------------------------------------------------
2023-12-22 10:15:20,127 epoch 19 - iter 1/13 - loss 1.00287724 - time (sec): 2.37 - samples/sec: 53.90 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:31,580 epoch 19 - iter 2/13 - loss 1.03718895 - time (sec): 13.83 - samples/sec: 18.51 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:41,924 epoch 19 - iter 3/13 - loss 1.00763392 - time (sec): 24.17 - samples/sec: 15.89 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:45,120 epoch 19 - iter 4/13 - loss 0.99569055 - time (sec): 27.37 - samples/sec: 18.71 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:46,303 epoch 19 - iter 5/13 - loss 0.99754870 - time (sec): 28.55 - samples/sec: 22.42 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:49,254 epoch 19 - iter 6/13 - loss 0.98166631 - time (sec): 31.50 - samples/sec: 24.38 - lr: 0.000017 - momentum: 0.000000
2023-12-22 10:15:57,205 epoch 19 - i

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]


2023-12-22 10:17:05,188 DEV : loss 1.2112685441970825 - f1-score (micro avg)  0.455
2023-12-22 10:17:06,318 ----------------------------------------------------------------------------------------------------
2023-12-22 10:17:08,457 epoch 20 - iter 1/13 - loss 0.94891059 - time (sec): 2.14 - samples/sec: 59.88 - lr: 0.000015 - momentum: 0.000000
2023-12-22 10:17:10,917 epoch 20 - iter 2/13 - loss 0.94154003 - time (sec): 4.60 - samples/sec: 55.68 - lr: 0.000015 - momentum: 0.000000
2023-12-22 10:17:13,951 epoch 20 - iter 3/13 - loss 0.97185038 - time (sec): 7.63 - samples/sec: 50.32 - lr: 0.000015 - momentum: 0.000000
2023-12-22 10:17:16,429 epoch 20 - iter 4/13 - loss 0.99150403 - time (sec): 10.11 - samples/sec: 50.65 - lr: 0.000015 - momentum: 0.000000
2023-12-22 10:17:20,877 epoch 20 - iter 5/13 - loss 1.00052029 - time (sec): 14.56 - samples/sec: 43.96 - lr: 0.000015 - momentum: 0.000000
2023-12-22 10:17:25,092 epoch 20 - iter 6/13 - loss 0.99392449 - time (sec): 18.77 - samples/s

100%|██████████| 80/80 [00:46<00:00,  1.70it/s]

2023-12-22 10:18:50,469 DEV : loss 1.2170379161834717 - f1-score (micro avg)  0.4448





2023-12-22 10:18:51,140 ----------------------------------------------------------------------------------------------------
2023-12-22 10:18:59,040 epoch 21 - iter 1/13 - loss 0.92796510 - time (sec): 7.90 - samples/sec: 16.21 - lr: 0.000013 - momentum: 0.000000
2023-12-22 10:19:01,337 epoch 21 - iter 2/13 - loss 0.92854717 - time (sec): 10.19 - samples/sec: 25.11 - lr: 0.000013 - momentum: 0.000000
2023-12-22 10:19:07,047 epoch 21 - iter 3/13 - loss 0.92328566 - time (sec): 15.90 - samples/sec: 24.15 - lr: 0.000013 - momentum: 0.000000
2023-12-22 10:19:09,649 epoch 21 - iter 4/13 - loss 0.93776590 - time (sec): 18.51 - samples/sec: 27.67 - lr: 0.000013 - momentum: 0.000000
2023-12-22 10:19:19,429 epoch 21 - iter 5/13 - loss 0.94770087 - time (sec): 28.29 - samples/sec: 22.63 - lr: 0.000012 - momentum: 0.000000
2023-12-22 10:19:22,787 epoch 21 - iter 6/13 - loss 0.94229856 - time (sec): 31.64 - samples/sec: 24.27 - lr: 0.000012 - momentum: 0.000000
2023-12-22 10:19:24,938 epoch 21 - i

100%|██████████| 80/80 [00:47<00:00,  1.67it/s]

2023-12-22 10:20:37,156 DEV : loss 1.186739444732666 - f1-score (micro avg)  0.47





2023-12-22 10:20:37,873 ----------------------------------------------------------------------------------------------------
2023-12-22 10:20:42,001 epoch 22 - iter 1/13 - loss 1.01636219 - time (sec): 4.13 - samples/sec: 31.02 - lr: 0.000011 - momentum: 0.000000
2023-12-22 10:20:44,524 epoch 22 - iter 2/13 - loss 1.00106409 - time (sec): 6.65 - samples/sec: 38.50 - lr: 0.000011 - momentum: 0.000000
2023-12-22 10:20:48,960 epoch 22 - iter 3/13 - loss 0.94421516 - time (sec): 11.09 - samples/sec: 34.64 - lr: 0.000011 - momentum: 0.000000
2023-12-22 10:20:52,605 epoch 22 - iter 4/13 - loss 0.95158191 - time (sec): 14.73 - samples/sec: 34.76 - lr: 0.000010 - momentum: 0.000000
2023-12-22 10:20:54,969 epoch 22 - iter 5/13 - loss 0.94930533 - time (sec): 17.09 - samples/sec: 37.44 - lr: 0.000010 - momentum: 0.000000
2023-12-22 10:20:57,821 epoch 22 - iter 6/13 - loss 0.95883510 - time (sec): 19.95 - samples/sec: 38.50 - lr: 0.000010 - momentum: 0.000000
2023-12-22 10:21:01,922 epoch 22 - it

100%|██████████| 80/80 [00:47<00:00,  1.70it/s]

2023-12-22 10:22:16,825 DEV : loss 1.2252963781356812 - f1-score (micro avg)  0.4416





2023-12-22 10:22:17,542 ----------------------------------------------------------------------------------------------------
2023-12-22 10:22:21,678 epoch 23 - iter 1/13 - loss 0.90722644 - time (sec): 4.13 - samples/sec: 30.96 - lr: 0.000009 - momentum: 0.000000
2023-12-22 10:22:24,671 epoch 23 - iter 2/13 - loss 0.93827131 - time (sec): 7.13 - samples/sec: 35.92 - lr: 0.000009 - momentum: 0.000000
2023-12-22 10:22:34,503 epoch 23 - iter 3/13 - loss 0.94306169 - time (sec): 16.96 - samples/sec: 22.64 - lr: 0.000008 - momentum: 0.000000
2023-12-22 10:22:42,863 epoch 23 - iter 4/13 - loss 0.95108008 - time (sec): 25.32 - samples/sec: 20.22 - lr: 0.000008 - momentum: 0.000000
2023-12-22 10:22:45,858 epoch 23 - iter 5/13 - loss 0.94879391 - time (sec): 28.31 - samples/sec: 22.60 - lr: 0.000008 - momentum: 0.000000
2023-12-22 10:22:57,265 epoch 23 - iter 6/13 - loss 0.94403189 - time (sec): 39.72 - samples/sec: 19.34 - lr: 0.000008 - momentum: 0.000000
2023-12-22 10:22:59,785 epoch 23 - it

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 10:24:04,382 DEV : loss 1.195311427116394 - f1-score (micro avg)  0.4677
2023-12-22 10:24:06,360 ----------------------------------------------------------------------------------------------------
2023-12-22 10:24:08,027 epoch 24 - iter 1/13 - loss 0.96011227 - time (sec): 1.66 - samples/sec: 76.93 - lr: 0.000007 - momentum: 0.000000
2023-12-22 10:24:19,549 epoch 24 - iter 2/13 - loss 0.98029217 - time (sec): 13.19 - samples/sec: 19.41 - lr: 0.000007 - momentum: 0.000000
2023-12-22 10:24:23,651 epoch 24 - iter 3/13 - loss 0.94482026 - time (sec): 17.29 - samples/sec: 22.21 - lr: 0.000006 - momentum: 0.000000
2023-12-22 10:24:28,151 epoch 24 - iter 4/13 - loss 0.96808060 - time (sec): 21.79 - samples/sec: 23.50 - lr: 0.000006 - momentum: 0.000000
2023-12-22 10:24:30,355 epoch 24 - iter 5/13 - loss 0.96405770 - time (sec): 23.99 - samples/sec: 26.68 - lr: 0.000006 - momentum: 0.000000
2023-12-22 10:24:34,433 epoch 24 - iter 6/13 - loss 0.94385545 - time (sec): 28.07 - samples

100%|██████████| 80/80 [00:47<00:00,  1.68it/s]


2023-12-22 10:25:49,430 DEV : loss 1.2004897594451904 - f1-score (micro avg)  0.459
2023-12-22 10:25:50,570 ----------------------------------------------------------------------------------------------------
2023-12-22 10:25:54,775 epoch 25 - iter 1/13 - loss 1.02305889 - time (sec): 4.20 - samples/sec: 30.50 - lr: 0.000005 - momentum: 0.000000
2023-12-22 10:25:58,382 epoch 25 - iter 2/13 - loss 0.97893184 - time (sec): 7.80 - samples/sec: 32.81 - lr: 0.000004 - momentum: 0.000000
2023-12-22 10:26:00,955 epoch 25 - iter 3/13 - loss 0.93751264 - time (sec): 10.38 - samples/sec: 37.01 - lr: 0.000004 - momentum: 0.000000
2023-12-22 10:26:05,157 epoch 25 - iter 4/13 - loss 0.92567602 - time (sec): 14.58 - samples/sec: 35.12 - lr: 0.000004 - momentum: 0.000000
2023-12-22 10:26:13,238 epoch 25 - iter 5/13 - loss 0.92953892 - time (sec): 22.66 - samples/sec: 28.24 - lr: 0.000004 - momentum: 0.000000
2023-12-22 10:26:15,984 epoch 25 - iter 6/13 - loss 0.95873837 - time (sec): 25.41 - samples/

100%|██████████| 80/80 [00:46<00:00,  1.71it/s]


2023-12-22 10:27:33,276 DEV : loss 1.2031127214431763 - f1-score (micro avg)  0.4566
2023-12-22 10:27:39,097 ----------------------------------------------------------------------------------------------------
2023-12-22 10:27:39,099 Testing using last state of model ...


100%|██████████| 110/110 [00:56<00:00,  1.93it/s]

2023-12-22 10:28:36,038 
Results:
- F-score (micro) 0.5063
- F-score (macro) 0.3234
- Accuracy 0.5063

By class:
              precision    recall  f1-score   support

     comment     0.9022    0.5251    0.6638      1476
     support     0.0956    0.4135    0.1552       104
        deny     0.0810    0.2000    0.1153       100
       query     0.2421    0.6970    0.3594        66

    accuracy                         0.5063      1746
   macro avg     0.3302    0.4589    0.3234      1746
weighted avg     0.7822    0.5063    0.5906      1746

2023-12-22 10:28:36,039 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = True
add_post_title = True
add_depth = False

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[51]: "The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 10:31:07,006 Reading data from .
2023-12-22 10:31:07,008 Train: train_fasttext_format.txt
2023-12-22 10:31:07,010 Dev: dev_fasttext_format.txt
2023-12-22 10:31:07,012 Test: test_fasttext_format.txt
2023-12-22 10:31:07,113 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1591,
        "number_of_documents_per_class": {
            "support": 427,
            "comment": 427,
            "query": 395,
            "deny": 342
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 88334,
            "min": 15,
            "max": 599,
            "avg": 55.52105593966059
        }
    },
    "TEST": {
        "dataset": "

0it [00:00, ?it/s]
1591it [00:01, 1188.45it/s]

2023-12-22 10:31:30,879 Dictionary created for label 'class' with 4 values: support (seen 427 times), comment (seen 427 times), query (seen 395 times), deny (seen 342 times)
2023-12-22 10:31:30,887 ----------------------------------------------------------------------------------------------------
2023-12-22 10:31:30,889 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-22 10:31:30,902  - shuffle: "True"
2023-12-22 10:31:30,903 ----------------------------------------------------------------------------------------------------
2023-12-22 10:31:30,904 Plugins:
2023-12-22 10:31:30,904  - LinearScheduler | warmup_fraction: '0.1'
2023-12-22 10:31:30,905 ----------------------------------------------------------------------------------------------------
2023-12-22 10:31:30,906 Final evaluation on model after last epoch (final-model.pt)
2023-12-22 10:31:30,907  - metric: "('micro avg', 'f1-score')"
2023-12-22 10:31:30,908 ----------------------------------------------------------------------------------------------------
2023-12-22 10:31:30,909 Computation:
2023-12-22 10:31:30,910  - compute on device: cuda:0
2023-12-22 10:31:30,911  - embedding storage: none
2023-12-22 10:31:30,912 ----------------------------------------------------------------------------------------------------
2023-12-22 10:31:30,913 Model training base path: "resources/taggers

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 10:34:09,656 DEV : loss 1.3399156332015991 - f1-score (micro avg)  0.3904





2023-12-22 10:34:12,425 ----------------------------------------------------------------------------------------------------
2023-12-22 10:34:16,999 epoch 2 - iter 2/25 - loss 1.38154340 - time (sec): 4.57 - samples/sec: 28.00 - lr: 0.000020 - momentum: 0.000000
2023-12-22 10:34:20,118 epoch 2 - iter 4/25 - loss 1.37602276 - time (sec): 7.69 - samples/sec: 33.28 - lr: 0.000022 - momentum: 0.000000
2023-12-22 10:34:24,442 epoch 2 - iter 6/25 - loss 1.33476826 - time (sec): 12.02 - samples/sec: 31.96 - lr: 0.000023 - momentum: 0.000000
2023-12-22 10:34:31,191 epoch 2 - iter 8/25 - loss 1.32231146 - time (sec): 18.76 - samples/sec: 27.29 - lr: 0.000025 - momentum: 0.000000
2023-12-22 10:34:37,244 epoch 2 - iter 10/25 - loss 1.32954354 - time (sec): 24.82 - samples/sec: 25.79 - lr: 0.000027 - momentum: 0.000000
2023-12-22 10:34:41,226 epoch 2 - iter 12/25 - loss 1.32614805 - time (sec): 28.80 - samples/sec: 26.67 - lr: 0.000028 - momentum: 0.000000
2023-12-22 10:34:44,051 epoch 2 - iter 14

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 10:36:54,666 DEV : loss 1.128174901008606 - f1-score (micro avg)  0.5812





2023-12-22 10:36:58,282 ----------------------------------------------------------------------------------------------------
2023-12-22 10:37:05,258 epoch 3 - iter 2/25 - loss 1.42202449 - time (sec): 6.97 - samples/sec: 18.36 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:37:09,205 epoch 3 - iter 4/25 - loss 1.39449322 - time (sec): 10.92 - samples/sec: 23.44 - lr: 0.000041 - momentum: 0.000000
2023-12-22 10:37:11,730 epoch 3 - iter 6/25 - loss 1.35710466 - time (sec): 13.45 - samples/sec: 28.56 - lr: 0.000043 - momentum: 0.000000
2023-12-22 10:37:14,789 epoch 3 - iter 8/25 - loss 1.32434942 - time (sec): 16.51 - samples/sec: 31.02 - lr: 0.000045 - momentum: 0.000000
2023-12-22 10:37:20,113 epoch 3 - iter 10/25 - loss 1.30836257 - time (sec): 21.83 - samples/sec: 29.32 - lr: 0.000046 - momentum: 0.000000
2023-12-22 10:37:27,324 epoch 3 - iter 12/25 - loss 1.29330923 - time (sec): 29.04 - samples/sec: 26.45 - lr: 0.000048 - momentum: 0.000000
2023-12-22 10:37:29,837 epoch 3 - iter 1

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 10:39:35,474 DEV : loss 1.2151614427566528 - f1-score (micro avg)  0.541





2023-12-22 10:39:37,586 ----------------------------------------------------------------------------------------------------
2023-12-22 10:39:39,538 epoch 4 - iter 2/25 - loss 1.20605302 - time (sec): 1.95 - samples/sec: 65.74 - lr: 0.000049 - momentum: 0.000000
2023-12-22 10:39:44,569 epoch 4 - iter 4/25 - loss 1.14813596 - time (sec): 6.98 - samples/sec: 36.68 - lr: 0.000049 - momentum: 0.000000
2023-12-22 10:39:51,500 epoch 4 - iter 6/25 - loss 1.16179975 - time (sec): 13.91 - samples/sec: 27.61 - lr: 0.000049 - momentum: 0.000000
2023-12-22 10:39:55,721 epoch 4 - iter 8/25 - loss 1.16835065 - time (sec): 18.13 - samples/sec: 28.24 - lr: 0.000048 - momentum: 0.000000
2023-12-22 10:40:00,145 epoch 4 - iter 10/25 - loss 1.16289936 - time (sec): 22.55 - samples/sec: 28.38 - lr: 0.000048 - momentum: 0.000000
2023-12-22 10:40:06,768 epoch 4 - iter 12/25 - loss 1.18369591 - time (sec): 29.18 - samples/sec: 26.32 - lr: 0.000048 - momentum: 0.000000
2023-12-22 10:40:12,593 epoch 4 - iter 14

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 10:42:18,845 DEV : loss 1.3339712619781494 - f1-score (micro avg)  0.3549





2023-12-22 10:42:21,216 ----------------------------------------------------------------------------------------------------
2023-12-22 10:42:28,466 epoch 5 - iter 2/25 - loss 1.17780888 - time (sec): 7.25 - samples/sec: 17.66 - lr: 0.000047 - momentum: 0.000000
2023-12-22 10:42:35,265 epoch 5 - iter 4/25 - loss 1.14678892 - time (sec): 14.05 - samples/sec: 18.22 - lr: 0.000047 - momentum: 0.000000
2023-12-22 10:42:41,668 epoch 5 - iter 6/25 - loss 1.17264756 - time (sec): 20.45 - samples/sec: 18.78 - lr: 0.000046 - momentum: 0.000000
2023-12-22 10:42:46,536 epoch 5 - iter 8/25 - loss 1.16166624 - time (sec): 25.32 - samples/sec: 20.22 - lr: 0.000046 - momentum: 0.000000
2023-12-22 10:42:49,500 epoch 5 - iter 10/25 - loss 1.15600489 - time (sec): 28.28 - samples/sec: 22.63 - lr: 0.000046 - momentum: 0.000000
2023-12-22 10:42:54,104 epoch 5 - iter 12/25 - loss 1.13457781 - time (sec): 32.89 - samples/sec: 23.35 - lr: 0.000046 - momentum: 0.000000
2023-12-22 10:42:57,021 epoch 5 - iter 1

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 10:44:56,910 DEV : loss 1.250508189201355 - f1-score (micro avg)  0.4614





2023-12-22 10:44:59,344 ----------------------------------------------------------------------------------------------------
2023-12-22 10:45:05,261 epoch 6 - iter 2/25 - loss 1.08372331 - time (sec): 5.91 - samples/sec: 21.65 - lr: 0.000045 - momentum: 0.000000
2023-12-22 10:45:15,708 epoch 6 - iter 4/25 - loss 1.10402420 - time (sec): 16.36 - samples/sec: 15.65 - lr: 0.000045 - momentum: 0.000000
2023-12-22 10:45:18,455 epoch 6 - iter 6/25 - loss 1.13178865 - time (sec): 19.11 - samples/sec: 20.10 - lr: 0.000044 - momentum: 0.000000
2023-12-22 10:45:23,158 epoch 6 - iter 8/25 - loss 1.15520473 - time (sec): 23.81 - samples/sec: 21.50 - lr: 0.000044 - momentum: 0.000000
2023-12-22 10:45:28,579 epoch 6 - iter 10/25 - loss 1.14383934 - time (sec): 29.23 - samples/sec: 21.89 - lr: 0.000044 - momentum: 0.000000
2023-12-22 10:45:34,426 epoch 6 - iter 12/25 - loss 1.13049608 - time (sec): 35.08 - samples/sec: 21.89 - lr: 0.000044 - momentum: 0.000000
2023-12-22 10:45:37,047 epoch 6 - iter 1

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 10:47:37,739 DEV : loss 1.1119494438171387 - f1-score (micro avg)  0.5749





2023-12-22 10:47:38,963 ----------------------------------------------------------------------------------------------------
2023-12-22 10:47:48,069 epoch 7 - iter 2/25 - loss 1.13045770 - time (sec): 9.10 - samples/sec: 14.06 - lr: 0.000043 - momentum: 0.000000
2023-12-22 10:47:54,211 epoch 7 - iter 4/25 - loss 1.13677257 - time (sec): 15.25 - samples/sec: 16.79 - lr: 0.000042 - momentum: 0.000000
2023-12-22 10:47:58,918 epoch 7 - iter 6/25 - loss 1.09679496 - time (sec): 19.95 - samples/sec: 19.25 - lr: 0.000042 - momentum: 0.000000
2023-12-22 10:48:05,127 epoch 7 - iter 8/25 - loss 1.08397582 - time (sec): 26.16 - samples/sec: 19.57 - lr: 0.000042 - momentum: 0.000000
2023-12-22 10:48:12,646 epoch 7 - iter 10/25 - loss 1.07040812 - time (sec): 33.68 - samples/sec: 19.00 - lr: 0.000042 - momentum: 0.000000
2023-12-22 10:48:15,615 epoch 7 - iter 12/25 - loss 1.08052961 - time (sec): 36.65 - samples/sec: 20.96 - lr: 0.000042 - momentum: 0.000000
2023-12-22 10:48:19,077 epoch 7 - iter 1

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 10:50:24,860 DEV : loss 1.1634960174560547 - f1-score (micro avg)  0.541





2023-12-22 10:50:26,069 ----------------------------------------------------------------------------------------------------
2023-12-22 10:50:28,430 epoch 8 - iter 2/25 - loss 1.11166024 - time (sec): 2.36 - samples/sec: 54.26 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:50:35,156 epoch 8 - iter 4/25 - loss 1.16875330 - time (sec): 9.09 - samples/sec: 28.18 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:50:42,078 epoch 8 - iter 6/25 - loss 1.12557352 - time (sec): 16.01 - samples/sec: 23.99 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:50:47,636 epoch 8 - iter 8/25 - loss 1.11636018 - time (sec): 21.56 - samples/sec: 23.74 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:50:53,574 epoch 8 - iter 10/25 - loss 1.09795863 - time (sec): 27.50 - samples/sec: 23.27 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:50:56,128 epoch 8 - iter 12/25 - loss 1.08179525 - time (sec): 30.06 - samples/sec: 25.55 - lr: 0.000040 - momentum: 0.000000
2023-12-22 10:51:02,355 epoch 8 - iter 14

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 10:53:06,274 DEV : loss 1.1100457906723022 - f1-score (micro avg)  0.5591





2023-12-22 10:53:07,478 ----------------------------------------------------------------------------------------------------
2023-12-22 10:53:12,613 epoch 9 - iter 2/25 - loss 1.05109918 - time (sec): 5.13 - samples/sec: 24.94 - lr: 0.000038 - momentum: 0.000000
2023-12-22 10:53:15,832 epoch 9 - iter 4/25 - loss 1.02115479 - time (sec): 8.35 - samples/sec: 30.65 - lr: 0.000038 - momentum: 0.000000
2023-12-22 10:53:18,399 epoch 9 - iter 6/25 - loss 1.03444951 - time (sec): 10.92 - samples/sec: 35.17 - lr: 0.000038 - momentum: 0.000000
2023-12-22 10:53:24,688 epoch 9 - iter 8/25 - loss 1.03717581 - time (sec): 17.21 - samples/sec: 29.76 - lr: 0.000038 - momentum: 0.000000
2023-12-22 10:53:30,090 epoch 9 - iter 10/25 - loss 1.06029648 - time (sec): 22.61 - samples/sec: 28.31 - lr: 0.000038 - momentum: 0.000000
2023-12-22 10:53:35,998 epoch 9 - iter 12/25 - loss 1.05867640 - time (sec): 28.52 - samples/sec: 26.93 - lr: 0.000037 - momentum: 0.000000
2023-12-22 10:53:40,912 epoch 9 - iter 14

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 10:55:48,973 DEV : loss 1.236085057258606 - f1-score (micro avg)  0.4448





2023-12-22 10:55:50,204 ----------------------------------------------------------------------------------------------------
2023-12-22 10:55:53,072 epoch 10 - iter 2/25 - loss 1.03062943 - time (sec): 2.87 - samples/sec: 44.65 - lr: 0.000036 - momentum: 0.000000
2023-12-22 10:55:58,750 epoch 10 - iter 4/25 - loss 1.02695788 - time (sec): 8.54 - samples/sec: 29.96 - lr: 0.000036 - momentum: 0.000000
2023-12-22 10:56:00,785 epoch 10 - iter 6/25 - loss 0.99636220 - time (sec): 10.58 - samples/sec: 36.30 - lr: 0.000036 - momentum: 0.000000
2023-12-22 10:56:07,482 epoch 10 - iter 8/25 - loss 0.99483295 - time (sec): 17.28 - samples/sec: 29.64 - lr: 0.000036 - momentum: 0.000000
2023-12-22 10:56:13,542 epoch 10 - iter 10/25 - loss 1.00654557 - time (sec): 23.34 - samples/sec: 27.43 - lr: 0.000035 - momentum: 0.000000
2023-12-22 10:56:18,698 epoch 10 - iter 12/25 - loss 1.00173408 - time (sec): 28.49 - samples/sec: 26.95 - lr: 0.000035 - momentum: 0.000000
2023-12-22 10:56:22,157 epoch 10 - 

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 10:58:31,186 DEV : loss 1.2955684661865234 - f1-score (micro avg)  0.4282





2023-12-22 10:58:32,452 ----------------------------------------------------------------------------------------------------
2023-12-22 10:58:40,740 epoch 11 - iter 2/25 - loss 1.05371097 - time (sec): 8.29 - samples/sec: 15.45 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:58:47,289 epoch 11 - iter 4/25 - loss 1.05044724 - time (sec): 14.83 - samples/sec: 17.26 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:58:54,175 epoch 11 - iter 6/25 - loss 1.00706340 - time (sec): 21.72 - samples/sec: 17.68 - lr: 0.000034 - momentum: 0.000000
2023-12-22 10:58:57,656 epoch 11 - iter 8/25 - loss 1.00469454 - time (sec): 25.20 - samples/sec: 20.32 - lr: 0.000033 - momentum: 0.000000
2023-12-22 10:59:02,402 epoch 11 - iter 10/25 - loss 0.99484109 - time (sec): 29.95 - samples/sec: 21.37 - lr: 0.000033 - momentum: 0.000000
2023-12-22 10:59:05,371 epoch 11 - iter 12/25 - loss 0.99942245 - time (sec): 32.92 - samples/sec: 23.33 - lr: 0.000033 - momentum: 0.000000
2023-12-22 10:59:13,960 epoch 11 -

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:01:13,728 DEV : loss 1.2029240131378174 - f1-score (micro avg)  0.4614





2023-12-22 11:01:16,835 ----------------------------------------------------------------------------------------------------
2023-12-22 11:01:19,234 epoch 12 - iter 2/25 - loss 0.92297861 - time (sec): 2.39 - samples/sec: 53.48 - lr: 0.000032 - momentum: 0.000000
2023-12-22 11:01:24,610 epoch 12 - iter 4/25 - loss 0.95174971 - time (sec): 7.77 - samples/sec: 32.95 - lr: 0.000032 - momentum: 0.000000
2023-12-22 11:01:27,834 epoch 12 - iter 6/25 - loss 0.93288153 - time (sec): 10.99 - samples/sec: 34.93 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:01:31,720 epoch 12 - iter 8/25 - loss 0.92933384 - time (sec): 14.88 - samples/sec: 34.41 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:01:36,227 epoch 12 - iter 10/25 - loss 0.94545249 - time (sec): 19.39 - samples/sec: 33.01 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:01:43,804 epoch 12 - iter 12/25 - loss 0.94094366 - time (sec): 26.96 - samples/sec: 28.48 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:01:50,351 epoch 12 - 

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:03:59,390 DEV : loss 1.2030142545700073 - f1-score (micro avg)  0.4637





2023-12-22 11:04:02,234 ----------------------------------------------------------------------------------------------------
2023-12-22 11:04:10,950 epoch 13 - iter 2/25 - loss 0.98723122 - time (sec): 8.71 - samples/sec: 14.69 - lr: 0.000030 - momentum: 0.000000
2023-12-22 11:04:14,996 epoch 13 - iter 4/25 - loss 0.96888323 - time (sec): 12.76 - samples/sec: 20.06 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:04:21,764 epoch 13 - iter 6/25 - loss 0.99183727 - time (sec): 19.53 - samples/sec: 19.66 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:04:24,190 epoch 13 - iter 8/25 - loss 0.98062145 - time (sec): 21.95 - samples/sec: 23.32 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:04:27,744 epoch 13 - iter 10/25 - loss 0.98171347 - time (sec): 25.51 - samples/sec: 25.09 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:04:34,307 epoch 13 - iter 12/25 - loss 0.97577399 - time (sec): 32.07 - samples/sec: 23.95 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:04:38,571 epoch 13 -

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 11:06:42,101 DEV : loss 1.2244524955749512 - f1-score (micro avg)  0.4621





2023-12-22 11:06:45,271 ----------------------------------------------------------------------------------------------------
2023-12-22 11:06:55,234 epoch 14 - iter 2/25 - loss 0.92510638 - time (sec): 9.96 - samples/sec: 12.85 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:01,144 epoch 14 - iter 4/25 - loss 0.94056369 - time (sec): 15.87 - samples/sec: 16.13 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:03,324 epoch 14 - iter 6/25 - loss 0.92931374 - time (sec): 18.05 - samples/sec: 21.28 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:07,819 epoch 14 - iter 8/25 - loss 0.92796765 - time (sec): 22.54 - samples/sec: 22.71 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:13,552 epoch 14 - iter 10/25 - loss 0.92219507 - time (sec): 28.28 - samples/sec: 22.63 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:15,481 epoch 14 - iter 12/25 - loss 0.91127213 - time (sec): 30.21 - samples/sec: 25.43 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:07:21,883 epoch 14 -

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 11:09:25,280 DEV : loss 1.1803232431411743 - f1-score (micro avg)  0.4961





2023-12-22 11:09:28,463 ----------------------------------------------------------------------------------------------------
2023-12-22 11:09:34,302 epoch 15 - iter 2/25 - loss 0.94134340 - time (sec): 5.84 - samples/sec: 21.93 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:09:41,574 epoch 15 - iter 4/25 - loss 0.92799479 - time (sec): 13.11 - samples/sec: 19.53 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:09:44,219 epoch 15 - iter 6/25 - loss 0.92303127 - time (sec): 15.75 - samples/sec: 24.38 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:09:49,822 epoch 15 - iter 8/25 - loss 0.90794612 - time (sec): 21.36 - samples/sec: 23.97 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:09:56,304 epoch 15 - iter 10/25 - loss 0.92399319 - time (sec): 27.84 - samples/sec: 22.99 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:09:59,626 epoch 15 - iter 12/25 - loss 0.93584728 - time (sec): 31.16 - samples/sec: 24.65 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:10:05,916 epoch 15 -

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 11:12:12,434 DEV : loss 1.2230744361877441 - f1-score (micro avg)  0.4409





2023-12-22 11:12:13,790 ----------------------------------------------------------------------------------------------------
2023-12-22 11:12:23,245 epoch 16 - iter 2/25 - loss 0.86531904 - time (sec): 9.45 - samples/sec: 13.54 - lr: 0.000023 - momentum: 0.000000
2023-12-22 11:12:30,292 epoch 16 - iter 4/25 - loss 0.90516356 - time (sec): 16.50 - samples/sec: 15.52 - lr: 0.000023 - momentum: 0.000000
2023-12-22 11:12:37,251 epoch 16 - iter 6/25 - loss 0.89426561 - time (sec): 23.46 - samples/sec: 16.37 - lr: 0.000023 - momentum: 0.000000
2023-12-22 11:12:39,331 epoch 16 - iter 8/25 - loss 0.89814753 - time (sec): 25.54 - samples/sec: 20.05 - lr: 0.000023 - momentum: 0.000000
2023-12-22 11:12:41,182 epoch 16 - iter 10/25 - loss 0.88176488 - time (sec): 27.39 - samples/sec: 23.37 - lr: 0.000023 - momentum: 0.000000
2023-12-22 11:12:46,648 epoch 16 - iter 12/25 - loss 0.89083011 - time (sec): 32.86 - samples/sec: 23.37 - lr: 0.000022 - momentum: 0.000000
2023-12-22 11:12:51,281 epoch 16 -

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:14:55,693 DEV : loss 1.1959507465362549 - f1-score (micro avg)  0.4811





2023-12-22 11:14:56,901 ----------------------------------------------------------------------------------------------------
2023-12-22 11:15:02,824 epoch 17 - iter 2/25 - loss 0.78576991 - time (sec): 5.92 - samples/sec: 21.62 - lr: 0.000021 - momentum: 0.000000
2023-12-22 11:15:05,211 epoch 17 - iter 4/25 - loss 0.86066794 - time (sec): 8.31 - samples/sec: 30.81 - lr: 0.000021 - momentum: 0.000000
2023-12-22 11:15:08,103 epoch 17 - iter 6/25 - loss 0.88034180 - time (sec): 11.20 - samples/sec: 34.29 - lr: 0.000021 - momentum: 0.000000
2023-12-22 11:15:11,830 epoch 17 - iter 8/25 - loss 0.89807165 - time (sec): 14.93 - samples/sec: 34.30 - lr: 0.000021 - momentum: 0.000000
2023-12-22 11:15:17,905 epoch 17 - iter 10/25 - loss 0.89698843 - time (sec): 21.00 - samples/sec: 30.47 - lr: 0.000020 - momentum: 0.000000
2023-12-22 11:15:26,707 epoch 17 - iter 12/25 - loss 0.91117038 - time (sec): 29.80 - samples/sec: 25.77 - lr: 0.000020 - momentum: 0.000000
2023-12-22 11:15:33,152 epoch 17 - 

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 11:17:34,647 DEV : loss 1.2171670198440552 - f1-score (micro avg)  0.4771





2023-12-22 11:17:35,888 ----------------------------------------------------------------------------------------------------
2023-12-22 11:17:41,674 epoch 18 - iter 2/25 - loss 0.90681595 - time (sec): 5.78 - samples/sec: 22.13 - lr: 0.000019 - momentum: 0.000000
2023-12-22 11:17:48,451 epoch 18 - iter 4/25 - loss 0.90826537 - time (sec): 12.56 - samples/sec: 20.38 - lr: 0.000019 - momentum: 0.000000
2023-12-22 11:17:52,895 epoch 18 - iter 6/25 - loss 0.90627274 - time (sec): 17.01 - samples/sec: 22.58 - lr: 0.000019 - momentum: 0.000000
2023-12-22 11:18:02,803 epoch 18 - iter 8/25 - loss 0.92119107 - time (sec): 26.91 - samples/sec: 19.02 - lr: 0.000018 - momentum: 0.000000
2023-12-22 11:18:05,700 epoch 18 - iter 10/25 - loss 0.91911865 - time (sec): 29.81 - samples/sec: 21.47 - lr: 0.000018 - momentum: 0.000000
2023-12-22 11:18:08,666 epoch 18 - iter 12/25 - loss 0.91906692 - time (sec): 32.78 - samples/sec: 23.43 - lr: 0.000018 - momentum: 0.000000
2023-12-22 11:18:13,840 epoch 18 -

100%|██████████| 80/80 [01:38<00:00,  1.23s/it]

2023-12-22 11:20:21,401 DEV : loss 1.292548656463623 - f1-score (micro avg)  0.4062





2023-12-22 11:20:23,524 ----------------------------------------------------------------------------------------------------
2023-12-22 11:20:31,667 epoch 19 - iter 2/25 - loss 0.76675680 - time (sec): 8.14 - samples/sec: 15.73 - lr: 0.000017 - momentum: 0.000000
2023-12-22 11:20:34,270 epoch 19 - iter 4/25 - loss 0.83038992 - time (sec): 10.74 - samples/sec: 23.84 - lr: 0.000017 - momentum: 0.000000
2023-12-22 11:20:36,406 epoch 19 - iter 6/25 - loss 0.85037021 - time (sec): 12.87 - samples/sec: 29.83 - lr: 0.000016 - momentum: 0.000000
2023-12-22 11:20:41,586 epoch 19 - iter 8/25 - loss 0.87245805 - time (sec): 18.06 - samples/sec: 28.36 - lr: 0.000016 - momentum: 0.000000
2023-12-22 11:20:48,432 epoch 19 - iter 10/25 - loss 0.86259795 - time (sec): 24.90 - samples/sec: 25.70 - lr: 0.000016 - momentum: 0.000000
2023-12-22 11:20:50,789 epoch 19 - iter 12/25 - loss 0.87748759 - time (sec): 27.26 - samples/sec: 28.18 - lr: 0.000016 - momentum: 0.000000
2023-12-22 11:20:55,037 epoch 19 -

100%|██████████| 80/80 [01:37<00:00,  1.21s/it]

2023-12-22 11:23:04,612 DEV : loss 1.2208327054977417 - f1-score (micro avg)  0.4653





2023-12-22 11:23:07,811 ----------------------------------------------------------------------------------------------------
2023-12-22 11:23:13,916 epoch 20 - iter 2/25 - loss 0.80236065 - time (sec): 6.10 - samples/sec: 20.97 - lr: 0.000015 - momentum: 0.000000
2023-12-22 11:23:19,439 epoch 20 - iter 4/25 - loss 0.84477115 - time (sec): 11.63 - samples/sec: 22.02 - lr: 0.000014 - momentum: 0.000000
2023-12-22 11:23:25,871 epoch 20 - iter 6/25 - loss 0.86693441 - time (sec): 18.06 - samples/sec: 21.26 - lr: 0.000014 - momentum: 0.000000
2023-12-22 11:23:29,017 epoch 20 - iter 8/25 - loss 0.85368490 - time (sec): 21.20 - samples/sec: 24.15 - lr: 0.000014 - momentum: 0.000000
2023-12-22 11:23:36,390 epoch 20 - iter 10/25 - loss 0.86265506 - time (sec): 28.58 - samples/sec: 22.40 - lr: 0.000014 - momentum: 0.000000
2023-12-22 11:23:39,820 epoch 20 - iter 12/25 - loss 0.86065998 - time (sec): 32.01 - samples/sec: 24.00 - lr: 0.000014 - momentum: 0.000000
2023-12-22 11:23:45,558 epoch 20 -

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:25:52,269 DEV : loss 1.1900873184204102 - f1-score (micro avg)  0.4763





2023-12-22 11:25:54,620 ----------------------------------------------------------------------------------------------------
2023-12-22 11:25:57,066 epoch 21 - iter 2/25 - loss 0.89698717 - time (sec): 2.44 - samples/sec: 52.38 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:03,692 epoch 21 - iter 4/25 - loss 0.91738684 - time (sec): 9.07 - samples/sec: 28.22 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:06,872 epoch 21 - iter 6/25 - loss 0.88900704 - time (sec): 12.25 - samples/sec: 31.35 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:13,779 epoch 21 - iter 8/25 - loss 0.89479648 - time (sec): 19.16 - samples/sec: 26.73 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:22,484 epoch 21 - iter 10/25 - loss 0.88090148 - time (sec): 27.86 - samples/sec: 22.97 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:24,672 epoch 21 - iter 12/25 - loss 0.86835903 - time (sec): 30.05 - samples/sec: 25.56 - lr: 0.000012 - momentum: 0.000000
2023-12-22 11:26:29,238 epoch 21 - 

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:28:37,175 DEV : loss 1.257420539855957 - f1-score (micro avg)  0.4322





2023-12-22 11:28:39,271 ----------------------------------------------------------------------------------------------------
2023-12-22 11:28:46,155 epoch 22 - iter 2/25 - loss 0.78678539 - time (sec): 6.88 - samples/sec: 18.60 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:28:50,458 epoch 22 - iter 4/25 - loss 0.78557265 - time (sec): 11.19 - samples/sec: 22.89 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:28:53,763 epoch 22 - iter 6/25 - loss 0.82653567 - time (sec): 14.49 - samples/sec: 26.50 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:28:59,191 epoch 22 - iter 8/25 - loss 0.81884322 - time (sec): 19.92 - samples/sec: 25.71 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:29:03,976 epoch 22 - iter 10/25 - loss 0.84853913 - time (sec): 24.70 - samples/sec: 25.91 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:29:07,804 epoch 22 - iter 12/25 - loss 0.86746997 - time (sec): 28.53 - samples/sec: 26.92 - lr: 0.000009 - momentum: 0.000000
2023-12-22 11:29:10,120 epoch 22 -

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:31:22,547 DEV : loss 1.202034592628479 - f1-score (micro avg)  0.4708





2023-12-22 11:31:24,652 ----------------------------------------------------------------------------------------------------
2023-12-22 11:31:27,458 epoch 23 - iter 2/25 - loss 0.84560227 - time (sec): 2.80 - samples/sec: 45.66 - lr: 0.000008 - momentum: 0.000000
2023-12-22 11:31:29,911 epoch 23 - iter 4/25 - loss 0.88968182 - time (sec): 5.26 - samples/sec: 48.70 - lr: 0.000008 - momentum: 0.000000
2023-12-22 11:31:34,926 epoch 23 - iter 6/25 - loss 0.87363341 - time (sec): 10.27 - samples/sec: 37.38 - lr: 0.000008 - momentum: 0.000000
2023-12-22 11:31:40,657 epoch 23 - iter 8/25 - loss 0.88733968 - time (sec): 16.00 - samples/sec: 31.99 - lr: 0.000008 - momentum: 0.000000
2023-12-22 11:31:43,552 epoch 23 - iter 10/25 - loss 0.88203527 - time (sec): 18.90 - samples/sec: 33.87 - lr: 0.000007 - momentum: 0.000000
2023-12-22 11:31:48,366 epoch 23 - iter 12/25 - loss 0.87557735 - time (sec): 23.71 - samples/sec: 32.39 - lr: 0.000007 - momentum: 0.000000
2023-12-22 11:31:51,770 epoch 23 - 

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:34:01,454 DEV : loss 1.2392441034317017 - f1-score (micro avg)  0.4448





2023-12-22 11:34:03,614 ----------------------------------------------------------------------------------------------------
2023-12-22 11:34:09,261 epoch 24 - iter 2/25 - loss 0.82986975 - time (sec): 5.64 - samples/sec: 22.68 - lr: 0.000006 - momentum: 0.000000
2023-12-22 11:34:12,082 epoch 24 - iter 4/25 - loss 0.81127179 - time (sec): 8.46 - samples/sec: 30.24 - lr: 0.000006 - momentum: 0.000000
2023-12-22 11:34:14,857 epoch 24 - iter 6/25 - loss 0.81789842 - time (sec): 11.24 - samples/sec: 34.17 - lr: 0.000006 - momentum: 0.000000
2023-12-22 11:34:20,894 epoch 24 - iter 8/25 - loss 0.80675802 - time (sec): 17.28 - samples/sec: 29.64 - lr: 0.000005 - momentum: 0.000000
2023-12-22 11:34:29,527 epoch 24 - iter 10/25 - loss 0.81760132 - time (sec): 25.91 - samples/sec: 24.70 - lr: 0.000005 - momentum: 0.000000
2023-12-22 11:34:32,380 epoch 24 - iter 12/25 - loss 0.83726286 - time (sec): 28.76 - samples/sec: 26.70 - lr: 0.000005 - momentum: 0.000000
2023-12-22 11:34:36,470 epoch 24 - 

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:36:37,909 DEV : loss 1.2348711490631104 - f1-score (micro avg)  0.4472





2023-12-22 11:36:39,153 ----------------------------------------------------------------------------------------------------
2023-12-22 11:36:45,595 epoch 25 - iter 2/25 - loss 0.92567411 - time (sec): 6.44 - samples/sec: 19.88 - lr: 0.000004 - momentum: 0.000000
2023-12-22 11:36:51,459 epoch 25 - iter 4/25 - loss 0.90233177 - time (sec): 12.30 - samples/sec: 20.81 - lr: 0.000004 - momentum: 0.000000
2023-12-22 11:36:57,892 epoch 25 - iter 6/25 - loss 0.87061273 - time (sec): 18.74 - samples/sec: 20.50 - lr: 0.000004 - momentum: 0.000000
2023-12-22 11:37:03,873 epoch 25 - iter 8/25 - loss 0.86323537 - time (sec): 24.72 - samples/sec: 20.71 - lr: 0.000003 - momentum: 0.000000
2023-12-22 11:37:06,535 epoch 25 - iter 10/25 - loss 0.86330205 - time (sec): 27.38 - samples/sec: 23.38 - lr: 0.000003 - momentum: 0.000000
2023-12-22 11:37:12,262 epoch 25 - iter 12/25 - loss 0.87285958 - time (sec): 33.11 - samples/sec: 23.20 - lr: 0.000003 - momentum: 0.000000
2023-12-22 11:37:18,748 epoch 25 -

100%|██████████| 80/80 [01:37<00:00,  1.22s/it]

2023-12-22 11:39:25,024 DEV : loss 1.2239733934402466 - f1-score (micro avg)  0.4519





2023-12-22 11:39:28,157 ----------------------------------------------------------------------------------------------------
2023-12-22 11:39:28,163 Testing using last state of model ...


100%|██████████| 110/110 [01:21<00:00,  1.34it/s]

2023-12-22 11:40:50,160 
Results:
- F-score (micro) 0.5034
- F-score (macro) 0.3337
- Accuracy 0.5034

By class:
              precision    recall  f1-score   support

     comment     0.9219    0.5115    0.6580      1476
     support     0.1072    0.5288    0.1783       104
        deny     0.1041    0.2300    0.1433       100
       query     0.2383    0.6970    0.3552        66

    accuracy                         0.5034      1746
   macro avg     0.3429    0.4918    0.3337      1746
weighted avg     0.8007    0.5034    0.5885      1746

2023-12-22 11:40:50,166 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = False
add_post_title = False
add_depth = True

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[29]: "1 | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 11:40:54,913 Reading data from .
2023-12-22 11:40:54,914 Train: train_fasttext_format.txt
2023-12-22 11:40:54,918 Dev: dev_fasttext_format.txt
2023-12-22 11:40:54,920 Test: test_fasttext_format.txt
2023-12-22 11:40:55,008 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1619,
        "number_of_documents_per_class": {
            "query": 392,
            "support": 438,
            "deny": 351,
            "comment": 438
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 39467,
            "min": 4,
            "max": 494,
            "avg": 24.37739345274861
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_documen

0it [00:00, ?it/s]
1619it [00:00, 2546.84it/s]

2023-12-22 11:41:07,151 Dictionary created for label 'class' with 4 values: support (seen 438 times), comment (seen 438 times), query (seen 392 times), deny (seen 351 times)
2023-12-22 11:41:07,158 ----------------------------------------------------------------------------------------------------
2023-12-22 11:41:07,160 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-22 11:41:10,634 epoch 1 - iter 2/26 - loss 1.53695297 - time (sec): 3.44 - samples/sec: 37.19 - lr: 0.000001 - momentum: 0.000000
2023-12-22 11:41:11,484 epoch 1 - iter 4/26 - loss 1.55951932 - time (sec): 4.29 - samples/sec: 59.66 - lr: 0.000002 - momentum: 0.000000
2023-12-22 11:41:13,009 epoch 1 - iter 6/26 - loss 1.56170690 - time (sec): 5.82 - samples/sec: 66.02 - lr: 0.000004 - momentum: 0.000000
2023-12-22 11:41:14,384 epoch 1 - iter 8/26 - loss 1.54013699 - time (sec): 7.19 - samples/sec: 71.20 - lr: 0.000005 - momentum: 0.000000
2023-12-22 11:41:15,148 epoch 1 - iter 10/26 - loss 1.52234561 - time (sec): 7.95 - samples/sec: 80.46 - lr: 0.000007 - momentum: 0.000000
2023-12-22 11:41:18,083 epoch 1 - iter 12/26 - loss 1.51405248 - time (sec): 10.89 - samples/sec: 70.52 - lr: 0.000008 - momentum: 0.000000
2023-12-22 11:41:20,965 epoch 1 - iter 14/26 - loss 1.50721834 - time (sec): 13.77 - samples/sec: 65.06 - lr: 0.000010 - momentum: 0.000000
2023-12-22 11:41:24,753 epoch

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:42:24,629 DEV : loss 1.1799792051315308 - f1-score (micro avg)  0.5568





2023-12-22 11:42:25,346 ----------------------------------------------------------------------------------------------------
2023-12-22 11:42:27,887 epoch 2 - iter 2/26 - loss 1.40610832 - time (sec): 2.54 - samples/sec: 50.42 - lr: 0.000021 - momentum: 0.000000
2023-12-22 11:42:28,646 epoch 2 - iter 4/26 - loss 1.40410569 - time (sec): 3.30 - samples/sec: 77.64 - lr: 0.000022 - momentum: 0.000000
2023-12-22 11:42:31,689 epoch 2 - iter 6/26 - loss 1.39409844 - time (sec): 6.34 - samples/sec: 60.56 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:42:34,375 epoch 2 - iter 8/26 - loss 1.38546188 - time (sec): 9.03 - samples/sec: 56.72 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:42:35,936 epoch 2 - iter 10/26 - loss 1.36239115 - time (sec): 10.59 - samples/sec: 60.45 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:42:38,037 epoch 2 - iter 12/26 - loss 1.35169302 - time (sec): 12.69 - samples/sec: 60.53 - lr: 0.000028 - momentum: 0.000000
2023-12-22 11:42:39,441 epoch 2 - iter 14/2

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:43:42,896 DEV : loss 1.377198576927185 - f1-score (micro avg)  0.3186





2023-12-22 11:43:43,815 ----------------------------------------------------------------------------------------------------
2023-12-22 11:43:45,343 epoch 3 - iter 2/26 - loss 1.20223618 - time (sec): 1.52 - samples/sec: 84.16 - lr: 0.000041 - momentum: 0.000000
2023-12-22 11:43:47,247 epoch 3 - iter 4/26 - loss 1.27203315 - time (sec): 3.42 - samples/sec: 74.75 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:43:48,129 epoch 3 - iter 6/26 - loss 1.30502715 - time (sec): 4.31 - samples/sec: 89.16 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:43:51,670 epoch 3 - iter 8/26 - loss 1.29244933 - time (sec): 7.85 - samples/sec: 65.24 - lr: 0.000045 - momentum: 0.000000
2023-12-22 11:43:53,473 epoch 3 - iter 10/26 - loss 1.26748443 - time (sec): 9.65 - samples/sec: 66.31 - lr: 0.000047 - momentum: 0.000000
2023-12-22 11:43:59,303 epoch 3 - iter 12/26 - loss 1.26712580 - time (sec): 15.48 - samples/sec: 49.61 - lr: 0.000048 - momentum: 0.000000
2023-12-22 11:44:01,672 epoch 3 - iter 14/26

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:45:01,544 DEV : loss 1.4556993246078491 - f1-score (micro avg)  0.2697





2023-12-22 11:45:02,051 ----------------------------------------------------------------------------------------------------
2023-12-22 11:45:03,880 epoch 4 - iter 2/26 - loss 1.22252375 - time (sec): 1.83 - samples/sec: 70.07 - lr: 0.000049 - momentum: 0.000000
2023-12-22 11:45:08,118 epoch 4 - iter 4/26 - loss 1.17724654 - time (sec): 6.06 - samples/sec: 42.21 - lr: 0.000049 - momentum: 0.000000
2023-12-22 11:45:09,164 epoch 4 - iter 6/26 - loss 1.20667607 - time (sec): 7.11 - samples/sec: 54.00 - lr: 0.000048 - momentum: 0.000000
2023-12-22 11:45:12,702 epoch 4 - iter 8/26 - loss 1.18787265 - time (sec): 10.65 - samples/sec: 48.08 - lr: 0.000048 - momentum: 0.000000
2023-12-22 11:45:18,232 epoch 4 - iter 10/26 - loss 1.19735019 - time (sec): 16.18 - samples/sec: 39.56 - lr: 0.000048 - momentum: 0.000000
2023-12-22 11:45:20,976 epoch 4 - iter 12/26 - loss 1.20339202 - time (sec): 18.92 - samples/sec: 40.59 - lr: 0.000048 - momentum: 0.000000
2023-12-22 11:45:21,648 epoch 4 - iter 14/

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:46:19,771 DEV : loss 1.2175966501235962 - f1-score (micro avg)  0.4866





2023-12-22 11:46:20,303 ----------------------------------------------------------------------------------------------------
2023-12-22 11:46:22,575 epoch 5 - iter 2/26 - loss 1.24642456 - time (sec): 2.27 - samples/sec: 56.39 - lr: 0.000047 - momentum: 0.000000
2023-12-22 11:46:28,641 epoch 5 - iter 4/26 - loss 1.17324620 - time (sec): 8.34 - samples/sec: 30.71 - lr: 0.000046 - momentum: 0.000000
2023-12-22 11:46:31,315 epoch 5 - iter 6/26 - loss 1.18605381 - time (sec): 11.01 - samples/sec: 34.88 - lr: 0.000046 - momentum: 0.000000
2023-12-22 11:46:32,615 epoch 5 - iter 8/26 - loss 1.15998991 - time (sec): 12.31 - samples/sec: 41.59 - lr: 0.000046 - momentum: 0.000000
2023-12-22 11:46:34,657 epoch 5 - iter 10/26 - loss 1.15792056 - time (sec): 14.35 - samples/sec: 44.59 - lr: 0.000046 - momentum: 0.000000
2023-12-22 11:46:36,690 epoch 5 - iter 12/26 - loss 1.15998149 - time (sec): 16.38 - samples/sec: 46.87 - lr: 0.000046 - momentum: 0.000000
2023-12-22 11:46:39,048 epoch 5 - iter 14

100%|██████████| 80/80 [00:44<00:00,  1.80it/s]

2023-12-22 11:47:39,515 DEV : loss 1.2546203136444092 - f1-score (micro avg)  0.3265





2023-12-22 11:47:40,376 ----------------------------------------------------------------------------------------------------
2023-12-22 11:47:42,821 epoch 6 - iter 2/26 - loss 1.20073354 - time (sec): 2.44 - samples/sec: 52.41 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:46,464 epoch 6 - iter 4/26 - loss 1.16687027 - time (sec): 6.09 - samples/sec: 42.07 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:48,100 epoch 6 - iter 6/26 - loss 1.14844062 - time (sec): 7.72 - samples/sec: 49.73 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:51,727 epoch 6 - iter 8/26 - loss 1.13553542 - time (sec): 11.35 - samples/sec: 45.11 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:53,038 epoch 6 - iter 10/26 - loss 1.14712431 - time (sec): 12.66 - samples/sec: 50.56 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:54,563 epoch 6 - iter 12/26 - loss 1.13484957 - time (sec): 14.18 - samples/sec: 54.14 - lr: 0.000044 - momentum: 0.000000
2023-12-22 11:47:55,849 epoch 6 - iter 14/

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:48:57,751 DEV : loss 1.0776307582855225 - f1-score (micro avg)  0.5741





2023-12-22 11:48:58,620 ----------------------------------------------------------------------------------------------------
2023-12-22 11:49:00,837 epoch 7 - iter 2/26 - loss 1.13267267 - time (sec): 2.22 - samples/sec: 57.77 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:49:01,750 epoch 7 - iter 4/26 - loss 1.10259828 - time (sec): 3.13 - samples/sec: 81.83 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:49:02,853 epoch 7 - iter 6/26 - loss 1.10962482 - time (sec): 4.23 - samples/sec: 90.76 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:49:06,149 epoch 7 - iter 8/26 - loss 1.09808156 - time (sec): 7.53 - samples/sec: 68.02 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:49:09,621 epoch 7 - iter 10/26 - loss 1.09512497 - time (sec): 11.00 - samples/sec: 58.19 - lr: 0.000042 - momentum: 0.000000
2023-12-22 11:49:11,653 epoch 7 - iter 12/26 - loss 1.08950140 - time (sec): 13.03 - samples/sec: 58.94 - lr: 0.000041 - momentum: 0.000000
2023-12-22 11:49:16,756 epoch 7 - iter 14/2

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:50:16,697 DEV : loss 1.0192303657531738 - f1-score (micro avg)  0.6388





2023-12-22 11:50:17,205 ----------------------------------------------------------------------------------------------------
2023-12-22 11:50:18,840 epoch 8 - iter 2/26 - loss 1.05324709 - time (sec): 1.63 - samples/sec: 78.42 - lr: 0.000040 - momentum: 0.000000
2023-12-22 11:50:22,815 epoch 8 - iter 4/26 - loss 1.04026771 - time (sec): 5.61 - samples/sec: 45.66 - lr: 0.000040 - momentum: 0.000000
2023-12-22 11:50:24,597 epoch 8 - iter 6/26 - loss 1.05009498 - time (sec): 7.39 - samples/sec: 51.97 - lr: 0.000040 - momentum: 0.000000
2023-12-22 11:50:27,060 epoch 8 - iter 8/26 - loss 1.06381184 - time (sec): 9.85 - samples/sec: 51.97 - lr: 0.000040 - momentum: 0.000000
2023-12-22 11:50:28,356 epoch 8 - iter 10/26 - loss 1.06154104 - time (sec): 11.15 - samples/sec: 57.41 - lr: 0.000039 - momentum: 0.000000
2023-12-22 11:50:29,634 epoch 8 - iter 12/26 - loss 1.05198809 - time (sec): 12.43 - samples/sec: 61.81 - lr: 0.000039 - momentum: 0.000000
2023-12-22 11:50:33,289 epoch 8 - iter 14/2

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:51:35,583 DEV : loss 1.2410601377487183 - f1-score (micro avg)  0.4377





2023-12-22 11:51:36,133 ----------------------------------------------------------------------------------------------------
2023-12-22 11:51:38,543 epoch 9 - iter 2/26 - loss 1.11183858 - time (sec): 2.41 - samples/sec: 53.15 - lr: 0.000038 - momentum: 0.000000
2023-12-22 11:51:39,390 epoch 9 - iter 4/26 - loss 1.06442276 - time (sec): 3.25 - samples/sec: 78.65 - lr: 0.000038 - momentum: 0.000000
2023-12-22 11:51:40,826 epoch 9 - iter 6/26 - loss 1.07180591 - time (sec): 4.69 - samples/sec: 81.86 - lr: 0.000038 - momentum: 0.000000
2023-12-22 11:51:46,471 epoch 9 - iter 8/26 - loss 1.03803696 - time (sec): 10.34 - samples/sec: 49.53 - lr: 0.000037 - momentum: 0.000000
2023-12-22 11:51:48,419 epoch 9 - iter 10/26 - loss 1.05337650 - time (sec): 12.28 - samples/sec: 52.10 - lr: 0.000037 - momentum: 0.000000
2023-12-22 11:51:53,506 epoch 9 - iter 12/26 - loss 1.05459842 - time (sec): 17.37 - samples/sec: 44.21 - lr: 0.000037 - momentum: 0.000000
2023-12-22 11:51:55,851 epoch 9 - iter 14/

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:52:55,704 DEV : loss 1.2207993268966675 - f1-score (micro avg)  0.4401





2023-12-22 11:52:56,234 ----------------------------------------------------------------------------------------------------
2023-12-22 11:52:57,629 epoch 10 - iter 2/26 - loss 1.09087723 - time (sec): 1.39 - samples/sec: 91.87 - lr: 0.000036 - momentum: 0.000000
2023-12-22 11:53:03,135 epoch 10 - iter 4/26 - loss 1.05275056 - time (sec): 6.90 - samples/sec: 37.11 - lr: 0.000035 - momentum: 0.000000
2023-12-22 11:53:04,403 epoch 10 - iter 6/26 - loss 1.03684102 - time (sec): 8.17 - samples/sec: 47.02 - lr: 0.000035 - momentum: 0.000000
2023-12-22 11:53:06,439 epoch 10 - iter 8/26 - loss 1.00199849 - time (sec): 10.20 - samples/sec: 50.18 - lr: 0.000035 - momentum: 0.000000
2023-12-22 11:53:07,749 epoch 10 - iter 10/26 - loss 0.99166657 - time (sec): 11.51 - samples/sec: 55.59 - lr: 0.000035 - momentum: 0.000000
2023-12-22 11:53:09,277 epoch 10 - iter 12/26 - loss 1.01073534 - time (sec): 13.04 - samples/sec: 58.89 - lr: 0.000035 - momentum: 0.000000
2023-12-22 11:53:11,516 epoch 10 - i

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:54:14,235 DEV : loss 1.2163344621658325 - f1-score (micro avg)  0.4621





2023-12-22 11:54:14,780 ----------------------------------------------------------------------------------------------------
2023-12-22 11:54:16,980 epoch 11 - iter 2/26 - loss 1.10427517 - time (sec): 2.20 - samples/sec: 58.23 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:18,381 epoch 11 - iter 4/26 - loss 1.03541739 - time (sec): 3.60 - samples/sec: 71.12 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:22,246 epoch 11 - iter 6/26 - loss 1.00940671 - time (sec): 7.46 - samples/sec: 51.45 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:24,576 epoch 11 - iter 8/26 - loss 1.00854415 - time (sec): 9.79 - samples/sec: 52.28 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:28,492 epoch 11 - iter 10/26 - loss 1.00395256 - time (sec): 13.71 - samples/sec: 46.68 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:33,889 epoch 11 - iter 12/26 - loss 0.99566940 - time (sec): 19.11 - samples/sec: 40.19 - lr: 0.000033 - momentum: 0.000000
2023-12-22 11:54:36,248 epoch 11 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:55:34,470 DEV : loss 1.1919692754745483 - f1-score (micro avg)  0.4756





2023-12-22 11:55:35,350 ----------------------------------------------------------------------------------------------------
2023-12-22 11:55:36,166 epoch 12 - iter 2/26 - loss 0.94423145 - time (sec): 0.81 - samples/sec: 157.29 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:55:39,335 epoch 12 - iter 4/26 - loss 0.91804801 - time (sec): 3.98 - samples/sec: 64.28 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:55:42,464 epoch 12 - iter 6/26 - loss 0.94936498 - time (sec): 7.11 - samples/sec: 53.99 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:55:47,122 epoch 12 - iter 8/26 - loss 0.96932687 - time (sec): 11.77 - samples/sec: 43.50 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:55:48,565 epoch 12 - iter 10/26 - loss 0.98520730 - time (sec): 13.21 - samples/sec: 48.44 - lr: 0.000031 - momentum: 0.000000
2023-12-22 11:55:52,613 epoch 12 - iter 12/26 - loss 0.98486475 - time (sec): 17.26 - samples/sec: 44.49 - lr: 0.000030 - momentum: 0.000000
2023-12-22 11:55:58,631 epoch 12 - 

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:56:53,226 DEV : loss 1.4146589040756226 - f1-score (micro avg)  0.3068





2023-12-22 11:56:53,768 ----------------------------------------------------------------------------------------------------
2023-12-22 11:56:57,227 epoch 13 - iter 2/26 - loss 0.97635317 - time (sec): 3.46 - samples/sec: 37.03 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:56:58,024 epoch 13 - iter 4/26 - loss 1.01432830 - time (sec): 4.25 - samples/sec: 60.17 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:57:01,102 epoch 13 - iter 6/26 - loss 1.04690897 - time (sec): 7.33 - samples/sec: 52.37 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:57:02,295 epoch 13 - iter 8/26 - loss 1.02797540 - time (sec): 8.53 - samples/sec: 60.05 - lr: 0.000029 - momentum: 0.000000
2023-12-22 11:57:06,606 epoch 13 - iter 10/26 - loss 1.03701021 - time (sec): 12.84 - samples/sec: 49.86 - lr: 0.000028 - momentum: 0.000000
2023-12-22 11:57:09,627 epoch 13 - iter 12/26 - loss 1.03159483 - time (sec): 15.86 - samples/sec: 48.43 - lr: 0.000028 - momentum: 0.000000
2023-12-22 11:57:10,397 epoch 13 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:58:13,152 DEV : loss 1.2494794130325317 - f1-score (micro avg)  0.4148





2023-12-22 11:58:13,676 ----------------------------------------------------------------------------------------------------
2023-12-22 11:58:14,976 epoch 14 - iter 2/26 - loss 0.98333240 - time (sec): 1.30 - samples/sec: 98.64 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:58:16,709 epoch 14 - iter 4/26 - loss 0.96142769 - time (sec): 3.03 - samples/sec: 84.48 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:58:20,280 epoch 14 - iter 6/26 - loss 0.94791085 - time (sec): 6.60 - samples/sec: 58.17 - lr: 0.000027 - momentum: 0.000000
2023-12-22 11:58:22,575 epoch 14 - iter 8/26 - loss 0.95148974 - time (sec): 8.90 - samples/sec: 57.55 - lr: 0.000026 - momentum: 0.000000
2023-12-22 11:58:25,745 epoch 14 - iter 10/26 - loss 0.93347360 - time (sec): 12.07 - samples/sec: 53.04 - lr: 0.000026 - momentum: 0.000000
2023-12-22 11:58:27,231 epoch 14 - iter 12/26 - loss 0.94196126 - time (sec): 13.55 - samples/sec: 56.67 - lr: 0.000026 - momentum: 0.000000
2023-12-22 11:58:29,492 epoch 14 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 11:59:31,159 DEV : loss 1.224995493888855 - f1-score (micro avg)  0.4535





2023-12-22 11:59:32,081 ----------------------------------------------------------------------------------------------------
2023-12-22 11:59:34,290 epoch 15 - iter 2/26 - loss 1.01460922 - time (sec): 2.20 - samples/sec: 58.08 - lr: 0.000025 - momentum: 0.000000
2023-12-22 11:59:37,313 epoch 15 - iter 4/26 - loss 0.97855489 - time (sec): 5.23 - samples/sec: 48.98 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:59:38,638 epoch 15 - iter 6/26 - loss 0.92755146 - time (sec): 6.55 - samples/sec: 58.61 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:59:40,082 epoch 15 - iter 8/26 - loss 0.93181337 - time (sec): 8.00 - samples/sec: 64.03 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:59:42,969 epoch 15 - iter 10/26 - loss 0.93565122 - time (sec): 10.88 - samples/sec: 58.81 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:59:44,386 epoch 15 - iter 12/26 - loss 0.94067580 - time (sec): 12.30 - samples/sec: 62.44 - lr: 0.000024 - momentum: 0.000000
2023-12-22 11:59:50,274 epoch 15 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:00:49,937 DEV : loss 1.2201615571975708 - f1-score (micro avg)  0.4282





2023-12-22 12:00:50,620 ----------------------------------------------------------------------------------------------------
2023-12-22 12:00:56,875 epoch 16 - iter 2/26 - loss 0.91682559 - time (sec): 6.25 - samples/sec: 20.47 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:00:59,149 epoch 16 - iter 4/26 - loss 0.89993663 - time (sec): 8.53 - samples/sec: 30.02 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:01:00,436 epoch 16 - iter 6/26 - loss 0.89704849 - time (sec): 9.81 - samples/sec: 39.12 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:01:01,978 epoch 16 - iter 8/26 - loss 0.92037676 - time (sec): 11.36 - samples/sec: 45.09 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:01:03,807 epoch 16 - iter 10/26 - loss 0.92479108 - time (sec): 13.19 - samples/sec: 48.54 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:01:05,024 epoch 16 - iter 12/26 - loss 0.90601265 - time (sec): 14.40 - samples/sec: 53.32 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:01:10,425 epoch 16 - i

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:02:08,554 DEV : loss 1.2131987810134888 - f1-score (micro avg)  0.4629





2023-12-22 12:02:09,080 ----------------------------------------------------------------------------------------------------
2023-12-22 12:02:11,094 epoch 17 - iter 2/26 - loss 0.93837908 - time (sec): 2.01 - samples/sec: 63.65 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:02:12,742 epoch 17 - iter 4/26 - loss 0.91582280 - time (sec): 3.66 - samples/sec: 69.95 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:02:14,042 epoch 17 - iter 6/26 - loss 0.92527940 - time (sec): 4.96 - samples/sec: 77.43 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:02:17,676 epoch 17 - iter 8/26 - loss 0.90810137 - time (sec): 8.59 - samples/sec: 59.58 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:02:18,420 epoch 17 - iter 10/26 - loss 0.91976752 - time (sec): 9.34 - samples/sec: 68.54 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:02:19,909 epoch 17 - iter 12/26 - loss 0.90222280 - time (sec): 10.83 - samples/sec: 70.94 - lr: 0.000019 - momentum: 0.000000
2023-12-22 12:02:22,905 epoch 17 - ite

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:03:25,209 DEV : loss 1.1885254383087158 - f1-score (micro avg)  0.4826





2023-12-22 12:03:26,417 ----------------------------------------------------------------------------------------------------
2023-12-22 12:03:27,760 epoch 18 - iter 2/26 - loss 0.89684653 - time (sec): 1.34 - samples/sec: 95.41 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:03:30,017 epoch 18 - iter 4/26 - loss 0.91583008 - time (sec): 3.60 - samples/sec: 71.15 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:03:31,660 epoch 18 - iter 6/26 - loss 0.95049504 - time (sec): 5.24 - samples/sec: 73.26 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:03:32,598 epoch 18 - iter 8/26 - loss 0.95761086 - time (sec): 6.18 - samples/sec: 82.85 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:03:36,686 epoch 18 - iter 10/26 - loss 0.93243923 - time (sec): 10.27 - samples/sec: 62.33 - lr: 0.000017 - momentum: 0.000000
2023-12-22 12:03:39,351 epoch 18 - iter 12/26 - loss 0.94137161 - time (sec): 12.93 - samples/sec: 59.39 - lr: 0.000017 - momentum: 0.000000
2023-12-22 12:03:40,152 epoch 18 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:04:43,574 DEV : loss 1.3384569883346558 - f1-score (micro avg)  0.3683





2023-12-22 12:04:45,353 ----------------------------------------------------------------------------------------------------
2023-12-22 12:04:47,817 epoch 19 - iter 2/26 - loss 0.90415058 - time (sec): 2.46 - samples/sec: 52.04 - lr: 0.000016 - momentum: 0.000000
2023-12-22 12:04:52,145 epoch 19 - iter 4/26 - loss 0.88622695 - time (sec): 6.79 - samples/sec: 37.72 - lr: 0.000016 - momentum: 0.000000
2023-12-22 12:04:53,798 epoch 19 - iter 6/26 - loss 0.91754082 - time (sec): 8.44 - samples/sec: 45.50 - lr: 0.000016 - momentum: 0.000000
2023-12-22 12:04:56,847 epoch 19 - iter 8/26 - loss 0.92356225 - time (sec): 11.49 - samples/sec: 44.56 - lr: 0.000015 - momentum: 0.000000
2023-12-22 12:04:59,325 epoch 19 - iter 10/26 - loss 0.93197197 - time (sec): 13.97 - samples/sec: 45.82 - lr: 0.000015 - momentum: 0.000000
2023-12-22 12:05:03,467 epoch 19 - iter 12/26 - loss 0.91861912 - time (sec): 18.11 - samples/sec: 42.41 - lr: 0.000015 - momentum: 0.000000
2023-12-22 12:05:08,918 epoch 19 - i

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:06:03,445 DEV : loss 1.2225747108459473 - f1-score (micro avg)  0.444





2023-12-22 12:06:04,719 ----------------------------------------------------------------------------------------------------
2023-12-22 12:06:07,474 epoch 20 - iter 2/26 - loss 0.92401296 - time (sec): 2.75 - samples/sec: 46.51 - lr: 0.000014 - momentum: 0.000000
2023-12-22 12:06:09,933 epoch 20 - iter 4/26 - loss 0.93044421 - time (sec): 5.21 - samples/sec: 49.11 - lr: 0.000014 - momentum: 0.000000
2023-12-22 12:06:11,568 epoch 20 - iter 6/26 - loss 0.94148337 - time (sec): 6.85 - samples/sec: 56.08 - lr: 0.000013 - momentum: 0.000000
2023-12-22 12:06:15,101 epoch 20 - iter 8/26 - loss 0.90420067 - time (sec): 10.38 - samples/sec: 49.33 - lr: 0.000013 - momentum: 0.000000
2023-12-22 12:06:16,931 epoch 20 - iter 10/26 - loss 0.89274457 - time (sec): 12.21 - samples/sec: 52.42 - lr: 0.000013 - momentum: 0.000000
2023-12-22 12:06:18,651 epoch 20 - iter 12/26 - loss 0.88557555 - time (sec): 13.93 - samples/sec: 55.13 - lr: 0.000013 - momentum: 0.000000
2023-12-22 12:06:21,502 epoch 20 - i

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:07:24,297 DEV : loss 1.2035211324691772 - f1-score (micro avg)  0.4614





2023-12-22 12:07:25,575 ----------------------------------------------------------------------------------------------------
2023-12-22 12:07:29,390 epoch 21 - iter 2/26 - loss 0.87792733 - time (sec): 3.81 - samples/sec: 33.57 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:31,355 epoch 21 - iter 4/26 - loss 0.89166825 - time (sec): 5.78 - samples/sec: 44.30 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:33,937 epoch 21 - iter 6/26 - loss 0.89716168 - time (sec): 8.36 - samples/sec: 45.93 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:35,281 epoch 21 - iter 8/26 - loss 0.89304239 - time (sec): 9.70 - samples/sec: 52.76 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:37,269 epoch 21 - iter 10/26 - loss 0.88361748 - time (sec): 11.69 - samples/sec: 54.74 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:40,661 epoch 21 - iter 12/26 - loss 0.88051993 - time (sec): 15.08 - samples/sec: 50.91 - lr: 0.000011 - momentum: 0.000000
2023-12-22 12:07:44,438 epoch 21 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:08:45,130 DEV : loss 1.2420960664749146 - f1-score (micro avg)  0.4369





2023-12-22 12:08:45,773 ----------------------------------------------------------------------------------------------------
2023-12-22 12:08:48,891 epoch 22 - iter 2/26 - loss 0.80625516 - time (sec): 3.12 - samples/sec: 41.09 - lr: 0.000009 - momentum: 0.000000
2023-12-22 12:08:52,426 epoch 22 - iter 4/26 - loss 0.85681318 - time (sec): 6.65 - samples/sec: 38.50 - lr: 0.000009 - momentum: 0.000000
2023-12-22 12:08:53,430 epoch 22 - iter 6/26 - loss 0.91369489 - time (sec): 7.65 - samples/sec: 50.17 - lr: 0.000009 - momentum: 0.000000
2023-12-22 12:08:54,309 epoch 22 - iter 8/26 - loss 0.91789503 - time (sec): 8.53 - samples/sec: 60.00 - lr: 0.000009 - momentum: 0.000000
2023-12-22 12:08:56,544 epoch 22 - iter 10/26 - loss 0.91051065 - time (sec): 10.77 - samples/sec: 59.44 - lr: 0.000009 - momentum: 0.000000
2023-12-22 12:08:58,984 epoch 22 - iter 12/26 - loss 0.92534348 - time (sec): 13.21 - samples/sec: 58.15 - lr: 0.000008 - momentum: 0.000000
2023-12-22 12:09:01,561 epoch 22 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:10:03,260 DEV : loss 1.2255445718765259 - f1-score (micro avg)  0.4432





2023-12-22 12:10:04,172 ----------------------------------------------------------------------------------------------------
2023-12-22 12:10:05,937 epoch 23 - iter 2/26 - loss 0.77352196 - time (sec): 1.76 - samples/sec: 72.62 - lr: 0.000007 - momentum: 0.000000
2023-12-22 12:10:08,606 epoch 23 - iter 4/26 - loss 0.79750079 - time (sec): 4.43 - samples/sec: 57.77 - lr: 0.000007 - momentum: 0.000000
2023-12-22 12:10:10,800 epoch 23 - iter 6/26 - loss 0.79437221 - time (sec): 6.63 - samples/sec: 57.96 - lr: 0.000007 - momentum: 0.000000
2023-12-22 12:10:12,811 epoch 23 - iter 8/26 - loss 0.84200996 - time (sec): 8.64 - samples/sec: 59.29 - lr: 0.000007 - momentum: 0.000000
2023-12-22 12:10:18,754 epoch 23 - iter 10/26 - loss 0.85040545 - time (sec): 14.58 - samples/sec: 43.90 - lr: 0.000006 - momentum: 0.000000
2023-12-22 12:10:19,957 epoch 23 - iter 12/26 - loss 0.87260033 - time (sec): 15.78 - samples/sec: 48.66 - lr: 0.000006 - momentum: 0.000000
2023-12-22 12:10:25,323 epoch 23 - it

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:11:23,768 DEV : loss 1.244507908821106 - f1-score (micro avg)  0.4306





2023-12-22 12:11:24,582 ----------------------------------------------------------------------------------------------------
2023-12-22 12:11:27,670 epoch 24 - iter 2/26 - loss 0.79353625 - time (sec): 3.08 - samples/sec: 41.50 - lr: 0.000005 - momentum: 0.000000
2023-12-22 12:11:30,063 epoch 24 - iter 4/26 - loss 0.84652968 - time (sec): 5.48 - samples/sec: 46.73 - lr: 0.000005 - momentum: 0.000000
2023-12-22 12:11:31,492 epoch 24 - iter 6/26 - loss 0.85450992 - time (sec): 6.91 - samples/sec: 55.60 - lr: 0.000005 - momentum: 0.000000
2023-12-22 12:11:32,887 epoch 24 - iter 8/26 - loss 0.88326555 - time (sec): 8.30 - samples/sec: 61.67 - lr: 0.000004 - momentum: 0.000000
2023-12-22 12:11:34,557 epoch 24 - iter 10/26 - loss 0.89343160 - time (sec): 9.97 - samples/sec: 64.18 - lr: 0.000004 - momentum: 0.000000
2023-12-22 12:11:39,958 epoch 24 - iter 12/26 - loss 0.87906678 - time (sec): 15.37 - samples/sec: 49.96 - lr: 0.000004 - momentum: 0.000000
2023-12-22 12:11:41,913 epoch 24 - ite

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:12:42,754 DEV : loss 1.2375543117523193 - f1-score (micro avg)  0.4409





2023-12-22 12:12:43,270 ----------------------------------------------------------------------------------------------------
2023-12-22 12:12:44,379 epoch 25 - iter 2/26 - loss 0.75897121 - time (sec): 1.11 - samples/sec: 115.61 - lr: 0.000003 - momentum: 0.000000
2023-12-22 12:12:45,456 epoch 25 - iter 4/26 - loss 0.82089968 - time (sec): 2.18 - samples/sec: 117.23 - lr: 0.000003 - momentum: 0.000000
2023-12-22 12:12:47,889 epoch 25 - iter 6/26 - loss 0.85340822 - time (sec): 4.62 - samples/sec: 83.17 - lr: 0.000002 - momentum: 0.000000
2023-12-22 12:12:51,640 epoch 25 - iter 8/26 - loss 0.84948771 - time (sec): 8.37 - samples/sec: 61.19 - lr: 0.000002 - momentum: 0.000000
2023-12-22 12:12:52,992 epoch 25 - iter 10/26 - loss 0.84830686 - time (sec): 9.72 - samples/sec: 65.84 - lr: 0.000002 - momentum: 0.000000
2023-12-22 12:12:56,621 epoch 25 - iter 12/26 - loss 0.83587611 - time (sec): 13.35 - samples/sec: 57.53 - lr: 0.000002 - momentum: 0.000000
2023-12-22 12:12:59,186 epoch 25 - i

100%|██████████| 80/80 [00:44<00:00,  1.79it/s]

2023-12-22 12:13:58,506 DEV : loss 1.2196269035339355 - f1-score (micro avg)  0.4543





2023-12-22 12:14:00,805 ----------------------------------------------------------------------------------------------------
2023-12-22 12:14:00,813 Testing using last state of model ...


100%|██████████| 110/110 [00:51<00:00,  2.14it/s]

2023-12-22 12:14:52,218 
Results:
- F-score (micro) 0.48
- F-score (macro) 0.3224
- Accuracy 0.48

By class:
              precision    recall  f1-score   support

     comment     0.8964    0.4925    0.6358      1476
     support     0.0886    0.4327    0.1471       104
        deny     0.0598    0.1500    0.0855       100
       query     0.2898    0.7727    0.4215        66

    accuracy                         0.4800      1746
   macro avg     0.3336    0.4620    0.3224      1746
weighted avg     0.7775    0.4800    0.5670      1746

2023-12-22 12:14:52,221 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = True
add_post_title = True
add_depth = True

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[53]: "1 | The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 12:15:03,115 Reading data from .
2023-12-22 12:15:03,116 Train: train_fasttext_format.txt
2023-12-22 12:15:03,118 Dev: dev_fasttext_format.txt
2023-12-22 12:15:03,120 Test: test_fasttext_format.txt
2023-12-22 12:15:03,208 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1651,
        "number_of_documents_per_class": {
            "query": 374,
            "deny": 365,
            "support": 456,
            "comment": 456
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 95749,
            "min": 17,
            "max": 1139,
            "avg": 57.994548758328285
        }
    },
    "TEST": {
        "datas

0it [00:00, ?it/s]
1651it [00:02, 658.86it/s]

2023-12-22 12:15:24,868 Dictionary created for label 'class' with 4 values: support (seen 456 times), comment (seen 456 times), query (seen 374 times), deny (seen 365 times)





2023-12-22 12:15:24,876 ----------------------------------------------------------------------------------------------------
2023-12-22 12:15:24,882 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=4196, out_features=4196, bias=True)
    (rnn): GRU(4196, 128, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:17:52,388 DEV : loss 1.4203896522521973 - f1-score (micro avg)  0.2232





2023-12-22 12:17:54,378 ----------------------------------------------------------------------------------------------------
2023-12-22 12:17:58,700 epoch 2 - iter 2/26 - loss 1.34570795 - time (sec): 4.32 - samples/sec: 29.65 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:18:10,514 epoch 2 - iter 4/26 - loss 1.34303591 - time (sec): 16.13 - samples/sec: 15.87 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:18:13,667 epoch 2 - iter 6/26 - loss 1.36797033 - time (sec): 19.28 - samples/sec: 19.91 - lr: 0.000023 - momentum: 0.000000
2023-12-22 12:18:17,734 epoch 2 - iter 8/26 - loss 1.35089146 - time (sec): 23.35 - samples/sec: 21.93 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:18:20,934 epoch 2 - iter 10/26 - loss 1.32919012 - time (sec): 26.55 - samples/sec: 24.11 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:18:24,348 epoch 2 - iter 12/26 - loss 1.31523749 - time (sec): 29.96 - samples/sec: 25.63 - lr: 0.000028 - momentum: 0.000000
2023-12-22 12:18:27,606 epoch 2 - iter 1

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 12:20:22,976 DEV : loss 1.2279136180877686 - f1-score (micro avg)  0.4598





2023-12-22 12:20:24,788 ----------------------------------------------------------------------------------------------------
2023-12-22 12:20:32,252 epoch 3 - iter 2/26 - loss 1.21650040 - time (sec): 7.46 - samples/sec: 17.16 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:20:37,045 epoch 3 - iter 4/26 - loss 1.21321321 - time (sec): 12.25 - samples/sec: 20.89 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:20:41,507 epoch 3 - iter 6/26 - loss 1.22499063 - time (sec): 16.72 - samples/sec: 22.97 - lr: 0.000043 - momentum: 0.000000
2023-12-22 12:20:47,367 epoch 3 - iter 8/26 - loss 1.23110692 - time (sec): 22.58 - samples/sec: 22.68 - lr: 0.000045 - momentum: 0.000000
2023-12-22 12:20:51,199 epoch 3 - iter 10/26 - loss 1.22121722 - time (sec): 26.41 - samples/sec: 24.24 - lr: 0.000046 - momentum: 0.000000
2023-12-22 12:20:56,414 epoch 3 - iter 12/26 - loss 1.23503529 - time (sec): 31.62 - samples/sec: 24.29 - lr: 0.000048 - momentum: 0.000000
2023-12-22 12:21:02,116 epoch 3 - iter 1

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:22:57,533 DEV : loss 1.218027949333191 - f1-score (micro avg)  0.5221





2023-12-22 12:22:58,711 ----------------------------------------------------------------------------------------------------
2023-12-22 12:23:01,299 epoch 4 - iter 2/26 - loss 1.30613077 - time (sec): 2.59 - samples/sec: 49.49 - lr: 0.000049 - momentum: 0.000000
2023-12-22 12:23:04,290 epoch 4 - iter 4/26 - loss 1.26746362 - time (sec): 5.58 - samples/sec: 45.90 - lr: 0.000049 - momentum: 0.000000
2023-12-22 12:23:09,662 epoch 4 - iter 6/26 - loss 1.23097316 - time (sec): 10.95 - samples/sec: 35.07 - lr: 0.000049 - momentum: 0.000000
2023-12-22 12:23:22,156 epoch 4 - iter 8/26 - loss 1.23735227 - time (sec): 23.44 - samples/sec: 21.84 - lr: 0.000048 - momentum: 0.000000
2023-12-22 12:23:25,622 epoch 4 - iter 10/26 - loss 1.23398621 - time (sec): 26.91 - samples/sec: 23.78 - lr: 0.000048 - momentum: 0.000000
2023-12-22 12:23:29,796 epoch 4 - iter 12/26 - loss 1.22249921 - time (sec): 31.08 - samples/sec: 24.71 - lr: 0.000048 - momentum: 0.000000
2023-12-22 12:23:32,842 epoch 4 - iter 14

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:25:28,945 DEV : loss 1.2180664539337158 - f1-score (micro avg)  0.4487





2023-12-22 12:25:30,091 ----------------------------------------------------------------------------------------------------
2023-12-22 12:25:32,720 epoch 5 - iter 2/26 - loss 1.21152616 - time (sec): 2.63 - samples/sec: 48.75 - lr: 0.000047 - momentum: 0.000000
2023-12-22 12:25:35,918 epoch 5 - iter 4/26 - loss 1.15895966 - time (sec): 5.82 - samples/sec: 43.96 - lr: 0.000047 - momentum: 0.000000
2023-12-22 12:25:49,924 epoch 5 - iter 6/26 - loss 1.11696839 - time (sec): 19.83 - samples/sec: 19.37 - lr: 0.000046 - momentum: 0.000000
2023-12-22 12:25:54,313 epoch 5 - iter 8/26 - loss 1.12045582 - time (sec): 24.22 - samples/sec: 21.14 - lr: 0.000046 - momentum: 0.000000
2023-12-22 12:26:08,061 epoch 5 - iter 10/26 - loss 1.11926587 - time (sec): 37.97 - samples/sec: 16.86 - lr: 0.000046 - momentum: 0.000000
2023-12-22 12:26:10,872 epoch 5 - iter 12/26 - loss 1.13587509 - time (sec): 40.78 - samples/sec: 18.83 - lr: 0.000046 - momentum: 0.000000
2023-12-22 12:26:17,519 epoch 5 - iter 14

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:27:59,284 DEV : loss 1.2039215564727783 - f1-score (micro avg)  0.4385





2023-12-22 12:28:00,446 ----------------------------------------------------------------------------------------------------
2023-12-22 12:28:10,024 epoch 6 - iter 2/26 - loss 1.17502177 - time (sec): 9.58 - samples/sec: 13.37 - lr: 0.000045 - momentum: 0.000000
2023-12-22 12:28:12,556 epoch 6 - iter 4/26 - loss 1.13623321 - time (sec): 12.11 - samples/sec: 21.14 - lr: 0.000044 - momentum: 0.000000
2023-12-22 12:28:21,945 epoch 6 - iter 6/26 - loss 1.11544540 - time (sec): 21.50 - samples/sec: 17.86 - lr: 0.000044 - momentum: 0.000000
2023-12-22 12:28:31,159 epoch 6 - iter 8/26 - loss 1.12632829 - time (sec): 30.71 - samples/sec: 16.67 - lr: 0.000044 - momentum: 0.000000
2023-12-22 12:28:34,767 epoch 6 - iter 10/26 - loss 1.12764417 - time (sec): 34.32 - samples/sec: 18.65 - lr: 0.000044 - momentum: 0.000000
2023-12-22 12:28:40,162 epoch 6 - iter 12/26 - loss 1.11890315 - time (sec): 39.71 - samples/sec: 19.34 - lr: 0.000044 - momentum: 0.000000
2023-12-22 12:28:43,840 epoch 6 - iter 1

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 12:30:29,634 DEV : loss 1.1175391674041748 - f1-score (micro avg)  0.5702





2023-12-22 12:30:30,820 ----------------------------------------------------------------------------------------------------
2023-12-22 12:30:33,248 epoch 7 - iter 2/26 - loss 1.06247014 - time (sec): 2.42 - samples/sec: 52.78 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:30:36,000 epoch 7 - iter 4/26 - loss 1.07662588 - time (sec): 5.18 - samples/sec: 49.45 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:30:44,422 epoch 7 - iter 6/26 - loss 1.06920044 - time (sec): 13.60 - samples/sec: 28.24 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:30:50,590 epoch 7 - iter 8/26 - loss 1.08132409 - time (sec): 19.77 - samples/sec: 25.90 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:30:52,551 epoch 7 - iter 10/26 - loss 1.08022454 - time (sec): 21.73 - samples/sec: 29.45 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:30:59,227 epoch 7 - iter 12/26 - loss 1.07595746 - time (sec): 28.40 - samples/sec: 27.04 - lr: 0.000042 - momentum: 0.000000
2023-12-22 12:31:05,851 epoch 7 - iter 14

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 12:33:00,132 DEV : loss 1.2853120565414429 - f1-score (micro avg)  0.3636





2023-12-22 12:33:02,597 ----------------------------------------------------------------------------------------------------
2023-12-22 12:33:05,386 epoch 8 - iter 2/26 - loss 1.08152038 - time (sec): 2.78 - samples/sec: 45.99 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:33:08,385 epoch 8 - iter 4/26 - loss 1.07083720 - time (sec): 5.78 - samples/sec: 44.27 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:33:10,936 epoch 8 - iter 6/26 - loss 1.07227683 - time (sec): 8.33 - samples/sec: 46.08 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:33:19,251 epoch 8 - iter 8/26 - loss 1.05901562 - time (sec): 16.65 - samples/sec: 30.75 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:33:23,165 epoch 8 - iter 10/26 - loss 1.06601948 - time (sec): 20.56 - samples/sec: 31.12 - lr: 0.000040 - momentum: 0.000000
2023-12-22 12:33:26,297 epoch 8 - iter 12/26 - loss 1.07969424 - time (sec): 23.69 - samples/sec: 32.41 - lr: 0.000039 - momentum: 0.000000
2023-12-22 12:33:33,104 epoch 8 - iter 14/

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 12:35:28,456 DEV : loss 1.2847763299942017 - f1-score (micro avg)  0.3454





2023-12-22 12:35:30,557 ----------------------------------------------------------------------------------------------------
2023-12-22 12:35:34,194 epoch 9 - iter 2/26 - loss 0.91558152 - time (sec): 3.63 - samples/sec: 35.22 - lr: 0.000038 - momentum: 0.000000
2023-12-22 12:35:39,818 epoch 9 - iter 4/26 - loss 0.97277686 - time (sec): 9.26 - samples/sec: 27.65 - lr: 0.000038 - momentum: 0.000000
2023-12-22 12:35:42,513 epoch 9 - iter 6/26 - loss 0.99036475 - time (sec): 11.95 - samples/sec: 32.12 - lr: 0.000038 - momentum: 0.000000
2023-12-22 12:35:48,147 epoch 9 - iter 8/26 - loss 1.01733954 - time (sec): 17.59 - samples/sec: 29.11 - lr: 0.000038 - momentum: 0.000000
2023-12-22 12:35:51,004 epoch 9 - iter 10/26 - loss 1.00705631 - time (sec): 20.44 - samples/sec: 31.30 - lr: 0.000037 - momentum: 0.000000
2023-12-22 12:35:57,825 epoch 9 - iter 12/26 - loss 1.00861055 - time (sec): 27.27 - samples/sec: 28.17 - lr: 0.000037 - momentum: 0.000000
2023-12-22 12:36:00,764 epoch 9 - iter 14

100%|██████████| 80/80 [01:18<00:00,  1.02it/s]

2023-12-22 12:38:02,393 DEV : loss 1.2837849855422974 - f1-score (micro avg)  0.3644





2023-12-22 12:38:05,194 ----------------------------------------------------------------------------------------------------
2023-12-22 12:38:09,788 epoch 10 - iter 2/26 - loss 1.02403626 - time (sec): 4.59 - samples/sec: 27.88 - lr: 0.000036 - momentum: 0.000000
2023-12-22 12:38:15,762 epoch 10 - iter 4/26 - loss 1.01073471 - time (sec): 10.57 - samples/sec: 24.23 - lr: 0.000036 - momentum: 0.000000
2023-12-22 12:38:21,657 epoch 10 - iter 6/26 - loss 0.99620231 - time (sec): 16.46 - samples/sec: 23.33 - lr: 0.000036 - momentum: 0.000000
2023-12-22 12:38:24,318 epoch 10 - iter 8/26 - loss 0.98364633 - time (sec): 19.12 - samples/sec: 26.78 - lr: 0.000035 - momentum: 0.000000
2023-12-22 12:38:41,846 epoch 10 - iter 10/26 - loss 0.98655451 - time (sec): 36.65 - samples/sec: 17.46 - lr: 0.000035 - momentum: 0.000000
2023-12-22 12:38:45,853 epoch 10 - iter 12/26 - loss 1.00134190 - time (sec): 40.66 - samples/sec: 18.89 - lr: 0.000035 - momentum: 0.000000
2023-12-22 12:38:52,816 epoch 10 -

100%|██████████| 80/80 [01:18<00:00,  1.01it/s]

2023-12-22 12:40:30,383 DEV : loss 1.184126377105713 - f1-score (micro avg)  0.4913





2023-12-22 12:40:32,176 ----------------------------------------------------------------------------------------------------
2023-12-22 12:40:43,609 epoch 11 - iter 2/26 - loss 0.97945431 - time (sec): 11.43 - samples/sec: 11.20 - lr: 0.000034 - momentum: 0.000000
2023-12-22 12:40:48,229 epoch 11 - iter 4/26 - loss 0.97889310 - time (sec): 16.05 - samples/sec: 15.95 - lr: 0.000034 - momentum: 0.000000
2023-12-22 12:40:52,638 epoch 11 - iter 6/26 - loss 0.97892250 - time (sec): 20.46 - samples/sec: 18.77 - lr: 0.000033 - momentum: 0.000000
2023-12-22 12:40:55,541 epoch 11 - iter 8/26 - loss 0.99318200 - time (sec): 23.36 - samples/sec: 21.92 - lr: 0.000033 - momentum: 0.000000
2023-12-22 12:40:58,787 epoch 11 - iter 10/26 - loss 0.98913265 - time (sec): 26.61 - samples/sec: 24.05 - lr: 0.000033 - momentum: 0.000000
2023-12-22 12:41:06,339 epoch 11 - iter 12/26 - loss 0.99510082 - time (sec): 34.16 - samples/sec: 22.48 - lr: 0.000033 - momentum: 0.000000
2023-12-22 12:41:15,694 epoch 11 

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:43:07,128 DEV : loss 1.2060348987579346 - f1-score (micro avg)  0.4637





2023-12-22 12:43:08,295 ----------------------------------------------------------------------------------------------------
2023-12-22 12:43:11,393 epoch 12 - iter 2/26 - loss 0.95027786 - time (sec): 3.10 - samples/sec: 41.34 - lr: 0.000032 - momentum: 0.000000
2023-12-22 12:43:13,960 epoch 12 - iter 4/26 - loss 0.98528925 - time (sec): 5.66 - samples/sec: 45.21 - lr: 0.000032 - momentum: 0.000000
2023-12-22 12:43:16,444 epoch 12 - iter 6/26 - loss 0.96933770 - time (sec): 8.15 - samples/sec: 47.13 - lr: 0.000031 - momentum: 0.000000
2023-12-22 12:43:24,975 epoch 12 - iter 8/26 - loss 0.96370418 - time (sec): 16.68 - samples/sec: 30.70 - lr: 0.000031 - momentum: 0.000000
2023-12-22 12:43:33,813 epoch 12 - iter 10/26 - loss 0.96317694 - time (sec): 25.52 - samples/sec: 25.08 - lr: 0.000031 - momentum: 0.000000
2023-12-22 12:43:36,839 epoch 12 - iter 12/26 - loss 0.95068681 - time (sec): 28.54 - samples/sec: 26.91 - lr: 0.000031 - momentum: 0.000000
2023-12-22 12:43:52,934 epoch 12 - i

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:45:39,360 DEV : loss 1.1791115999221802 - f1-score (micro avg)  0.4897





2023-12-22 12:45:40,503 ----------------------------------------------------------------------------------------------------
2023-12-22 12:45:52,845 epoch 13 - iter 2/26 - loss 0.97207332 - time (sec): 12.34 - samples/sec: 10.37 - lr: 0.000030 - momentum: 0.000000
2023-12-22 12:46:01,818 epoch 13 - iter 4/26 - loss 0.99661204 - time (sec): 21.31 - samples/sec: 12.01 - lr: 0.000029 - momentum: 0.000000
2023-12-22 12:46:05,091 epoch 13 - iter 6/26 - loss 0.99240831 - time (sec): 24.59 - samples/sec: 15.62 - lr: 0.000029 - momentum: 0.000000
2023-12-22 12:46:08,927 epoch 13 - iter 8/26 - loss 0.99117041 - time (sec): 28.42 - samples/sec: 18.01 - lr: 0.000029 - momentum: 0.000000
2023-12-22 12:46:11,332 epoch 13 - iter 10/26 - loss 1.00885235 - time (sec): 30.83 - samples/sec: 20.76 - lr: 0.000029 - momentum: 0.000000
2023-12-22 12:46:14,330 epoch 13 - iter 12/26 - loss 1.00724007 - time (sec): 33.83 - samples/sec: 22.70 - lr: 0.000029 - momentum: 0.000000
2023-12-22 12:46:21,124 epoch 13 

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:48:09,811 DEV : loss 1.2126154899597168 - f1-score (micro avg)  0.4479





2023-12-22 12:48:10,983 ----------------------------------------------------------------------------------------------------
2023-12-22 12:48:17,776 epoch 14 - iter 2/26 - loss 0.93236876 - time (sec): 6.79 - samples/sec: 18.85 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:23,622 epoch 14 - iter 4/26 - loss 0.94565070 - time (sec): 12.64 - samples/sec: 20.26 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:29,201 epoch 14 - iter 6/26 - loss 0.92371290 - time (sec): 18.21 - samples/sec: 21.08 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:32,004 epoch 14 - iter 8/26 - loss 0.93430141 - time (sec): 21.02 - samples/sec: 24.36 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:34,408 epoch 14 - iter 10/26 - loss 0.92274599 - time (sec): 23.42 - samples/sec: 27.33 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:38,897 epoch 14 - iter 12/26 - loss 0.91444534 - time (sec): 27.91 - samples/sec: 27.52 - lr: 0.000027 - momentum: 0.000000
2023-12-22 12:48:45,815 epoch 14 -

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:50:41,302 DEV : loss 1.1530295610427856 - f1-score (micro avg)  0.5039





2023-12-22 12:50:43,366 ----------------------------------------------------------------------------------------------------
2023-12-22 12:50:52,204 epoch 15 - iter 2/26 - loss 0.85028628 - time (sec): 8.83 - samples/sec: 14.49 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:51:03,519 epoch 15 - iter 4/26 - loss 0.89546753 - time (sec): 20.15 - samples/sec: 12.71 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:51:05,791 epoch 15 - iter 6/26 - loss 0.91370274 - time (sec): 22.42 - samples/sec: 17.13 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:51:09,550 epoch 15 - iter 8/26 - loss 0.91977592 - time (sec): 26.18 - samples/sec: 19.56 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:51:13,301 epoch 15 - iter 10/26 - loss 0.92426479 - time (sec): 29.93 - samples/sec: 21.38 - lr: 0.000025 - momentum: 0.000000
2023-12-22 12:51:22,294 epoch 15 - iter 12/26 - loss 0.90912231 - time (sec): 38.92 - samples/sec: 19.73 - lr: 0.000024 - momentum: 0.000000
2023-12-22 12:51:25,817 epoch 15 -

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:53:05,715 DEV : loss 1.285651445388794 - f1-score (micro avg)  0.3864





2023-12-22 12:53:06,907 ----------------------------------------------------------------------------------------------------
2023-12-22 12:53:09,588 epoch 16 - iter 2/26 - loss 0.97775984 - time (sec): 2.68 - samples/sec: 47.79 - lr: 0.000023 - momentum: 0.000000
2023-12-22 12:53:11,771 epoch 16 - iter 4/26 - loss 0.96026003 - time (sec): 4.86 - samples/sec: 52.66 - lr: 0.000023 - momentum: 0.000000
2023-12-22 12:53:16,300 epoch 16 - iter 6/26 - loss 0.95118976 - time (sec): 9.39 - samples/sec: 40.89 - lr: 0.000023 - momentum: 0.000000
2023-12-22 12:53:19,851 epoch 16 - iter 8/26 - loss 0.94160128 - time (sec): 12.94 - samples/sec: 39.56 - lr: 0.000023 - momentum: 0.000000
2023-12-22 12:53:29,360 epoch 16 - iter 10/26 - loss 0.93625658 - time (sec): 22.45 - samples/sec: 28.51 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:53:31,976 epoch 16 - iter 12/26 - loss 0.93504400 - time (sec): 25.07 - samples/sec: 30.64 - lr: 0.000022 - momentum: 0.000000
2023-12-22 12:53:39,796 epoch 16 - i

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:55:35,579 DEV : loss 1.2118014097213745 - f1-score (micro avg)  0.4716





2023-12-22 12:55:36,768 ----------------------------------------------------------------------------------------------------
2023-12-22 12:55:40,990 epoch 17 - iter 2/26 - loss 0.88450208 - time (sec): 4.22 - samples/sec: 30.34 - lr: 0.000021 - momentum: 0.000000
2023-12-22 12:55:46,040 epoch 17 - iter 4/26 - loss 0.88719022 - time (sec): 9.27 - samples/sec: 27.62 - lr: 0.000021 - momentum: 0.000000
2023-12-22 12:55:48,688 epoch 17 - iter 6/26 - loss 0.88532747 - time (sec): 11.92 - samples/sec: 32.22 - lr: 0.000021 - momentum: 0.000000
2023-12-22 12:56:02,686 epoch 17 - iter 8/26 - loss 0.88870604 - time (sec): 25.92 - samples/sec: 19.76 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:56:06,010 epoch 17 - iter 10/26 - loss 0.90500874 - time (sec): 29.24 - samples/sec: 21.89 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:56:13,202 epoch 17 - iter 12/26 - loss 0.92665723 - time (sec): 36.43 - samples/sec: 21.08 - lr: 0.000020 - momentum: 0.000000
2023-12-22 12:56:15,880 epoch 17 - 

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 12:58:02,404 DEV : loss 1.2294518947601318 - f1-score (micro avg)  0.4424





2023-12-22 12:58:03,581 ----------------------------------------------------------------------------------------------------
2023-12-22 12:58:05,481 epoch 18 - iter 2/26 - loss 0.84112552 - time (sec): 1.90 - samples/sec: 67.44 - lr: 0.000019 - momentum: 0.000000
2023-12-22 12:58:09,285 epoch 18 - iter 4/26 - loss 0.84560180 - time (sec): 5.70 - samples/sec: 44.89 - lr: 0.000019 - momentum: 0.000000
2023-12-22 12:58:12,485 epoch 18 - iter 6/26 - loss 0.88084545 - time (sec): 8.90 - samples/sec: 43.13 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:58:17,694 epoch 18 - iter 8/26 - loss 0.88063325 - time (sec): 14.11 - samples/sec: 36.28 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:58:24,490 epoch 18 - iter 10/26 - loss 0.89294413 - time (sec): 20.91 - samples/sec: 30.61 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:58:40,522 epoch 18 - iter 12/26 - loss 0.91249470 - time (sec): 36.94 - samples/sec: 20.79 - lr: 0.000018 - momentum: 0.000000
2023-12-22 12:58:45,094 epoch 18 - i

100%|██████████| 80/80 [01:18<00:00,  1.02it/s]

2023-12-22 13:00:32,668 DEV : loss 1.230904221534729 - f1-score (micro avg)  0.4464





2023-12-22 13:00:34,810 ----------------------------------------------------------------------------------------------------
2023-12-22 13:00:40,340 epoch 19 - iter 2/26 - loss 0.92570478 - time (sec): 5.53 - samples/sec: 23.15 - lr: 0.000017 - momentum: 0.000000
2023-12-22 13:00:43,516 epoch 19 - iter 4/26 - loss 0.92655393 - time (sec): 8.70 - samples/sec: 29.41 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:00:54,747 epoch 19 - iter 6/26 - loss 0.91285435 - time (sec): 19.94 - samples/sec: 19.26 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:00:58,920 epoch 19 - iter 8/26 - loss 0.91595168 - time (sec): 24.11 - samples/sec: 21.24 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:01:01,497 epoch 19 - iter 10/26 - loss 0.89707784 - time (sec): 26.69 - samples/sec: 23.98 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:01:13,560 epoch 19 - iter 12/26 - loss 0.89218637 - time (sec): 38.75 - samples/sec: 19.82 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:01:18,887 epoch 19 - 

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 13:03:03,030 DEV : loss 1.1750818490982056 - f1-score (micro avg)  0.5





2023-12-22 13:03:05,137 ----------------------------------------------------------------------------------------------------
2023-12-22 13:03:10,640 epoch 20 - iter 2/26 - loss 0.89453787 - time (sec): 5.50 - samples/sec: 23.27 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:19,707 epoch 20 - iter 4/26 - loss 0.93649419 - time (sec): 14.57 - samples/sec: 17.57 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:26,779 epoch 20 - iter 6/26 - loss 0.89584099 - time (sec): 21.64 - samples/sec: 17.75 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:38,758 epoch 20 - iter 8/26 - loss 0.88863051 - time (sec): 33.62 - samples/sec: 15.23 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:43,741 epoch 20 - iter 10/26 - loss 0.89004080 - time (sec): 38.60 - samples/sec: 16.58 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:51,305 epoch 20 - iter 12/26 - loss 0.88207986 - time (sec): 46.17 - samples/sec: 16.64 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:03:57,228 epoch 20 -

100%|██████████| 80/80 [01:18<00:00,  1.02it/s]

2023-12-22 13:05:34,333 DEV : loss 1.164432168006897 - f1-score (micro avg)  0.4976





2023-12-22 13:05:36,784 ----------------------------------------------------------------------------------------------------
2023-12-22 13:05:39,595 epoch 21 - iter 2/26 - loss 0.87277222 - time (sec): 2.81 - samples/sec: 45.63 - lr: 0.000012 - momentum: 0.000000
2023-12-22 13:05:53,572 epoch 21 - iter 4/26 - loss 0.91264896 - time (sec): 16.78 - samples/sec: 15.25 - lr: 0.000012 - momentum: 0.000000
2023-12-22 13:05:59,006 epoch 21 - iter 6/26 - loss 0.88156739 - time (sec): 22.22 - samples/sec: 17.28 - lr: 0.000012 - momentum: 0.000000
2023-12-22 13:06:01,909 epoch 21 - iter 8/26 - loss 0.86899566 - time (sec): 25.12 - samples/sec: 20.38 - lr: 0.000012 - momentum: 0.000000
2023-12-22 13:06:04,603 epoch 21 - iter 10/26 - loss 0.87389849 - time (sec): 27.81 - samples/sec: 23.01 - lr: 0.000012 - momentum: 0.000000
2023-12-22 13:06:08,611 epoch 21 - iter 12/26 - loss 0.88802102 - time (sec): 31.82 - samples/sec: 24.13 - lr: 0.000011 - momentum: 0.000000
2023-12-22 13:06:13,136 epoch 21 -

100%|██████████| 80/80 [01:18<00:00,  1.01it/s]

2023-12-22 13:08:09,049 DEV : loss 1.2726713418960571 - f1-score (micro avg)  0.4203





2023-12-22 13:08:10,265 ----------------------------------------------------------------------------------------------------
2023-12-22 13:08:15,246 epoch 22 - iter 2/26 - loss 0.88803563 - time (sec): 4.98 - samples/sec: 25.71 - lr: 0.000010 - momentum: 0.000000
2023-12-22 13:08:25,887 epoch 22 - iter 4/26 - loss 0.86526087 - time (sec): 15.62 - samples/sec: 16.39 - lr: 0.000010 - momentum: 0.000000
2023-12-22 13:08:29,115 epoch 22 - iter 6/26 - loss 0.87773343 - time (sec): 18.85 - samples/sec: 20.37 - lr: 0.000010 - momentum: 0.000000
2023-12-22 13:08:33,172 epoch 22 - iter 8/26 - loss 0.88505465 - time (sec): 22.90 - samples/sec: 22.35 - lr: 0.000010 - momentum: 0.000000
2023-12-22 13:08:42,469 epoch 22 - iter 10/26 - loss 0.87069426 - time (sec): 32.20 - samples/sec: 19.88 - lr: 0.000009 - momentum: 0.000000
2023-12-22 13:08:46,398 epoch 22 - iter 12/26 - loss 0.88137503 - time (sec): 36.13 - samples/sec: 21.26 - lr: 0.000009 - momentum: 0.000000
2023-12-22 13:08:50,618 epoch 22 -

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 13:10:43,320 DEV : loss 1.1953338384628296 - f1-score (micro avg)  0.4763





2023-12-22 13:10:44,471 ----------------------------------------------------------------------------------------------------
2023-12-22 13:10:47,220 epoch 23 - iter 2/26 - loss 0.81465110 - time (sec): 2.75 - samples/sec: 46.59 - lr: 0.000008 - momentum: 0.000000
2023-12-22 13:10:51,416 epoch 23 - iter 4/26 - loss 0.81312135 - time (sec): 6.94 - samples/sec: 36.87 - lr: 0.000008 - momentum: 0.000000
2023-12-22 13:11:02,772 epoch 23 - iter 6/26 - loss 0.84405167 - time (sec): 18.30 - samples/sec: 20.98 - lr: 0.000008 - momentum: 0.000000
2023-12-22 13:11:10,254 epoch 23 - iter 8/26 - loss 0.86910310 - time (sec): 25.78 - samples/sec: 19.86 - lr: 0.000007 - momentum: 0.000000
2023-12-22 13:11:13,834 epoch 23 - iter 10/26 - loss 0.85527936 - time (sec): 29.36 - samples/sec: 21.80 - lr: 0.000007 - momentum: 0.000000
2023-12-22 13:11:22,593 epoch 23 - iter 12/26 - loss 0.86465960 - time (sec): 38.12 - samples/sec: 20.15 - lr: 0.000007 - momentum: 0.000000
2023-12-22 13:11:28,332 epoch 23 - 

100%|██████████| 80/80 [01:19<00:00,  1.01it/s]

2023-12-22 13:13:14,007 DEV : loss 1.2241840362548828 - f1-score (micro avg)  0.4645





2023-12-22 13:13:15,168 ----------------------------------------------------------------------------------------------------
2023-12-22 13:13:20,323 epoch 24 - iter 2/26 - loss 0.79711616 - time (sec): 5.15 - samples/sec: 24.84 - lr: 0.000006 - momentum: 0.000000
2023-12-22 13:13:25,946 epoch 24 - iter 4/26 - loss 0.89651655 - time (sec): 10.78 - samples/sec: 23.76 - lr: 0.000006 - momentum: 0.000000
2023-12-22 13:13:41,319 epoch 24 - iter 6/26 - loss 0.89056412 - time (sec): 26.15 - samples/sec: 14.69 - lr: 0.000005 - momentum: 0.000000
2023-12-22 13:13:45,474 epoch 24 - iter 8/26 - loss 0.88578577 - time (sec): 30.30 - samples/sec: 16.90 - lr: 0.000005 - momentum: 0.000000
2023-12-22 13:13:48,354 epoch 24 - iter 10/26 - loss 0.89207748 - time (sec): 33.18 - samples/sec: 19.29 - lr: 0.000005 - momentum: 0.000000
2023-12-22 13:13:53,772 epoch 24 - iter 12/26 - loss 0.88006325 - time (sec): 38.60 - samples/sec: 19.90 - lr: 0.000005 - momentum: 0.000000
2023-12-22 13:13:57,610 epoch 24 -

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 13:15:47,515 DEV : loss 1.2222685813903809 - f1-score (micro avg)  0.4692





2023-12-22 13:15:48,677 ----------------------------------------------------------------------------------------------------
2023-12-22 13:16:04,662 epoch 25 - iter 2/26 - loss 0.78753948 - time (sec): 15.98 - samples/sec: 8.01 - lr: 0.000004 - momentum: 0.000000
2023-12-22 13:16:15,183 epoch 25 - iter 4/26 - loss 0.81353568 - time (sec): 26.50 - samples/sec: 9.66 - lr: 0.000003 - momentum: 0.000000
2023-12-22 13:16:21,882 epoch 25 - iter 6/26 - loss 0.80403164 - time (sec): 33.20 - samples/sec: 11.57 - lr: 0.000003 - momentum: 0.000000
2023-12-22 13:16:24,834 epoch 25 - iter 8/26 - loss 0.82936451 - time (sec): 36.16 - samples/sec: 14.16 - lr: 0.000003 - momentum: 0.000000
2023-12-22 13:16:29,573 epoch 25 - iter 10/26 - loss 0.82002468 - time (sec): 40.89 - samples/sec: 15.65 - lr: 0.000003 - momentum: 0.000000
2023-12-22 13:16:33,691 epoch 25 - iter 12/26 - loss 0.83177345 - time (sec): 45.01 - samples/sec: 17.06 - lr: 0.000003 - momentum: 0.000000
2023-12-22 13:16:36,662 epoch 25 - 

100%|██████████| 80/80 [01:19<00:00,  1.00it/s]

2023-12-22 13:18:12,782 DEV : loss 1.2317458391189575 - f1-score (micro avg)  0.4574





2023-12-22 13:18:15,289 ----------------------------------------------------------------------------------------------------
2023-12-22 13:18:15,292 Testing using last state of model ...


100%|██████████| 110/110 [01:22<00:00,  1.33it/s]

2023-12-22 13:19:37,863 
Results:
- F-score (micro) 0.4914
- F-score (macro) 0.3195
- Accuracy 0.4914

By class:
              precision    recall  f1-score   support

     comment     0.9011    0.5061    0.6482      1476
     support     0.1106    0.4904    0.1805       104
        deny     0.0569    0.1600    0.0840       100
       query     0.2514    0.6667    0.3651        66

    accuracy                         0.4914      1746
   macro avg     0.3300    0.4558    0.3195      1746
weighted avg     0.7811    0.4914    0.5773      1746

2023-12-22 13:19:37,865 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = True
add_post_title = False
add_depth = True

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[53]: "1 | The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$ | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 13:19:46,396 Reading data from .
2023-12-22 13:19:46,400 Train: train_fasttext_format.txt
2023-12-22 13:19:46,404 Dev: dev_fasttext_format.txt
2023-12-22 13:19:46,406 Test: test_fasttext_format.txt
2023-12-22 13:19:46,554 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1624,
        "number_of_documents_per_class": {
            "support": 442,
            "comment": 442,
            "deny": 354,
            "query": 386
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 78483,
            "min": 9,
            "max": 511,
            "avg": 48.32697044334975
        }
    },
    "TEST": {
        "dataset"

0it [00:00, ?it/s]
1624it [00:01, 1189.04it/s]

2023-12-22 13:20:06,987 Dictionary created for label 'class' with 4 values: support (seen 442 times), comment (seen 442 times), query (seen 386 times), deny (seen 354 times)
2023-12-22 13:20:06,996 ----------------------------------------------------------------------------------------------------
2023-12-22 13:20:06,997 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-22 13:20:10,840 epoch 1 - iter 2/26 - loss 1.48503661 - time (sec): 3.81 - samples/sec: 33.56 - lr: 0.000001 - momentum: 0.000000
2023-12-22 13:20:16,112 epoch 1 - iter 4/26 - loss 1.46680987 - time (sec): 9.09 - samples/sec: 28.17 - lr: 0.000002 - momentum: 0.000000
2023-12-22 13:20:18,750 epoch 1 - iter 6/26 - loss 1.46794643 - time (sec): 11.72 - samples/sec: 32.75 - lr: 0.000004 - momentum: 0.000000
2023-12-22 13:20:24,870 epoch 1 - iter 8/26 - loss 1.46313447 - time (sec): 17.84 - samples/sec: 28.69 - lr: 0.000005 - momentum: 0.000000
2023-12-22 13:20:29,215 epoch 1 - iter 10/26 - loss 1.46397672 - time (sec): 22.19 - samples/sec: 28.84 - lr: 0.000007 - momentum: 0.000000
2023-12-22 13:20:31,341 epoch 1 - iter 12/26 - loss 1.46504609 - time (sec): 24.32 - samples/sec: 31.59 - lr: 0.000008 - momentum: 0.000000
2023-12-22 13:20:36,953 epoch 1 - iter 14/26 - loss 1.45523047 - time (sec): 29.93 - samples/sec: 29.94 - lr: 0.000010 - momentum: 0.000000
2023-12-22 13:20:39,192 ep

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:22:08,683 DEV : loss 1.2777016162872314 - f1-score (micro avg)  0.4606





2023-12-22 13:22:09,675 ----------------------------------------------------------------------------------------------------
2023-12-22 13:22:12,350 epoch 2 - iter 2/26 - loss 1.34329206 - time (sec): 2.67 - samples/sec: 47.89 - lr: 0.000021 - momentum: 0.000000
2023-12-22 13:22:17,482 epoch 2 - iter 4/26 - loss 1.33411908 - time (sec): 7.80 - samples/sec: 32.80 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:22:25,062 epoch 2 - iter 6/26 - loss 1.35379477 - time (sec): 15.38 - samples/sec: 24.96 - lr: 0.000024 - momentum: 0.000000
2023-12-22 13:22:27,705 epoch 2 - iter 8/26 - loss 1.34895538 - time (sec): 18.03 - samples/sec: 28.40 - lr: 0.000025 - momentum: 0.000000
2023-12-22 13:22:29,919 epoch 2 - iter 10/26 - loss 1.35406386 - time (sec): 20.24 - samples/sec: 31.62 - lr: 0.000027 - momentum: 0.000000
2023-12-22 13:22:32,092 epoch 2 - iter 12/26 - loss 1.33432655 - time (sec): 22.41 - samples/sec: 34.26 - lr: 0.000028 - momentum: 0.000000
2023-12-22 13:22:35,739 epoch 2 - iter 14

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:24:08,966 DEV : loss 1.185672402381897 - f1-score (micro avg)  0.5371





2023-12-22 13:24:09,940 ----------------------------------------------------------------------------------------------------
2023-12-22 13:24:12,708 epoch 3 - iter 2/26 - loss 1.38536596 - time (sec): 2.77 - samples/sec: 46.28 - lr: 0.000041 - momentum: 0.000000
2023-12-22 13:24:17,193 epoch 3 - iter 4/26 - loss 1.34992033 - time (sec): 7.25 - samples/sec: 35.30 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:24:19,220 epoch 3 - iter 6/26 - loss 1.30460860 - time (sec): 9.28 - samples/sec: 41.39 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:24:26,295 epoch 3 - iter 8/26 - loss 1.26942053 - time (sec): 16.35 - samples/sec: 31.31 - lr: 0.000045 - momentum: 0.000000
2023-12-22 13:24:27,953 epoch 3 - iter 10/26 - loss 1.26257843 - time (sec): 18.01 - samples/sec: 35.53 - lr: 0.000047 - momentum: 0.000000
2023-12-22 13:24:29,974 epoch 3 - iter 12/26 - loss 1.26314296 - time (sec): 20.03 - samples/sec: 38.34 - lr: 0.000048 - momentum: 0.000000
2023-12-22 13:24:34,708 epoch 3 - iter 14/

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]

2023-12-22 13:26:08,542 DEV : loss 1.117826223373413 - f1-score (micro avg)  0.5844





2023-12-22 13:26:10,417 ----------------------------------------------------------------------------------------------------
2023-12-22 13:26:12,162 epoch 4 - iter 2/26 - loss 1.16188592 - time (sec): 1.74 - samples/sec: 73.42 - lr: 0.000049 - momentum: 0.000000
2023-12-22 13:26:16,238 epoch 4 - iter 4/26 - loss 1.20893836 - time (sec): 5.82 - samples/sec: 44.00 - lr: 0.000049 - momentum: 0.000000
2023-12-22 13:26:20,361 epoch 4 - iter 6/26 - loss 1.21899054 - time (sec): 9.94 - samples/sec: 38.63 - lr: 0.000048 - momentum: 0.000000
2023-12-22 13:26:23,496 epoch 4 - iter 8/26 - loss 1.20885934 - time (sec): 13.08 - samples/sec: 39.15 - lr: 0.000048 - momentum: 0.000000
2023-12-22 13:26:25,792 epoch 4 - iter 10/26 - loss 1.20766717 - time (sec): 15.37 - samples/sec: 41.63 - lr: 0.000048 - momentum: 0.000000
2023-12-22 13:26:28,262 epoch 4 - iter 12/26 - loss 1.20912423 - time (sec): 17.84 - samples/sec: 43.04 - lr: 0.000048 - momentum: 0.000000
2023-12-22 13:26:30,576 epoch 4 - iter 14/

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]


2023-12-22 13:28:07,486 DEV : loss 1.1762282848358154 - f1-score (micro avg)  0.5552
2023-12-22 13:28:08,461 ----------------------------------------------------------------------------------------------------
2023-12-22 13:28:11,094 epoch 5 - iter 2/26 - loss 1.20939296 - time (sec): 2.63 - samples/sec: 48.65 - lr: 0.000047 - momentum: 0.000000
2023-12-22 13:28:14,420 epoch 5 - iter 4/26 - loss 1.15996617 - time (sec): 5.96 - samples/sec: 42.97 - lr: 0.000046 - momentum: 0.000000
2023-12-22 13:28:19,001 epoch 5 - iter 6/26 - loss 1.13670025 - time (sec): 10.54 - samples/sec: 36.44 - lr: 0.000046 - momentum: 0.000000
2023-12-22 13:28:20,262 epoch 5 - iter 8/26 - loss 1.16889782 - time (sec): 11.80 - samples/sec: 43.39 - lr: 0.000046 - momentum: 0.000000
2023-12-22 13:28:27,915 epoch 5 - iter 10/26 - loss 1.17515765 - time (sec): 19.45 - samples/sec: 32.90 - lr: 0.000046 - momentum: 0.000000
2023-12-22 13:28:31,643 epoch 5 - iter 12/26 - loss 1.16754355 - time (sec): 23.18 - samples/sec

100%|██████████| 80/80 [01:13<00:00,  1.10it/s]

2023-12-22 13:30:09,766 DEV : loss 1.2090609073638916 - f1-score (micro avg)  0.5039





2023-12-22 13:30:11,470 ----------------------------------------------------------------------------------------------------
2023-12-22 13:30:14,639 epoch 6 - iter 2/26 - loss 1.12942320 - time (sec): 3.16 - samples/sec: 40.47 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:17,033 epoch 6 - iter 4/26 - loss 1.14805514 - time (sec): 5.56 - samples/sec: 46.07 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:21,823 epoch 6 - iter 6/26 - loss 1.13336299 - time (sec): 10.35 - samples/sec: 37.11 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:24,157 epoch 6 - iter 8/26 - loss 1.11978664 - time (sec): 12.68 - samples/sec: 40.38 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:29,407 epoch 6 - iter 10/26 - loss 1.12880213 - time (sec): 17.93 - samples/sec: 35.69 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:32,757 epoch 6 - iter 12/26 - loss 1.13270236 - time (sec): 21.28 - samples/sec: 36.09 - lr: 0.000044 - momentum: 0.000000
2023-12-22 13:30:34,491 epoch 6 - iter 14

100%|██████████| 80/80 [01:12<00:00,  1.11it/s]


2023-12-22 13:32:11,972 DEV : loss 1.1603182554244995 - f1-score (micro avg)  0.5
2023-12-22 13:32:14,383 ----------------------------------------------------------------------------------------------------
2023-12-22 13:32:15,881 epoch 7 - iter 2/26 - loss 1.09969658 - time (sec): 1.50 - samples/sec: 85.52 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:32:17,185 epoch 7 - iter 4/26 - loss 1.11437589 - time (sec): 2.80 - samples/sec: 91.41 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:32:21,575 epoch 7 - iter 6/26 - loss 1.10024605 - time (sec): 7.19 - samples/sec: 53.41 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:32:24,162 epoch 7 - iter 8/26 - loss 1.08764529 - time (sec): 9.78 - samples/sec: 52.37 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:32:27,456 epoch 7 - iter 10/26 - loss 1.09067740 - time (sec): 13.07 - samples/sec: 48.96 - lr: 0.000042 - momentum: 0.000000
2023-12-22 13:32:30,064 epoch 7 - iter 12/26 - loss 1.10391747 - time (sec): 15.68 - samples/sec: 48.

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]


2023-12-22 13:34:12,519 DEV : loss 1.1287509202957153 - f1-score (micro avg)  0.5244
2023-12-22 13:34:13,510 ----------------------------------------------------------------------------------------------------
2023-12-22 13:34:16,190 epoch 8 - iter 2/26 - loss 1.13340819 - time (sec): 2.68 - samples/sec: 47.79 - lr: 0.000040 - momentum: 0.000000
2023-12-22 13:34:18,227 epoch 8 - iter 4/26 - loss 1.09029031 - time (sec): 4.71 - samples/sec: 54.30 - lr: 0.000040 - momentum: 0.000000
2023-12-22 13:34:24,754 epoch 8 - iter 6/26 - loss 1.08950808 - time (sec): 11.24 - samples/sec: 34.16 - lr: 0.000040 - momentum: 0.000000
2023-12-22 13:34:27,823 epoch 8 - iter 8/26 - loss 1.10670188 - time (sec): 14.31 - samples/sec: 35.78 - lr: 0.000040 - momentum: 0.000000
2023-12-22 13:34:34,298 epoch 8 - iter 10/26 - loss 1.08509991 - time (sec): 20.79 - samples/sec: 30.79 - lr: 0.000039 - momentum: 0.000000
2023-12-22 13:34:39,747 epoch 8 - iter 12/26 - loss 1.08755763 - time (sec): 26.23 - samples/sec

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:36:14,250 DEV : loss 1.1815330982208252 - f1-score (micro avg)  0.4535





2023-12-22 13:36:15,260 ----------------------------------------------------------------------------------------------------
2023-12-22 13:36:18,533 epoch 9 - iter 2/26 - loss 1.06640023 - time (sec): 3.27 - samples/sec: 39.13 - lr: 0.000038 - momentum: 0.000000
2023-12-22 13:36:24,892 epoch 9 - iter 4/26 - loss 1.02312447 - time (sec): 9.63 - samples/sec: 26.58 - lr: 0.000038 - momentum: 0.000000
2023-12-22 13:36:28,507 epoch 9 - iter 6/26 - loss 1.02366042 - time (sec): 13.25 - samples/sec: 28.99 - lr: 0.000038 - momentum: 0.000000
2023-12-22 13:36:30,309 epoch 9 - iter 8/26 - loss 1.03982621 - time (sec): 15.05 - samples/sec: 34.03 - lr: 0.000037 - momentum: 0.000000
2023-12-22 13:36:33,238 epoch 9 - iter 10/26 - loss 1.03618370 - time (sec): 17.98 - samples/sec: 35.60 - lr: 0.000037 - momentum: 0.000000
2023-12-22 13:36:38,819 epoch 9 - iter 12/26 - loss 1.03740939 - time (sec): 23.56 - samples/sec: 32.60 - lr: 0.000037 - momentum: 0.000000
2023-12-22 13:36:47,805 epoch 9 - iter 14

100%|██████████| 80/80 [01:12<00:00,  1.11it/s]

2023-12-22 13:38:16,498 DEV : loss 1.1742247343063354 - f1-score (micro avg)  0.4779





2023-12-22 13:38:18,388 ----------------------------------------------------------------------------------------------------
2023-12-22 13:38:21,047 epoch 10 - iter 2/26 - loss 0.97121716 - time (sec): 2.66 - samples/sec: 48.19 - lr: 0.000036 - momentum: 0.000000
2023-12-22 13:38:24,419 epoch 10 - iter 4/26 - loss 0.99377546 - time (sec): 6.03 - samples/sec: 42.46 - lr: 0.000035 - momentum: 0.000000
2023-12-22 13:38:30,232 epoch 10 - iter 6/26 - loss 1.00390773 - time (sec): 11.84 - samples/sec: 32.43 - lr: 0.000035 - momentum: 0.000000
2023-12-22 13:38:35,594 epoch 10 - iter 8/26 - loss 1.02226327 - time (sec): 17.20 - samples/sec: 29.76 - lr: 0.000035 - momentum: 0.000000
2023-12-22 13:38:38,378 epoch 10 - iter 10/26 - loss 1.01936384 - time (sec): 19.99 - samples/sec: 32.02 - lr: 0.000035 - momentum: 0.000000
2023-12-22 13:38:43,710 epoch 10 - iter 12/26 - loss 1.01982219 - time (sec): 25.32 - samples/sec: 30.33 - lr: 0.000035 - momentum: 0.000000
2023-12-22 13:38:46,524 epoch 10 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:40:19,697 DEV : loss 1.0948690176010132 - f1-score (micro avg)  0.5513





2023-12-22 13:40:21,438 ----------------------------------------------------------------------------------------------------
2023-12-22 13:40:23,851 epoch 11 - iter 2/26 - loss 0.96130434 - time (sec): 2.41 - samples/sec: 53.10 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:26,550 epoch 11 - iter 4/26 - loss 0.99919547 - time (sec): 5.11 - samples/sec: 50.11 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:32,949 epoch 11 - iter 6/26 - loss 1.01413507 - time (sec): 11.51 - samples/sec: 33.37 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:36,659 epoch 11 - iter 8/26 - loss 1.00867289 - time (sec): 15.22 - samples/sec: 33.64 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:42,154 epoch 11 - iter 10/26 - loss 1.00465441 - time (sec): 20.71 - samples/sec: 30.90 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:45,568 epoch 11 - iter 12/26 - loss 1.01277239 - time (sec): 24.13 - samples/sec: 31.83 - lr: 0.000033 - momentum: 0.000000
2023-12-22 13:40:49,384 epoch 11 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:42:21,447 DEV : loss 1.1783099174499512 - f1-score (micro avg)  0.4866





2023-12-22 13:42:23,155 ----------------------------------------------------------------------------------------------------
2023-12-22 13:42:29,905 epoch 12 - iter 2/26 - loss 1.02516913 - time (sec): 6.75 - samples/sec: 18.97 - lr: 0.000031 - momentum: 0.000000
2023-12-22 13:42:32,705 epoch 12 - iter 4/26 - loss 1.01654905 - time (sec): 9.55 - samples/sec: 26.82 - lr: 0.000031 - momentum: 0.000000
2023-12-22 13:42:35,075 epoch 12 - iter 6/26 - loss 1.02127188 - time (sec): 11.92 - samples/sec: 32.23 - lr: 0.000031 - momentum: 0.000000
2023-12-22 13:42:40,199 epoch 12 - iter 8/26 - loss 1.03508037 - time (sec): 17.04 - samples/sec: 30.05 - lr: 0.000031 - momentum: 0.000000
2023-12-22 13:42:42,056 epoch 12 - iter 10/26 - loss 1.04894058 - time (sec): 18.90 - samples/sec: 33.87 - lr: 0.000031 - momentum: 0.000000
2023-12-22 13:42:44,092 epoch 12 - iter 12/26 - loss 1.02828564 - time (sec): 20.93 - samples/sec: 36.69 - lr: 0.000030 - momentum: 0.000000
2023-12-22 13:42:48,609 epoch 12 - 

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]

2023-12-22 13:44:22,296 DEV : loss 1.1791480779647827 - f1-score (micro avg)  0.485





2023-12-22 13:44:24,470 ----------------------------------------------------------------------------------------------------
2023-12-22 13:44:28,869 epoch 13 - iter 2/26 - loss 0.96909586 - time (sec): 4.40 - samples/sec: 29.11 - lr: 0.000029 - momentum: 0.000000
2023-12-22 13:44:33,995 epoch 13 - iter 4/26 - loss 0.98766388 - time (sec): 9.52 - samples/sec: 26.88 - lr: 0.000029 - momentum: 0.000000
2023-12-22 13:44:36,756 epoch 13 - iter 6/26 - loss 0.97486196 - time (sec): 12.28 - samples/sec: 31.26 - lr: 0.000029 - momentum: 0.000000
2023-12-22 13:44:38,999 epoch 13 - iter 8/26 - loss 0.96528459 - time (sec): 14.53 - samples/sec: 35.24 - lr: 0.000029 - momentum: 0.000000
2023-12-22 13:44:43,813 epoch 13 - iter 10/26 - loss 0.96536097 - time (sec): 19.34 - samples/sec: 33.09 - lr: 0.000028 - momentum: 0.000000
2023-12-22 13:44:46,304 epoch 13 - iter 12/26 - loss 0.96857681 - time (sec): 21.83 - samples/sec: 35.18 - lr: 0.000028 - momentum: 0.000000
2023-12-22 13:44:48,669 epoch 13 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:46:24,023 DEV : loss 1.145314335823059 - f1-score (micro avg)  0.5118





2023-12-22 13:46:25,028 ----------------------------------------------------------------------------------------------------
2023-12-22 13:46:27,226 epoch 14 - iter 2/26 - loss 0.95639479 - time (sec): 2.20 - samples/sec: 58.28 - lr: 0.000027 - momentum: 0.000000
2023-12-22 13:46:29,346 epoch 14 - iter 4/26 - loss 0.95245747 - time (sec): 4.32 - samples/sec: 59.31 - lr: 0.000027 - momentum: 0.000000
2023-12-22 13:46:35,562 epoch 14 - iter 6/26 - loss 0.96252857 - time (sec): 10.53 - samples/sec: 36.46 - lr: 0.000027 - momentum: 0.000000
2023-12-22 13:46:40,130 epoch 14 - iter 8/26 - loss 0.94535294 - time (sec): 15.10 - samples/sec: 33.91 - lr: 0.000026 - momentum: 0.000000
2023-12-22 13:46:45,583 epoch 14 - iter 10/26 - loss 0.93441424 - time (sec): 20.55 - samples/sec: 31.14 - lr: 0.000026 - momentum: 0.000000
2023-12-22 13:46:50,416 epoch 14 - iter 12/26 - loss 0.94220615 - time (sec): 25.39 - samples/sec: 30.25 - lr: 0.000026 - momentum: 0.000000
2023-12-22 13:46:54,657 epoch 14 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:48:27,146 DEV : loss 1.1227935552597046 - f1-score (micro avg)  0.511





2023-12-22 13:48:28,122 ----------------------------------------------------------------------------------------------------
2023-12-22 13:48:32,272 epoch 15 - iter 2/26 - loss 0.93339327 - time (sec): 4.15 - samples/sec: 30.86 - lr: 0.000025 - momentum: 0.000000
2023-12-22 13:48:34,512 epoch 15 - iter 4/26 - loss 0.94334796 - time (sec): 6.39 - samples/sec: 40.08 - lr: 0.000025 - momentum: 0.000000
2023-12-22 13:48:36,725 epoch 15 - iter 6/26 - loss 0.92951269 - time (sec): 8.60 - samples/sec: 44.65 - lr: 0.000024 - momentum: 0.000000
2023-12-22 13:48:41,774 epoch 15 - iter 8/26 - loss 0.94710287 - time (sec): 13.65 - samples/sec: 37.51 - lr: 0.000024 - momentum: 0.000000
2023-12-22 13:48:44,559 epoch 15 - iter 10/26 - loss 0.94155964 - time (sec): 16.43 - samples/sec: 38.94 - lr: 0.000024 - momentum: 0.000000
2023-12-22 13:48:49,542 epoch 15 - iter 12/26 - loss 0.95190725 - time (sec): 21.42 - samples/sec: 35.86 - lr: 0.000024 - momentum: 0.000000
2023-12-22 13:48:56,419 epoch 15 - i

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]

2023-12-22 13:50:28,462 DEV : loss 1.2221086025238037 - f1-score (micro avg)  0.4006





2023-12-22 13:50:29,432 ----------------------------------------------------------------------------------------------------
2023-12-22 13:50:34,057 epoch 16 - iter 2/26 - loss 0.90110424 - time (sec): 4.62 - samples/sec: 27.69 - lr: 0.000023 - momentum: 0.000000
2023-12-22 13:50:36,061 epoch 16 - iter 4/26 - loss 0.91683486 - time (sec): 6.63 - samples/sec: 38.63 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:50:42,583 epoch 16 - iter 6/26 - loss 0.93043050 - time (sec): 13.15 - samples/sec: 29.20 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:50:46,475 epoch 16 - iter 8/26 - loss 0.96397644 - time (sec): 17.04 - samples/sec: 30.05 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:50:51,302 epoch 16 - iter 10/26 - loss 0.96134629 - time (sec): 21.87 - samples/sec: 29.27 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:50:56,821 epoch 16 - iter 12/26 - loss 0.96007432 - time (sec): 27.39 - samples/sec: 28.04 - lr: 0.000022 - momentum: 0.000000
2023-12-22 13:50:59,171 epoch 16 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:52:29,706 DEV : loss 1.218664288520813 - f1-score (micro avg)  0.4393





2023-12-22 13:52:30,684 ----------------------------------------------------------------------------------------------------
2023-12-22 13:52:38,318 epoch 17 - iter 2/26 - loss 0.79609072 - time (sec): 7.63 - samples/sec: 16.77 - lr: 0.000020 - momentum: 0.000000
2023-12-22 13:52:41,521 epoch 17 - iter 4/26 - loss 0.91870864 - time (sec): 10.83 - samples/sec: 23.63 - lr: 0.000020 - momentum: 0.000000
2023-12-22 13:52:44,895 epoch 17 - iter 6/26 - loss 0.91624451 - time (sec): 14.21 - samples/sec: 27.03 - lr: 0.000020 - momentum: 0.000000
2023-12-22 13:52:47,751 epoch 17 - iter 8/26 - loss 0.92243309 - time (sec): 17.06 - samples/sec: 30.01 - lr: 0.000020 - momentum: 0.000000
2023-12-22 13:52:53,851 epoch 17 - iter 10/26 - loss 0.93489316 - time (sec): 23.16 - samples/sec: 27.63 - lr: 0.000020 - momentum: 0.000000
2023-12-22 13:52:59,518 epoch 17 - iter 12/26 - loss 0.94048589 - time (sec): 28.83 - samples/sec: 26.64 - lr: 0.000019 - momentum: 0.000000
2023-12-22 13:53:03,293 epoch 17 -

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 13:54:34,304 DEV : loss 1.2166417837142944 - f1-score (micro avg)  0.4385





2023-12-22 13:54:35,998 ----------------------------------------------------------------------------------------------------
2023-12-22 13:54:42,625 epoch 18 - iter 2/26 - loss 0.85671750 - time (sec): 6.62 - samples/sec: 19.32 - lr: 0.000018 - momentum: 0.000000
2023-12-22 13:54:45,186 epoch 18 - iter 4/26 - loss 0.85917027 - time (sec): 9.19 - samples/sec: 27.87 - lr: 0.000018 - momentum: 0.000000
2023-12-22 13:54:50,917 epoch 18 - iter 6/26 - loss 0.86715427 - time (sec): 14.92 - samples/sec: 25.74 - lr: 0.000018 - momentum: 0.000000
2023-12-22 13:54:55,988 epoch 18 - iter 8/26 - loss 0.89043479 - time (sec): 19.99 - samples/sec: 25.62 - lr: 0.000018 - momentum: 0.000000
2023-12-22 13:54:58,840 epoch 18 - iter 10/26 - loss 0.88858685 - time (sec): 22.84 - samples/sec: 28.02 - lr: 0.000017 - momentum: 0.000000
2023-12-22 13:55:01,828 epoch 18 - iter 12/26 - loss 0.90783341 - time (sec): 25.83 - samples/sec: 29.74 - lr: 0.000017 - momentum: 0.000000
2023-12-22 13:55:05,308 epoch 18 - 

100%|██████████| 80/80 [01:12<00:00,  1.11it/s]

2023-12-22 13:56:36,998 DEV : loss 1.1837676763534546 - f1-score (micro avg)  0.4606





2023-12-22 13:56:40,108 ----------------------------------------------------------------------------------------------------
2023-12-22 13:56:44,706 epoch 19 - iter 2/26 - loss 0.88789362 - time (sec): 4.60 - samples/sec: 27.86 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:56:47,682 epoch 19 - iter 4/26 - loss 0.92554425 - time (sec): 7.57 - samples/sec: 33.81 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:56:51,487 epoch 19 - iter 6/26 - loss 0.93220122 - time (sec): 11.38 - samples/sec: 33.75 - lr: 0.000016 - momentum: 0.000000
2023-12-22 13:56:53,918 epoch 19 - iter 8/26 - loss 0.90377949 - time (sec): 13.81 - samples/sec: 37.08 - lr: 0.000015 - momentum: 0.000000
2023-12-22 13:56:55,617 epoch 19 - iter 10/26 - loss 0.91130997 - time (sec): 15.51 - samples/sec: 41.27 - lr: 0.000015 - momentum: 0.000000
2023-12-22 13:57:02,235 epoch 19 - iter 12/26 - loss 0.91130135 - time (sec): 22.12 - samples/sec: 34.71 - lr: 0.000015 - momentum: 0.000000
2023-12-22 13:57:07,437 epoch 19 - 

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]


2023-12-22 13:58:39,361 DEV : loss 1.2525408267974854 - f1-score (micro avg)  0.4085
2023-12-22 13:58:40,340 ----------------------------------------------------------------------------------------------------
2023-12-22 13:58:42,768 epoch 20 - iter 2/26 - loss 0.88081545 - time (sec): 2.43 - samples/sec: 52.75 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:58:44,995 epoch 20 - iter 4/26 - loss 0.89799099 - time (sec): 4.65 - samples/sec: 55.01 - lr: 0.000014 - momentum: 0.000000
2023-12-22 13:58:48,396 epoch 20 - iter 6/26 - loss 0.89969396 - time (sec): 8.05 - samples/sec: 47.67 - lr: 0.000013 - momentum: 0.000000
2023-12-22 13:58:55,435 epoch 20 - iter 8/26 - loss 0.88384651 - time (sec): 15.09 - samples/sec: 33.92 - lr: 0.000013 - momentum: 0.000000
2023-12-22 13:59:02,856 epoch 20 - iter 10/26 - loss 0.88864815 - time (sec): 22.51 - samples/sec: 28.43 - lr: 0.000013 - momentum: 0.000000
2023-12-22 13:59:05,565 epoch 20 - iter 12/26 - loss 0.90037748 - time (sec): 25.22 - sample

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 14:00:43,190 DEV : loss 1.2314547300338745 - f1-score (micro avg)  0.4306





2023-12-22 14:00:44,285 ----------------------------------------------------------------------------------------------------
2023-12-22 14:00:45,961 epoch 21 - iter 2/26 - loss 0.94433314 - time (sec): 1.67 - samples/sec: 76.43 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:00:49,493 epoch 21 - iter 4/26 - loss 0.92741884 - time (sec): 5.21 - samples/sec: 49.17 - lr: 0.000011 - momentum: 0.000000
2023-12-22 14:00:52,683 epoch 21 - iter 6/26 - loss 0.91767010 - time (sec): 8.40 - samples/sec: 45.73 - lr: 0.000011 - momentum: 0.000000
2023-12-22 14:00:55,676 epoch 21 - iter 8/26 - loss 0.91435914 - time (sec): 11.39 - samples/sec: 44.95 - lr: 0.000011 - momentum: 0.000000
2023-12-22 14:01:00,325 epoch 21 - iter 10/26 - loss 0.90789132 - time (sec): 16.04 - samples/sec: 39.90 - lr: 0.000011 - momentum: 0.000000
2023-12-22 14:01:03,063 epoch 21 - iter 12/26 - loss 0.90429172 - time (sec): 18.78 - samples/sec: 40.90 - lr: 0.000011 - momentum: 0.000000
2023-12-22 14:01:05,923 epoch 21 - i

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]


2023-12-22 14:02:42,991 DEV : loss 1.2096383571624756 - f1-score (micro avg)  0.4472
2023-12-22 14:02:44,879 ----------------------------------------------------------------------------------------------------
2023-12-22 14:02:46,699 epoch 22 - iter 2/26 - loss 0.84931767 - time (sec): 1.82 - samples/sec: 70.42 - lr: 0.000009 - momentum: 0.000000
2023-12-22 14:02:53,261 epoch 22 - iter 4/26 - loss 0.83424658 - time (sec): 8.38 - samples/sec: 30.55 - lr: 0.000009 - momentum: 0.000000
2023-12-22 14:02:56,130 epoch 22 - iter 6/26 - loss 0.87294035 - time (sec): 11.25 - samples/sec: 34.14 - lr: 0.000009 - momentum: 0.000000
2023-12-22 14:03:01,524 epoch 22 - iter 8/26 - loss 0.87870334 - time (sec): 16.64 - samples/sec: 30.76 - lr: 0.000009 - momentum: 0.000000
2023-12-22 14:03:03,101 epoch 22 - iter 10/26 - loss 0.86635701 - time (sec): 18.22 - samples/sec: 35.13 - lr: 0.000009 - momentum: 0.000000
2023-12-22 14:03:05,960 epoch 22 - iter 12/26 - loss 0.87356945 - time (sec): 21.08 - sampl

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]

2023-12-22 14:04:45,416 DEV : loss 1.1836941242218018 - f1-score (micro avg)  0.4645





2023-12-22 14:04:46,450 ----------------------------------------------------------------------------------------------------
2023-12-22 14:04:48,115 epoch 23 - iter 2/26 - loss 0.84258556 - time (sec): 1.66 - samples/sec: 77.05 - lr: 0.000007 - momentum: 0.000000
2023-12-22 14:04:51,295 epoch 23 - iter 4/26 - loss 0.86359392 - time (sec): 4.84 - samples/sec: 52.87 - lr: 0.000007 - momentum: 0.000000
2023-12-22 14:04:54,020 epoch 23 - iter 6/26 - loss 0.89336904 - time (sec): 7.57 - samples/sec: 50.75 - lr: 0.000007 - momentum: 0.000000
2023-12-22 14:05:01,930 epoch 23 - iter 8/26 - loss 0.87433485 - time (sec): 15.48 - samples/sec: 33.08 - lr: 0.000007 - momentum: 0.000000
2023-12-22 14:05:07,618 epoch 23 - iter 10/26 - loss 0.88556617 - time (sec): 21.16 - samples/sec: 30.24 - lr: 0.000006 - momentum: 0.000000
2023-12-22 14:05:10,301 epoch 23 - iter 12/26 - loss 0.86934253 - time (sec): 23.85 - samples/sec: 32.21 - lr: 0.000006 - momentum: 0.000000
2023-12-22 14:05:13,059 epoch 23 - i

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]

2023-12-22 14:06:46,861 DEV : loss 1.2152578830718994 - f1-score (micro avg)  0.4306





2023-12-22 14:06:48,888 ----------------------------------------------------------------------------------------------------
2023-12-22 14:06:54,362 epoch 24 - iter 2/26 - loss 0.85151368 - time (sec): 5.47 - samples/sec: 23.41 - lr: 0.000005 - momentum: 0.000000
2023-12-22 14:06:56,322 epoch 24 - iter 4/26 - loss 0.87088320 - time (sec): 7.43 - samples/sec: 34.47 - lr: 0.000005 - momentum: 0.000000
2023-12-22 14:07:02,124 epoch 24 - iter 6/26 - loss 0.85572952 - time (sec): 13.23 - samples/sec: 29.03 - lr: 0.000005 - momentum: 0.000000
2023-12-22 14:07:04,742 epoch 24 - iter 8/26 - loss 0.86863051 - time (sec): 15.85 - samples/sec: 32.31 - lr: 0.000004 - momentum: 0.000000
2023-12-22 14:07:09,890 epoch 24 - iter 10/26 - loss 0.84756824 - time (sec): 21.00 - samples/sec: 30.48 - lr: 0.000004 - momentum: 0.000000
2023-12-22 14:07:11,889 epoch 24 - iter 12/26 - loss 0.85700406 - time (sec): 23.00 - samples/sec: 33.40 - lr: 0.000004 - momentum: 0.000000
2023-12-22 14:07:14,221 epoch 24 - 

100%|██████████| 80/80 [01:12<00:00,  1.10it/s]


2023-12-22 14:08:47,423 DEV : loss 1.1937644481658936 - f1-score (micro avg)  0.4464
2023-12-22 14:08:49,982 ----------------------------------------------------------------------------------------------------
2023-12-22 14:08:52,646 epoch 25 - iter 2/26 - loss 0.95695651 - time (sec): 2.66 - samples/sec: 48.18 - lr: 0.000003 - momentum: 0.000000
2023-12-22 14:08:57,508 epoch 25 - iter 4/26 - loss 0.90459253 - time (sec): 7.52 - samples/sec: 34.05 - lr: 0.000003 - momentum: 0.000000
2023-12-22 14:09:03,386 epoch 25 - iter 6/26 - loss 0.89393397 - time (sec): 13.40 - samples/sec: 28.66 - lr: 0.000002 - momentum: 0.000000
2023-12-22 14:09:05,756 epoch 25 - iter 8/26 - loss 0.88545369 - time (sec): 15.77 - samples/sec: 32.47 - lr: 0.000002 - momentum: 0.000000
2023-12-22 14:09:08,552 epoch 25 - iter 10/26 - loss 0.88947936 - time (sec): 18.56 - samples/sec: 34.48 - lr: 0.000002 - momentum: 0.000000
2023-12-22 14:09:14,120 epoch 25 - iter 12/26 - loss 0.88590672 - time (sec): 24.13 - sampl

100%|██████████| 80/80 [01:13<00:00,  1.09it/s]


2023-12-22 14:10:46,631 DEV : loss 1.1981905698776245 - f1-score (micro avg)  0.4495
2023-12-22 14:10:52,462 ----------------------------------------------------------------------------------------------------
2023-12-22 14:10:52,465 Testing using last state of model ...


100%|██████████| 110/110 [01:15<00:00,  1.45it/s]

2023-12-22 14:12:08,127 
Results:
- F-score (micro) 0.5344
- F-score (macro) 0.3488
- Accuracy 0.5344

By class:
              precision    recall  f1-score   support

     comment     0.9044    0.5515    0.6852      1476
     support     0.1190    0.4808    0.1908       104
        deny     0.0866    0.2200    0.1243       100
       query     0.2733    0.7121    0.3950        66

    accuracy                         0.5344      1746
   macro avg     0.3458    0.4911    0.3488      1746
weighted avg     0.7870    0.5344    0.6126      1746

2023-12-22 14:12:08,129 ----------------------------------------------------------------------------------------------------





In [None]:
add_previous_comment = False
add_post_title = True
add_depth = True

test_model_configuration_character_level_embeddings(samples_train, ground_truths, add_previous_comment, add_post_title, add_depth)

Sample example: Sentence[29]: "1 | “$MENTION$: The day #Ferguson cops told a dirty, bloody lie (via $MENTION$): $URL$ $URL$" → support (1.0)
2023-12-22 14:12:14,208 Reading data from .
2023-12-22 14:12:14,210 Train: train_fasttext_format.txt
2023-12-22 14:12:14,212 Dev: dev_fasttext_format.txt
2023-12-22 14:12:14,215 Test: test_fasttext_format.txt
2023-12-22 14:12:14,311 Initialized corpus . (label type name is 'class')
Corpus statistics:
 {
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 1653,
        "number_of_documents_per_class": {
            "support": 448,
            "comment": 448,
            "deny": 359,
            "query": 398
        },
        "number_of_tokens_per_tag": {},
        "number_of_tokens": {
            "total": 54376,
            "min": 4,
            "max": 486,
            "avg": 32.895341802782816
        }
    },
    "TEST": {
        "dataset": "TEST",
        "total_number_of_documents": 1746,
        "number_of_docume

0it [00:00, ?it/s]
1653it [00:01, 1037.11it/s]

2023-12-22 14:12:32,461 Dictionary created for label 'class' with 4 values: support (seen 448 times), comment (seen 448 times), query (seen 398 times), deny (seen 359 times)
2023-12-22 14:12:32,470 ----------------------------------------------------------------------------------------------------
2023-12-22 14:12:32,472 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): WordEmbeddings(
        'glove'
        (embedding): Embedding(400001, 100)
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear




2023-12-22 14:12:32,502 ----------------------------------------------------------------------------------------------------
2023-12-22 14:12:32,505 Computation:
2023-12-22 14:12:32,506  - compute on device: cuda:0
2023-12-22 14:12:32,508  - embedding storage: none
2023-12-22 14:12:32,509 ----------------------------------------------------------------------------------------------------
2023-12-22 14:12:32,512 Model training base path: "resources/taggers/rumoureval_character_level_embeddings"
2023-12-22 14:12:32,513 ----------------------------------------------------------------------------------------------------
2023-12-22 14:12:32,514 ----------------------------------------------------------------------------------------------------
2023-12-22 14:12:36,047 epoch 1 - iter 2/26 - loss 1.58149987 - time (sec): 3.53 - samples/sec: 36.25 - lr: 0.000001 - momentum: 0.000000
2023-12-22 14:12:40,293 epoch 1 - iter 4/26 - loss 1.52476788 - time (sec): 7.78 - samples/sec: 32.92 - lr: 0.000

100%|██████████| 80/80 [00:50<00:00,  1.57it/s]


2023-12-22 14:14:09,881 DEV : loss 1.3363587856292725 - f1-score (micro avg)  0.392
2023-12-22 14:14:10,574 ----------------------------------------------------------------------------------------------------
2023-12-22 14:14:15,730 epoch 2 - iter 2/26 - loss 1.37474078 - time (sec): 5.15 - samples/sec: 24.84 - lr: 0.000020 - momentum: 0.000000
2023-12-22 14:14:17,659 epoch 2 - iter 4/26 - loss 1.34606719 - time (sec): 7.08 - samples/sec: 36.14 - lr: 0.000022 - momentum: 0.000000
2023-12-22 14:14:19,132 epoch 2 - iter 6/26 - loss 1.32523507 - time (sec): 8.56 - samples/sec: 44.88 - lr: 0.000023 - momentum: 0.000000
2023-12-22 14:14:23,896 epoch 2 - iter 8/26 - loss 1.31525180 - time (sec): 13.32 - samples/sec: 38.44 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:14:27,287 epoch 2 - iter 10/26 - loss 1.31576439 - time (sec): 16.71 - samples/sec: 38.30 - lr: 0.000026 - momentum: 0.000000
2023-12-22 14:14:31,840 epoch 2 - iter 12/26 - loss 1.31940636 - time (sec): 21.26 - samples/sec: 

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:15:42,799 DEV : loss 1.249250888824463 - f1-score (micro avg)  0.4905
2023-12-22 14:15:43,488 ----------------------------------------------------------------------------------------------------
2023-12-22 14:15:46,543 epoch 3 - iter 2/26 - loss 1.27771026 - time (sec): 3.05 - samples/sec: 41.92 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:15:48,264 epoch 3 - iter 4/26 - loss 1.28262410 - time (sec): 4.77 - samples/sec: 53.62 - lr: 0.000041 - momentum: 0.000000
2023-12-22 14:15:52,865 epoch 3 - iter 6/26 - loss 1.28078500 - time (sec): 9.38 - samples/sec: 40.96 - lr: 0.000043 - momentum: 0.000000
2023-12-22 14:15:58,464 epoch 3 - iter 8/26 - loss 1.29837675 - time (sec): 14.97 - samples/sec: 34.19 - lr: 0.000044 - momentum: 0.000000
2023-12-22 14:16:00,849 epoch 3 - iter 10/26 - loss 1.30971632 - time (sec): 17.36 - samples/sec: 36.87 - lr: 0.000046 - momentum: 0.000000
2023-12-22 14:16:05,286 epoch 3 - iter 12/26 - loss 1.30479419 - time (sec): 21.80 - samples/sec: 

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:17:17,280 DEV : loss 1.298666000366211 - f1-score (micro avg)  0.5032
2023-12-22 14:17:19,293 ----------------------------------------------------------------------------------------------------
2023-12-22 14:17:24,045 epoch 4 - iter 2/26 - loss 1.24956751 - time (sec): 4.75 - samples/sec: 26.95 - lr: 0.000049 - momentum: 0.000000
2023-12-22 14:17:29,011 epoch 4 - iter 4/26 - loss 1.22838286 - time (sec): 9.72 - samples/sec: 26.35 - lr: 0.000049 - momentum: 0.000000
2023-12-22 14:17:31,214 epoch 4 - iter 6/26 - loss 1.22674098 - time (sec): 11.92 - samples/sec: 32.22 - lr: 0.000049 - momentum: 0.000000
2023-12-22 14:17:36,518 epoch 4 - iter 8/26 - loss 1.21441944 - time (sec): 17.22 - samples/sec: 29.73 - lr: 0.000049 - momentum: 0.000000
2023-12-22 14:17:37,664 epoch 4 - iter 10/26 - loss 1.21476496 - time (sec): 18.37 - samples/sec: 34.84 - lr: 0.000048 - momentum: 0.000000
2023-12-22 14:17:42,573 epoch 4 - iter 12/26 - loss 1.20886718 - time (sec): 23.28 - samples/sec:

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:18:52,181 DEV : loss 1.2114266157150269 - f1-score (micro avg)  0.5536
2023-12-22 14:18:53,745 ----------------------------------------------------------------------------------------------------
2023-12-22 14:18:58,494 epoch 5 - iter 2/26 - loss 1.16932207 - time (sec): 4.75 - samples/sec: 26.96 - lr: 0.000047 - momentum: 0.000000
2023-12-22 14:19:03,483 epoch 5 - iter 4/26 - loss 1.21122843 - time (sec): 9.74 - samples/sec: 26.29 - lr: 0.000047 - momentum: 0.000000
2023-12-22 14:19:08,933 epoch 5 - iter 6/26 - loss 1.21619729 - time (sec): 15.19 - samples/sec: 25.29 - lr: 0.000047 - momentum: 0.000000
2023-12-22 14:19:11,251 epoch 5 - iter 8/26 - loss 1.20991118 - time (sec): 17.50 - samples/sec: 29.25 - lr: 0.000046 - momentum: 0.000000
2023-12-22 14:19:18,760 epoch 5 - iter 10/26 - loss 1.18292454 - time (sec): 25.01 - samples/sec: 25.59 - lr: 0.000046 - momentum: 0.000000
2023-12-22 14:19:20,796 epoch 5 - iter 12/26 - loss 1.18232087 - time (sec): 27.05 - samples/sec

100%|██████████| 80/80 [00:51<00:00,  1.56it/s]


2023-12-22 14:20:30,231 DEV : loss 1.1022688150405884 - f1-score (micro avg)  0.5986
2023-12-22 14:20:31,418 ----------------------------------------------------------------------------------------------------
2023-12-22 14:20:36,352 epoch 6 - iter 2/26 - loss 1.07610965 - time (sec): 4.93 - samples/sec: 25.98 - lr: 0.000045 - momentum: 0.000000
2023-12-22 14:20:38,416 epoch 6 - iter 4/26 - loss 1.09176749 - time (sec): 6.99 - samples/sec: 36.61 - lr: 0.000045 - momentum: 0.000000
2023-12-22 14:20:40,081 epoch 6 - iter 6/26 - loss 1.11304864 - time (sec): 8.66 - samples/sec: 44.36 - lr: 0.000044 - momentum: 0.000000
2023-12-22 14:20:43,318 epoch 6 - iter 8/26 - loss 1.13214103 - time (sec): 11.89 - samples/sec: 43.05 - lr: 0.000044 - momentum: 0.000000
2023-12-22 14:20:45,641 epoch 6 - iter 10/26 - loss 1.12161030 - time (sec): 14.22 - samples/sec: 45.02 - lr: 0.000044 - momentum: 0.000000
2023-12-22 14:20:49,573 epoch 6 - iter 12/26 - loss 1.13866823 - time (sec): 18.15 - samples/sec:

100%|██████████| 80/80 [00:50<00:00,  1.58it/s]


2023-12-22 14:22:06,520 DEV : loss 1.1755751371383667 - f1-score (micro avg)  0.5489
2023-12-22 14:22:07,213 ----------------------------------------------------------------------------------------------------
2023-12-22 14:22:08,949 epoch 7 - iter 2/26 - loss 1.19697392 - time (sec): 1.73 - samples/sec: 73.89 - lr: 0.000043 - momentum: 0.000000
2023-12-22 14:22:12,018 epoch 7 - iter 4/26 - loss 1.11934018 - time (sec): 4.80 - samples/sec: 53.31 - lr: 0.000042 - momentum: 0.000000
2023-12-22 14:22:16,401 epoch 7 - iter 6/26 - loss 1.09954158 - time (sec): 9.18 - samples/sec: 41.81 - lr: 0.000042 - momentum: 0.000000
2023-12-22 14:22:18,101 epoch 7 - iter 8/26 - loss 1.10223140 - time (sec): 10.89 - samples/sec: 47.04 - lr: 0.000042 - momentum: 0.000000
2023-12-22 14:22:24,695 epoch 7 - iter 10/26 - loss 1.07500232 - time (sec): 17.48 - samples/sec: 36.62 - lr: 0.000042 - momentum: 0.000000
2023-12-22 14:22:29,613 epoch 7 - iter 12/26 - loss 1.07147045 - time (sec): 22.40 - samples/sec:

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:23:41,585 DEV : loss 1.390243649482727 - f1-score (micro avg)  0.388
2023-12-22 14:23:43,603 ----------------------------------------------------------------------------------------------------
2023-12-22 14:23:44,872 epoch 8 - iter 2/26 - loss 0.96075571 - time (sec): 1.27 - samples/sec: 101.12 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:23:46,370 epoch 8 - iter 4/26 - loss 1.00391334 - time (sec): 2.76 - samples/sec: 92.60 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:23:47,525 epoch 8 - iter 6/26 - loss 1.01475519 - time (sec): 3.92 - samples/sec: 97.98 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:23:50,160 epoch 8 - iter 8/26 - loss 1.01633751 - time (sec): 6.55 - samples/sec: 78.12 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:23:52,300 epoch 8 - iter 10/26 - loss 1.03235847 - time (sec): 8.69 - samples/sec: 73.61 - lr: 0.000040 - momentum: 0.000000
2023-12-22 14:23:53,436 epoch 8 - iter 12/26 - loss 1.04746261 - time (sec): 9.83 - samples/sec: 78.

100%|██████████| 80/80 [00:51<00:00,  1.57it/s]


2023-12-22 14:25:14,979 DEV : loss 1.1864145994186401 - f1-score (micro avg)  0.5118
2023-12-22 14:25:16,128 ----------------------------------------------------------------------------------------------------
2023-12-22 14:25:21,520 epoch 9 - iter 2/26 - loss 1.07271695 - time (sec): 5.39 - samples/sec: 23.75 - lr: 0.000038 - momentum: 0.000000
2023-12-22 14:25:26,243 epoch 9 - iter 4/26 - loss 1.03537884 - time (sec): 10.11 - samples/sec: 25.31 - lr: 0.000038 - momentum: 0.000000
2023-12-22 14:25:27,948 epoch 9 - iter 6/26 - loss 1.06527319 - time (sec): 11.82 - samples/sec: 32.49 - lr: 0.000038 - momentum: 0.000000
2023-12-22 14:25:29,912 epoch 9 - iter 8/26 - loss 1.04972199 - time (sec): 13.78 - samples/sec: 37.15 - lr: 0.000038 - momentum: 0.000000
2023-12-22 14:25:33,501 epoch 9 - iter 10/26 - loss 1.06042855 - time (sec): 17.37 - samples/sec: 36.84 - lr: 0.000038 - momentum: 0.000000
2023-12-22 14:25:36,077 epoch 9 - iter 12/26 - loss 1.05922374 - time (sec): 19.95 - samples/se

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:26:53,384 DEV : loss 1.2410218715667725 - f1-score (micro avg)  0.4858
2023-12-22 14:26:54,068 ----------------------------------------------------------------------------------------------------
2023-12-22 14:26:59,021 epoch 10 - iter 2/26 - loss 0.97893733 - time (sec): 4.95 - samples/sec: 25.85 - lr: 0.000036 - momentum: 0.000000
2023-12-22 14:27:03,587 epoch 10 - iter 4/26 - loss 0.94776025 - time (sec): 9.52 - samples/sec: 26.90 - lr: 0.000036 - momentum: 0.000000
2023-12-22 14:27:07,015 epoch 10 - iter 6/26 - loss 0.97839863 - time (sec): 12.94 - samples/sec: 29.67 - lr: 0.000036 - momentum: 0.000000
2023-12-22 14:27:11,704 epoch 10 - iter 8/26 - loss 0.98387637 - time (sec): 17.63 - samples/sec: 29.04 - lr: 0.000036 - momentum: 0.000000
2023-12-22 14:27:13,520 epoch 10 - iter 10/26 - loss 0.97534842 - time (sec): 19.45 - samples/sec: 32.91 - lr: 0.000035 - momentum: 0.000000
2023-12-22 14:27:15,024 epoch 10 - iter 12/26 - loss 0.99069890 - time (sec): 20.95 - sampl

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:28:25,711 DEV : loss 1.3754496574401855 - f1-score (micro avg)  0.3312
2023-12-22 14:28:26,904 ----------------------------------------------------------------------------------------------------
2023-12-22 14:28:32,989 epoch 11 - iter 2/26 - loss 1.07082325 - time (sec): 6.08 - samples/sec: 21.04 - lr: 0.000034 - momentum: 0.000000
2023-12-22 14:28:36,721 epoch 11 - iter 4/26 - loss 1.05283600 - time (sec): 9.81 - samples/sec: 26.08 - lr: 0.000034 - momentum: 0.000000
2023-12-22 14:28:39,158 epoch 11 - iter 6/26 - loss 1.04408850 - time (sec): 12.25 - samples/sec: 31.34 - lr: 0.000034 - momentum: 0.000000
2023-12-22 14:28:40,968 epoch 11 - iter 8/26 - loss 1.02980035 - time (sec): 14.06 - samples/sec: 36.41 - lr: 0.000033 - momentum: 0.000000
2023-12-22 14:28:42,833 epoch 11 - iter 10/26 - loss 1.01830514 - time (sec): 15.93 - samples/sec: 40.18 - lr: 0.000033 - momentum: 0.000000
2023-12-22 14:28:44,203 epoch 11 - iter 12/26 - loss 1.00190593 - time (sec): 17.30 - sampl

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:30:01,481 DEV : loss 1.2670068740844727 - f1-score (micro avg)  0.4464
2023-12-22 14:30:02,958 ----------------------------------------------------------------------------------------------------
2023-12-22 14:30:05,749 epoch 12 - iter 2/26 - loss 1.03170073 - time (sec): 2.79 - samples/sec: 45.89 - lr: 0.000032 - momentum: 0.000000
2023-12-22 14:30:07,848 epoch 12 - iter 4/26 - loss 0.97827770 - time (sec): 4.89 - samples/sec: 52.38 - lr: 0.000032 - momentum: 0.000000
2023-12-22 14:30:12,684 epoch 12 - iter 6/26 - loss 0.96312789 - time (sec): 9.72 - samples/sec: 39.49 - lr: 0.000031 - momentum: 0.000000
2023-12-22 14:30:18,031 epoch 12 - iter 8/26 - loss 0.97167029 - time (sec): 15.07 - samples/sec: 33.97 - lr: 0.000031 - momentum: 0.000000
2023-12-22 14:30:22,832 epoch 12 - iter 10/26 - loss 0.97328653 - time (sec): 19.87 - samples/sec: 32.21 - lr: 0.000031 - momentum: 0.000000
2023-12-22 14:30:25,322 epoch 12 - iter 12/26 - loss 0.98590162 - time (sec): 22.36 - sample

100%|██████████| 80/80 [00:51<00:00,  1.56it/s]


2023-12-22 14:31:38,454 DEV : loss 1.4559695720672607 - f1-score (micro avg)  0.3273
2023-12-22 14:31:39,329 ----------------------------------------------------------------------------------------------------
2023-12-22 14:31:41,745 epoch 13 - iter 2/26 - loss 1.00934470 - time (sec): 2.41 - samples/sec: 53.14 - lr: 0.000030 - momentum: 0.000000
2023-12-22 14:31:43,797 epoch 13 - iter 4/26 - loss 0.97463387 - time (sec): 4.46 - samples/sec: 57.39 - lr: 0.000029 - momentum: 0.000000
2023-12-22 14:31:47,580 epoch 13 - iter 6/26 - loss 0.95531115 - time (sec): 8.24 - samples/sec: 46.58 - lr: 0.000029 - momentum: 0.000000
2023-12-22 14:31:49,356 epoch 13 - iter 8/26 - loss 0.94262516 - time (sec): 10.02 - samples/sec: 51.10 - lr: 0.000029 - momentum: 0.000000
2023-12-22 14:31:50,987 epoch 13 - iter 10/26 - loss 0.95697823 - time (sec): 11.65 - samples/sec: 54.93 - lr: 0.000029 - momentum: 0.000000
2023-12-22 14:31:55,711 epoch 13 - iter 12/26 - loss 0.95599881 - time (sec): 16.38 - sample

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:33:07,711 DEV : loss 1.362725019454956 - f1-score (micro avg)  0.3588
2023-12-22 14:33:08,672 ----------------------------------------------------------------------------------------------------
2023-12-22 14:33:13,256 epoch 14 - iter 2/26 - loss 0.99820259 - time (sec): 4.58 - samples/sec: 27.97 - lr: 0.000027 - momentum: 0.000000
2023-12-22 14:33:15,216 epoch 14 - iter 4/26 - loss 1.00135350 - time (sec): 6.54 - samples/sec: 39.17 - lr: 0.000027 - momentum: 0.000000
2023-12-22 14:33:17,591 epoch 14 - iter 6/26 - loss 1.00936655 - time (sec): 8.91 - samples/sec: 43.10 - lr: 0.000027 - momentum: 0.000000
2023-12-22 14:33:22,564 epoch 14 - iter 8/26 - loss 1.00181603 - time (sec): 13.88 - samples/sec: 36.88 - lr: 0.000027 - momentum: 0.000000
2023-12-22 14:33:26,677 epoch 14 - iter 10/26 - loss 0.99355971 - time (sec): 18.00 - samples/sec: 35.56 - lr: 0.000027 - momentum: 0.000000
2023-12-22 14:33:28,509 epoch 14 - iter 12/26 - loss 0.97681101 - time (sec): 19.83 - samples

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:34:42,314 DEV : loss 1.2240471839904785 - f1-score (micro avg)  0.4614
2023-12-22 14:34:43,798 ----------------------------------------------------------------------------------------------------
2023-12-22 14:34:50,431 epoch 15 - iter 2/26 - loss 1.00190359 - time (sec): 6.63 - samples/sec: 19.30 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:34:54,633 epoch 15 - iter 4/26 - loss 0.97804514 - time (sec): 10.83 - samples/sec: 23.63 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:34:59,414 epoch 15 - iter 6/26 - loss 0.93336228 - time (sec): 15.61 - samples/sec: 24.59 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:35:01,456 epoch 15 - iter 8/26 - loss 0.94685525 - time (sec): 17.66 - samples/sec: 29.00 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:35:02,915 epoch 15 - iter 10/26 - loss 0.95887808 - time (sec): 19.12 - samples/sec: 33.48 - lr: 0.000025 - momentum: 0.000000
2023-12-22 14:35:06,011 epoch 15 - iter 12/26 - loss 0.95881536 - time (sec): 22.21 - samp

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:36:18,383 DEV : loss 1.3234866857528687 - f1-score (micro avg)  0.3943
2023-12-22 14:36:19,935 ----------------------------------------------------------------------------------------------------
2023-12-22 14:36:28,074 epoch 16 - iter 2/26 - loss 0.85295790 - time (sec): 8.14 - samples/sec: 15.73 - lr: 0.000023 - momentum: 0.000000
2023-12-22 14:36:31,712 epoch 16 - iter 4/26 - loss 0.86999840 - time (sec): 11.77 - samples/sec: 21.74 - lr: 0.000023 - momentum: 0.000000
2023-12-22 14:36:36,504 epoch 16 - iter 6/26 - loss 0.92038099 - time (sec): 16.57 - samples/sec: 23.18 - lr: 0.000023 - momentum: 0.000000
2023-12-22 14:36:38,259 epoch 16 - iter 8/26 - loss 0.92104743 - time (sec): 18.32 - samples/sec: 27.94 - lr: 0.000023 - momentum: 0.000000
2023-12-22 14:36:41,882 epoch 16 - iter 10/26 - loss 0.91352804 - time (sec): 21.94 - samples/sec: 29.16 - lr: 0.000022 - momentum: 0.000000
2023-12-22 14:36:45,734 epoch 16 - iter 12/26 - loss 0.91776581 - time (sec): 25.80 - samp

100%|██████████| 80/80 [00:51<00:00,  1.56it/s]

2023-12-22 14:37:56,650 DEV : loss 1.3048571348190308 - f1-score (micro avg)  0.4006





2023-12-22 14:37:57,786 ----------------------------------------------------------------------------------------------------
2023-12-22 14:38:01,179 epoch 17 - iter 2/26 - loss 0.88876584 - time (sec): 3.39 - samples/sec: 37.75 - lr: 0.000021 - momentum: 0.000000
2023-12-22 14:38:05,793 epoch 17 - iter 4/26 - loss 0.87008791 - time (sec): 8.00 - samples/sec: 31.98 - lr: 0.000021 - momentum: 0.000000
2023-12-22 14:38:08,063 epoch 17 - iter 6/26 - loss 0.86571241 - time (sec): 10.27 - samples/sec: 37.38 - lr: 0.000021 - momentum: 0.000000
2023-12-22 14:38:10,299 epoch 17 - iter 8/26 - loss 0.87264827 - time (sec): 12.51 - samples/sec: 40.93 - lr: 0.000020 - momentum: 0.000000
2023-12-22 14:38:15,271 epoch 17 - iter 10/26 - loss 0.87392492 - time (sec): 17.48 - samples/sec: 36.61 - lr: 0.000020 - momentum: 0.000000
2023-12-22 14:38:16,677 epoch 17 - iter 12/26 - loss 0.88449131 - time (sec): 18.89 - samples/sec: 40.66 - lr: 0.000020 - momentum: 0.000000
2023-12-22 14:38:18,546 epoch 17 - 

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:39:30,465 DEV : loss 1.2443901300430298 - f1-score (micro avg)  0.4259
2023-12-22 14:39:31,153 ----------------------------------------------------------------------------------------------------
2023-12-22 14:39:33,474 epoch 18 - iter 2/26 - loss 0.83765867 - time (sec): 2.32 - samples/sec: 55.20 - lr: 0.000019 - momentum: 0.000000
2023-12-22 14:39:39,614 epoch 18 - iter 4/26 - loss 0.83564968 - time (sec): 8.46 - samples/sec: 30.27 - lr: 0.000019 - momentum: 0.000000
2023-12-22 14:39:41,170 epoch 18 - iter 6/26 - loss 0.85716854 - time (sec): 10.01 - samples/sec: 38.35 - lr: 0.000018 - momentum: 0.000000
2023-12-22 14:39:45,891 epoch 18 - iter 8/26 - loss 0.88436182 - time (sec): 14.74 - samples/sec: 34.75 - lr: 0.000018 - momentum: 0.000000
2023-12-22 14:39:47,185 epoch 18 - iter 10/26 - loss 0.89450306 - time (sec): 16.03 - samples/sec: 39.93 - lr: 0.000018 - momentum: 0.000000
2023-12-22 14:39:49,547 epoch 18 - iter 12/26 - loss 0.90016387 - time (sec): 18.39 - sampl

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:41:02,209 DEV : loss 1.0698237419128418 - f1-score (micro avg)  0.5718
2023-12-22 14:41:02,902 ----------------------------------------------------------------------------------------------------
2023-12-22 14:41:08,731 epoch 19 - iter 2/26 - loss 0.87889734 - time (sec): 5.83 - samples/sec: 21.97 - lr: 0.000017 - momentum: 0.000000
2023-12-22 14:41:12,367 epoch 19 - iter 4/26 - loss 0.93214740 - time (sec): 9.46 - samples/sec: 27.05 - lr: 0.000017 - momentum: 0.000000
2023-12-22 14:41:14,398 epoch 19 - iter 6/26 - loss 0.93256105 - time (sec): 11.49 - samples/sec: 33.41 - lr: 0.000016 - momentum: 0.000000
2023-12-22 14:41:17,668 epoch 19 - iter 8/26 - loss 0.91749727 - time (sec): 14.76 - samples/sec: 34.68 - lr: 0.000016 - momentum: 0.000000
2023-12-22 14:41:20,632 epoch 19 - iter 10/26 - loss 0.91503950 - time (sec): 17.73 - samples/sec: 36.10 - lr: 0.000016 - momentum: 0.000000
2023-12-22 14:41:24,570 epoch 19 - iter 12/26 - loss 0.90901491 - time (sec): 21.67 - sampl

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:42:34,728 DEV : loss 1.17717444896698 - f1-score (micro avg)  0.4921
2023-12-22 14:42:36,691 ----------------------------------------------------------------------------------------------------
2023-12-22 14:42:39,574 epoch 20 - iter 2/26 - loss 0.87725842 - time (sec): 2.88 - samples/sec: 44.44 - lr: 0.000015 - momentum: 0.000000
2023-12-22 14:42:41,685 epoch 20 - iter 4/26 - loss 0.85591626 - time (sec): 4.99 - samples/sec: 51.29 - lr: 0.000014 - momentum: 0.000000
2023-12-22 14:42:43,482 epoch 20 - iter 6/26 - loss 0.85853582 - time (sec): 6.79 - samples/sec: 56.57 - lr: 0.000014 - momentum: 0.000000
2023-12-22 14:42:47,711 epoch 20 - iter 8/26 - loss 0.89748008 - time (sec): 11.02 - samples/sec: 46.47 - lr: 0.000014 - momentum: 0.000000
2023-12-22 14:42:50,629 epoch 20 - iter 10/26 - loss 0.89456344 - time (sec): 13.94 - samples/sec: 45.93 - lr: 0.000014 - momentum: 0.000000
2023-12-22 14:42:52,883 epoch 20 - iter 12/26 - loss 0.89511751 - time (sec): 16.19 - samples/

100%|██████████| 80/80 [00:51<00:00,  1.56it/s]


2023-12-22 14:44:08,657 DEV : loss 1.1807082891464233 - f1-score (micro avg)  0.4905
2023-12-22 14:44:09,844 ----------------------------------------------------------------------------------------------------
2023-12-22 14:44:14,312 epoch 21 - iter 2/26 - loss 0.81145531 - time (sec): 4.47 - samples/sec: 28.66 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:44:19,691 epoch 21 - iter 4/26 - loss 0.77386346 - time (sec): 9.85 - samples/sec: 26.00 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:44:22,711 epoch 21 - iter 6/26 - loss 0.81081106 - time (sec): 12.87 - samples/sec: 29.85 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:44:24,763 epoch 21 - iter 8/26 - loss 0.83369301 - time (sec): 14.92 - samples/sec: 34.32 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:44:26,809 epoch 21 - iter 10/26 - loss 0.84348020 - time (sec): 16.96 - samples/sec: 37.73 - lr: 0.000012 - momentum: 0.000000
2023-12-22 14:44:30,590 epoch 21 - iter 12/26 - loss 0.84152562 - time (sec): 20.74 - sampl

100%|██████████| 80/80 [00:50<00:00,  1.58it/s]


2023-12-22 14:45:42,248 DEV : loss 1.299103021621704 - f1-score (micro avg)  0.4022
2023-12-22 14:45:42,936 ----------------------------------------------------------------------------------------------------
2023-12-22 14:45:50,433 epoch 22 - iter 2/26 - loss 0.88154680 - time (sec): 7.49 - samples/sec: 17.08 - lr: 0.000010 - momentum: 0.000000
2023-12-22 14:45:52,636 epoch 22 - iter 4/26 - loss 0.84884430 - time (sec): 9.70 - samples/sec: 26.40 - lr: 0.000010 - momentum: 0.000000
2023-12-22 14:45:55,420 epoch 22 - iter 6/26 - loss 0.81849024 - time (sec): 12.48 - samples/sec: 30.76 - lr: 0.000010 - momentum: 0.000000
2023-12-22 14:45:58,523 epoch 22 - iter 8/26 - loss 0.80360480 - time (sec): 15.58 - samples/sec: 32.85 - lr: 0.000010 - momentum: 0.000000
2023-12-22 14:46:03,017 epoch 22 - iter 10/26 - loss 0.82220592 - time (sec): 20.08 - samples/sec: 31.88 - lr: 0.000010 - momentum: 0.000000
2023-12-22 14:46:08,353 epoch 22 - iter 12/26 - loss 0.83452878 - time (sec): 25.41 - sample

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:47:16,191 DEV : loss 1.2896922826766968 - f1-score (micro avg)  0.3967
2023-12-22 14:47:17,679 ----------------------------------------------------------------------------------------------------
2023-12-22 14:47:20,626 epoch 23 - iter 2/26 - loss 0.99353769 - time (sec): 2.94 - samples/sec: 43.48 - lr: 0.000008 - momentum: 0.000000
2023-12-22 14:47:24,925 epoch 23 - iter 4/26 - loss 0.86856511 - time (sec): 7.24 - samples/sec: 35.34 - lr: 0.000008 - momentum: 0.000000
2023-12-22 14:47:26,285 epoch 23 - iter 6/26 - loss 0.86752621 - time (sec): 8.60 - samples/sec: 44.63 - lr: 0.000008 - momentum: 0.000000
2023-12-22 14:47:29,965 epoch 23 - iter 8/26 - loss 0.86814918 - time (sec): 12.28 - samples/sec: 41.68 - lr: 0.000008 - momentum: 0.000000
2023-12-22 14:47:33,938 epoch 23 - iter 10/26 - loss 0.85037020 - time (sec): 16.26 - samples/sec: 39.37 - lr: 0.000007 - momentum: 0.000000
2023-12-22 14:47:37,683 epoch 23 - iter 12/26 - loss 0.85163615 - time (sec): 20.00 - sample

100%|██████████| 80/80 [00:51<00:00,  1.56it/s]


2023-12-22 14:48:52,746 DEV : loss 1.24203622341156 - f1-score (micro avg)  0.4385
2023-12-22 14:48:53,941 ----------------------------------------------------------------------------------------------------
2023-12-22 14:48:55,431 epoch 24 - iter 2/26 - loss 0.85939887 - time (sec): 1.49 - samples/sec: 86.07 - lr: 0.000006 - momentum: 0.000000
2023-12-22 14:49:00,791 epoch 24 - iter 4/26 - loss 0.89661387 - time (sec): 6.85 - samples/sec: 37.38 - lr: 0.000006 - momentum: 0.000000
2023-12-22 14:49:05,143 epoch 24 - iter 6/26 - loss 0.86918614 - time (sec): 11.20 - samples/sec: 34.29 - lr: 0.000006 - momentum: 0.000000
2023-12-22 14:49:06,418 epoch 24 - iter 8/26 - loss 0.87435813 - time (sec): 12.47 - samples/sec: 41.04 - lr: 0.000005 - momentum: 0.000000
2023-12-22 14:49:08,149 epoch 24 - iter 10/26 - loss 0.86684723 - time (sec): 14.21 - samples/sec: 45.05 - lr: 0.000005 - momentum: 0.000000
2023-12-22 14:49:13,280 epoch 24 - iter 12/26 - loss 0.86669873 - time (sec): 19.34 - samples

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:50:30,086 DEV : loss 1.229972004890442 - f1-score (micro avg)  0.444
2023-12-22 14:50:30,764 ----------------------------------------------------------------------------------------------------
2023-12-22 14:50:36,128 epoch 25 - iter 2/26 - loss 0.81852785 - time (sec): 5.36 - samples/sec: 23.87 - lr: 0.000004 - momentum: 0.000000
2023-12-22 14:50:38,497 epoch 25 - iter 4/26 - loss 0.82659948 - time (sec): 7.73 - samples/sec: 33.11 - lr: 0.000004 - momentum: 0.000000
2023-12-22 14:50:40,653 epoch 25 - iter 6/26 - loss 0.86939549 - time (sec): 9.89 - samples/sec: 38.84 - lr: 0.000003 - momentum: 0.000000
2023-12-22 14:50:42,223 epoch 25 - iter 8/26 - loss 0.88336577 - time (sec): 11.46 - samples/sec: 44.69 - lr: 0.000003 - momentum: 0.000000
2023-12-22 14:50:45,050 epoch 25 - iter 10/26 - loss 0.85374765 - time (sec): 14.28 - samples/sec: 44.80 - lr: 0.000003 - momentum: 0.000000
2023-12-22 14:50:51,847 epoch 25 - iter 12/26 - loss 0.85352317 - time (sec): 21.08 - samples/

100%|██████████| 80/80 [00:50<00:00,  1.59it/s]


2023-12-22 14:52:03,834 DEV : loss 1.237272024154663 - f1-score (micro avg)  0.4409
2023-12-22 14:52:07,961 ----------------------------------------------------------------------------------------------------
2023-12-22 14:52:07,968 Testing using last state of model ...


100%|██████████| 110/110 [00:56<00:00,  1.93it/s]

2023-12-22 14:53:04,901 
Results:
- F-score (micro) 0.5223
- F-score (macro) 0.3334
- Accuracy 0.5223

By class:
              precision    recall  f1-score   support

     comment     0.9123    0.5427    0.6805      1476
     support     0.1086    0.4615    0.1758       104
        deny     0.0645    0.1600    0.0920       100
       query     0.2640    0.7121    0.3852        66

    accuracy                         0.5223      1746
   macro avg     0.3374    0.4691    0.3334      1746
weighted avg     0.7914    0.5223    0.6056      1746

2023-12-22 14:53:04,903 ----------------------------------------------------------------------------------------------------





# Inference with our own samples

Now that we know the best performing configuration (a DeBERTa-v3 model with a linear classifier and samples consisting of `depth | post_title | reply`), we can create our own samples in order to test its behavior on specific cases.

In [None]:
# Download the model, saved after the last epoch (Note: this may not be available forever)
!gdown 1L9WM61gaPfZJbMWnD4SmAskEJhbU4fEq

In [27]:
sample_template = "{} | {} | {}"
classifier = TextClassifier.load('best_model_deberta.pt')

def get_prediction(post_title, reply, depth=1):
  sentence = Sentence(sample_template.format(depth, post_title, reply))
  classifier.predict(sentence)
  print(sentence.get_labels())

In [28]:
get_prediction("BREAKING NEWS! scientists confirm that microwaves cause heart problems!", "are you really sure about that?")

['Sentence[21]: "1 | BREAKING NEWS! scientists confirm that microwaves cause heart problems! | are you really sure about that?"'/'query' (0.9999)]


In [13]:
get_prediction("BREAKING NEWS! scientists confirm that microwaves cause heart problems!", "come on.. you know that's false")

['Sentence[22]: "1 | BREAKING NEWS! scientists confirm that microwaves cause heart problems! | come on.. you know that's false"'/'deny' (1.0)]


In [14]:
get_prediction("BREAKING NEWS! scientists confirm that microwaves cause heart problems!", "my uncle who works and NASA said that his third cousin's heart problems stopped when he sold his mocrowave")

['Sentence[34]: "1 | BREAKING NEWS! scientists confirm that microwaves cause heart problems! | my uncle who works and NASA said that his third cousin's heart problems stopped when he sold his mocrowave"'/'comment' (1.0)]


In [22]:
# DeBERTa's tokenizer actually supports emojis
# We can also simulate mentions (@username) and links by using the placeholders we used during training ($MENTION$ and $URL$)
get_prediction("BREAKING NEWS! scientists confirm that microwaves cause heart problems!", "$MENTION$ look at this 😂😂")

['Sentence[21]: "1 | BREAKING NEWS! scientists confirm that microwaves cause heart problems! | $MENTION$ look at this 😂😂"'/'comment' (0.9999)]


In [35]:
get_prediction("BREAKING NEWS! scientists confirm that microwaves cause heart problems!", "my best friend works at the FBI and he confirms its true!!!")

['Sentence[28]: "1 | BREAKING NEWS! scientists confirm that microwaves cause heart problems! | my best friend works at the FBI and he confirms its true!!!"'/'support' (0.9976)]
