## Finetune Flair

In [2]:
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
import pandas as pd

import torch

from flair.data import Corpus, Sentence
from flair.datasets import ClassificationCorpus
from flair.embeddings import (
    TransformerDocumentEmbeddings,
    FlairEmbeddings, 
    DocumentRNNEmbeddings
)
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from flair.training_utils import EvaluationMetric
from flair.visual.training_curves import Plotter

import tqdm

from torch.optim import AdamW
import logging

import flair
flair.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {flair.device}")

from collections import Counter

from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

Using device: cuda


In [3]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA Version:", torch.version.cuda)

PyTorch Version: 2.2.1+cu121
CUDA Available: True
CUDA Version: 12.1


In [4]:
# Ensure the output directory exists
flair_data_folder = "data/flair_data"
os.makedirs(flair_data_folder, exist_ok=True)

# Define the folder where the data is located
corpus_folder = Path(flair_data_folder)

# Create the corpus
corpus = ClassificationCorpus(
    corpus_folder,
    train_file='train.txt',
    dev_file='dev.txt',
    test_file='test.txt',
    label_type='sentiment'
)

# Print statistics
print(f"Number of training sentences: {len(corpus.train)}")
print(f"Number of validation sentences: {len(corpus.dev)}")
print(f"Number of test sentences: {len(corpus.test)}")

# Small dataset
small_data_folder = "data/flair_data"
small_corpus = ClassificationCorpus(small_data_folder, 
                                    train_file="train_small.txt",  
                                    dev_file='dev_small.txt', 
                                    test_file='test_small.txt',
                                    label_type="sentiment")

# Print statistics
print(f"Number of training sentences: {len(small_corpus.train)}")
print(f"Number of validation sentences: {len(small_corpus.dev)}")
print(f"Number of test sentences: {len(small_corpus.test)}")


2024-12-05 11:42:53,689 Reading data from data\flair_data
2024-12-05 11:42:53,689 Train: data\flair_data\train.txt
2024-12-05 11:42:53,689 Dev: data\flair_data\dev.txt
2024-12-05 11:42:53,690 Test: data\flair_data\test.txt
2024-12-05 11:42:54,355 Initialized corpus data\flair_data (label type name is 'sentiment')
Number of training sentences: 22500
Number of validation sentences: 2500
Number of test sentences: 25000
2024-12-05 11:42:54,356 Reading data from data\flair_data
2024-12-05 11:42:54,356 Train: data\flair_data\train_small.txt
2024-12-05 11:42:54,356 Dev: data\flair_data\dev_small.txt
2024-12-05 11:42:54,357 Test: data\flair_data\test_small.txt
2024-12-05 11:42:54,359 Initialized corpus data/flair_data (label type name is 'sentiment')
Number of training sentences: 55
Number of validation sentences: 25
Number of test sentences: 48


In [5]:
label_dict = corpus.make_label_dictionary(label_type='sentiment')
print(label_dict)

2024-12-05 11:42:54,515 Computing label dictionary. Progress:


0it [00:00, ?it/s]
22500it [00:55, 407.80it/s]

2024-12-05 11:43:49,694 Dictionary created for label 'sentiment' with 2 values: POS (seen 11298 times), NEG (seen 11202 times)
Dictionary with 2 tags: POS, NEG





In [6]:
def evaluate_model(classifier, test_dataset):
    """
    Evaluate a Flair classifier on a given test dataset, with verification and debugging steps.

    Args:
        classifier (TextClassifier): The trained Flair classifier.
        test_dataset (Dataset): The test dataset.

    Returns:
        dict: Dictionary containing evaluation metrics.
    """
    true_labels = []
    predicted_labels = []

    # Label mapping
    label_mapping = {"NEG": 0, "POS": 1}

    # Iterate over the test dataset with tqdm progress bar
    for sentence in tqdm.tqdm(test_dataset, desc="Evaluating", leave=True):
        sentence.to(flair.device)

        # Get true label
        true_label = sentence.get_label("sentiment").value
        true_labels.append(true_label)

        # Get predicted label
        classifier.predict(sentence)
        predicted_label = sentence.labels[0].value
        predicted_labels.append(predicted_label)

    # Verify label consistency
    print("True Labels Sample:", true_labels[:5])
    print("Predicted Labels Sample:", predicted_labels[:5])
    print("True Label Distribution:", Counter(true_labels))
    print("Predicted Label Distribution:", Counter(predicted_labels))

    # Map labels to numeric values for sklearn
    try:
        true_labels_mapped = [label_mapping[label] for label in true_labels]
        predicted_labels_mapped = [label_mapping[label] for label in predicted_labels]
    except KeyError as e:
        print(f"Label mapping error: {e}. Ensure all labels are in {label_mapping}.")
        return {}

    # Verify mapped labels
    print("Mapped True Labels Sample:", true_labels_mapped[:5])
    print("Mapped Predicted Labels Sample:", predicted_labels_mapped[:5])

    # Calculate metrics
    accuracy = accuracy_score(true_labels_mapped, predicted_labels_mapped)
    precision = precision_score(true_labels_mapped, predicted_labels_mapped, pos_label=1, zero_division=0)
    recall = recall_score(true_labels_mapped, predicted_labels_mapped, pos_label=1, zero_division=0)
    f1 = f1_score(true_labels_mapped, predicted_labels_mapped, pos_label=1, zero_division=0)

    # Full classification report
    classification_rep = classification_report(
        true_labels_mapped,
        predicted_labels_mapped,
        target_names=["NEG", "POS"]  # Match target names with label_mapping
    )

    # Print metrics
    print(f"\nAccuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_rep)

    # Return metrics as a dictionary
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "classification_report": classification_rep,
    }

## flair embedding: to evaluate if code is working on small dataset

In [11]:
# Step 2: Initialize Flair embeddings
forward_embedding = FlairEmbeddings("news-forward")
backward_embedding = FlairEmbeddings("news-backward")

# Step 3: Create DocumentRNNEmbeddings using only Flair embeddings
document_embeddings = DocumentRNNEmbeddings(
    embeddings=[forward_embedding, backward_embedding],
    hidden_size=256,  # Adjust based on your computational resources
    reproject_words=True,  # Reproject word embeddings into a new space
    reproject_words_dimension=256,  # Dimensionality of reprojection
)

flair_classifier = TextClassifier(
    embeddings=document_embeddings,
    label_dictionary=label_dict,
    label_type="sentiment",
)


In [8]:
# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(flair_classifier, small_corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_flair_small_test_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=8,                   # Smaller batch size for transformers
    max_epochs=3,                        # Number of epochs
    embeddings_storage_mode='none',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
    use_final_model_for_eval=False
)


2024-12-04 18:53:56,071 ----------------------------------------------------------------------------------------------------
2024-12-04 18:53:56,071 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=4096, out_features=256, bias=True)
    (rnn): GRU(256, 256, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-12-04 18:54:17,774 epoch 1 - iter 1/7 - loss 0.69338685 - time (sec): 21.70 - samples/sec: 0.37 - lr: 0.000000 - momentum: 0.000000
2024-12-04 18:54:34,932 epoch 1 - iter 2/7 - loss 0.71438470 - time (sec): 38.85 - samples/sec: 0.41 - lr: 0.000025 - momentum: 0.000000
2024-12-04 18:54:50,813 epoch 1 - iter 3/7 - loss 0.70975542 - time (sec): 54.74 - samples/sec: 0.44 - lr: 0.000050 - momentum: 0.000000
2024-12-04 18:55:03,183 epoch 1 - iter 4/7 - loss 0.69591980 - time (sec): 67.10 - samples/sec: 0.48 - lr: 0.000048 - momentum: 0.000000
2024-12-04 18:55:08,366 epoch 1 - iter 5/7 - loss 0.69491612 - time (sec): 72.29 - samples/sec: 0.55 - lr: 0.000045 - momentum: 0.000000
2024-12-04 18:55:16,929 epoch 1 - iter 6/7 - loss 0.69524315 - time (sec): 80.85 - samples/sec: 0.59 - lr: 0.000043 - momentum: 0.000000
2024-12-04 18:55:37,075 epoch 1 - iter 7/7 - loss 0.68840644 - time (sec): 101.00 - samples/sec: 0.54 - lr: 0.000040 - momentum: 0.000000
2024-12-04 18:55:37,076 ----------------

100%|██████████| 2/2 [00:28<00:00, 14.15s/it]

2024-12-04 18:56:05,563 DEV : loss 0.6889662742614746 - f1-score (micro avg)  0.52
2024-12-04 18:56:05,598 saving best model





2024-12-04 18:56:05,783 ----------------------------------------------------------------------------------------------------
2024-12-04 18:56:26,566 epoch 2 - iter 1/7 - loss 0.66968471 - time (sec): 20.78 - samples/sec: 0.38 - lr: 0.000038 - momentum: 0.000000
2024-12-04 18:56:36,561 epoch 2 - iter 2/7 - loss 0.67560032 - time (sec): 30.78 - samples/sec: 0.52 - lr: 0.000036 - momentum: 0.000000
2024-12-04 18:56:49,392 epoch 2 - iter 3/7 - loss 0.68243100 - time (sec): 43.61 - samples/sec: 0.55 - lr: 0.000033 - momentum: 0.000000
2024-12-04 18:56:58,120 epoch 2 - iter 4/7 - loss 0.68006256 - time (sec): 52.34 - samples/sec: 0.61 - lr: 0.000031 - momentum: 0.000000
2024-12-04 18:57:05,295 epoch 2 - iter 5/7 - loss 0.67606875 - time (sec): 59.51 - samples/sec: 0.67 - lr: 0.000029 - momentum: 0.000000
2024-12-04 18:57:22,388 epoch 2 - iter 6/7 - loss 0.67890920 - time (sec): 76.61 - samples/sec: 0.63 - lr: 0.000026 - momentum: 0.000000
2024-12-04 18:57:44,074 epoch 2 - iter 7/7 - loss 0.6

100%|██████████| 2/2 [00:28<00:00, 14.11s/it]

2024-12-04 18:58:12,480 DEV : loss 0.6882741451263428 - f1-score (micro avg)  0.52
2024-12-04 18:58:12,514 ----------------------------------------------------------------------------------------------------





2024-12-04 18:58:34,363 epoch 3 - iter 1/7 - loss 0.64284366 - time (sec): 21.85 - samples/sec: 0.37 - lr: 0.000021 - momentum: 0.000000
2024-12-04 18:58:41,430 epoch 3 - iter 2/7 - loss 0.66006476 - time (sec): 28.92 - samples/sec: 0.55 - lr: 0.000019 - momentum: 0.000000
2024-12-04 18:58:58,659 epoch 3 - iter 3/7 - loss 0.66148293 - time (sec): 46.14 - samples/sec: 0.52 - lr: 0.000017 - momentum: 0.000000
2024-12-04 18:59:18,973 epoch 3 - iter 4/7 - loss 0.66381870 - time (sec): 66.46 - samples/sec: 0.48 - lr: 0.000014 - momentum: 0.000000
2024-12-04 18:59:31,865 epoch 3 - iter 5/7 - loss 0.67462469 - time (sec): 79.35 - samples/sec: 0.50 - lr: 0.000012 - momentum: 0.000000
2024-12-04 18:59:47,719 epoch 3 - iter 6/7 - loss 0.67824361 - time (sec): 95.20 - samples/sec: 0.50 - lr: 0.000010 - momentum: 0.000000
2024-12-04 18:59:56,358 epoch 3 - iter 7/7 - loss 0.67673341 - time (sec): 103.84 - samples/sec: 0.53 - lr: 0.000007 - momentum: 0.000000
2024-12-04 18:59:56,359 ----------------

100%|██████████| 2/2 [00:28<00:00, 14.09s/it]

2024-12-04 19:00:24,754 DEV : loss 0.686703085899353 - f1-score (micro avg)  0.56
2024-12-04 19:00:24,789 saving best model





2024-12-04 19:00:25,076 ----------------------------------------------------------------------------------------------------
2024-12-04 19:00:25,077 Loading model from best epoch ...


100%|██████████| 3/3 [01:05<00:00, 21.76s/it]

2024-12-04 19:01:30,524 
Results:
- F-score (micro) 0.5
- F-score (macro) 0.395
- Accuracy 0.5

By class:
              precision    recall  f1-score   support

         NEG     0.5238    0.8462    0.6471        26
         POS     0.3333    0.0909    0.1429        22

    accuracy                         0.5000        48
   macro avg     0.4286    0.4685    0.3950        48
weighted avg     0.4365    0.5000    0.4160        48

2024-12-04 19:01:30,524 ----------------------------------------------------------------------------------------------------





{'test_score': 0.5}

In [9]:
# Path to the saved model
saved_model_path = "flair_flair_small_test_model/best-model.pt"  # Update this if the path or filename is different

# Load the trained model
flair_small_classifier = TextClassifier.load(saved_model_path).to(flair.device)

# Evaluate the model
results = evaluate_model(flair_small_classifier, small_corpus.test)

Evaluating: 100%|██████████| 48/48 [01:33<00:00,  1.94s/it]

True Labels Sample: ['NEG', 'NEG', 'NEG', 'NEG', 'NEG']
Predicted Labels Sample: ['NEG', 'NEG', 'NEG', 'NEG', 'NEG']
True Label Distribution: Counter({'NEG': 26, 'POS': 22})
Predicted Label Distribution: Counter({'NEG': 42, 'POS': 6})
Mapped True Labels Sample: [0, 0, 0, 0, 0]
Mapped Predicted Labels Sample: [0, 0, 0, 0, 0]

Accuracy: 0.5000
Precision: 0.3333
Recall: 0.0909
F1 Score: 0.1429

Classification Report:
              precision    recall  f1-score   support

         NEG       0.52      0.85      0.65        26
         POS       0.33      0.09      0.14        22

    accuracy                           0.50        48
   macro avg       0.43      0.47      0.39        48
weighted avg       0.44      0.50      0.42        48






## flair embedding: actual training

In [12]:
# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(flair_classifier, corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_transformer_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=8,                   # Smaller batch size for transformers
    max_epochs=5,                        # Number of epochs
    embeddings_storage_mode='gpu',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
    use_final_model_for_eval=False
)

2024-12-04 19:23:03,117 ----------------------------------------------------------------------------------------------------
2024-12-04 19:23:03,118 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
    )
    (word_reprojection_map): Linear(in_features=4096, out_features=256, bias=True)
    (rnn): GRU(256, 256, batch_first=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.0, inplace=False)
  (locked_dropout): LockedDropout(p=0

100%|██████████| 157/157 [24:14<00:00,  9.26s/it]

2024-12-04 20:32:49,182 DEV : loss 0.5398534536361694 - f1-score (micro avg)  0.7296





2024-12-04 20:32:55,899 saving best model
2024-12-04 20:32:56,044 ----------------------------------------------------------------------------------------------------
2024-12-04 20:43:00,450 epoch 2 - iter 281/2813 - loss 0.56741641 - time (sec): 604.41 - samples/sec: 3.72 - lr: 0.000049 - momentum: 0.000000
2024-12-04 20:50:01,202 epoch 2 - iter 562/2813 - loss 0.56086464 - time (sec): 1025.16 - samples/sec: 4.39 - lr: 0.000049 - momentum: 0.000000
2024-12-04 20:56:53,066 epoch 2 - iter 843/2813 - loss 0.54737712 - time (sec): 1437.02 - samples/sec: 4.69 - lr: 0.000048 - momentum: 0.000000
2024-12-04 21:03:19,606 epoch 2 - iter 1124/2813 - loss 0.54221408 - time (sec): 1823.56 - samples/sec: 4.93 - lr: 0.000048 - momentum: 0.000000
2024-12-04 21:08:58,776 epoch 2 - iter 1405/2813 - loss 0.54341581 - time (sec): 2162.73 - samples/sec: 5.20 - lr: 0.000047 - momentum: 0.000000
2024-12-04 21:14:56,166 epoch 2 - iter 1686/2813 - loss 0.53647269 - time (sec): 2520.12 - samples/sec: 5.35 - l

100%|██████████| 157/157 [30:57<00:00, 11.83s/it] 

2024-12-04 22:10:17,480 DEV : loss 0.48584309220314026 - f1-score (micro avg)  0.7808





2024-12-04 22:10:23,991 saving best model
2024-12-04 22:10:24,219 ----------------------------------------------------------------------------------------------------
2024-12-04 22:15:41,810 epoch 3 - iter 281/2813 - loss 0.50164425 - time (sec): 317.59 - samples/sec: 7.08 - lr: 0.000044 - momentum: 0.000000
2024-12-04 22:20:43,911 epoch 3 - iter 562/2813 - loss 0.49488215 - time (sec): 619.69 - samples/sec: 7.26 - lr: 0.000043 - momentum: 0.000000
2024-12-04 22:25:53,694 epoch 3 - iter 843/2813 - loss 0.48452236 - time (sec): 929.47 - samples/sec: 7.26 - lr: 0.000043 - momentum: 0.000000
2024-12-04 22:31:21,616 epoch 3 - iter 1124/2813 - loss 0.48220646 - time (sec): 1257.40 - samples/sec: 7.15 - lr: 0.000042 - momentum: 0.000000
2024-12-04 22:36:44,266 epoch 3 - iter 1405/2813 - loss 0.48683788 - time (sec): 1580.05 - samples/sec: 7.11 - lr: 0.000042 - momentum: 0.000000
2024-12-04 22:42:36,763 epoch 3 - iter 1686/2813 - loss 0.48298356 - time (sec): 1932.54 - samples/sec: 6.98 - lr:

100%|██████████| 157/157 [31:20<00:00, 11.97s/it]

2024-12-04 23:37:48,166 DEV : loss 0.44944584369659424 - f1-score (micro avg)  0.8032





2024-12-04 23:37:55,052 saving best model
2024-12-04 23:37:55,272 ----------------------------------------------------------------------------------------------------
2024-12-04 23:42:54,870 epoch 4 - iter 281/2813 - loss 0.45180363 - time (sec): 299.60 - samples/sec: 7.50 - lr: 0.000038 - momentum: 0.000000
2024-12-04 23:48:15,175 epoch 4 - iter 562/2813 - loss 0.45040981 - time (sec): 619.90 - samples/sec: 7.25 - lr: 0.000038 - momentum: 0.000000
2024-12-04 23:53:23,181 epoch 4 - iter 843/2813 - loss 0.45444247 - time (sec): 927.91 - samples/sec: 7.27 - lr: 0.000037 - momentum: 0.000000
2024-12-04 23:58:33,535 epoch 4 - iter 1124/2813 - loss 0.45829335 - time (sec): 1238.26 - samples/sec: 7.26 - lr: 0.000037 - momentum: 0.000000
2024-12-05 00:04:10,761 epoch 4 - iter 1405/2813 - loss 0.45855153 - time (sec): 1575.49 - samples/sec: 7.13 - lr: 0.000036 - momentum: 0.000000
2024-12-05 00:09:40,754 epoch 4 - iter 1686/2813 - loss 0.45490447 - time (sec): 1905.48 - samples/sec: 7.08 - lr:

100%|██████████| 157/157 [18:16<00:00,  6.99s/it]

2024-12-05 00:48:50,565 DEV : loss 0.46021169424057007 - f1-score (micro avg)  0.8092





2024-12-05 00:48:57,058 saving best model
2024-12-05 00:48:57,225 ----------------------------------------------------------------------------------------------------
2024-12-05 00:53:21,823 epoch 5 - iter 281/2813 - loss 0.43247064 - time (sec): 264.60 - samples/sec: 8.50 - lr: 0.000033 - momentum: 0.000000
2024-12-05 00:57:42,689 epoch 5 - iter 562/2813 - loss 0.42157177 - time (sec): 525.46 - samples/sec: 8.56 - lr: 0.000032 - momentum: 0.000000
2024-12-05 01:02:09,619 epoch 5 - iter 843/2813 - loss 0.42211544 - time (sec): 792.39 - samples/sec: 8.51 - lr: 0.000032 - momentum: 0.000000
2024-12-05 01:06:28,720 epoch 5 - iter 1124/2813 - loss 0.42720328 - time (sec): 1051.49 - samples/sec: 8.55 - lr: 0.000031 - momentum: 0.000000
2024-12-05 01:10:51,207 epoch 5 - iter 1405/2813 - loss 0.42549192 - time (sec): 1313.98 - samples/sec: 8.55 - lr: 0.000031 - momentum: 0.000000
2024-12-05 01:14:54,121 epoch 5 - iter 1686/2813 - loss 0.43022526 - time (sec): 1556.90 - samples/sec: 8.66 - lr:

100%|██████████| 157/157 [06:55<00:00,  2.65s/it]

2024-12-05 01:39:53,353 DEV : loss 0.4273413419723511 - f1-score (micro avg)  0.7988





2024-12-05 01:39:59,715 ----------------------------------------------------------------------------------------------------
2024-12-05 01:40:03,464 ----------------------------------------------------------------------------------------------------
2024-12-05 01:40:03,464 Exiting from training early.
2024-12-05 01:40:03,465 Saving model ...
2024-12-05 01:40:03,678 Done.
2024-12-05 01:40:03,679 ----------------------------------------------------------------------------------------------------
2024-12-05 01:40:03,679 Loading model from best epoch ...


  1%|          | 11/1563 [00:15<36:29,  1.41s/it]


KeyboardInterrupt: 

In [None]:
# Path to the saved model
saved_model_path = "flair_transformer_model/best-model.pt"  # Update this if the path or filename is different

# Load the trained model
flair_classifier = TextClassifier.load(saved_model_path).to(flair.device)

# Evaluate the model
results = evaluate_model(flair_classifier, corpus.test)

## transformer classifier: to evaluate if code is working on small dataset

In [7]:
# Initialize transformer embeddings
transformer_embedding = TransformerDocumentEmbeddings(
    model="roberta-base",  # Transformer model of choice
    fine_tune=True,        # Fine-tune the transformer model
    layers="-1",           # Use the last layer for representation
)

# Update classifier with transformer embedding
transformer_classifier = TextClassifier(embeddings=transformer_embedding, label_dictionary=label_dict, label_type="sentiment").to(flair.device)

In [12]:
# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(transformer_classifier, small_corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_transformer_small_test_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=8,                   # Smaller batch size for transformers
    max_epochs=2,                        # Number of epochs
    embeddings_storage_mode='gpu',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
    use_final_model_for_eval=False
)


2024-12-04 19:10:38,394 ----------------------------------------------------------------------------------------------------
2024-12-04 19:10:38,395 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-12-04 19:10:44,409 epoch 1 - iter 1/7 - loss 1.04363453 - time (sec): 6.00 - samples/sec: 1.33 - lr: 0.000000 - momentum: 0.000000
2024-12-04 19:10:47,945 epoch 1 - iter 2/7 - loss 0.90796769 - time (sec): 9.54 - samples/sec: 1.68 - lr: 0.000050 - momentum: 0.000000
2024-12-04 19:10:53,608 epoch 1 - iter 3/7 - loss 0.79996492 - time (sec): 15.20 - samples/sec: 1.58 - lr: 0.000046 - momentum: 0.000000
2024-12-04 19:10:57,539 epoch 1 - iter 4/7 - loss 0.77897805 - time (sec): 19.13 - samples/sec: 1.67 - lr: 0.000043 - momentum: 0.000000
2024-12-04 19:11:03,182 epoch 1 - iter 5/7 - loss 0.77820672 - time (sec): 24.77 - samples/sec: 1.61 - lr: 0.000039 - momentum: 0.000000
2024-12-04 19:11:08,492 epoch 1 - iter 6/7 - loss 0.79851161 - time (sec): 30.09 - samples/sec: 1.60 - lr: 0.000036 - momentum: 0.000000
2024-12-04 19:11:12,155 epoch 1 - iter 7/7 - loss 0.79783030 - time (sec): 33.75 - samples/sec: 1.63 - lr: 0.000032 - momentum: 0.000000
2024-12-04 19:11:12,156 -------------------

100%|██████████| 2/2 [00:03<00:00,  1.88s/it]

2024-12-04 19:11:16,578 DEV : loss 0.6676883697509766 - f1-score (micro avg)  0.64
2024-12-04 19:11:16,612 saving best model





2024-12-04 19:11:17,265 ----------------------------------------------------------------------------------------------------
2024-12-04 19:11:21,959 epoch 2 - iter 1/7 - loss 0.53675771 - time (sec): 4.69 - samples/sec: 1.70 - lr: 0.000029 - momentum: 0.000000
2024-12-04 19:11:27,532 epoch 2 - iter 2/7 - loss 0.59745705 - time (sec): 10.26 - samples/sec: 1.56 - lr: 0.000025 - momentum: 0.000000
2024-12-04 19:11:32,873 epoch 2 - iter 3/7 - loss 0.61329925 - time (sec): 15.61 - samples/sec: 1.54 - lr: 0.000021 - momentum: 0.000000
2024-12-04 19:11:38,124 epoch 2 - iter 4/7 - loss 0.65516320 - time (sec): 20.86 - samples/sec: 1.53 - lr: 0.000018 - momentum: 0.000000
2024-12-04 19:11:43,435 epoch 2 - iter 5/7 - loss 0.66054441 - time (sec): 26.17 - samples/sec: 1.53 - lr: 0.000014 - momentum: 0.000000
2024-12-04 19:11:49,424 epoch 2 - iter 6/7 - loss 0.65232339 - time (sec): 32.16 - samples/sec: 1.49 - lr: 0.000011 - momentum: 0.000000
2024-12-04 19:11:52,102 epoch 2 - iter 7/7 - loss 0.66

100%|██████████| 2/2 [00:03<00:00,  1.84s/it]

2024-12-04 19:11:56,426 DEV : loss 0.6994249224662781 - f1-score (micro avg)  0.36





2024-12-04 19:11:57,119 ----------------------------------------------------------------------------------------------------
2024-12-04 19:11:57,120 Loading model from best epoch ...


100%|██████████| 3/3 [00:06<00:00,  2.30s/it]

2024-12-04 19:12:06,212 
Results:
- F-score (micro) 0.5625
- F-score (macro) 0.3996
- Accuracy 0.5625

By class:
              precision    recall  f1-score   support

         NEG     0.5532    1.0000    0.7123        26
         POS     1.0000    0.0455    0.0870        22

    accuracy                         0.5625        48
   macro avg     0.7766    0.5227    0.3996        48
weighted avg     0.7580    0.5625    0.4257        48

2024-12-04 19:12:06,212 ----------------------------------------------------------------------------------------------------





{'test_score': 0.5625}

In [13]:
# Path to the saved model
saved_model_path = "flair_transformer_small_test_model/best-model.pt"  # Update this if the path or filename is different

# Load the trained model
transformer_small_classifier = TextClassifier.load(saved_model_path).to(flair.device)

# Evaluate the model
results = evaluate_model(transformer_small_classifier, small_corpus.test)

Evaluating: 100%|██████████| 48/48 [00:03<00:00, 13.25it/s]

True Labels Sample: ['NEG', 'NEG', 'NEG', 'NEG', 'NEG']
Predicted Labels Sample: ['NEG', 'NEG', 'NEG', 'NEG', 'NEG']
True Label Distribution: Counter({'NEG': 26, 'POS': 22})
Predicted Label Distribution: Counter({'NEG': 47, 'POS': 1})
Mapped True Labels Sample: [0, 0, 0, 0, 0]
Mapped Predicted Labels Sample: [0, 0, 0, 0, 0]

Accuracy: 0.5625
Precision: 1.0000
Recall: 0.0455
F1 Score: 0.0870

Classification Report:
              precision    recall  f1-score   support

         NEG       0.55      1.00      0.71        26
         POS       1.00      0.05      0.09        22

    accuracy                           0.56        48
   macro avg       0.78      0.52      0.40        48
weighted avg       0.76      0.56      0.43        48






## actual training

In [7]:
# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(transformer_classifier, corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_roberta_transformer_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=8,                   # Smaller batch size for transformers
    max_epochs=5,                        # Number of epochs
    embeddings_storage_mode='gpu',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
    use_final_model_for_eval=False
)


2024-12-05 10:54:03,697 ----------------------------------------------------------------------------------------------------
2024-12-05 10:54:03,698 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_feat

  attn_output = torch.nn.functional.scaled_dot_product_attention(


2024-12-05 10:54:52,777 epoch 1 - iter 281/2813 - loss 0.55673593 - time (sec): 49.07 - samples/sec: 45.82 - lr: 0.000010 - momentum: 0.000000
2024-12-05 10:55:40,834 epoch 1 - iter 562/2813 - loss 0.48512997 - time (sec): 97.12 - samples/sec: 46.29 - lr: 0.000020 - momentum: 0.000000
2024-12-05 10:56:28,486 epoch 1 - iter 843/2813 - loss 0.46255406 - time (sec): 144.78 - samples/sec: 46.58 - lr: 0.000030 - momentum: 0.000000
2024-12-05 10:57:16,357 epoch 1 - iter 1124/2813 - loss 0.44995912 - time (sec): 192.65 - samples/sec: 46.68 - lr: 0.000040 - momentum: 0.000000
2024-12-05 10:58:04,398 epoch 1 - iter 1405/2813 - loss 0.43937684 - time (sec): 240.69 - samples/sec: 46.70 - lr: 0.000050 - momentum: 0.000000
2024-12-05 10:58:52,167 epoch 1 - iter 1686/2813 - loss 0.43399941 - time (sec): 288.46 - samples/sec: 46.76 - lr: 0.000049 - momentum: 0.000000
2024-12-05 10:59:39,828 epoch 1 - iter 1967/2813 - loss 0.43466675 - time (sec): 336.12 - samples/sec: 46.82 - lr: 0.000048 - momentum:

100%|██████████| 157/157 [00:25<00:00,  6.13it/s]

2024-12-05 11:02:27,397 DEV : loss 0.257555216550827 - f1-score (micro avg)  0.92





2024-12-05 11:02:33,898 saving best model
2024-12-05 11:02:35,092 ----------------------------------------------------------------------------------------------------
2024-12-05 11:03:23,040 epoch 2 - iter 281/2813 - loss 0.30794823 - time (sec): 47.95 - samples/sec: 46.88 - lr: 0.000043 - momentum: 0.000000
2024-12-05 11:04:10,863 epoch 2 - iter 562/2813 - loss 0.28643736 - time (sec): 95.77 - samples/sec: 46.95 - lr: 0.000042 - momentum: 0.000000
2024-12-05 11:04:58,139 epoch 2 - iter 843/2813 - loss 0.28425419 - time (sec): 143.05 - samples/sec: 47.15 - lr: 0.000041 - momentum: 0.000000
2024-12-05 11:05:45,386 epoch 2 - iter 1124/2813 - loss 0.26957155 - time (sec): 190.29 - samples/sec: 47.25 - lr: 0.000040 - momentum: 0.000000
2024-12-05 11:06:32,702 epoch 2 - iter 1405/2813 - loss 0.27634417 - time (sec): 237.61 - samples/sec: 47.30 - lr: 0.000039 - momentum: 0.000000
2024-12-05 11:07:19,994 epoch 2 - iter 1686/2813 - loss 0.27581874 - time (sec): 284.90 - samples/sec: 47.34 - lr

100%|██████████| 157/157 [00:25<00:00,  6.04it/s]

2024-12-05 11:10:55,868 DEV : loss 0.46414676308631897 - f1-score (micro avg)  0.918





2024-12-05 11:11:02,823 ----------------------------------------------------------------------------------------------------
2024-12-05 11:11:50,782 epoch 3 - iter 281/2813 - loss 0.16105546 - time (sec): 47.96 - samples/sec: 46.88 - lr: 0.000032 - momentum: 0.000000
2024-12-05 11:12:38,259 epoch 3 - iter 562/2813 - loss 0.16029461 - time (sec): 95.43 - samples/sec: 47.11 - lr: 0.000031 - momentum: 0.000000
2024-12-05 11:13:25,502 epoch 3 - iter 843/2813 - loss 0.16207369 - time (sec): 142.68 - samples/sec: 47.27 - lr: 0.000030 - momentum: 0.000000
2024-12-05 11:14:13,163 epoch 3 - iter 1124/2813 - loss 0.15758802 - time (sec): 190.34 - samples/sec: 47.24 - lr: 0.000029 - momentum: 0.000000
2024-12-05 11:15:00,483 epoch 3 - iter 1405/2813 - loss 0.15653109 - time (sec): 237.66 - samples/sec: 47.29 - lr: 0.000028 - momentum: 0.000000
2024-12-05 11:15:48,244 epoch 3 - iter 1686/2813 - loss 0.16046691 - time (sec): 285.42 - samples/sec: 47.26 - lr: 0.000027 - momentum: 0.000000
2024-12-05

100%|██████████| 157/157 [00:26<00:00,  6.00it/s]

2024-12-05 11:19:24,708 DEV : loss 0.4041275084018707 - f1-score (micro avg)  0.9308





2024-12-05 11:19:31,375 saving best model
2024-12-05 11:19:32,480 ----------------------------------------------------------------------------------------------------
2024-12-05 11:20:20,812 epoch 4 - iter 281/2813 - loss 0.08424391 - time (sec): 48.33 - samples/sec: 46.51 - lr: 0.000021 - momentum: 0.000000
2024-12-05 11:21:08,211 epoch 4 - iter 562/2813 - loss 0.07993533 - time (sec): 95.73 - samples/sec: 46.97 - lr: 0.000020 - momentum: 0.000000
2024-12-05 11:21:54,955 epoch 4 - iter 843/2813 - loss 0.07658078 - time (sec): 142.47 - samples/sec: 47.34 - lr: 0.000019 - momentum: 0.000000
2024-12-05 11:22:42,856 epoch 4 - iter 1124/2813 - loss 0.08071680 - time (sec): 190.37 - samples/sec: 47.23 - lr: 0.000018 - momentum: 0.000000
2024-12-05 11:23:30,963 epoch 4 - iter 1405/2813 - loss 0.08696325 - time (sec): 238.48 - samples/sec: 47.13 - lr: 0.000017 - momentum: 0.000000
2024-12-05 11:24:19,891 epoch 4 - iter 1686/2813 - loss 0.08438743 - time (sec): 287.41 - samples/sec: 46.93 - lr

100%|██████████| 157/157 [00:25<00:00,  6.14it/s]

2024-12-05 11:27:56,410 DEV : loss 0.3604690432548523 - f1-score (micro avg)  0.942





2024-12-05 11:28:03,112 saving best model
2024-12-05 11:28:04,161 ----------------------------------------------------------------------------------------------------
2024-12-05 11:28:51,516 epoch 5 - iter 281/2813 - loss 0.03934556 - time (sec): 47.35 - samples/sec: 47.47 - lr: 0.000010 - momentum: 0.000000
2024-12-05 11:29:38,058 epoch 5 - iter 562/2813 - loss 0.04708834 - time (sec): 93.89 - samples/sec: 47.88 - lr: 0.000009 - momentum: 0.000000
2024-12-05 11:30:24,683 epoch 5 - iter 843/2813 - loss 0.05039754 - time (sec): 140.52 - samples/sec: 47.99 - lr: 0.000008 - momentum: 0.000000
2024-12-05 11:31:12,197 epoch 5 - iter 1124/2813 - loss 0.04746460 - time (sec): 188.03 - samples/sec: 47.82 - lr: 0.000007 - momentum: 0.000000
2024-12-05 11:32:00,227 epoch 5 - iter 1405/2813 - loss 0.04636933 - time (sec): 236.06 - samples/sec: 47.61 - lr: 0.000006 - momentum: 0.000000
2024-12-05 11:32:47,304 epoch 5 - iter 1686/2813 - loss 0.04505450 - time (sec): 283.14 - samples/sec: 47.64 - lr

100%|██████████| 157/157 [00:25<00:00,  6.14it/s]

2024-12-05 11:36:24,884 DEV : loss 0.4084957540035248 - f1-score (micro avg)  0.9412





2024-12-05 11:36:32,431 ----------------------------------------------------------------------------------------------------
2024-12-05 11:36:32,433 Loading model from best epoch ...


100%|██████████| 1563/1563 [04:15<00:00,  6.13it/s]


2024-12-05 11:40:51,445 
Results:
- F-score (micro) 0.9429
- F-score (macro) 0.9429
- Accuracy 0.9429

By class:
              precision    recall  f1-score   support

         POS     0.9317    0.9559    0.9437     12500
         NEG     0.9547    0.9299    0.9422     12500

    accuracy                         0.9429     25000
   macro avg     0.9432    0.9429    0.9429     25000
weighted avg     0.9432    0.9429    0.9429     25000

2024-12-05 11:40:51,446 ----------------------------------------------------------------------------------------------------


{'test_score': 0.94292}

In [8]:
# This is for the 10 epochs

# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(transformer_classifier, corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_roberta_transformer_10_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=8,                   # Smaller batch size for transformers
    max_epochs=10,                        # Number of epochs
    embeddings_storage_mode='gpu',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
    use_final_model_for_eval=False
)


2024-12-05 11:43:51,397 ----------------------------------------------------------------------------------------------------
2024-12-05 11:43:51,398 Model: "TextClassifier(
  (embeddings): TransformerDocumentEmbeddings(
    (model): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50266, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_feat

  attn_output = torch.nn.functional.scaled_dot_product_attention(


2024-12-05 11:44:39,316 epoch 1 - iter 281/2813 - loss 0.58373917 - time (sec): 47.91 - samples/sec: 46.92 - lr: 0.000005 - momentum: 0.000000
2024-12-05 11:45:27,015 epoch 1 - iter 562/2813 - loss 0.47220784 - time (sec): 95.61 - samples/sec: 47.03 - lr: 0.000010 - momentum: 0.000000
2024-12-05 11:46:14,462 epoch 1 - iter 843/2813 - loss 0.44223183 - time (sec): 143.05 - samples/sec: 47.14 - lr: 0.000015 - momentum: 0.000000
2024-12-05 11:47:02,329 epoch 1 - iter 1124/2813 - loss 0.42621069 - time (sec): 190.92 - samples/sec: 47.10 - lr: 0.000020 - momentum: 0.000000
2024-12-05 11:47:50,144 epoch 1 - iter 1405/2813 - loss 0.41301586 - time (sec): 238.74 - samples/sec: 47.08 - lr: 0.000025 - momentum: 0.000000
2024-12-05 11:48:38,050 epoch 1 - iter 1686/2813 - loss 0.40821100 - time (sec): 286.64 - samples/sec: 47.06 - lr: 0.000030 - momentum: 0.000000
2024-12-05 11:49:25,752 epoch 1 - iter 1967/2813 - loss 0.40049963 - time (sec): 334.34 - samples/sec: 47.07 - lr: 0.000035 - momentum:

100%|██████████| 157/157 [00:26<00:00,  6.00it/s]

2024-12-05 11:52:15,965 DEV : loss 0.7565402388572693 - f1-score (micro avg)  0.8396





2024-12-05 11:52:23,490 saving best model
2024-12-05 11:52:24,363 ----------------------------------------------------------------------------------------------------
2024-12-05 11:53:13,511 epoch 2 - iter 281/2813 - loss 0.32835130 - time (sec): 49.15 - samples/sec: 45.74 - lr: 0.000049 - momentum: 0.000000
2024-12-05 11:54:01,231 epoch 2 - iter 562/2813 - loss 0.32176834 - time (sec): 96.87 - samples/sec: 46.41 - lr: 0.000049 - momentum: 0.000000
2024-12-05 11:54:48,502 epoch 2 - iter 843/2813 - loss 0.33887666 - time (sec): 144.14 - samples/sec: 46.79 - lr: 0.000048 - momentum: 0.000000
2024-12-05 11:55:36,311 epoch 2 - iter 1124/2813 - loss 0.33466865 - time (sec): 191.95 - samples/sec: 46.85 - lr: 0.000048 - momentum: 0.000000
2024-12-05 11:56:22,962 epoch 2 - iter 1405/2813 - loss 0.33086620 - time (sec): 238.60 - samples/sec: 47.11 - lr: 0.000047 - momentum: 0.000000
2024-12-05 11:57:09,412 epoch 2 - iter 1686/2813 - loss 0.31990122 - time (sec): 285.05 - samples/sec: 47.32 - lr

100%|██████████| 157/157 [00:25<00:00,  6.19it/s]

2024-12-05 12:00:42,742 DEV : loss 0.32830020785331726 - f1-score (micro avg)  0.9332





2024-12-05 12:00:49,260 saving best model
2024-12-05 12:00:49,717 ----------------------------------------------------------------------------------------------------
2024-12-05 12:01:37,252 epoch 3 - iter 281/2813 - loss 0.18962833 - time (sec): 47.53 - samples/sec: 47.29 - lr: 0.000044 - momentum: 0.000000
2024-12-05 12:02:24,812 epoch 3 - iter 562/2813 - loss 0.20125464 - time (sec): 95.09 - samples/sec: 47.28 - lr: 0.000043 - momentum: 0.000000
2024-12-05 12:03:12,134 epoch 3 - iter 843/2813 - loss 0.20365776 - time (sec): 142.41 - samples/sec: 47.35 - lr: 0.000043 - momentum: 0.000000
2024-12-05 12:04:00,423 epoch 3 - iter 1124/2813 - loss 0.21156227 - time (sec): 190.70 - samples/sec: 47.15 - lr: 0.000042 - momentum: 0.000000
2024-12-05 12:04:46,660 epoch 3 - iter 1405/2813 - loss 0.21561870 - time (sec): 236.94 - samples/sec: 47.44 - lr: 0.000042 - momentum: 0.000000
2024-12-05 12:05:33,888 epoch 3 - iter 1686/2813 - loss 0.21712826 - time (sec): 284.17 - samples/sec: 47.46 - lr

100%|██████████| 157/157 [00:25<00:00,  6.16it/s]

2024-12-05 12:09:07,952 DEV : loss 0.3040253520011902 - f1-score (micro avg)  0.9356





2024-12-05 12:09:14,812 saving best model
2024-12-05 12:09:15,343 ----------------------------------------------------------------------------------------------------
2024-12-05 12:10:02,921 epoch 4 - iter 281/2813 - loss 0.12074864 - time (sec): 47.58 - samples/sec: 47.25 - lr: 0.000038 - momentum: 0.000000
2024-12-05 12:10:50,157 epoch 4 - iter 562/2813 - loss 0.13219092 - time (sec): 94.81 - samples/sec: 47.42 - lr: 0.000038 - momentum: 0.000000
2024-12-05 12:11:37,081 epoch 4 - iter 843/2813 - loss 0.14922086 - time (sec): 141.74 - samples/sec: 47.58 - lr: 0.000037 - momentum: 0.000000
2024-12-05 12:12:23,143 epoch 4 - iter 1124/2813 - loss 0.16284989 - time (sec): 187.80 - samples/sec: 47.88 - lr: 0.000037 - momentum: 0.000000
2024-12-05 12:13:09,798 epoch 4 - iter 1405/2813 - loss 0.16361103 - time (sec): 234.45 - samples/sec: 47.94 - lr: 0.000036 - momentum: 0.000000
2024-12-05 12:13:56,641 epoch 4 - iter 1686/2813 - loss 0.17118494 - time (sec): 281.30 - samples/sec: 47.95 - lr

100%|██████████| 157/157 [00:25<00:00,  6.09it/s]

2024-12-05 12:17:30,810 DEV : loss 0.4105227291584015 - f1-score (micro avg)  0.9356





2024-12-05 12:17:37,383 ----------------------------------------------------------------------------------------------------
2024-12-05 12:18:24,508 epoch 5 - iter 281/2813 - loss 0.10495093 - time (sec): 47.12 - samples/sec: 47.70 - lr: 0.000033 - momentum: 0.000000
2024-12-05 12:19:11,754 epoch 5 - iter 562/2813 - loss 0.10767603 - time (sec): 94.37 - samples/sec: 47.64 - lr: 0.000032 - momentum: 0.000000
2024-12-05 12:19:58,018 epoch 5 - iter 843/2813 - loss 0.11383192 - time (sec): 140.63 - samples/sec: 47.95 - lr: 0.000032 - momentum: 0.000000
2024-12-05 12:20:44,566 epoch 5 - iter 1124/2813 - loss 0.11603135 - time (sec): 187.18 - samples/sec: 48.04 - lr: 0.000031 - momentum: 0.000000
2024-12-05 12:21:30,844 epoch 5 - iter 1405/2813 - loss 0.12203325 - time (sec): 233.46 - samples/sec: 48.15 - lr: 0.000031 - momentum: 0.000000
2024-12-05 12:22:16,933 epoch 5 - iter 1686/2813 - loss 0.12418234 - time (sec): 279.55 - samples/sec: 48.25 - lr: 0.000030 - momentum: 0.000000
2024-12-05

100%|██████████| 157/157 [00:26<00:00,  6.03it/s]

2024-12-05 12:25:49,475 DEV : loss 0.35577234625816345 - f1-score (micro avg)  0.9376





2024-12-05 12:25:56,216 saving best model
2024-12-05 12:25:56,727 ----------------------------------------------------------------------------------------------------
2024-12-05 12:26:43,379 epoch 6 - iter 281/2813 - loss 0.06666695 - time (sec): 46.65 - samples/sec: 48.19 - lr: 0.000027 - momentum: 0.000000
2024-12-05 12:27:30,278 epoch 6 - iter 562/2813 - loss 0.07470844 - time (sec): 93.55 - samples/sec: 48.06 - lr: 0.000027 - momentum: 0.000000
2024-12-05 12:28:16,821 epoch 6 - iter 843/2813 - loss 0.07611373 - time (sec): 140.09 - samples/sec: 48.14 - lr: 0.000026 - momentum: 0.000000
2024-12-05 12:29:04,165 epoch 6 - iter 1124/2813 - loss 0.08337160 - time (sec): 187.44 - samples/sec: 47.97 - lr: 0.000026 - momentum: 0.000000
2024-12-05 12:29:51,207 epoch 6 - iter 1405/2813 - loss 0.08263908 - time (sec): 234.48 - samples/sec: 47.94 - lr: 0.000025 - momentum: 0.000000
2024-12-05 12:30:37,847 epoch 6 - iter 1686/2813 - loss 0.08722331 - time (sec): 281.12 - samples/sec: 47.98 - lr

100%|██████████| 157/157 [00:25<00:00,  6.17it/s]


2024-12-05 12:34:09,861 DEV : loss 0.38221585750579834 - f1-score (micro avg)  0.9376
2024-12-05 12:34:16,741 ----------------------------------------------------------------------------------------------------
2024-12-05 12:35:02,455 epoch 7 - iter 281/2813 - loss 0.07888148 - time (sec): 45.71 - samples/sec: 49.18 - lr: 0.000022 - momentum: 0.000000
2024-12-05 12:35:48,471 epoch 7 - iter 562/2813 - loss 0.06912298 - time (sec): 91.73 - samples/sec: 49.01 - lr: 0.000021 - momentum: 0.000000
2024-12-05 12:36:34,191 epoch 7 - iter 843/2813 - loss 0.07083683 - time (sec): 137.45 - samples/sec: 49.07 - lr: 0.000021 - momentum: 0.000000
2024-12-05 12:37:20,014 epoch 7 - iter 1124/2813 - loss 0.06223230 - time (sec): 183.27 - samples/sec: 49.06 - lr: 0.000020 - momentum: 0.000000
2024-12-05 12:38:05,926 epoch 7 - iter 1405/2813 - loss 0.06322601 - time (sec): 229.18 - samples/sec: 49.04 - lr: 0.000019 - momentum: 0.000000
2024-12-05 12:38:52,003 epoch 7 - iter 1686/2813 - loss 0.06497332 - 

100%|██████████| 157/157 [00:25<00:00,  6.13it/s]

2024-12-05 12:42:23,696 DEV : loss 0.37941882014274597 - f1-score (micro avg)  0.9436





2024-12-05 12:42:30,323 saving best model
2024-12-05 12:42:30,826 ----------------------------------------------------------------------------------------------------
2024-12-05 12:43:18,447 epoch 8 - iter 281/2813 - loss 0.04706603 - time (sec): 47.62 - samples/sec: 47.21 - lr: 0.000016 - momentum: 0.000000
2024-12-05 12:44:04,907 epoch 8 - iter 562/2813 - loss 0.04148793 - time (sec): 94.08 - samples/sec: 47.79 - lr: 0.000016 - momentum: 0.000000
2024-12-05 12:44:50,845 epoch 8 - iter 843/2813 - loss 0.03838554 - time (sec): 140.02 - samples/sec: 48.17 - lr: 0.000015 - momentum: 0.000000
2024-12-05 12:45:35,972 epoch 8 - iter 1124/2813 - loss 0.03788182 - time (sec): 185.14 - samples/sec: 48.57 - lr: 0.000014 - momentum: 0.000000
2024-12-05 12:46:22,105 epoch 8 - iter 1405/2813 - loss 0.04548763 - time (sec): 231.28 - samples/sec: 48.60 - lr: 0.000014 - momentum: 0.000000
2024-12-05 12:47:08,158 epoch 8 - iter 1686/2813 - loss 0.04615122 - time (sec): 277.33 - samples/sec: 48.64 - lr

100%|██████████| 157/157 [00:25<00:00,  6.18it/s]


2024-12-05 12:50:38,157 DEV : loss 0.42150649428367615 - f1-score (micro avg)  0.9412
2024-12-05 12:50:44,746 ----------------------------------------------------------------------------------------------------
2024-12-05 12:51:31,240 epoch 9 - iter 281/2813 - loss 0.03191176 - time (sec): 46.49 - samples/sec: 48.35 - lr: 0.000011 - momentum: 0.000000
2024-12-05 12:52:17,132 epoch 9 - iter 562/2813 - loss 0.02966251 - time (sec): 92.38 - samples/sec: 48.67 - lr: 0.000010 - momentum: 0.000000
2024-12-05 12:53:02,815 epoch 9 - iter 843/2813 - loss 0.03044990 - time (sec): 138.07 - samples/sec: 48.85 - lr: 0.000009 - momentum: 0.000000
2024-12-05 12:53:48,809 epoch 9 - iter 1124/2813 - loss 0.03087598 - time (sec): 184.06 - samples/sec: 48.85 - lr: 0.000009 - momentum: 0.000000
2024-12-05 12:54:35,002 epoch 9 - iter 1405/2813 - loss 0.02867330 - time (sec): 230.25 - samples/sec: 48.82 - lr: 0.000008 - momentum: 0.000000
2024-12-05 12:55:20,682 epoch 9 - iter 1686/2813 - loss 0.02895149 - 

100%|██████████| 157/157 [00:25<00:00,  6.17it/s]

2024-12-05 12:58:51,635 DEV : loss 0.42656537890434265 - f1-score (micro avg)  0.9448





2024-12-05 12:58:58,271 saving best model
2024-12-05 12:58:58,832 ----------------------------------------------------------------------------------------------------
2024-12-05 12:59:44,968 epoch 10 - iter 281/2813 - loss 0.01800937 - time (sec): 46.13 - samples/sec: 48.73 - lr: 0.000005 - momentum: 0.000000
2024-12-05 13:00:31,129 epoch 10 - iter 562/2813 - loss 0.01297621 - time (sec): 92.30 - samples/sec: 48.71 - lr: 0.000004 - momentum: 0.000000
2024-12-05 13:01:17,366 epoch 10 - iter 843/2813 - loss 0.01387568 - time (sec): 138.53 - samples/sec: 48.68 - lr: 0.000004 - momentum: 0.000000
2024-12-05 13:02:03,700 epoch 10 - iter 1124/2813 - loss 0.01678401 - time (sec): 184.87 - samples/sec: 48.64 - lr: 0.000003 - momentum: 0.000000
2024-12-05 13:02:49,472 epoch 10 - iter 1405/2813 - loss 0.01468636 - time (sec): 230.64 - samples/sec: 48.73 - lr: 0.000003 - momentum: 0.000000
2024-12-05 13:03:35,408 epoch 10 - iter 1686/2813 - loss 0.01589973 - time (sec): 276.57 - samples/sec: 48.7

100%|██████████| 157/157 [00:25<00:00,  6.14it/s]

2024-12-05 13:07:07,265 DEV : loss 0.4370265603065491 - f1-score (micro avg)  0.9456





2024-12-05 13:07:13,963 saving best model
2024-12-05 13:07:14,951 ----------------------------------------------------------------------------------------------------
2024-12-05 13:07:14,953 Loading model from best epoch ...


100%|██████████| 1563/1563 [04:13<00:00,  6.15it/s]


2024-12-05 13:11:31,168 
Results:
- F-score (micro) 0.9432
- F-score (macro) 0.9432
- Accuracy 0.9432

By class:
              precision    recall  f1-score   support

         POS     0.9408    0.9460    0.9434     12500
         NEG     0.9457    0.9405    0.9431     12500

    accuracy                         0.9432     25000
   macro avg     0.9433    0.9432    0.9432     25000
weighted avg     0.9433    0.9432    0.9432     25000

2024-12-05 13:11:31,168 ----------------------------------------------------------------------------------------------------


{'test_score': 0.94324}

In [None]:
# Path to the saved model
saved_model_path = "flair_roberta_transformer_model/best-model.pt"  # Update this if the path or filename is different

# Load the trained model
transformer_classifier = TextClassifier.load(saved_model_path).to(flair.device)

# Evaluate the model
results = evaluate_model(transformer_classifier, corpus.test)