## Finetune Flair

In [2]:
import os
from pathlib import Path
from sklearn.model_selection import train_test_split
import pandas as pd

import torch

from flair.data import Corpus, Sentence
from flair.datasets import ClassificationCorpus
from flair.embeddings import (
    WordEmbeddings,
    FlairEmbeddings,
    DocumentRNNEmbeddings,
    TransformerWordEmbeddings,
    StackedEmbeddings,
)
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from flair.training_utils import EvaluationMetric
from flair.visual.training_curves import Plotter

import flair
flair.device = 'cpu'

from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def read_imdb(data_dir: str, is_train: bool):
    '''
    Read the IMDb review dataset text sequences and labels in Flair format.
    parameters:
    - data_dir (str): data folder relative file path
    - is_train (bool): when True, access train folder. else access test folder.
    
    return:
    >>> reviews and assign labels each in expected format by Flair
    '''
    data_folder = 'train' if is_train else 'test'
    data = []

    for label_folder in ['neg', 'pos']:
        path = os.path.join(data_dir, data_folder, label_folder)
        label = '__label__NEG' if label_folder == 'neg' else '__label__POS'

        for file in os.listdir(path):
            with open(os.path.join(path, file), 'r', encoding='utf-8') as f:
                text = f.read().replace('\n', ' ')  # Remove newline characters
                data.append(f"{label} {text}")
    return data

In [4]:
data_dir = "../data/aclImdb"
# read data
train_data = read_imdb(data_dir, is_train=True)
test_data = read_imdb(data_dir, is_train=False)

# Split training data into training and validation sets
train_data, dev_data = train_test_split(
    train_data, test_size=0.1, random_state=42
)

# print dataset sizes
print(f"Number of training samples: {len(train_data)}")
print(f"Number of validation samples: {len(dev_data)}")
print(f"Number of test samples: {len(test_data)}")

# print first 5 sample of training samples
print(train_data[1])

Number of training samples: 22500
Number of validation samples: 2500
Number of test samples: 25000
__label__NEG On the surface the idea of Omen 4 was good. It's nice to see that the devil child could be a girl. In fact, sometimes, as in the Exorcist, when girls are possessed or are devilry it's very effective. But in Omen 4, it stunk.<br /><br />Delia does not make me think that she could be a devil child, rather she is a child with issues. Issues that maybe only a therapist, rather then a priest could help. She does not look scary or devilish. Rather, she looks sulky and moody.<br /><br />This film had potential and if it was made by the same people who had made the previous three films it could of worked. But it's rather insulting really to make a sequel to one of the most favoured horror trilogies, as a made for TV movie special.<br /><br />On so many levels it lets down. It's cheap looking, the acting is hammish and the effects are typical of a TV drama. The characters do not bring

In [5]:
# Ensure the output directory exists
flair_data_folder = "../data/flair_data"
os.makedirs(flair_data_folder, exist_ok=True)

# Save data to files
def write_list_to_file(data_list, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        for item in data_list:
            f.write(f"{item}\n")

write_list_to_file(train_data, os.path.join(flair_data_folder, 'train.txt'))
write_list_to_file(dev_data, os.path.join(flair_data_folder, 'dev.txt'))
write_list_to_file(test_data, os.path.join(flair_data_folder, 'test.txt'))

In [6]:
# Define the folder where the data is located
corpus_folder = Path(flair_data_folder)

# Create the corpus
corpus = ClassificationCorpus(
    corpus_folder,
    train_file='train.txt',
    dev_file='dev.txt',
    test_file='test.txt',
    label_type='sentiment'
)

# Print statistics
print(f"Number of training sentences: {len(corpus.train)}")
print(f"Number of validation sentences: {len(corpus.dev)}")
print(f"Number of test sentences: {len(corpus.test)}")

2024-11-29 18:15:47,537 Reading data from ../data/flair_data
2024-11-29 18:15:47,538 Train: ../data/flair_data/train.txt
2024-11-29 18:15:47,538 Dev: ../data/flair_data/dev.txt
2024-11-29 18:15:47,538 Test: ../data/flair_data/test.txt
2024-11-29 18:15:48,009 Initialized corpus ../data/flair_data (label type name is 'sentiment')
Number of training sentences: 22500
Number of validation sentences: 2500
Number of test sentences: 25000


In [7]:
label_dict = corpus.make_label_dictionary(label_type='sentiment')
print(label_dict)

2024-11-29 18:15:48,013 Computing label dictionary. Progress:


0it [00:00, ?it/s]
22500it [00:40, 550.92it/s]

2024-11-29 18:16:28,876 Dictionary created for label 'sentiment' with 2 values: POS (seen 11298 times), NEG (seen 11202 times)
Dictionary with 2 tags: POS, NEG





In [8]:
# Step 4: Set Up Stacked Embeddings
flair_forward_embedding = FlairEmbeddings('news-forward')
flair_backward_embedding = FlairEmbeddings('news-backward')
transformer_word_embeddings = TransformerWordEmbeddings('distilbert-base-uncased')

# List of embeddings
embeddings = [
    flair_forward_embedding,
    flair_backward_embedding,
    transformer_word_embeddings,
]

# Create document embeddings from word embeddings
document_embeddings = DocumentRNNEmbeddings(
    embeddings=embeddings,
    hidden_size=256,
    reproject_words=True,
    reproject_words_dimension=256,
)



In [9]:
classifier = TextClassifier(
    document_embeddings,
    label_dictionary=label_dict,
    label_type='sentiment'
)

In [10]:
from torch.optim import AdamW
import logging

# Set the logging level to INFO
logging.basicConfig(level=logging.INFO)

# Initialize the trainer
trainer = ModelTrainer(classifier, corpus)

#Fine-tune the model
trainer.fine_tune(
    base_path='flair_model',             # Directory to save the model and logs
    learning_rate=5e-5,                  # Learning rate for fine-tuning
    mini_batch_size=4,                   # Smaller batch size for transformers
    max_epochs=3,                        # Number of epochs
    embeddings_storage_mode='none',     
    optimizer=AdamW,                     # Optimizer suited for transformers
    save_final_model=True,               # Save the final model
    save_model_each_k_epochs=1,          # Save model checkpoint every epoch
    create_file_logs=True,               # Save logs to a file
    create_loss_file=True,               # Save loss values to a file
)

2024-11-29 18:16:55,572 ----------------------------------------------------------------------------------------------------
2024-11-29 18:16:55,573 Model: "TextClassifier(
  (embeddings): DocumentRNNEmbeddings(
    (embeddings): StackedEmbeddings(
      (list_embedding_0): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_1): FlairEmbeddings(
        (lm): LanguageModel(
          (drop): Dropout(p=0.05, inplace=False)
          (encoder): Embedding(300, 100)
          (rnn): LSTM(100, 2048)
        )
      )
      (list_embedding_2): TransformerWordEmbeddings(
        (model): DistilBertModel(
          (embeddings): Embeddings(
            (word_embeddings): Embedding(30523, 768)
            (position_embeddings): Embedding(512, 768)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dr

  scaler = torch.cuda.amp.GradScaler(enabled=use_amp and flair.device.type != "cpu")


2024-11-29 20:31:42,247 epoch 1 - iter 562/5625 - loss 0.58804855 - time (sec): 8086.67 - samples/sec: 0.28 - lr: 0.000017 - momentum: 0.000000
2024-11-29 22:40:59,115 epoch 1 - iter 1124/5625 - loss 0.53251435 - time (sec): 15843.54 - samples/sec: 0.28 - lr: 0.000033 - momentum: 0.000000
2024-11-30 00:44:33,174 epoch 1 - iter 1686/5625 - loss 0.52635825 - time (sec): 23257.59 - samples/sec: 0.29 - lr: 0.000050 - momentum: 0.000000
2024-11-30 02:46:54,666 epoch 1 - iter 2248/5625 - loss 0.52355535 - time (sec): 30599.09 - samples/sec: 0.29 - lr: 0.000048 - momentum: 0.000000
2024-11-30 04:53:32,622 epoch 1 - iter 2810/5625 - loss 0.50514976 - time (sec): 38197.04 - samples/sec: 0.29 - lr: 0.000046 - momentum: 0.000000
2024-11-30 06:53:54,399 epoch 1 - iter 3372/5625 - loss 0.49047850 - time (sec): 45418.82 - samples/sec: 0.30 - lr: 0.000044 - momentum: 0.000000
2024-11-30 08:52:12,843 epoch 1 - iter 3934/5625 - loss 0.48356723 - time (sec): 52517.26 - samples/sec: 0.30 - lr: 0.000043 -

100%|██████████| 157/157 [58:13<00:00, 22.25s/it]

2024-11-30 15:50:25,834 DEV : loss 0.31884515285491943 - f1-score (micro avg)  0.904





2024-11-30 15:50:30,416 ----------------------------------------------------------------------------------------------------
2024-11-30 15:53:39,044 ----------------------------------------------------------------------------------------------------
2024-11-30 15:53:39,044 Exiting from training early.
2024-11-30 15:53:39,045 Saving model ...
2024-11-30 15:53:39,510 Done.
2024-11-30 15:53:39,511 ----------------------------------------------------------------------------------------------------
2024-11-30 15:53:39,511 Testing using last state of model ...


  0%|          | 1/1563 [00:21<9:21:17, 21.56s/it]


KeyboardInterrupt: 

In [11]:
# Load the saved model
model_path = "flair_model/best_model.pt"  # Adjust this to the path of your saved model
classifier = TextClassifier.load(model_path)

In [None]:
def evaluate_model(classifier, test_dataset):
    """
    Evaluate a Flair classifier on a given test dataset.
    Args:
        classifier (TextClassifier): The trained Flair classifier.
        test_dataset (Dataset): The test dataset.

    Returns:
        dict: Dictionary containing evaluation metrics.
    """
    true_labels = []
    predicted_labels = []

    # Iterate over test dataset with tqdm progress bar
    for sentence in tqdm(test_dataset, desc="Evaluating", leave=True):
        # True label
        true_labels.append(sentence.get_label("sentiment").value)

        # Predicted label
        classifier.predict(sentence)
        predicted_labels.append(sentence.labels[0].value)

    # Calculate metrics
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels, pos_label="POS")
    recall = recall_score(true_labels, predicted_labels, pos_label="POS")
    f1 = f1_score(true_labels, predicted_labels, pos_label="POS")

    # Full classification report
    classification_rep = classification_report(true_labels, predicted_labels, target_names=["POS", "NEG"])

    # Print results
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_rep)

    # Return metrics as a dictionary
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
        "classification_report": classification_rep,
    }


In [1]:
from flair.datasets import ClassificationCorpus

# Load the corpus (adjust the path to your dataset)
corpus = ClassificationCorpus(flair_data_folder, test_file="test.txt", label_type="sentiment")

# Access the test sentences
test_sentences = corpus.test

# Evaluate the model
results = evaluate_model(classifier, small_corpus.test)

ModuleNotFoundError: No module named 'flair'