# **Model Tuning**

In [None]:
# Required packages

!pip install -U transformers
!pip install -U accelerate
!pip install -U datasets
!pip install -U bertviz
!pip install -U umap-learn
!pip install -U seaborn
!pip install -U evaluate
!pip install -U emoji

Collecting symspellpy
  Downloading symspellpy-6.7.8-py3-none-any.whl.metadata (3.9 kB)
Collecting editdistpy>=0.1.3 (from symspellpy)
  Downloading editdistpy-0.1.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Downloading symspellpy-6.7.8-py3-none-any.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading editdistpy-0.1.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.1/144.1 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: editdistpy, symspellpy
Successfully installed editdistpy-0.1.5 symspellpy-6.7.8
Collecting emoji
  Downloading emoji-2.14.0-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.0-py3-none-any.whl (586 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

# Data Preprocessing

In [None]:
import pandas as pd
import re

def load_dataset(file_path, label):
    """
    Load the dataset from a file and add a numeric label to each entry.

    Args:
    - file_path (str): Path to the dataset file containing text data. Each line in the file
      should represent one entry (e.g., a tweet or text sample).
    - label (int): Numeric label to associate with all entries in the dataset. Typically,
      1 for positive sentiment and 0 for negative sentiment.

    Returns:
    - pd.DataFrame: A Pandas DataFrame with the following columns:
        - 'text': The textual data loaded from the file.
        - 'label': The numeric label provided, applied to all entries in the dataset.
    """
    # Open the file at the given path with UTF-8 encoding and read all lines into a list.
    with open(file_path, 'r', encoding='utf-8') as file:
        tweets = file.readlines()

    # Create a DataFrame from the list of tweets with a single column named 'text'.
    df = pd.DataFrame(tweets, columns=['text'])

    # Add a new column 'label' to the DataFrame and assign the provided label value to all rows.
    df['label'] = label

    # Return the resulting DataFrame.
    return df

def clean_tweet(tweet):
    """
    Clean a single tweet by removing unnecessary parts such as placeholders, special characters,
    and normalizing text.

    Args:
    - tweet (str): The raw tweet text.

    Returns:
    - str: The cleaned tweet text.
    """
    tweet = re.sub(r"<user>", "", tweet)  # Remove <user>
    tweet = re.sub(r"<url>", "", tweet)  # Remove <url>
    tweet = re.sub(r"\.{2,}", "", tweet)  # Remove ellipses (two or more dots)
    tweet = re.sub(r"\s+", " ", tweet)  # Normalize whitespace
    tweet = re.sub(r"\b\d+(?:st|nd|rd|th)?\b", "", tweet)  # Remove numbers and ordinals
    return tweet.strip()

# Paths to your datasets
# Specify file paths for negative and positive sentiment training data.
train_neg_path = 'drive/MyDrive/Colab Notebooks/train_neg_full.txt'  # Path to negative tweets dataset
train_pos_path = 'drive/MyDrive/Colab Notebooks/train_pos_full.txt'  # Path to positive tweets dataset

# Load datasets with numeric labels
# Assign label 0 to negative tweets and label 1 to positive tweets using the `load_dataset` function.
df_neg = load_dataset(train_neg_path, 0)  # Negative tweets
df_pos = load_dataset(train_pos_path, 1)  # Positive tweets

# Combine datasets
# Concatenate the positive and negative datasets into a single DataFrame for easier processing.
df = pd.concat([df_neg, df_pos], ignore_index=True)

# Clean tweets
# Apply the `clean_tweet` function to each tweet in the 'text' column of the DataFrame.
df['text'] = df['text'].apply(clean_tweet)

# Remove duplicates
# Drop duplicate entries based on the 'text' column to avoid redundant data.
df = df.drop_duplicates(subset='text').copy()

# Add a label_name column based on the label
# Create a new column 'label_name' to map numeric labels to human-readable categories.
# Label 1 (positive sentiment) maps to "positive", and label 0 (negative sentiment) maps to "negative".
df['label_name'] = df['label'].apply(lambda x: 'positive' if x == 1 else 'negative')

In [None]:
df['label_name'].value_counts()

Unnamed: 0_level_0,count
label_name,Unnamed: 1_level_1
sad,90744
happy,90013


# Train-test Split

In [None]:
from sklearn.model_selection import train_test_split

# Set a fixed random_state for reproducibility
RANDOM_STATE = 42

# Split the dataset into training and testing sets
# 30% of the data is reserved for testing, while the remaining 70% is used for training.
train, test = train_test_split(df, test_size=0.3, random_state=RANDOM_STATE)

# Further split the test set into validation and test sets
# One-third of the test set is used for validation, while the remaining two-thirds are kept as the final test set.
test, validation = train_test_split(test, test_size=1/3, random_state=RANDOM_STATE)

# Print the shapes of the resulting datasets (train, test, and validation)
train.shape, test.shape, validation.shape

((126529, 4), (36152, 4), (18076, 4))

In [None]:
from datasets import Dataset, DatasetDict

# Create a DatasetDict object to organize the data into train, test, and validation splits
dataset = DatasetDict({
    'train': Dataset.from_pandas(train, preserve_index=False),
    'test': Dataset.from_pandas(test, preserve_index=False),
    'validation': Dataset.from_pandas(validation, preserve_index=False)
})

# Display the DatasetDict object
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_name', 'Words per Tweet'],
        num_rows: 126529
    })
    test: Dataset({
        features: ['text', 'label', 'label_name', 'Words per Tweet'],
        num_rows: 36152
    })
    validation: Dataset({
        features: ['text', 'label', 'label_name', 'Words per Tweet'],
        num_rows: 18076
    })
})

# Tokenization of the Data

In [None]:
from transformers import AutoTokenizer

# Specify the pre-trained model checkpoint to use for tokenization
# Specify if BERT or BETweet
model_ckpt = "vinai/bertweet-base"
# model_ckpt = "bert-base-uncased"

# Load the tokenizer associated with the specified model checkpoint
# The tokenizer will handle tokenizing raw text into input IDs and tokens suitable for the model.
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

In [None]:
def tokenize(batch):
    """
    Tokenize a batch of text data using the pre-loaded tokenizer.

    Args:
    - batch (dict): A batch of data containing a 'text' key with raw text to tokenize.

    Returns:
    - dict: Tokenized output including input IDs, attention masks, and other relevant fields.
    """
    # Use the tokenizer to process the text data in the batch
    # - padding=True ensures all sequences in the batch are padded to the same length.
    # - truncation=True ensures that sequences longer than the model's max length are truncated.
    temp = tokenizer(batch['text'], padding=True, truncation=True)

    return temp

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'input_ids': [[0, 1508, 839, 53, 5379, 5417, 34, 13101, 9, 16271, 57, 1982, 202, 156, 2], [0, 16, 6, 161, 1462, 136, 77, 4, 8210, 2193, 44499, 2, 1, 1, 1]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]}


In [None]:
# Apply the `tokenize` function to the entire dataset
emotion_encoded = dataset.map(tokenize, batched=True, batch_size=None)

Map:   0%|          | 0/126529 [00:00<?, ? examples/s]

Map:   0%|          | 0/36152 [00:00<?, ? examples/s]

Map:   0%|          | 0/18076 [00:00<?, ? examples/s]

In [None]:
# Create a mapping from label names (e.g., 'positive', 'negative') to their numeric IDs
# Iterate through the training dataset to extract label names and their corresponding numeric labels.
label2id = {x['label_name']: x['label'] for x in dataset['train']}

# Create the reverse mapping: from numeric IDs to label names
# Swap the keys and values from the `label2id` dictionary to create `id2label`.
id2label = {v: k for k, v in label2id.items()}

# Hyper-parameter Tuning

In [None]:
from transformers import AutoModel
import torch

In [None]:
# Use the Hugging Face `AutoModel` class to load the model associated with the specified checkpoint.
# This initializes the model, ready for further fine-tuning or inference tasks.
model = AutoModel.from_pretrained(model_ckpt)
model

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

In [None]:
from transformers import AutoModelForSequenceClassification, AutoConfig, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
from itertools import product
import torch

# Model configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define hyperparameter grid
# A grid of hyperparameters to search through, including:
# - learning rates
# - batch sizes
# - number of epochs
# - weight decay for regularization
learning_rates = [2e-5, 3e-5, 5e-5]
batch_sizes = [32, 64]
num_epochs = [2, 3]
weight_decays = [0.01, 0.1]

# Generate all possible combinations of the hyperparameters using Cartesian product
hyperparameter_combinations = list(product(learning_rates, batch_sizes, num_epochs, weight_decays))

# Metric computation function
def compute_metrics(pred):
    """
    Compute evaluation metrics for the model: accuracy and F1-score.

    Args:
    - pred: Predictions output from the model during evaluation.

    Returns:
    - dict: A dictionary containing accuracy and weighted F1-score.
    """
    labels = pred.label_ids  # True labels
    preds = pred.predictions.argmax(-1)  # Predicted class labels from logits
    f1 = f1_score(labels, preds, average="weighted")  # Weighted F1-score
    acc = accuracy_score(labels, preds)  # Accuracy
    return {"accuracy": acc, "f1": f1}

# Initialize a list to store evaluation results for each hyperparameter combination
results = []

# Iterate over all combinations of hyperparameters
for lr, batch_size, epochs, weight_decay in hyperparameter_combinations:
    print(f"Training with: lr={lr}, batch_size={batch_size}, epochs={epochs}, weight_decay={weight_decay}")

    # Define training arguments for the current hyperparameter combination
    training_args = TrainingArguments(
        output_dir=f"./results_lr_{lr}_bs_{batch_size}_ep_{epochs}_wd_{weight_decay}",  # Directory for logs and outputs
        overwrite_output_dir=True,  # Overwrite output directory if it exists
        num_train_epochs=epochs,  # Number of training epochs
        learning_rate=lr,  # Learning rate for optimization
        per_device_train_batch_size=batch_size,  # Batch size per device for training
        per_device_eval_batch_size=batch_size,  # Batch size per device for evaluation
        weight_decay=weight_decay,  # Weight decay for regularization
        evaluation_strategy="epoch",  # Evaluate at the end of each epoch
        save_strategy="no",  # Do not save checkpoints to reduce overhead
        disable_tqdm=False,  # Display progress bars during training
        logging_dir=f"./logs_lr_{lr}_bs_{batch_size}_ep_{epochs}_wd_{weight_decay}",  # Directory for logging
        load_best_model_at_end=False  # Do not load the best model automatically
    )

    # Initialize model configuration with label mappings
    config = AutoConfig.from_pretrained(model_ckpt, label2id=label2id, id2label=id2label)

    # Load the pre-trained model for sequence classification
    model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, config=config).to(device)

    # Initialize the Trainer for training and evaluation
    trainer = Trainer(
        model=model,
        args=training_args,  # Training arguments
        compute_metrics=compute_metrics,  # Function to compute accuracy and F1-score
        train_dataset=emotion_encoded['train'],  # Training dataset
        eval_dataset=emotion_encoded['validation'],  # Validation dataset
        tokenizer=tokenizer  # Tokenizer used for preprocessing
    )

    # Train the model with the current hyperparameter combination
    trainer.train()

    # Evaluate the model on the validation dataset
    eval_results = trainer.evaluate()
    eval_results["lr"] = lr  # Add learning rate to the results
    eval_results["batch_size"] = batch_size  # Add batch size
    eval_results["epochs"] = epochs  # Add number of epochs
    eval_results["weight_decay"] = weight_decay  # Add weight decay value
    results.append(eval_results)  # Store the results for this combination

# Print all evaluation results for each hyperparameter combination
for result in results:
    print(result)

# Find the best combination based on the highest F1-score
best_result = max(results, key=lambda x: x["eval_f1"])
print("Best hyperparameters:", best_result)

Training with: lr=2e-05, batch_size=32, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2597,0.263632,0.894335,0.894271
2,0.1898,0.269222,0.896105,0.896103


Training with: lr=2e-05, batch_size=32, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2606,0.263114,0.894612,0.894543
2,0.1903,0.268124,0.896935,0.896934


Training with: lr=2e-05, batch_size=32, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2652,0.273056,0.892675,0.892658
2,0.1957,0.274645,0.89605,0.89605
3,0.1393,0.322439,0.894833,0.894833


Training with: lr=2e-05, batch_size=32, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2642,0.274204,0.892675,0.892646
2,0.1958,0.273048,0.894058,0.894059
3,0.1388,0.322774,0.893395,0.893395


Training with: lr=2e-05, batch_size=64, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.271,0.25248,0.894335,0.894286
2,0.2009,0.260867,0.896493,0.896492


Training with: lr=2e-05, batch_size=64, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2723,0.252665,0.895386,0.895337
2,0.2022,0.26043,0.897212,0.897211


Training with: lr=2e-05, batch_size=64, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2734,0.254676,0.894169,0.89412
2,0.2063,0.2707,0.895331,0.89533
3,0.1552,0.29251,0.893782,0.893781


Training with: lr=2e-05, batch_size=64, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2722,0.254954,0.894003,0.893955
2,0.2048,0.268958,0.894999,0.894999
3,0.1549,0.293574,0.892952,0.89295


Training with: lr=3e-05, batch_size=32, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.265,0.270348,0.893505,0.89348
2,0.1793,0.274996,0.896603,0.896602


Training with: lr=3e-05, batch_size=32, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2672,0.274303,0.892399,0.892334
2,0.1808,0.275802,0.895939,0.895937


Training with: lr=3e-05, batch_size=32, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2709,0.294387,0.891624,0.891604
2,0.1909,0.276693,0.893561,0.893548
3,0.1213,0.362563,0.892565,0.892565


Training with: lr=3e-05, batch_size=32, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2702,0.293525,0.890518,0.890508
2,0.1924,0.278701,0.895552,0.895539
3,0.1211,0.358146,0.890739,0.890739


Training with: lr=3e-05, batch_size=64, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2756,0.254598,0.894335,0.894303
2,0.1896,0.264653,0.895165,0.895165


Training with: lr=3e-05, batch_size=64, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2749,0.254196,0.893671,0.89364
2,0.19,0.265372,0.895773,0.895773


Training with: lr=3e-05, batch_size=64, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.277,0.258868,0.894058,0.89402
2,0.1971,0.279549,0.892897,0.892893
3,0.1312,0.320513,0.892288,0.892287


Training with: lr=3e-05, batch_size=64, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2776,0.259322,0.893561,0.893523
2,0.1968,0.281939,0.893561,0.893555
3,0.132,0.319484,0.893339,0.893338


Training with: lr=5e-05, batch_size=32, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2764,0.293822,0.887807,0.88777
2,0.1776,0.289257,0.89428,0.89428


Training with: lr=5e-05, batch_size=32, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2794,0.313095,0.889688,0.889648
2,0.1808,0.288895,0.892454,0.892448


Training with: lr=5e-05, batch_size=32, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2857,0.328106,0.885815,0.885813
2,0.197,0.29771,0.890462,0.890453
3,0.1149,0.387029,0.887752,0.887752


Training with: lr=5e-05, batch_size=32, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2842,0.317071,0.884322,0.884303
2,0.1963,0.300978,0.889909,0.889905
3,0.1076,0.385695,0.89096,0.890956


Training with: lr=5e-05, batch_size=64, epochs=2, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.29,0.274814,0.891126,0.891109
2,0.1962,0.268483,0.894446,0.894446


Training with: lr=5e-05, batch_size=64, epochs=2, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2838,0.264836,0.891126,0.89111
2,0.1815,0.277486,0.892288,0.892286


Training with: lr=5e-05, batch_size=64, epochs=3, weight_decay=0.01


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.287,0.277447,0.89002,0.890014
2,0.195,0.285183,0.893173,0.893173
3,0.1099,0.353133,0.890739,0.890739


Training with: lr=5e-05, batch_size=64, epochs=3, weight_decay=0.1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2857,0.277775,0.890407,0.890406
2,0.1911,0.284213,0.893948,0.893945
3,0.111,0.354364,0.891403,0.891403


{'eval_loss': 0.2692217230796814, 'eval_accuracy': 0.8961053330382828, 'eval_f1': 0.8961025563902978, 'eval_runtime': 15.0646, 'eval_samples_per_second': 1199.896, 'eval_steps_per_second': 37.505, 'epoch': 2.0, 'lr': 2e-05, 'batch_size': 32, 'epochs': 2, 'weight_decay': 0.01}
{'eval_loss': 0.2681236267089844, 'eval_accuracy': 0.8969351626466032, 'eval_f1': 0.8969336343359718, 'eval_runtime': 15.042, 'eval_samples_per_second': 1201.699, 'eval_steps_per_second': 37.561, 'epoch': 2.0, 'lr': 2e-05, 'batch_size': 32, 'epochs': 2, 'weight_decay': 0.1}
{'eval_loss': 0.3224394917488098, 'eval_accuracy': 0.8948329276388581, 'eval_f1': 0.8948329414790966, 'eval_runtime': 15.0679, 'eval_samples_per_second': 1199.638, 'eval_steps_per_second': 37.497, 'epoch': 3.0, 'lr': 2e-05, 'batch_size': 32, 'epochs': 3, 'weight_decay': 0.01}
{'eval_loss': 0.3227744996547699, 'eval_accuracy': 0.8933945563177694, 'eval_f1': 0.8933945416357014, 'eval_runtime': 15.0447, 'eval_samples_per_second': 1201.489, 'eval_s