<a href="https://colab.research.google.com/github/Hillascher5/nlp-tweets-sentiment-analysis/blob/main/Bertweet_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Try a specific model for the task

In [1]:
# # Needed for Google Colab
# !pip install --quiet evaluate transformers optuna datasets nltk scikit-learn
# !pip install numpy==1.26.4

In [2]:
%env CUDA_LAUNCH_BLOCKING=1

from wordcloud import WordCloud, STOPWORDS
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from tqdm import tqdm
from collections import Counter
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from nltk.stem import WordNetLemmatizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer
from datasets import Dataset
from torch.utils.data import DataLoader, TensorDataset
from optuna.pruners import MedianPruner
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F
import os
import re
import string
import time
import glob
import nltk
import evaluate
import transformers
import torch
import optuna
import wandb
wandb.login()
# API key - 0cbd7fe3cffd71df993b30edb4fa0db94f114413

os.environ["WANDB_PROJECT"] = "tweet-sentiment-classification_split_to_test_maxl_256_bertweet"
os.environ["WANDB_INIT_TIMEOUT"] = "180"

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

env: CUDA_LAUNCH_BLOCKING=1
Mounted at /content/drive


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhillas[0m ([33mhillas-tel-aviv-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [3]:
# Load data
df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/nlp_project/Data/Corona_NLP_train.csv', encoding='latin1')
df_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/nlp_project/Data/Corona_NLP_test.csv', encoding='latin1')

In [4]:
# Merge and shuffle for better stratified splits
df_full = pd.concat([df_train, df_test], ignore_index=True)
df_full = df_full.sample(frac=1.0, random_state=42).reset_index(drop=True)

In [5]:
# Try without pre-processing
is_preprocessed = "no_preprocess"
df_full["clean_text"] = df_full["OriginalTweet"]

In [6]:
# Choose pretrained models
bert_model_name = "vinai/bertweet-base"

sentiment_labels = df_full['Sentiment'].unique()
n_labels = len(sentiment_labels)

# Load BERT tokenizer and model
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
bert_model = AutoModelForSequenceClassification.from_pretrained(bert_model_name, num_labels=n_labels)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# Mapping sentiments to unique numeric IDs
unique_labels = sorted(df_full["Sentiment"].unique())
label2id = {label: idx for idx, label in enumerate(unique_labels)}
df_full["label"] = df_full["Sentiment"].map(label2id)

In [8]:
# Stratified split: 70% train, 15% val, 15% test
train_val_df, test_df = train_test_split(df_full, test_size=0.15, stratify=df_full["label"], random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.1765, stratify=train_val_df["label"], random_state=42)

# Confirm sizes
print("Train size:", len(train_df))
print("Val size:", len(val_df))
print("Test size:", len(test_df))

Train size: 31466
Val size: 6745
Test size: 6744


In [9]:
train_subset_df, _ = train_test_split(
    train_df[["clean_text", "label"]],
    train_size=2000,
    stratify=train_df["label"],
    random_state=42
)

val_subset_df, _ = train_test_split(
    val_df[["clean_text", "label"]],
    train_size=500,
    stratify=val_df["label"],
    random_state=42
)

In [10]:
# Tokenize function
def tokenize_function_bert(examples):
    return bert_tokenizer(examples["clean_text"], truncation=True, padding='max_length', max_length=128)

In [11]:
# Convert DataFrame to Hugging Face Dataset
hf_subset_train = Dataset.from_pandas(train_subset_df)
hf_subset_val = Dataset.from_pandas(val_subset_df)

hf_train = Dataset.from_pandas(train_df[["clean_text", "label"]])
hf_val = Dataset.from_pandas(val_df[["clean_text", "label"]])
hf_test = Dataset.from_pandas(test_df[["clean_text", "label"]])

In [12]:
# Tokenize subsets
# Tokenize for BERT
tokenized_bert_train_sub = hf_subset_train.map(tokenize_function_bert, batched=True)
tokenized_bert_train_sub.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_bert_val_sub = hf_subset_val.map(tokenize_function_bert, batched=True)
tokenized_bert_val_sub.set_format("torch", columns=["input_ids", "attention_mask", "label"])

model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [13]:
# Tokenize full dataset
# Tokenize for BERT
tokenized_bert_train = hf_train.map(tokenize_function_bert, batched=True)
tokenized_bert_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_bert_val = hf_val.map(tokenize_function_bert, batched=True)
tokenized_bert_val.set_format("torch", columns=["input_ids", "attention_mask", "label"])

tokenized_bert_test = hf_test.map(tokenize_function_bert, batched=True)
tokenized_bert_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/31466 [00:00<?, ? examples/s]

Map:   0%|          | 0/6745 [00:00<?, ? examples/s]

Map:   0%|          | 0/6744 [00:00<?, ? examples/s]

In [14]:
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return {
        "accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
        "f1_macro": f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"]
    }

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [15]:
def build_trainer(model_checkpoint, trial, run_prefix, train_dataset, val_dataset):
    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    num_epochs = trial.suggest_int("num_train_epochs", 2, 5)
    n_samples = len(train_dataset)

    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=5)
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

    run_name = f"{run_prefix}-ep{num_epochs}-lr{learning_rate}-bs{batch_size}-samples{n_samples}-run{int(time.time())}-{is_preprocessed}"

    args = TrainingArguments(
        output_dir=f"./results/{run_prefix}/{run_name}",
        disable_tqdm=True,
        fp16=True,
        eval_strategy="epoch",
        save_strategy="epoch",
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        learning_rate=learning_rate,
        weight_decay=0.01,
        label_smoothing_factor=0.1,
        load_best_model_at_end=True,
        save_total_limit=1,
        logging_strategy="epoch",
        logging_dir=f"./logs/{run_prefix}/{run_name}",
        run_name=run_name,
        report_to="wandb",
        metric_for_best_model="f1_macro",
        greater_is_better=True
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    return trainer

In [16]:
def objective_bert(trial):
    trainer = build_trainer(
        model_checkpoint="vinai/bertweet-base",
        trial=trial,
        run_prefix="bertweet",
        train_dataset=tokenized_bert_train_sub,
        val_dataset=tokenized_bert_val_sub
    )
    trainer.train()
    eval_result = trainer.evaluate()
    return eval_result["eval_f1_macro"]

In [17]:
# !huggingface-cli login

In [18]:
study_bert = optuna.create_study(direction="maximize",
                                 pruner=MedianPruner(n_startup_trials=2, n_warmup_steps=1),
                                 study_name=f"bertweet_study_stratify_{is_preprocessed}",
                                 storage=f"sqlite:////content/drive/MyDrive/Colab Notebooks/nlp_project/optuna/bertweet_study_stratify_maxl_256_{is_preprocessed}.db",
                                 load_if_exists=True)
study_bert.optimize(objective_bert, n_trials=5)
wandb.finish()

[I 2025-08-07 20:21:18,350] Using an existing study with name 'bertweet_study_stratify_no_preprocess' instead of creating a new one.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(


  return forward_call(*args, **kwargs)


{'loss': 1.5527, 'grad_norm': 4.736123561859131, 'learning_rate': 8.558019834262654e-06, 'epoch': 1.0}
{'eval_loss': 1.4529064893722534, 'eval_accuracy': 0.37, 'eval_f1_macro': 0.2626007933374692, 'eval_runtime': 1.4985, 'eval_samples_per_second': 333.67, 'eval_steps_per_second': 42.042, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 1.3716, 'grad_norm': 19.645160675048828, 'learning_rate': 5.720540554993145e-06, 'epoch': 2.0}
{'eval_loss': 1.3430989980697632, 'eval_accuracy': 0.43, 'eval_f1_macro': 0.439498694203324, 'eval_runtime': 1.4634, 'eval_samples_per_second': 341.678, 'eval_steps_per_second': 43.051, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 1.2279, 'grad_norm': 8.976165771484375, 'learning_rate': 2.883061275723637e-06, 'epoch': 3.0}
{'eval_loss': 1.29836106300354, 'eval_accuracy': 0.468, 'eval_f1_macro': 0.4820684386607307, 'eval_runtime': 1.468, 'eval_samples_per_second': 340.596, 'eval_steps_per_second': 42.915, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'loss': 1.1221, 'grad_norm': 9.144913673400879, 'learning_rate': 3.418649734059648e-08, 'epoch': 4.0}
{'eval_loss': 1.2887773513793945, 'eval_accuracy': 0.478, 'eval_f1_macro': 0.4862483770186217, 'eval_runtime': 1.468, 'eval_samples_per_second': 340.608, 'eval_steps_per_second': 42.917, 'epoch': 4.0}
{'train_runtime': 99.5901, 'train_samples_per_second': 80.329, 'train_steps_per_second': 10.041, 'train_loss': 1.318555938720703, 'epoch': 4.0}


  return forward_call(*args, **kwargs)
[I 2025-08-07 20:23:02,037] Trial 8 finished with value: 0.4862483770186217 and parameters: {'learning_rate': 1.1395499113532161e-05, 'batch_size': 8, 'num_train_epochs': 4}. Best is trial 5 with value: 0.5541241030428423.


{'eval_loss': 1.2887773513793945, 'eval_accuracy': 0.478, 'eval_f1_macro': 0.4862483770186217, 'eval_runtime': 1.508, 'eval_samples_per_second': 331.561, 'eval_steps_per_second': 41.777, 'epoch': 4.0}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(
  return forward_call(*args, **kwargs)


{'loss': 1.5369, 'grad_norm': 2.8598074913024902, 'learning_rate': 2.614946934674516e-05, 'epoch': 1.0}
{'eval_loss': 1.441977858543396, 'eval_accuracy': 0.396, 'eval_f1_macro': 0.35704547096155576, 'eval_runtime': 0.4909, 'eval_samples_per_second': 1018.596, 'eval_steps_per_second': 32.595, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 1.3684, 'grad_norm': 6.337177753448486, 'learning_rate': 1.317768534009205e-05, 'epoch': 2.0}
{'eval_loss': 1.3560354709625244, 'eval_accuracy': 0.418, 'eval_f1_macro': 0.390127078957162, 'eval_runtime': 0.483, 'eval_samples_per_second': 1035.201, 'eval_steps_per_second': 33.126, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 1.1958, 'grad_norm': 5.364632606506348, 'learning_rate': 2.0590133343893827e-07, 'epoch': 3.0}
{'eval_loss': 1.2755074501037598, 'eval_accuracy': 0.508, 'eval_f1_macro': 0.5167669357439615, 'eval_runtime': 0.4967, 'eval_samples_per_second': 1006.732, 'eval_steps_per_second': 32.215, 'epoch': 3.0}
{'train_runtime': 27.8535, 'train_samples_per_second': 215.413, 'train_steps_per_second': 6.786, 'train_loss': 1.3670257083953372, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
[I 2025-08-07 20:23:32,496] Trial 9 finished with value: 0.5167669357439615 and parameters: {'learning_rate': 3.8915352019959334e-05, 'batch_size': 32, 'num_train_epochs': 3}. Best is trial 5 with value: 0.5541241030428423.


{'eval_loss': 1.2755074501037598, 'eval_accuracy': 0.508, 'eval_f1_macro': 0.5167669357439615, 'eval_runtime': 0.5179, 'eval_samples_per_second': 965.472, 'eval_steps_per_second': 30.895, 'epoch': 3.0}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(
  return forward_call(*args, **kwargs)


{'loss': 1.5491, 'grad_norm': 3.1209685802459717, 'learning_rate': 9.174645368618376e-06, 'epoch': 1.0}
{'eval_loss': 1.4534187316894531, 'eval_accuracy': 0.412, 'eval_f1_macro': 0.3537822526737761, 'eval_runtime': 1.4857, 'eval_samples_per_second': 336.539, 'eval_steps_per_second': 42.404, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 1.3591, 'grad_norm': 8.684828758239746, 'learning_rate': 4.596479017012399e-06, 'epoch': 2.0}
{'eval_loss': 1.3068957328796387, 'eval_accuracy': 0.484, 'eval_f1_macro': 0.4893098688750862, 'eval_runtime': 1.4789, 'eval_samples_per_second': 338.088, 'eval_steps_per_second': 42.599, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 1.2144, 'grad_norm': 6.60981559753418, 'learning_rate': 3.66253308128478e-08, 'epoch': 3.0}
{'eval_loss': 1.2752561569213867, 'eval_accuracy': 0.518, 'eval_f1_macro': 0.512939372182835, 'eval_runtime': 1.4813, 'eval_samples_per_second': 337.545, 'eval_steps_per_second': 42.531, 'epoch': 3.0}
{'train_runtime': 71.8472, 'train_samples_per_second': 83.511, 'train_steps_per_second': 10.439, 'train_loss': 1.374183146158854, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
[I 2025-08-07 20:24:52,089] Trial 10 finished with value: 0.512939372182835 and parameters: {'learning_rate': 1.3734499054817927e-05, 'batch_size': 8, 'num_train_epochs': 3}. Best is trial 5 with value: 0.5541241030428423.


{'eval_loss': 1.2752561569213867, 'eval_accuracy': 0.518, 'eval_f1_macro': 0.512939372182835, 'eval_runtime': 3.1609, 'eval_samples_per_second': 158.182, 'eval_steps_per_second': 19.931, 'epoch': 3.0}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(
  return forward_call(*args, **kwargs)


{'loss': 1.4781, 'grad_norm': 6.656697750091553, 'learning_rate': 3.135272473701958e-05, 'epoch': 1.0}
{'eval_loss': 1.319658637046814, 'eval_accuracy': 0.428, 'eval_f1_macro': 0.368789087500671, 'eval_runtime': 1.4618, 'eval_samples_per_second': 342.048, 'eval_steps_per_second': 43.098, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 1.1793, 'grad_norm': 20.40317726135254, 'learning_rate': 2.3569246711578027e-05, 'epoch': 2.0}
{'eval_loss': 1.1965771913528442, 'eval_accuracy': 0.558, 'eval_f1_macro': 0.5600685724555445, 'eval_runtime': 1.4707, 'eval_samples_per_second': 339.985, 'eval_steps_per_second': 42.838, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 0.9464, 'grad_norm': 9.63680648803711, 'learning_rate': 1.5754509738243137e-05, 'epoch': 3.0}
{'eval_loss': 1.2349578142166138, 'eval_accuracy': 0.584, 'eval_f1_macro': 0.5889810527462627, 'eval_runtime': 1.4645, 'eval_samples_per_second': 341.422, 'eval_steps_per_second': 43.019, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'loss': 0.7447, 'grad_norm': 32.66359329223633, 'learning_rate': 7.971031712801587e-06, 'epoch': 4.0}
{'eval_loss': 1.2148420810699463, 'eval_accuracy': 0.626, 'eval_f1_macro': 0.6299721483675741, 'eval_runtime': 1.4659, 'eval_samples_per_second': 341.087, 'eval_steps_per_second': 42.977, 'epoch': 4.0}


  return forward_call(*args, **kwargs)


{'loss': 0.6068, 'grad_norm': 1.0710707902908325, 'learning_rate': 1.562947394666978e-07, 'epoch': 5.0}
{'eval_loss': 1.3234320878982544, 'eval_accuracy': 0.63, 'eval_f1_macro': 0.6351333750181567, 'eval_runtime': 1.4824, 'eval_samples_per_second': 337.286, 'eval_steps_per_second': 42.498, 'epoch': 5.0}
{'train_runtime': 118.8239, 'train_samples_per_second': 84.158, 'train_steps_per_second': 10.52, 'train_loss': 0.9910513793945313, 'epoch': 5.0}


  return forward_call(*args, **kwargs)
[I 2025-08-07 20:26:54,295] Trial 11 finished with value: 0.6351333750181567 and parameters: {'learning_rate': 3.907368486667445e-05, 'batch_size': 8, 'num_train_epochs': 5}. Best is trial 11 with value: 0.6351333750181567.


{'eval_loss': 1.3234320878982544, 'eval_accuracy': 0.63, 'eval_f1_macro': 0.6351333750181567, 'eval_runtime': 1.4852, 'eval_samples_per_second': 336.652, 'eval_steps_per_second': 42.418, 'epoch': 5.0}


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(
  return forward_call(*args, **kwargs)


{'loss': 1.5242, 'grad_norm': 4.786707401275635, 'learning_rate': 1.3563298058309618e-05, 'epoch': 1.0}
{'eval_loss': 1.3823292255401611, 'eval_accuracy': 0.392, 'eval_f1_macro': 0.367350331153808, 'eval_runtime': 1.4597, 'eval_samples_per_second': 342.54, 'eval_steps_per_second': 43.16, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 1.2642, 'grad_norm': 12.96672534942627, 'learning_rate': 6.808667551183315e-06, 'epoch': 2.0}
{'eval_loss': 1.2555299997329712, 'eval_accuracy': 0.504, 'eval_f1_macro': 0.5157064358435346, 'eval_runtime': 1.4744, 'eval_samples_per_second': 339.127, 'eval_steps_per_second': 42.73, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 1.0791, 'grad_norm': 4.396622180938721, 'learning_rate': 5.403704405701043e-08, 'epoch': 3.0}
{'eval_loss': 1.2245572805404663, 'eval_accuracy': 0.518, 'eval_f1_macro': 0.5271964168414943, 'eval_runtime': 1.4778, 'eval_samples_per_second': 338.336, 'eval_steps_per_second': 42.63, 'epoch': 3.0}
{'train_runtime': 72.449, 'train_samples_per_second': 82.817, 'train_steps_per_second': 10.352, 'train_loss': 1.289187296549479, 'epoch': 3.0}


  return forward_call(*args, **kwargs)
[I 2025-08-07 20:28:13,423] Trial 12 finished with value: 0.5271964168414943 and parameters: {'learning_rate': 2.0263891521378913e-05, 'batch_size': 8, 'num_train_epochs': 3}. Best is trial 11 with value: 0.6351333750181567.


{'eval_loss': 1.2245572805404663, 'eval_accuracy': 0.518, 'eval_f1_macro': 0.5271964168414943, 'eval_runtime': 3.1984, 'eval_samples_per_second': 156.326, 'eval_steps_per_second': 19.697, 'epoch': 3.0}


0,1
eval/accuracy,▁▃▄▄▄▂▂▅▅▂▄▅▅▃▆▇███▂▅▅▅
eval/f1_macro,▁▄▅▅▅▃▃▆▆▃▅▆▆▃▇▇███▃▆▆▆
eval/loss,█▅▄▄▄█▅▃▃█▄▃▃▄▁▂▁▄▄▆▃▂▂
eval/runtime,▄▄▄▄▄▁▁▁▁▄▄▄█▄▄▄▄▄▄▄▄▄█
eval/samples_per_second,▂▂▂▂▂███▇▂▂▂▁▂▂▂▂▂▂▂▂▂▁
eval/steps_per_second,█████▅▅▅▄███▁█████████▁
train/epoch,▁▁▃▃▅▅▆▆▆▁▁▃▃▅▅▅▁▁▃▃▅▅▅▁▁▃▃▅▆▆███▁▁▃▃▅▅▅
train/global_step,▂▂▄▄▅▅▇▇▇▁▁▁▁▂▂▂▂▂▄▄▅▅▅▂▂▄▄▅▇▇███▂▂▄▄▅▅▅
train/grad_norm,▂▅▃▃▁▂▂▁▃▂▂▅▃█▁▂▄▂
train/learning_rate,▃▂▂▁▇▄▁▃▂▁█▆▅▃▁▄▃▁

0,1
eval/accuracy,0.518
eval/f1_macro,0.5272
eval/loss,1.22456
eval/runtime,3.1984
eval/samples_per_second,156.326
eval/steps_per_second,19.697
total_flos,394677213696000.0
train/epoch,3.0
train/global_step,750.0
train/grad_norm,4.39662


In [19]:
best_trial_bert = study_bert.best_trial
print('Bert best trial on subset:')
print(best_trial_bert.params)

Bert best trial on subset:
{'learning_rate': 3.907368486667445e-05, 'batch_size': 8, 'num_train_epochs': 5}


In [20]:
best_params_bert = best_trial_bert.params
run_name_bert = f"bertweet_final_stratify_{is_preprocessed}-ep{best_params_bert['num_train_epochs']}-lr{best_params_bert['learning_rate']:.1e}-bs{best_params_bert['batch_size']}"
wandb.init(project="tweet-sentiment-classification_split_to_test_maxl_256_bertweet", name=run_name_bert, reinit=True)

final_trainer_bert = build_trainer(
    model_checkpoint="vinai/bertweet-base",
    trial=best_trial_bert,
    run_prefix=f"bertweet_final_stratify_{is_preprocessed}",
    train_dataset=tokenized_bert_train,
    val_dataset=tokenized_bert_val
)
final_trainer_bert.train()
final_trainer_bert.evaluate(tokenized_bert_test)
wandb.finish()



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
  trainer = Trainer(
  return forward_call(*args, **kwargs)


{'loss': 1.0036, 'grad_norm': 17.193601608276367, 'learning_rate': 3.127285311927076e-05, 'epoch': 1.0}
{'eval_loss': 0.891286313533783, 'eval_accuracy': 0.7755374351371386, 'eval_f1_macro': 0.7867367440949316, 'eval_runtime': 19.5579, 'eval_samples_per_second': 344.873, 'eval_steps_per_second': 43.154, 'epoch': 1.0}


  return forward_call(*args, **kwargs)


{'loss': 0.75, 'grad_norm': 0.1666225641965866, 'learning_rate': 2.3464075528477814e-05, 'epoch': 2.0}
{'eval_loss': 0.8648982048034668, 'eval_accuracy': 0.7915492957746478, 'eval_f1_macro': 0.7998869147246579, 'eval_runtime': 19.6388, 'eval_samples_per_second': 343.452, 'eval_steps_per_second': 42.976, 'epoch': 2.0}


  return forward_call(*args, **kwargs)


{'loss': 0.6491, 'grad_norm': 0.07685968279838562, 'learning_rate': 1.565132501599024e-05, 'epoch': 3.0}
{'eval_loss': 0.7952856421470642, 'eval_accuracy': 0.8363232023721275, 'eval_f1_macro': 0.8429263146752417, 'eval_runtime': 19.622, 'eval_samples_per_second': 343.748, 'eval_steps_per_second': 43.013, 'epoch': 3.0}


  return forward_call(*args, **kwargs)


{'loss': 0.5617, 'grad_norm': 185.20480346679688, 'learning_rate': 7.838574503502663e-06, 'epoch': 4.0}
{'eval_loss': 0.758899986743927, 'eval_accuracy': 0.8668643439584878, 'eval_f1_macro': 0.872030255297679, 'eval_runtime': 19.5636, 'eval_samples_per_second': 344.772, 'eval_steps_per_second': 43.141, 'epoch': 4.0}


  return forward_call(*args, **kwargs)


{'loss': 0.4997, 'grad_norm': 0.07513552159070969, 'learning_rate': 2.7810451862401745e-08, 'epoch': 5.0}
{'eval_loss': 0.7617508172988892, 'eval_accuracy': 0.8668643439584878, 'eval_f1_macro': 0.8713936014635906, 'eval_runtime': 19.5733, 'eval_samples_per_second': 344.602, 'eval_steps_per_second': 43.12, 'epoch': 5.0}
{'train_runtime': 1685.8542, 'train_samples_per_second': 93.324, 'train_steps_per_second': 11.668, 'train_loss': 0.6928059254555636, 'epoch': 5.0}


  return forward_call(*args, **kwargs)


{'eval_loss': 0.769802451133728, 'eval_accuracy': 0.8640272835112692, 'eval_f1_macro': 0.8680775747664612, 'eval_runtime': 19.6094, 'eval_samples_per_second': 343.917, 'eval_steps_per_second': 42.99, 'epoch': 5.0}


0,1
eval/accuracy,▁▂▆███
eval/f1_macro,▁▂▆███
eval/loss,█▇▃▁▁▂
eval/runtime,▁█▇▁▂▅
eval/samples_per_second,█▁▂█▇▃
eval/steps_per_second,█▁▂▇▇▂
train/epoch,▁▁▃▃▅▅▆▆████
train/global_step,▁▁▃▃▅▅▆▆████
train/grad_norm,▂▁▁█▁
train/learning_rate,█▆▅▃▁

0,1
eval/accuracy,0.86403
eval/f1_macro,0.86808
eval/loss,0.7698
eval/runtime,19.6094
eval/samples_per_second,343.917
eval/steps_per_second,42.99
total_flos,1.034909433846528e+16
train/epoch,5.0
train/global_step,19670.0
train/grad_norm,0.07514


In [21]:
final_trainer_bert.save_model(f"models/w_test_split/bertweet_final_stratify_{is_preprocessed}")
bert_tokenizer.save_pretrained(f"models/w_test_split/bertweet_final_stratify_{is_preprocessed}")
!cp -r models/w_test_split/bertweet_final_stratify_{is_preprocessed} "/content/drive/MyDrive/Colab Notebooks/nlp_project/models/w_test_split/bertweet_best_model_stratify_maxl_256_{is_preprocessed}"