In [1]:
# change this if running locally
!pip install datasets
# !pip install --force-reinstall --no-deps git+https://github.com/huggingface/transformers
!pip install scikit-multilearn
!pip install optuna
!pip install triton

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
import pandas as pd
import numpy as np
import nltk
import torch
import datasets
from datasets import Dataset, DatasetDict
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification
from typing import Union
from skmultilearn.model_selection import iterative_train_test_split
import optuna
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
MODEL_NAME = "bertabaporu-large-uncased"
MODEL_PATH = "pablocosta/" + MODEL_NAME

In [3]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [5]:
# change this if running locally
#file = 'public_data/train/track_b/ptbr.csv'
file = 'ptbr.csv'
data = pd.read_csv(file)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2226 entries, 0 to 2225
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        2226 non-null   object
 1   text      2226 non-null   object
 2   Anger     2226 non-null   int64 
 3   Disgust   2226 non-null   int64 
 4   Fear      2226 non-null   int64 
 5   Joy       2226 non-null   int64 
 6   Sadness   2226 non-null   int64 
 7   Surprise  2226 non-null   int64 
dtypes: int64(6), object(2)
memory usage: 139.2+ KB


In [6]:
data.head()

Unnamed: 0,id,text,Anger,Disgust,Fear,Joy,Sadness,Surprise
0,ptbr_train_track_a_00001,"minha vó me disse que era frango e eu comi, ti...",0,0,0,0,1,0
1,ptbr_train_track_a_00002,Está e a nossa deputada Benedita linda guerrei...,0,0,0,1,0,0
2,ptbr_train_track_a_00003,só falta as roupas kkkkkkkkkkk,0,0,0,1,0,0
3,ptbr_train_track_a_00004,Eu tmb. Comecei a sair de casa agora (fui pela...,0,0,0,0,1,0
4,ptbr_train_track_a_00005,Peço a Deus que nossos dirigentes tenham realm...,0,0,0,0,0,0


### Pré-processamento dos dados
1. Case folding
2. Remover stop words
3. Remover acentuação e pontuação


In [7]:
stopwords = nltk.corpus.stopwords.words('portuguese')
stopwords[:20]

['a',
 'à',
 'ao',
 'aos',
 'aquela',
 'aquelas',
 'aquele',
 'aqueles',
 'aquilo',
 'as',
 'às',
 'até',
 'com',
 'como',
 'da',
 'das',
 'de',
 'dela',
 'delas',
 'dele']

In [8]:
data["clean_text"] = data["text"].copy()
# Case folding
data["clean_text"] = data["clean_text"].str.lower()
# Remover stopwords
data["clean_text"] = data["clean_text"].replace({r"\b" + stopword + r"\b": "" for stopword in stopwords}, regex=True)
# Remover pontuação
data["clean_text"] = data["clean_text"].str.replace(r"[\.!?\\\-.,]", "", regex=True)
data["clean_text"] = data["clean_text"].str.replace(r"\s+", " ", regex=True)
data["clean_text"] = data["clean_text"].str.strip()

In [9]:
data["text"], data["clean_text"]

(0       minha vó me disse que era frango e eu comi, ti...
 1       Está e a nossa deputada Benedita linda guerrei...
 2                          só falta as roupas kkkkkkkkkkk
 3       Eu tmb. Comecei a sair de casa agora (fui pela...
 4       Peço a Deus que nossos dirigentes tenham realm...
                               ...                        
 2221              Eu acho que o CAP vai surpreender hein.
 2222    23:59 - Lula sabia de toda a corrupção no seu ...
 2223    O Brasil precisa URGENTE de pessoas sérias e c...
 2224    Sera que só eu acho que ta passando da hora de...
 2225                                     falta só 2 porra
 Name: text, Length: 2226, dtype: object,
 0          vó disse frango comi gosto frango mto inocente
 1       deputada benedita linda guerreira parabéns juntos
 2                                falta roupas kkkkkkkkkkk
 3       tmb comecei sair casa agora ( primeira vez cin...
 4       peço deus dirigentes realmente iluminação toma...
              

### Adicionar emoção neutra

Para casos que não tem nenhuma emoção na linha, adicionamos uma nova emoção: neutro

In [10]:
BASE_EMOTIONS = [ data.columns[i] for i in range(2, data.shape[1]-1)]
BASE_EMOTIONS
base_emotions = True
if not base_emotions:
    data["Neutral"] = 0
    no_emotions_mask = data[BASE_EMOTIONS].sum(axis=1) == 0
    data.loc[no_emotions_mask, "Neutral"] = 1
    EMOTIONS = BASE_EMOTIONS + ["Neutral"]
else:
    EMOTIONS = BASE_EMOTIONS


In [11]:
data[EMOTIONS].sum()

Unnamed: 0,0
Anger,718
Disgust,75
Fear,109
Joy,581
Sadness,322
Surprise,153


### Iterative train test split
Uma vez que o dataset é desbalanceado, precisamos garantir que os dados de treino e teste tenham proporções similares de cada classe. Entretanto, já que o nosso problema é multi classe, utilizar o *train_test_split* do scikit-learn não funciona, uma vez que ele não lida bem com problemas multi classe, pois nesse tipo de problema há muitas combinações de classe possíveis. Sendo assim, utilizamos a função *iterative_train_test_split* que se propõe a resolver esse problema

In [12]:
def concat_X_y(X: np.array, y: np.array, columns: list[str]) -> pd.DataFrame:
    concatted_np = np.concatenate((X, y), axis=1)
    concatted = pd.DataFrame(concatted_np, columns=columns)
    return concatted


def train_test_val_split(
        data: pd.DataFrame,
        feature_label: str,
        targets_labels: list[str],
        seed: int = None
    ) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    if seed is not None:
        np.random.seed(seed)
    X = data[feature_label].to_numpy().reshape(-1, 1)
    y = data[targets_labels].to_numpy()

    X_train, y_train, X_test, y_test = iterative_train_test_split(
        X,
        y,
        test_size=0.2,
    )
    X_test, y_test, X_val, y_val = iterative_train_test_split(
        X_test,
        y_test,
        test_size=0.5,
    )
    columns = [feature_label] + targets_labels

    train = concat_X_y(X_train, y_train, columns)
    test = concat_X_y(X_test, y_test, columns)
    val = concat_X_y(X_val, y_val, columns)

    return train, test, val

train, test, val = train_test_val_split(data, "clean_text", EMOTIONS, seed=42)


### Preparação para a estrutura do Huggingface

In [13]:
def create_dataset_dict(train: pd.DataFrame, val: pd.DataFrame, test: pd.DataFrame) -> DatasetDict:
    train_dataset = Dataset.from_pandas(train)
    val_dataset = Dataset.from_pandas(val)
    test_dataset = Dataset.from_pandas(test)

    return DatasetDict({
        'train': train_dataset,
        'validation': val_dataset,
        'test': test_dataset
    })

dataset = create_dataset_dict(train, val, test)
print(dataset)
dataset["train"][0]


DatasetDict({
    train: Dataset({
        features: ['clean_text', 'Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise'],
        num_rows: 1780
    })
    validation: Dataset({
        features: ['clean_text', 'Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise'],
        num_rows: 223
    })
    test: Dataset({
        features: ['clean_text', 'Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise'],
        num_rows: 223
    })
})


{'clean_text': 'vó disse frango comi gosto frango mto inocente',
 'Anger': 0,
 'Disgust': 0,
 'Fear': 0,
 'Joy': 0,
 'Sadness': 1,
 'Surprise': 0}

In [14]:
labels = dataset['train'].column_names
labels.remove('clean_text')
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
print(labels)
print(id2label)
print(label2id)

['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise']
{0: 'Anger', 1: 'Disgust', 2: 'Fear', 3: 'Joy', 4: 'Sadness', 5: 'Surprise'}
{'Anger': 0, 'Disgust': 1, 'Fear': 2, 'Joy': 3, 'Sadness': 4, 'Surprise': 5}


In [15]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Usando o dispositivo: {device}")

Usando o dispositivo: cuda


In [16]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
def tokenize_data(example):
  text = example["clean_text"]
  example_labels = {k: example[k] for k in example.keys() if k in labels}
  labels_dict = [0. for i in range(len(labels))]
  for label, value in example_labels.items():
       label_id = label2id[label]
       labels_dict[label_id] = float(value)
  encoding = tokenizer(text, truncation=True, max_length=128)
  encoding["labels"] = labels_dict

  return encoding
tokenized_dataset = dataset.map(tokenize_data, remove_columns=dataset['train'].column_names)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/504k [00:00<?, ?B/s]

Map:   0%|          | 0/1780 [00:00<?, ? examples/s]

Map:   0%|          | 0/223 [00:00<?, ? examples/s]

Map:   0%|          | 0/223 [00:00<?, ? examples/s]

In [17]:
example = tokenized_dataset['train'][0]
print(example.keys())

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'labels'])


In [18]:
tokenizer.decode(example['input_ids'])

'[CLS] vo disse frango comi gosto frango mto inocente [SEP]'

In [19]:
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

### Finetuning do modelo

In [20]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(MODEL_PATH,
                                    problem_type="multi_label_classification",
                                    num_labels=len(labels),
                                    id2label=id2label,
                                    label2id=label2id)

In [21]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH,
                                    problem_type="multi_label_classification",
                                    num_labels=len(labels),
                                    id2label=id2label,
                                    label2id=label2id)
model.to(device)

pytorch_model.bin:   0%|          | 0.00/1.48G [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(64000, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1

In [22]:
def optuna_hp_space(trial):

    return {

        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),

        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [8, 16, 32]),

        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.3),

        "warmup_steps": trial.suggest_float("warmup_steps", 100, 1000),

    }

In [23]:
batch_size = 8
metric_name = "f1"

args = TrainingArguments(
    MODEL_NAME + "-multisentiment-portuguese",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    save_total_limit=3,
    learning_rate=1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    label_names=["labels"],
    warmup_steps=100,
    torch_compile=True,
    optim="adafactor"
    #push_to_hub=True,
)

The speedups for torchdynamo mostly come wih GPU Ampere or higher and which is not detected here.


In [24]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, precision_score, recall_score
from transformers import EvalPrediction

def sigmoid(x):
   return 1/(1 + np.exp(-x))

# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    predictions = sigmoid(predictions)
    y_pred = (predictions > 0.5).astype(int).reshape(-1)
    y_true = labels.astype(int).reshape(-1)

    f1_macro_average = f1_score(y_true=y_true, y_pred=y_pred, average="macro")
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="macro")
    recall = recall_score(y_true, y_pred, average="macro")

    metrics = {
        'f1': f1_macro_average,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall
    }
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids)
    return result

In [25]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [26]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=tokenized_dataset["train"],  # Subsample para treino rápido
    eval_dataset=tokenized_dataset["validation"],    # Subsample para validação
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
best_trials = trainer.hyperparameter_search(

    direction=["maximize"],

    backend="optuna",

    hp_space=optuna_hp_space,

    n_trials=10,

)

[I 2025-01-05 19:53:53,687] A new study created in memory with name: no-name-84a4276d-4902-44f1-a2e0-8ff7b49c7304
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.647105,0.489566,0.622571,0.51256,0.52252
2,No log,0.486433,0.534562,0.84006,0.606352,0.534548
3,0.615000,0.374081,0.460266,0.852765,0.426383,0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-01-05 20:06:16,360] Trial 0 finished with value: 2.2394142184484194 and parameters: {'learning_rate': 1.6280667014816993e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.26520600029366936, 'warmup_steps': 894.7653967028104}. Best is trial 0 with value: 2.2394142184484194.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁██
eval/f1,▄█▁
eval/loss,█▄▁
eval/precision,▄█▁
eval/recall,▆█▁
eval/runtime,█▁▁
eval/samples_per_second,▁██
eval/steps_per_second,▁██
train/epoch,▁▅▅██
train/global_step,▁▅▅██

0,1
eval/accuracy,0.85277
eval/f1,0.46027
eval/loss,0.37408
eval/precision,0.42638
eval/recall,0.5
eval/runtime,1.8495
eval/samples_per_second,120.576
eval/steps_per_second,15.14
total_flos,345088654705872.0
train/epoch,3.0


[34m[1mwandb[0m: Currently logged in as: [33mvictorelitt[0m ([33mvictorelitt-ufrgs-universidade-federal-do-rio-grande-do-sul[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.519752,0.54922,0.810164,0.566657,0.544317
2,No log,0.356701,0.465502,0.853513,0.926702,0.502538
3,No log,0.310882,0.720318,0.893871,0.857686,0.673192


[I 2025-01-05 20:13:58,117] Trial 1 finished with value: 3.145067092629867 and parameters: {'learning_rate': 1.4149765034351786e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.2328911601781654, 'warmup_steps': 742.3259328978805}. Best is trial 1 with value: 3.145067092629867.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁▅█
eval/f1,▃▁█
eval/loss,█▃▁
eval/precision,▁█▇
eval/recall,▃▁█
eval/runtime,▃█▁
eval/samples_per_second,▆▁█
eval/steps_per_second,▆▁█
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.89387
eval/f1,0.72032
eval/loss,0.31088
eval/precision,0.85769
eval/recall,0.67319
eval/runtime,1.853
eval/samples_per_second,120.345
eval/steps_per_second,15.111
total_flos,437176367107920.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.643335,0.493446,0.62855,0.51464,0.526026
2,No log,0.474793,0.52752,0.844544,0.619283,0.530877
3,0.610200,0.370853,0.460266,0.852765,0.426383,0.5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-01-05 20:20:13,479] Trial 2 finished with value: 2.2394142184484194 and parameters: {'learning_rate': 1.4714557058485391e-06, 'per_device_train_batch_size': 8, 'weight_decay': 0.00010552730842281476, 'warmup_steps': 754.2626418680469}. Best is trial 1 with value: 3.145067092629867.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁██
eval/f1,▄█▁
eval/loss,█▄▁
eval/precision,▄█▁
eval/recall,▇█▁
eval/runtime,▁██
eval/samples_per_second,█▁▁
eval/steps_per_second,█▁▁
train/epoch,▁▅▅██
train/global_step,▁▅▅██

0,1
eval/accuracy,0.85277
eval/f1,0.46027
eval/loss,0.37085
eval/precision,0.42638
eval/recall,0.5
eval/runtime,1.8631
eval/samples_per_second,119.693
eval/steps_per_second,15.029
total_flos,345088654705872.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.699355,0.442857,0.523169,0.504198,0.508335
2,No log,0.65802,0.483473,0.602392,0.51363,0.525387
3,No log,0.58805,0.529132,0.710762,0.53014,0.542731


[I 2025-01-05 20:24:47,405] Trial 3 finished with value: 2.3127651014467645 and parameters: {'learning_rate': 2.444579796720014e-06, 'per_device_train_batch_size': 32, 'weight_decay': 0.25298561674418496, 'warmup_steps': 553.4666972832314}. Best is trial 1 with value: 3.145067092629867.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁▄█
eval/f1,▁▄█
eval/loss,█▅▁
eval/precision,▁▄█
eval/recall,▁▄█
eval/runtime,▁▇█
eval/samples_per_second,█▂▁
eval/steps_per_second,█▂▁
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.71076
eval/f1,0.52913
eval/loss,0.58805
eval/precision,0.53014
eval/recall,0.54273
eval/runtime,1.8629
eval/samples_per_second,119.705
eval/steps_per_second,15.03
total_flos,553516464868224.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.276997,0.766994,0.901345,0.837955,0.72797
2,No log,0.247014,0.799519,0.913303,0.864911,0.76018
3,0.302800,0.253915,0.80935,0.913303,0.848598,0.781179


[I 2025-01-05 20:30:58,769] Trial 4 finished with value: 3.3524299868730605 and parameters: {'learning_rate': 6.794496228684008e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.19008383482645178, 'warmup_steps': 442.7626081014312}. Best is trial 4 with value: 3.3524299868730605.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁██
eval/f1,▁▆█
eval/loss,█▁▃
eval/precision,▁█▄
eval/recall,▁▅█
eval/runtime,▁▅█
eval/samples_per_second,█▄▁
eval/steps_per_second,█▄▁
train/epoch,▁▅▅██
train/global_step,▁▅▅██

0,1
eval/accuracy,0.9133
eval/f1,0.80935
eval/loss,0.25392
eval/precision,0.8486
eval/recall,0.78118
eval/runtime,1.8878
eval/samples_per_second,118.128
eval/steps_per_second,14.832
total_flos,345088654705872.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.703203,0.437021,0.513453,0.502382,0.504738


[I 2025-01-05 20:32:17,952] Trial 5 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.51345
eval/f1,0.43702
eval/loss,0.7032
eval/precision,0.50238
eval/recall,0.50474
eval/runtime,1.8877
eval/samples_per_second,118.136
eval/steps_per_second,14.833
train/epoch,1.0
train/global_step,56.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.707598,0.434561,0.506726,0.503563,0.507094


[I 2025-01-05 20:33:30,683] Trial 6 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.50673
eval/f1,0.43456
eval/loss,0.7076
eval/precision,0.50356
eval/recall,0.50709
eval/runtime,1.8892
eval/samples_per_second,118.038
eval/steps_per_second,14.821
train/epoch,1.0
train/global_step,56.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.309617,0.746619,0.893871,0.818854,0.708889
2,No log,0.269361,0.769914,0.907324,0.8771,0.720977
3,0.334500,0.262854,0.766994,0.901345,0.837955,0.72797


[I 2025-01-05 20:39:35,286] Trial 7 finished with value: 3.234264834907643 and parameters: {'learning_rate': 1.0325095421637412e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.007189648754308741, 'warmup_steps': 113.2097005516447}. Best is trial 4 with value: 3.3524299868730605.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█▅
eval/f1,▁█▇
eval/loss,█▂▁
eval/precision,▁█▃
eval/recall,▁▅█
eval/runtime,▁▅█
eval/samples_per_second,█▄▁
eval/steps_per_second,█▄▁
train/epoch,▁▅▅██
train/global_step,▁▅▅██

0,1
eval/accuracy,0.90135
eval/f1,0.76699
eval/loss,0.26285
eval/precision,0.83796
eval/recall,0.72797
eval/runtime,1.8861
eval/samples_per_second,118.234
eval/steps_per_second,14.846
total_flos,709318564622448.0
train/epoch,3.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.682487,0.467399,0.562033,0.513782,0.526922
2,No log,0.600945,0.52108,0.690583,0.525785,0.539299


[I 2025-01-05 20:42:22,648] Trial 8 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at pablocosta/bertabaporu-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


0,1
eval/accuracy,▁█
eval/f1,▁█
eval/loss,█▁
eval/precision,▁█
eval/recall,▁█
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁█
train/global_step,▁█

0,1
eval/accuracy,0.69058
eval/f1,0.52108
eval/loss,0.60095
eval/precision,0.52578
eval/recall,0.5393
eval/runtime,1.8538
eval/samples_per_second,120.291
eval/steps_per_second,15.104
train/epoch,2.0
train/global_step,112.0


Epoch,Training Loss,Validation Loss,F1,Accuracy,Precision,Recall
1,No log,0.274696,0.782207,0.905082,0.839575,0.74696
2,No log,0.248552,0.788045,0.908819,0.85419,0.749151
3,No log,0.239367,0.809743,0.911809,0.839961,0.786602


[I 2025-01-05 20:47:27,418] Trial 9 finished with value: 3.348113582795965 and parameters: {'learning_rate': 7.760850400076917e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.20024635783849384, 'warmup_steps': 191.85358096809546}. Best is trial 4 with value: 3.3524299868730605.


In [28]:
best_trials

BestRun(run_id='4', objective=3.3524299868730605, hyperparameters={'learning_rate': 6.794496228684008e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.19008383482645178, 'warmup_steps': 442.7626081014312}, run_summary=None)