In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModel
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorWithPadding
import evaluate
import numpy as np
import pandas as pd
import os
from models import columns,vectorize_data
from resultsAnalyse import drawConfusionMatrix
import torch
from sklearn.metrics import f1_score
def filter_inadequada(example):
    return example["INADEQUADA"] == 0

ds = load_dataset("higopires/RePro-categories-multilabel")
print(len(ds["train"]))
#remove INADEQUADA examples
ds = ds.filter(filter_inadequada)
print(len(ds["train"]))

8002
7674


In [2]:


print(ds["train"].features)
#run_blitr(train_df,val_df,test_df)


{'review_text': Value(dtype='string', id=None), 'ENTREGA': Value(dtype='int64', id=None), 'OUTROS': Value(dtype='int64', id=None), 'PRODUTO': Value(dtype='int64', id=None), 'CONDICOESDERECEBIMENTO': Value(dtype='int64', id=None), 'INADEQUADA': Value(dtype='int64', id=None), 'ANUNCIO': Value(dtype='int64', id=None)}


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Convert logits to probabilities and then to binary predictions
    predictions = (torch.sigmoid(torch.tensor(logits)) > 0.5).int().numpy()
    
    # Calculate sample-wise F1 score
    f1 = f1_score(labels, predictions, average='micro', zero_division=0)
    
    return {'f1': float(f1)}

def preprocess_function(sample):
    model_name = "distilbert-base-uncased"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # Tokenize text
    tokenized = tokenizer(sample["review_text"], truncation=True, padding=True)
    
    # Convert binary label columns to a list (e.g., [1, 0, 1, 0, 0, 0])
    labels = []
    for i in range(len(sample["review_text"])):
        label_row = [
            float(sample["ENTREGA"][i]),
            float(sample["OUTROS"][i]),
            float(sample["PRODUTO"][i]),
            float(sample["CONDICOESDERECEBIMENTO"][i]),
            float(sample["ANUNCIO"][i])
        ]
        labels.append(label_row)
    
    tokenized["labels"] = labels
    return tokenized

model_name = "Qwen/Qwen3-8B"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5,problem_type="multi_label_classification", ignore_mismatched_sizes=True )

tokenized_dataset = ds.map(preprocess_function, batched=True)



Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
from sklearn.metrics import f1_score
small_pred = np.array([[0, 1, 0], [1, 0, 1]])
small_true = np.array([[0, 1, 0], [1, 0, 0]])
print(f1_score(small_true, small_pred, average='samples'))

0.8333333333333333


In [5]:
print(tokenized_dataset["train"]["labels"][2])

[1.0, 0.0, 1.0, 0.0, 0.0]


In [6]:
training_args = TrainingArguments(
    output_dir="./resultsTransformer",
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=50,
    weight_decay=0.01,
    eval_strategy="epoch", # run validation at the end of each epoch
    save_strategy="epoch",
    load_best_model_at_end=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

#trainer.evaluate()

In [7]:
trainer.train()

  0%|          | 0/24000 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.30067911744117737, 'eval_f1': 0.8002847988608046, 'eval_runtime': 6.2824, 'eval_samples_per_second': 151.533, 'eval_steps_per_second': 9.55, 'epoch': 1.0}
{'loss': 0.3943, 'grad_norm': 2.059049367904663, 'learning_rate': 9.791666666666666e-06, 'epoch': 1.04}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.23694944381713867, 'eval_f1': 0.8448821667252903, 'eval_runtime': 5.7273, 'eval_samples_per_second': 166.221, 'eval_steps_per_second': 10.476, 'epoch': 2.0}
{'loss': 0.266, 'grad_norm': 2.1203715801239014, 'learning_rate': 9.583333333333335e-06, 'epoch': 2.08}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.1994788497686386, 'eval_f1': 0.8766475160527205, 'eval_runtime': 5.696, 'eval_samples_per_second': 167.136, 'eval_steps_per_second': 10.534, 'epoch': 3.0}
{'loss': 0.2168, 'grad_norm': 2.5911598205566406, 'learning_rate': 9.375000000000001e-06, 'epoch': 3.12}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.17579224705696106, 'eval_f1': 0.8952827032452325, 'eval_runtime': 5.6293, 'eval_samples_per_second': 169.115, 'eval_steps_per_second': 10.658, 'epoch': 4.0}
{'loss': 0.1755, 'grad_norm': 0.7673338055610657, 'learning_rate': 9.166666666666666e-06, 'epoch': 4.17}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.15951097011566162, 'eval_f1': 0.9037629037629038, 'eval_runtime': 6.1098, 'eval_samples_per_second': 155.815, 'eval_steps_per_second': 9.82, 'epoch': 5.0}
{'loss': 0.143, 'grad_norm': 3.809091567993164, 'learning_rate': 8.958333333333334e-06, 'epoch': 5.21}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.1578001081943512, 'eval_f1': 0.9135721327637201, 'eval_runtime': 5.8517, 'eval_samples_per_second': 162.688, 'eval_steps_per_second': 10.253, 'epoch': 6.0}
{'loss': 0.1194, 'grad_norm': 4.3509650230407715, 'learning_rate': 8.750000000000001e-06, 'epoch': 6.25}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.1597176194190979, 'eval_f1': 0.9134583744652847, 'eval_runtime': 5.5935, 'eval_samples_per_second': 170.198, 'eval_steps_per_second': 10.727, 'epoch': 7.0}
{'loss': 0.0986, 'grad_norm': 2.0115041732788086, 'learning_rate': 8.541666666666666e-06, 'epoch': 7.29}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.15835259854793549, 'eval_f1': 0.9141716566866267, 'eval_runtime': 5.6946, 'eval_samples_per_second': 167.177, 'eval_steps_per_second': 10.536, 'epoch': 8.0}
{'loss': 0.0841, 'grad_norm': 5.647084712982178, 'learning_rate': 8.333333333333334e-06, 'epoch': 8.33}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.16756638884544373, 'eval_f1': 0.912885061278569, 'eval_runtime': 5.7647, 'eval_samples_per_second': 165.144, 'eval_steps_per_second': 10.408, 'epoch': 9.0}
{'loss': 0.0702, 'grad_norm': 0.7456362843513489, 'learning_rate': 8.125000000000001e-06, 'epoch': 9.38}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.17170484364032745, 'eval_f1': 0.9150630391506305, 'eval_runtime': 5.6782, 'eval_samples_per_second': 167.658, 'eval_steps_per_second': 10.567, 'epoch': 10.0}
{'loss': 0.0588, 'grad_norm': 5.202874660491943, 'learning_rate': 7.916666666666667e-06, 'epoch': 10.42}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.1825377643108368, 'eval_f1': 0.9170476812540823, 'eval_runtime': 5.9294, 'eval_samples_per_second': 160.557, 'eval_steps_per_second': 10.119, 'epoch': 11.0}
{'loss': 0.0469, 'grad_norm': 5.313999652862549, 'learning_rate': 7.708333333333334e-06, 'epoch': 11.46}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.19301646947860718, 'eval_f1': 0.9131286740692358, 'eval_runtime': 5.8105, 'eval_samples_per_second': 163.842, 'eval_steps_per_second': 10.326, 'epoch': 12.0}
{'loss': 0.0401, 'grad_norm': 0.565346896648407, 'learning_rate': 7.500000000000001e-06, 'epoch': 12.5}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.19742557406425476, 'eval_f1': 0.9168036829990135, 'eval_runtime': 5.6199, 'eval_samples_per_second': 169.397, 'eval_steps_per_second': 10.676, 'epoch': 13.0}
{'loss': 0.0307, 'grad_norm': 4.053928375244141, 'learning_rate': 7.291666666666667e-06, 'epoch': 13.54}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2065037041902542, 'eval_f1': 0.9150757077024357, 'eval_runtime': 5.6558, 'eval_samples_per_second': 168.323, 'eval_steps_per_second': 10.609, 'epoch': 14.0}
{'loss': 0.0258, 'grad_norm': 0.7137948274612427, 'learning_rate': 7.083333333333335e-06, 'epoch': 14.58}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.220008984208107, 'eval_f1': 0.9135963482230192, 'eval_runtime': 5.5525, 'eval_samples_per_second': 171.454, 'eval_steps_per_second': 10.806, 'epoch': 15.0}
{'loss': 0.021, 'grad_norm': 0.13749469816684723, 'learning_rate': 6.875e-06, 'epoch': 15.62}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2366972267627716, 'eval_f1': 0.9162851536952257, 'eval_runtime': 6.3046, 'eval_samples_per_second': 151.001, 'eval_steps_per_second': 9.517, 'epoch': 16.0}
{'loss': 0.0182, 'grad_norm': 4.1881327629089355, 'learning_rate': 6.666666666666667e-06, 'epoch': 16.67}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.22586171329021454, 'eval_f1': 0.9197510645266951, 'eval_runtime': 6.3169, 'eval_samples_per_second': 150.708, 'eval_steps_per_second': 9.498, 'epoch': 17.0}
{'loss': 0.0143, 'grad_norm': 0.1319257616996765, 'learning_rate': 6.458333333333334e-06, 'epoch': 17.71}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.23990999162197113, 'eval_f1': 0.9179030662710187, 'eval_runtime': 6.3196, 'eval_samples_per_second': 150.643, 'eval_steps_per_second': 9.494, 'epoch': 18.0}
{'loss': 0.0111, 'grad_norm': 1.7699700593948364, 'learning_rate': 6.25e-06, 'epoch': 18.75}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.25622040033340454, 'eval_f1': 0.9136855923859534, 'eval_runtime': 6.3909, 'eval_samples_per_second': 148.961, 'eval_steps_per_second': 9.388, 'epoch': 19.0}
{'loss': 0.0102, 'grad_norm': 0.17616479098796844, 'learning_rate': 6.041666666666667e-06, 'epoch': 19.79}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2759998142719269, 'eval_f1': 0.9156547423695438, 'eval_runtime': 6.2889, 'eval_samples_per_second': 151.377, 'eval_steps_per_second': 9.541, 'epoch': 20.0}
{'loss': 0.0089, 'grad_norm': 0.13357315957546234, 'learning_rate': 5.833333333333334e-06, 'epoch': 20.83}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.26846858859062195, 'eval_f1': 0.919344262295082, 'eval_runtime': 6.4096, 'eval_samples_per_second': 148.527, 'eval_steps_per_second': 9.361, 'epoch': 21.0}
{'loss': 0.0074, 'grad_norm': 0.6644874215126038, 'learning_rate': 5.625e-06, 'epoch': 21.88}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2913794219493866, 'eval_f1': 0.9164490861618799, 'eval_runtime': 6.3168, 'eval_samples_per_second': 150.71, 'eval_steps_per_second': 9.499, 'epoch': 22.0}
{'loss': 0.006, 'grad_norm': 5.216739654541016, 'learning_rate': 5.416666666666667e-06, 'epoch': 22.92}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2784186005592346, 'eval_f1': 0.9199607971251225, 'eval_runtime': 6.4025, 'eval_samples_per_second': 148.691, 'eval_steps_per_second': 9.371, 'epoch': 23.0}
{'loss': 0.0064, 'grad_norm': 5.707949161529541, 'learning_rate': 5.208333333333334e-06, 'epoch': 23.96}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.2942940592765808, 'eval_f1': 0.9187256176853056, 'eval_runtime': 6.3043, 'eval_samples_per_second': 151.007, 'eval_steps_per_second': 9.517, 'epoch': 24.0}
{'loss': 0.0042, 'grad_norm': 4.982077121734619, 'learning_rate': 5e-06, 'epoch': 25.0}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.30610379576683044, 'eval_f1': 0.9133550488599349, 'eval_runtime': 6.4021, 'eval_samples_per_second': 148.701, 'eval_steps_per_second': 9.372, 'epoch': 25.0}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3093714714050293, 'eval_f1': 0.9189012426422499, 'eval_runtime': 6.2883, 'eval_samples_per_second': 151.391, 'eval_steps_per_second': 9.541, 'epoch': 26.0}
{'loss': 0.0029, 'grad_norm': 0.03750540688633919, 'learning_rate': 4.791666666666668e-06, 'epoch': 26.04}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.32064634561538696, 'eval_f1': 0.9139465875370919, 'eval_runtime': 6.3086, 'eval_samples_per_second': 150.906, 'eval_steps_per_second': 9.511, 'epoch': 27.0}
{'loss': 0.0033, 'grad_norm': 0.18524640798568726, 'learning_rate': 4.583333333333333e-06, 'epoch': 27.08}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3283829987049103, 'eval_f1': 0.9177008491182234, 'eval_runtime': 6.3006, 'eval_samples_per_second': 151.096, 'eval_steps_per_second': 9.523, 'epoch': 28.0}
{'loss': 0.0043, 'grad_norm': 0.12713496387004852, 'learning_rate': 4.3750000000000005e-06, 'epoch': 28.12}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.34010863304138184, 'eval_f1': 0.9184605348988911, 'eval_runtime': 6.3924, 'eval_samples_per_second': 148.926, 'eval_steps_per_second': 9.386, 'epoch': 29.0}
{'loss': 0.0032, 'grad_norm': 0.03294110298156738, 'learning_rate': 4.166666666666667e-06, 'epoch': 29.17}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.33464106917381287, 'eval_f1': 0.9167489305692662, 'eval_runtime': 6.3936, 'eval_samples_per_second': 148.898, 'eval_steps_per_second': 9.384, 'epoch': 30.0}
{'loss': 0.0031, 'grad_norm': 0.1323094218969345, 'learning_rate': 3.958333333333333e-06, 'epoch': 30.21}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.34489119052886963, 'eval_f1': 0.9194127243066884, 'eval_runtime': 6.3262, 'eval_samples_per_second': 150.485, 'eval_steps_per_second': 9.484, 'epoch': 31.0}
{'loss': 0.0026, 'grad_norm': 0.10728933662176132, 'learning_rate': 3.7500000000000005e-06, 'epoch': 31.25}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.34137865900993347, 'eval_f1': 0.9203947368421053, 'eval_runtime': 6.2901, 'eval_samples_per_second': 151.35, 'eval_steps_per_second': 9.539, 'epoch': 32.0}
{'loss': 0.0023, 'grad_norm': 0.038007110357284546, 'learning_rate': 3.5416666666666673e-06, 'epoch': 32.29}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.353713721036911, 'eval_f1': 0.9171597633136094, 'eval_runtime': 6.3912, 'eval_samples_per_second': 148.954, 'eval_steps_per_second': 9.388, 'epoch': 33.0}
{'loss': 0.0021, 'grad_norm': 0.013202503323554993, 'learning_rate': 3.3333333333333333e-06, 'epoch': 33.33}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.35973018407821655, 'eval_f1': 0.9178617992177315, 'eval_runtime': 6.2858, 'eval_samples_per_second': 151.452, 'eval_steps_per_second': 9.545, 'epoch': 34.0}
{'loss': 0.0017, 'grad_norm': 0.08519119024276733, 'learning_rate': 3.125e-06, 'epoch': 34.38}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.34476765990257263, 'eval_f1': 0.9210439378923027, 'eval_runtime': 6.322, 'eval_samples_per_second': 150.586, 'eval_steps_per_second': 9.491, 'epoch': 35.0}
{'loss': 0.0019, 'grad_norm': 0.050912994891405106, 'learning_rate': 2.916666666666667e-06, 'epoch': 35.42}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.36458683013916016, 'eval_f1': 0.9214590864278672, 'eval_runtime': 6.2902, 'eval_samples_per_second': 151.346, 'eval_steps_per_second': 9.539, 'epoch': 36.0}
{'loss': 0.0016, 'grad_norm': 0.04523950442671776, 'learning_rate': 2.7083333333333334e-06, 'epoch': 36.46}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.36418747901916504, 'eval_f1': 0.9204882876938304, 'eval_runtime': 6.3753, 'eval_samples_per_second': 149.326, 'eval_steps_per_second': 9.411, 'epoch': 37.0}
{'loss': 0.0016, 'grad_norm': 0.01727208122611046, 'learning_rate': 2.5e-06, 'epoch': 37.5}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.36317721009254456, 'eval_f1': 0.918421052631579, 'eval_runtime': 6.285, 'eval_samples_per_second': 151.472, 'eval_steps_per_second': 9.547, 'epoch': 38.0}
{'loss': 0.0011, 'grad_norm': 0.013157936744391918, 'learning_rate': 2.2916666666666666e-06, 'epoch': 38.54}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3760256767272949, 'eval_f1': 0.9186885245901639, 'eval_runtime': 6.331, 'eval_samples_per_second': 150.372, 'eval_steps_per_second': 9.477, 'epoch': 39.0}
{'loss': 0.0014, 'grad_norm': 0.06720146536827087, 'learning_rate': 2.0833333333333334e-06, 'epoch': 39.58}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.36513447761535645, 'eval_f1': 0.9204470742932283, 'eval_runtime': 6.3958, 'eval_samples_per_second': 148.847, 'eval_steps_per_second': 9.381, 'epoch': 40.0}
{'loss': 0.0009, 'grad_norm': 0.6533024311065674, 'learning_rate': 1.8750000000000003e-06, 'epoch': 40.62}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3758693337440491, 'eval_f1': 0.9177631578947367, 'eval_runtime': 6.2894, 'eval_samples_per_second': 151.367, 'eval_steps_per_second': 9.54, 'epoch': 41.0}
{'loss': 0.001, 'grad_norm': 0.015847591683268547, 'learning_rate': 1.6666666666666667e-06, 'epoch': 41.67}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3833604156970978, 'eval_f1': 0.9190600522193212, 'eval_runtime': 6.2831, 'eval_samples_per_second': 151.519, 'eval_steps_per_second': 9.549, 'epoch': 42.0}
{'loss': 0.001, 'grad_norm': 0.5189116597175598, 'learning_rate': 1.4583333333333335e-06, 'epoch': 42.71}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3784738779067993, 'eval_f1': 0.9190071848465056, 'eval_runtime': 6.3084, 'eval_samples_per_second': 150.911, 'eval_steps_per_second': 9.511, 'epoch': 43.0}
{'loss': 0.0006, 'grad_norm': 0.2212093323469162, 'learning_rate': 1.25e-06, 'epoch': 43.75}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3728553056716919, 'eval_f1': 0.9220651101611312, 'eval_runtime': 6.3166, 'eval_samples_per_second': 150.715, 'eval_steps_per_second': 9.499, 'epoch': 44.0}
{'loss': 0.0005, 'grad_norm': 0.052110906690359116, 'learning_rate': 1.0416666666666667e-06, 'epoch': 44.79}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3766786456108093, 'eval_f1': 0.9216650278597182, 'eval_runtime': 6.3349, 'eval_samples_per_second': 150.278, 'eval_steps_per_second': 9.471, 'epoch': 45.0}
{'loss': 0.001, 'grad_norm': 0.07765387743711472, 'learning_rate': 8.333333333333333e-07, 'epoch': 45.83}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.374664843082428, 'eval_f1': 0.9209576910462447, 'eval_runtime': 6.3625, 'eval_samples_per_second': 149.626, 'eval_steps_per_second': 9.43, 'epoch': 46.0}
{'loss': 0.0007, 'grad_norm': 0.0116405189037323, 'learning_rate': 6.25e-07, 'epoch': 46.88}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.37504488229751587, 'eval_f1': 0.9219672131147542, 'eval_runtime': 6.2859, 'eval_samples_per_second': 151.45, 'eval_steps_per_second': 9.545, 'epoch': 47.0}
{'loss': 0.0005, 'grad_norm': 0.001859614159911871, 'learning_rate': 4.1666666666666667e-07, 'epoch': 47.92}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3811575770378113, 'eval_f1': 0.9188481675392669, 'eval_runtime': 6.3243, 'eval_samples_per_second': 150.531, 'eval_steps_per_second': 9.487, 'epoch': 48.0}
{'loss': 0.0005, 'grad_norm': 0.24017959833145142, 'learning_rate': 2.0833333333333333e-07, 'epoch': 48.96}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3784990906715393, 'eval_f1': 0.9189366590088612, 'eval_runtime': 6.4108, 'eval_samples_per_second': 148.498, 'eval_steps_per_second': 9.359, 'epoch': 49.0}
{'loss': 0.0004, 'grad_norm': 0.2580961287021637, 'learning_rate': 0.0, 'epoch': 50.0}


  0%|          | 0/60 [00:00<?, ?it/s]

{'eval_loss': 0.3775757849216461, 'eval_f1': 0.9207497533706018, 'eval_runtime': 6.3821, 'eval_samples_per_second': 149.167, 'eval_steps_per_second': 9.401, 'epoch': 50.0}
{'train_runtime': 11509.2287, 'train_samples_per_second': 33.338, 'train_steps_per_second': 2.085, 'train_loss': 0.03995643497010072, 'epoch': 50.0}


TrainOutput(global_step=24000, training_loss=0.03995643497010072, metrics={'train_runtime': 11509.2287, 'train_samples_per_second': 33.338, 'train_steps_per_second': 2.085, 'total_flos': 5.08287835559016e+16, 'train_loss': 0.03995643497010072, 'epoch': 50.0})

In [8]:
y_pred = trainer.predict(tokenized_dataset["test"])
y_pred.metrics

  0%|          | 0/61 [00:00<?, ?it/s]

{'test_loss': 0.16220751404762268,
 'test_f1': 0.9070753179002283,
 'test_runtime': 5.7191,
 'test_samples_per_second': 168.909,
 'test_steps_per_second': 10.666}

In [None]:
trainer.save_model("finetunedTransformer_1")

: 