In [None]:
!pip install transformers datasets peft accelerate optuna wandb evaluate scikit-learn -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/247.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
import torch
import numpy as np
import evaluate
import optuna
import wandb
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from peft import IA3Config, get_peft_model, TaskType

wandb.login()




True

In [None]:

data_path_prefix = "/content/drive/MyDrive/Banking77_Project/data/"
data_files = {
    "train": os.path.join(data_path_prefix, "train.csv"),
    "validation": os.path.join(data_path_prefix, "validation.csv"),
    "test": os.path.join(data_path_prefix, "test.csv"),
}

dataset = load_dataset('csv', data_files=data_files)

label_names = sorted(list(set(dataset["train"]["intent"])))
label2id = {label: idx for idx, label in enumerate(label_names)}
id2label = {idx: label for label, idx in label2id.items()}
num_labels = len(label_names)


In [None]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'intent', 'text_cleaned'],
        num_rows: 8002
    })
    validation: Dataset({
        features: ['text', 'intent', 'text_cleaned'],
        num_rows: 2001
    })
    test: Dataset({
        features: ['text', 'intent', 'text_cleaned'],
        num_rows: 3080
    })
})


In [None]:
model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=64)

dataset = dataset.map(preprocess)
dataset = dataset.map(lambda e: {"label": [label2id[l] for l in e["intent"]]}, batched=True)


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/8002 [00:00<?, ? examples/s]

Map:   0%|          | 0/2001 [00:00<?, ? examples/s]

Map:   0%|          | 0/3080 [00:00<?, ? examples/s]

Map:   0%|          | 0/8002 [00:00<?, ? examples/s]

Map:   0%|          | 0/2001 [00:00<?, ? examples/s]

Map:   0%|          | 0/3080 [00:00<?, ? examples/s]

In [None]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1.compute(predictions=preds, references=labels, average="weighted")["f1"]
    }


In [None]:
def objective(trial):
    # Suggest hyperparams
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    num_epochs = trial.suggest_int("num_train_epochs", 3, 7)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)

    # W&B init per trial
    wandb.init(
        project="banking77-ia3-optuna",
        name=f"trial_{trial.number}",
        reinit=True,
        config={
            "learning_rate": learning_rate,
            "batch_size": batch_size,
            "num_epochs": num_epochs,
            "weight_decay": weight_decay
        }
    )

    # Build model
    base_model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id
    )
    ia3_config = IA3Config(task_type=TaskType.SEQ_CLS, target_modules=["query", "value"])
    model = get_peft_model(base_model, ia3_config)

    # Training args
    training_args = TrainingArguments(
        output_dir=f"./results/trial_{trial.number}",
        eval_strategy="epoch",
        save_strategy="no",
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        weight_decay=weight_decay,
        logging_dir="./logs",
        report_to=["wandb"]
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["validation"],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    eval_results = trainer.evaluate()

    wandb.log(eval_results)
    wandb.finish()

    return eval_results["eval_accuracy"]


In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

print("Best Trial:")
print(study.best_trial.params)


[I 2025-08-22 08:51:10,512] A new study created in memory with name: no-name-98355eb9-d9f8-4e7a-94c5-db16d17b554d


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.2776,4.131026,0.068966,0.038914
2,3.716,2.934323,0.490255,0.448305
3,2.3926,1.702281,0.64068,0.614771
4,1.6555,1.310027,0.708146,0.693115
5,1.4237,1.2199,0.717141,0.704071


0,1
epoch,▁
eval/accuracy,▁▆▇███
eval/f1,▁▅▇███
eval/loss,█▅▂▁▁▁
eval/runtime,▁▃▇▆▇█
eval/samples_per_second,█▆▂▃▂▁
eval/steps_per_second,█▆▂▃▂▁
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,5.0
eval/accuracy,0.71714
eval/f1,0.70407
eval/loss,1.2199
eval/runtime,6.6625
eval/samples_per_second,300.337
eval/steps_per_second,18.912
eval_accuracy,0.71714
eval_f1,0.70407
eval_loss,1.2199


[I 2025-08-22 08:56:40,307] Trial 0 finished with value: 0.7171414292853573 and parameters: {'learning_rate': 0.0001730151117875226, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.0681725169429884}. Best is trial 0 with value: 0.7171414292853573.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.2969,4.208556,0.045477,0.014915
2,4.0137,3.627589,0.37931,0.338319
3,3.0727,2.30983,0.562219,0.530146
4,2.0552,1.555021,0.66067,0.637725
5,1.5711,1.255523,0.712644,0.699345
6,1.3607,1.126093,0.731634,0.721012
7,1.2736,1.089893,0.73913,0.729049


0,1
epoch,▁
eval/accuracy,▁▄▆▇████
eval/f1,▁▄▆▇████
eval/loss,█▇▄▂▁▁▁▁
eval/runtime,▂▇▅▄█▂█▁
eval/samples_per_second,▇▂▄▅▁▇▁█
eval/steps_per_second,▇▂▄▅▁▇▁█
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,7.0
eval/accuracy,0.73913
eval/f1,0.72905
eval/loss,1.08989
eval/runtime,6.5573
eval/samples_per_second,305.158
eval/steps_per_second,19.215
eval_accuracy,0.73913
eval_f1,0.72905
eval_loss,1.08989


[I 2025-08-22 09:04:08,361] Trial 1 finished with value: 0.7391304347826086 and parameters: {'learning_rate': 0.00013217439430404193, 'batch_size': 16, 'num_train_epochs': 7, 'weight_decay': 0.010321813929659529}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.3206,4.288956,0.026487,0.006108
2,4.2824,4.263036,0.053973,0.020285
3,4.2582,4.252604,0.05997,0.019911


0,1
epoch,▁
eval/accuracy,▁▇██
eval/f1,▁███
eval/loss,█▃▁▁
eval/runtime,█▁▃▃
eval/samples_per_second,▁█▆▆
eval/steps_per_second,▁█▆▆
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,3.0
eval/accuracy,0.05997
eval/f1,0.01991
eval/loss,4.2526
eval/runtime,6.604
eval/samples_per_second,302.998
eval/steps_per_second,19.079
eval_accuracy,0.05997
eval_f1,0.01991
eval_loss,4.2526


[I 2025-08-22 09:07:25,797] Trial 2 finished with value: 0.05997001499250375 and parameters: {'learning_rate': 5.9457204665127934e-05, 'batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 0.05657950484507462}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,4.325657,0.014493,0.001937
2,4.331300,4.314332,0.01949,0.002121
3,4.331300,4.307908,0.018491,0.001339
4,4.310700,4.30368,0.031484,0.009525
5,4.310700,4.300929,0.033483,0.009814
6,4.304000,4.299388,0.036482,0.009212
7,4.304000,4.298932,0.035482,0.008347


0,1
epoch,▁
eval/accuracy,▁▃▂▆▇███
eval/f1,▁▂▁███▇▇
eval/loss,█▅▃▂▂▁▁▁
eval/runtime,▁▅█▁▆▄█▃
eval/samples_per_second,█▄▁█▃▅▁▆
eval/steps_per_second,█▄▁█▃▅▁▆
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,7.0
eval/accuracy,0.03548
eval/f1,0.00835
eval/loss,4.29893
eval/runtime,6.4884
eval/samples_per_second,308.397
eval/steps_per_second,9.71
eval_accuracy,0.03548
eval_f1,0.00835
eval_loss,4.29893


[I 2025-08-22 09:14:39,152] Trial 3 finished with value: 0.03548225887056472 and parameters: {'learning_rate': 1.3535559090876356e-05, 'batch_size': 32, 'num_train_epochs': 7, 'weight_decay': 0.048502677322007096}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.3223,4.291667,0.025987,0.007396
2,4.2842,4.26129,0.043978,0.024354
3,4.2496,4.22966,0.049475,0.023212
4,4.2206,4.20553,0.073963,0.040535
5,4.1978,4.19622,0.058971,0.026721


0,1
epoch,▁
eval/accuracy,▁▄▄█▆▆
eval/f1,▁▅▄█▅▅
eval/loss,█▆▃▂▁▁
eval/runtime,█▁▆▇▄█
eval/samples_per_second,▁█▃▂▅▁
eval/steps_per_second,▁█▃▂▅▁
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,5.0
eval/accuracy,0.05897
eval/f1,0.02672
eval/loss,4.19622
eval/runtime,6.6469
eval/samples_per_second,301.042
eval/steps_per_second,18.956
eval_accuracy,0.05897
eval_f1,0.02672
eval_loss,4.19622


[I 2025-08-22 09:20:02,568] Trial 4 finished with value: 0.05897051474262868 and parameters: {'learning_rate': 5.070886975503095e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.04398270933695974}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,4.287428,0.038981,0.012552
2,4.298000,4.245709,0.064468,0.02357
3,4.298000,4.19076,0.058471,0.028812
4,4.194700,4.132328,0.116942,0.069725
5,4.194700,4.088405,0.132434,0.088121
6,4.095100,4.072013,0.148926,0.102379


0,1
epoch,▁
eval/accuracy,▁▃▂▆▇██
eval/f1,▁▂▂▅▇██
eval/loss,█▇▅▃▂▁▁
eval/runtime,▁▇▇▆▇▂█
eval/samples_per_second,█▂▂▃▂▇▁
eval/steps_per_second,█▂▂▃▂▇▁
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,6.0
eval/accuracy,0.14893
eval/f1,0.10238
eval/loss,4.07201
eval/runtime,6.5664
eval/samples_per_second,304.732
eval/steps_per_second,9.594
eval_accuracy,0.14893
eval_f1,0.10238
eval_loss,4.07201


[I 2025-08-22 09:26:16,216] Trial 5 finished with value: 0.1489255372313843 and parameters: {'learning_rate': 8.519700368385158e-05, 'batch_size': 32, 'num_train_epochs': 6, 'weight_decay': 0.000914476882323978}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,4.204118,0.049975,0.020533
2,No log,3.783585,0.273863,0.220124
3,No log,2.879159,0.46077,0.416584
4,3.590500,2.077201,0.591704,0.56245
5,3.590500,1.709463,0.642179,0.621144
6,3.590500,1.611534,0.656672,0.637058


0,1
epoch,▁
eval/accuracy,▁▄▆▇███
eval/f1,▁▃▅▇███
eval/loss,█▇▄▂▁▁▁
eval/runtime,▁▁▄▇█▅▃
eval/samples_per_second,██▅▂▁▄▆
eval/steps_per_second,██▅▂▁▄▆
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,6.0
eval/accuracy,0.65667
eval/f1,0.63706
eval/loss,1.61153
eval/runtime,6.0396
eval/samples_per_second,331.312
eval/steps_per_second,5.298
eval_accuracy,0.65667
eval_f1,0.63706
eval_loss,1.61153


[I 2025-08-22 09:32:10,497] Trial 6 finished with value: 0.656671664167916 and parameters: {'learning_rate': 0.00033235127675082955, 'batch_size': 64, 'num_train_epochs': 6, 'weight_decay': 0.04857177553098746}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.3303,4.30931,0.024488,0.003786
2,4.3052,4.294888,0.029985,0.006943
3,4.2933,4.284803,0.03948,0.014037
4,4.2842,4.277291,0.048476,0.016095
5,4.2759,4.272192,0.044978,0.012081
6,4.2741,4.270479,0.047476,0.017654


0,1
epoch,▁
eval/accuracy,▁▃▅█▇██
eval/f1,▁▃▆▇▅██
eval/loss,█▅▄▂▁▁▁
eval/runtime,▁▅▄▄█▃▇
eval/samples_per_second,█▄▅▅▁▆▂
eval/steps_per_second,█▄▅▅▁▆▂
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,6.0
eval/accuracy,0.04748
eval/f1,0.01765
eval/loss,4.27048
eval/runtime,6.7407
eval/samples_per_second,296.851
eval/steps_per_second,18.692
eval_accuracy,0.04748
eval_f1,0.01765
eval_loss,4.27048


[I 2025-08-22 09:38:41,782] Trial 7 finished with value: 0.047476261869065464 and parameters: {'learning_rate': 2.4093268046675206e-05, 'batch_size': 16, 'num_train_epochs': 6, 'weight_decay': 0.031623273139924524}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.3151,4.274888,0.04048,0.017797
2,4.2482,4.20006,0.06047,0.023675
3,4.1551,4.093758,0.101449,0.075803
4,4.0483,3.997467,0.198901,0.158541
5,3.9714,3.958402,0.201899,0.156067


0,1
epoch,▁
eval/accuracy,▁▂▄███
eval/f1,▁▁▄███
eval/loss,█▆▄▂▁▁
eval/runtime,▇▆█▄▅▁
eval/samples_per_second,▂▃▁▅▄█
eval/steps_per_second,▂▃▁▅▄█
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,5.0
eval/accuracy,0.2019
eval/f1,0.15607
eval/loss,3.9584
eval/runtime,6.6273
eval/samples_per_second,301.931
eval/steps_per_second,19.012
eval_accuracy,0.2019
eval_f1,0.15607
eval_loss,3.9584


[I 2025-08-22 09:44:08,905] Trial 8 finished with value: 0.20189905047476261 and parameters: {'learning_rate': 7.755930950069219e-05, 'batch_size': 16, 'num_train_epochs': 5, 'weight_decay': 0.06134784330032274}. Best is trial 1 with value: 0.7391304347826086.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.3162,4.277278,0.037481,0.018536
2,4.2572,4.218113,0.071964,0.025208
3,4.1909,4.156759,0.071464,0.041655
4,4.1419,4.131039,0.093953,0.063004


0,1
epoch,▁
eval/accuracy,▁▅▅██
eval/f1,▁▂▅██
eval/loss,█▅▂▁▁
eval/runtime,▁█▁▇▂
eval/samples_per_second,█▁█▂▇
eval/steps_per_second,█▁█▂▇
eval_accuracy,▁
eval_f1,▁
eval_loss,▁

0,1
epoch,4.0
eval/accuracy,0.09395
eval/f1,0.063
eval/loss,4.13104
eval/runtime,6.6595
eval/samples_per_second,300.474
eval/steps_per_second,18.92
eval_accuracy,0.09395
eval_f1,0.063
eval_loss,4.13104


[I 2025-08-22 09:48:33,266] Trial 9 finished with value: 0.09395302348825588 and parameters: {'learning_rate': 7.613237902003231e-05, 'batch_size': 16, 'num_train_epochs': 4, 'weight_decay': 0.0422396494741149}. Best is trial 1 with value: 0.7391304347826086.


Best Trial:
{'learning_rate': 0.00013217439430404193, 'batch_size': 16, 'num_train_epochs': 7, 'weight_decay': 0.010321813929659529}


In [None]:
best_params = study.best_trial.params

base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)
ia3_config = IA3Config(task_type=TaskType.SEQ_CLS, target_modules=["query", "value"])
model = get_peft_model(base_model, ia3_config)

final_args = TrainingArguments(
    output_dir="./final_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=best_params["learning_rate"],
    per_device_train_batch_size=best_params["batch_size"],
    per_device_eval_batch_size=best_params["batch_size"],
    num_train_epochs=best_params["num_train_epochs"],
    weight_decay=best_params["weight_decay"],
    load_best_model_at_end=True,
    logging_dir="./logs_final",
    report_to=["wandb"]
)

final_trainer = Trainer(
    model=model,
    args=final_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

final_trainer.train()
results = final_trainer.evaluate(dataset["test"])
print("Final Test Results:", results)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  final_trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,4.2957,4.204616,0.044478,0.014498
2,4.016,3.641601,0.372314,0.330674
3,3.1082,2.353835,0.554723,0.517513
4,2.0945,1.582701,0.650175,0.626654
5,1.5985,1.27389,0.704648,0.691441
6,1.3817,1.14034,0.727636,0.716337
7,1.2913,1.103214,0.734633,0.72376


Final Test Results: {'eval_loss': 1.1605722904205322, 'eval_accuracy': 0.7253246753246754, 'eval_f1': 0.7075864702367034, 'eval_runtime': 10.192, 'eval_samples_per_second': 302.199, 'eval_steps_per_second': 18.937, 'epoch': 7.0}
