In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from datasets import Dataset,load_dataset
import ast
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import precision_recall_fscore_support
from torch.utils.data import DataLoader
from transformers import default_data_collator
import torch
import optuna
import shutil
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nlbse-25-dataset/NLBSE_Dataset_Python.csv
/kaggle/input/nlbse-25-dataset/NLBSE_Dataset_Pharo.csv
Using device: cuda


In [2]:
df = pd.read_csv('/kaggle/input/nlbse-25-dataset/NLBSE_Dataset_Pharo.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17508 entries, 0 to 17507
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   class             17508 non-null  object
 1   comment_sentence  17506 non-null  object
 2   labels            17508 non-null  object
 3   types             17508 non-null  object
dtypes: object(4)
memory usage: 547.2+ KB


In [4]:
df.head(10)
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("Wandb")
import wandb

# Replace YOUR_API_KEY with your actual API key
wandb.login(key=secret_value_0)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [5]:
df.drop_duplicates(subset=['comment_sentence'], keep='first', inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13649 entries, 0 to 17505
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   class             13649 non-null  object
 1   comment_sentence  13648 non-null  object
 2   labels            13649 non-null  object
 3   types             13649 non-null  object
dtypes: object(4)
memory usage: 533.2+ KB


In [6]:
null_rows = df[df['comment_sentence'].isnull()]

print("Rows with null values in 'comment_sentence':")
print(null_rows)

Rows with null values in 'comment_sentence':
                        class comment_sentence           labels types
13253  TSpartaCompositeFilter              NaN  [0 1 0 0 0 0 0]   trk


In [7]:
df_cleaned = df.dropna(subset=['comment_sentence'])
print("DataFrame shape after removing nulls:", df_cleaned.shape)
df_cleaned.info()

DataFrame shape after removing nulls: (13648, 4)
<class 'pandas.core.frame.DataFrame'>
Index: 13648 entries, 0 to 17505
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   class             13648 non-null  object
 1   comment_sentence  13648 non-null  object
 2   labels            13648 non-null  object
 3   types             13648 non-null  object
dtypes: object(4)
memory usage: 533.1+ KB


In [None]:
pattern = r'\*|//'
rows_with_pattern = df_cleaned.apply(lambda row: row.astype(str).str.contains(pattern).any(), axis=1)

# Count rows with patterns
num_rows_with_pattern = rows_with_pattern.sum()
print(f"\nNumber of rows containing patterns: {num_rows_with_pattern}")

# Remove `//` or `*` from all columns
df_cleaned = df_cleaned.replace(pattern, '', regex=True)


Number of rows containing patterns: 21


In [9]:
df = df_cleaned
df['combo'] = df['comment_sentence'] +"  |  "+  df['class']
pharo_dataset = Dataset.from_pandas(df)
# Split the dataset into train and validation subsets
train_test_split = pharo_dataset.train_test_split(test_size=0.2, seed=42)

# Extract train and validation datasets
pharo_train = train_test_split['train']
pharo_test = train_test_split['test']
pharo_labels = ['Keyimplementationpoints', 'Example', 'Responsibilities', 'Classreferences', 'Intent', 'Keymessages', 'Collaborators']

In [10]:
# Use Hugging Face's default data collator
data_collator = default_data_collator
num_labels = len(pharo_labels)

# Load model
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        "FacebookAI/roberta-base",
        num_labels=num_labels,
        problem_type="multi_label_classification",
    ).to(device)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-base")

# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples["combo"], truncation=True, padding="max_length", max_length=128)
tokenized_train = pharo_train.map(tokenize_function, batched=True)
tokenized_test = pharo_test.map(tokenize_function, batched=True)

# Convert labels to tensors
def encode_labels(examples):
    if isinstance(examples['labels'], str):
        examples["labels"]=examples["labels"].replace(" ", ",")
        labels = ast.literal_eval(examples['labels'])
    else:
        labels = examples['labels']
    # Convert labels to tensors
    labels = torch.tensor(labels, dtype=torch.float32)
    return {'labels': labels}
    
tokenized_train = tokenized_train.map(encode_labels)
tokenized_test = tokenized_test.map(encode_labels)

# Format datasets for PyTorch
tokenized_train.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])


# train_dataloader = DataLoader(tokenized_train, batch_size=32, shuffle=True)
# test_dataloader = DataLoader(tokenized_test, batch_size=32, shuffle=False)
def clear_directory(directory):
    """Removes all contents inside a directory."""
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove file or symbolic link
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # Remove directory
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Define evaluation metrics
def compute_metrics(pred):
    logits, labels = pred
    print(f"Logits Shape: {logits.shape}, Labels Shape: {labels.shape}")
    # Apply sigmoid to logits to convert to probabilities
    probs = 1 / (1 + np.exp(-logits))  # Sigmoid function
    preds = (probs > 0.5).astype(int)  # Threshold for multi-label classification
    # Compute metrics
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="micro")
    if f1 == 0:
        print("F1 score is zero. Resetting the trial.")
        raise optuna.exceptions.TrialPruned()  # Prune the trial to reset it
    return {"precision": precision, "recall": recall, "f1": f1}
    
def optuna_objective(trial):
    temp_dir = "./temp_results"
    if os.path.exists(temp_dir):
        clear_directory(temp_dir)
    else:
        os.makedirs(temp_dir)
    temp_dir = "./temp_logs"
    if os.path.exists(temp_dir):
        clear_directory(temp_dir)
    else:
        os.makedirs(temp_dir)
    
    # Define hyperparameter search space
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
    weight_decay = trial.suggest_float("weight_decay", 0.01, 0.1)
    batch_size = trial.suggest_categorical("batch_size", [4,8,16, 32])

    # Initialize Trainer with current trial parameters
    training_args = TrainingArguments(
        output_dir="./temp_results",  # Temporary directory for checkpoints
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=5,
        weight_decay=weight_decay,
        logging_dir="./temp_logs",
        logging_steps=100,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        save_total_limit=3,  # Keep only the latest checkpoint
    )

    trainer = Trainer(
        model_init=model_init,  # This makes sure we load the base model at each trial
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
    )

    # Perform training
    trainer.train()
    # Evaluate on the validation set
    eval_results = trainer.evaluate()
    print("Evaluation Results:", eval_results)
    eval_loss = eval_results["eval_loss"]
    # Optuna will minimize this
    return eval_loss

# Run Optuna search
study = optuna.create_study(direction="minimize")
study.optimize(optuna_objective, n_trials=16)

# Display best hyperparameters
print("Best Hyperparameters:", study.best_params)
print("Best Evaluation Loss:", study.best_value)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/10918 [00:00<?, ? examples/s]

Map:   0%|          | 0/2730 [00:00<?, ? examples/s]

Map:   0%|          | 0/10918 [00:00<?, ? examples/s]

Map:   0%|          | 0/2730 [00:00<?, ? examples/s]

[I 2024-12-06 09:28:21,846] A new study created in memory with name: no-name-733f0848-1a81-4bf2-a466-48774d4ddf18
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Currently logged in as: [33mhimel6087[0m ([33mhimel6087-bangladesh-university-of-engineering-and-techn[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.18.7
[34m[1mwan

Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1264,0.106638,0.907682,0.871281,0.889109
2,0.0456,0.054973,0.956493,0.945666,0.951049
3,0.0305,0.032509,0.97846,0.969599,0.974009
4,0.0129,0.02277,0.980316,0.982536,0.981425
5,0.0059,0.021518,0.982892,0.984799,0.983845


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 09:56:22,564] Trial 0 finished with value: 0.02151818946003914 and parameters: {'learning_rate': 2.124272908877653e-05, 'weight_decay': 0.08479406198320488, 'batch_size': 4}. Best is trial 0 with value: 0.02151818946003914.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.02151818946003914, 'eval_precision': 0.9828921885087153, 'eval_recall': 0.9847994825355757, 'eval_f1': 0.9838449111470112, 'eval_runtime': 23.4925, 'eval_samples_per_second': 116.207, 'eval_steps_per_second': 14.558, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1418,0.118198,0.898462,0.849935,0.873525
2,0.063,0.051739,0.965347,0.94599,0.95557
3,0.0265,0.03394,0.972258,0.974774,0.973514
4,0.0083,0.024258,0.979053,0.982536,0.980791
5,0.0068,0.020193,0.981029,0.98674,0.983876


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 10:15:58,247] Trial 1 finished with value: 0.02019253745675087 and parameters: {'learning_rate': 3.9453133511375956e-05, 'weight_decay': 0.07778731941647414, 'batch_size': 8}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.02019253745675087, 'eval_precision': 0.9810289389067524, 'eval_recall': 0.9867399741267788, 'eval_f1': 0.9838761689777491, 'eval_runtime': 15.7124, 'eval_samples_per_second': 173.749, 'eval_steps_per_second': 10.883, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-12-06 10:19:47,953] Trial 2 pruned. 
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
F1 score is zero. Resetting the trial.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1692,0.141681,0.894309,0.818241,0.854585
2,0.0904,0.078639,0.932705,0.927878,0.930285
3,0.0524,0.0514,0.968494,0.954398,0.961394
4,0.0288,0.035608,0.97819,0.971863,0.975016
5,0.0208,0.032311,0.980475,0.97445,0.977453


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 10:39:25,023] Trial 3 finished with value: 0.032311249524354935 and parameters: {'learning_rate': 1.5813461468429627e-05, 'weight_decay': 0.09495404622746004, 'batch_size': 8}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.032311249524354935, 'eval_precision': 0.9804751057598438, 'eval_recall': 0.9744501940491591, 'eval_f1': 0.9774533657745337, 'eval_runtime': 15.6893, 'eval_samples_per_second': 174.004, 'eval_steps_per_second': 10.899, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-12-06 10:42:22,061] Trial 4 pruned. 
  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
F1 score is zero. Resetting the trial.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1964,0.167288,0.869581,0.791397,0.828649
2,0.112,0.096752,0.934701,0.884217,0.908759
3,0.073,0.070708,0.947472,0.933376,0.940371
4,0.048,0.05333,0.97203,0.955369,0.963627
5,0.0411,0.048936,0.970193,0.957956,0.964036


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 11:01:56,691] Trial 5 finished with value: 0.048935774713754654 and parameters: {'learning_rate': 1.0593451825445046e-05, 'weight_decay': 0.09135294187021471, 'batch_size': 8}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.048935774713754654, 'eval_precision': 0.9701932525384868, 'eval_recall': 0.9579560155239327, 'eval_f1': 0.9640358014646053, 'eval_runtime': 15.6512, 'eval_samples_per_second': 174.428, 'eval_steps_per_second': 10.926, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.2857,0.212956,0.816907,0.69696,0.752182
2,0.1645,0.138498,0.903226,0.796895,0.846735
3,0.108,0.102436,0.9209,0.873545,0.896598
4,0.0717,0.076027,0.937913,0.918499,0.928105
5,0.0443,0.058267,0.952412,0.932083,0.942138


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 11:17:06,102] Trial 6 finished with value: 0.05826733633875847 and parameters: {'learning_rate': 0.00011766584594005819, 'weight_decay': 0.06605183753111833, 'batch_size': 16}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.05826733633875847, 'eval_precision': 0.9524124256444151, 'eval_recall': 0.9320827943078913, 'eval_f1': 0.9421379535796012, 'eval_runtime': 13.5516, 'eval_samples_per_second': 201.452, 'eval_steps_per_second': 6.346, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss


  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-12-06 11:22:36,144] Trial 7 pruned. 


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
F1 score is zero. Resetting the trial.


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1779,0.146947,0.851294,0.840556,0.845891
2,0.0877,0.067634,0.935889,0.939521,0.937702
3,0.0396,0.0389,0.961265,0.963131,0.962197
4,0.0196,0.030249,0.971318,0.974774,0.973043
5,0.009,0.022761,0.980323,0.982859,0.981589


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 11:37:44,744] Trial 8 finished with value: 0.022761063650250435 and parameters: {'learning_rate': 0.00010004266056593575, 'weight_decay': 0.06372072988613298, 'batch_size': 16}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.022761063650250435, 'eval_precision': 0.9803225806451613, 'eval_recall': 0.9828589909443726, 'eval_f1': 0.9815891472868217, 'eval_runtime': 13.5265, 'eval_samples_per_second': 201.826, 'eval_steps_per_second': 6.358, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1715,0.126172,0.90317,0.838616,0.869696
2,0.0871,0.066961,0.939602,0.930789,0.935175
3,0.0372,0.039523,0.969874,0.968305,0.969089
4,0.0219,0.026386,0.980645,0.983182,0.981912
5,0.0125,0.022597,0.983242,0.98674,0.984988


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 11:52:55,150] Trial 9 finished with value: 0.02259724773466587 and parameters: {'learning_rate': 3.6815006433745734e-05, 'weight_decay': 0.039352140885171125, 'batch_size': 16}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.02259724773466587, 'eval_precision': 0.9832420238478892, 'eval_recall': 0.9867399741267788, 'eval_f1': 0.9849878934624697, 'eval_runtime': 13.5869, 'eval_samples_per_second': 200.928, 'eval_steps_per_second': 6.33, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.3278,0.153258,0.883261,0.795278,0.836964
2,0.1172,0.080128,0.945044,0.906533,0.925388
3,0.0553,0.048249,0.964251,0.959573,0.961906
4,0.0379,0.035247,0.977539,0.971216,0.974367
5,0.0227,0.029155,0.981144,0.976067,0.978599


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 12:05:43,919] Trial 10 finished with value: 0.029155168682336807 and parameters: {'learning_rate': 4.3950268764728664e-05, 'weight_decay': 0.011243471528243773, 'batch_size': 32}. Best is trial 1 with value: 0.02019253745675087.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.029155168682336807, 'eval_precision': 0.9811443433029909, 'eval_recall': 0.9760672703751617, 'eval_f1': 0.9785992217898832, 'eval_runtime': 10.8828, 'eval_samples_per_second': 250.855, 'eval_steps_per_second': 3.951, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.123,0.111753,0.903467,0.859638,0.881008
2,0.0438,0.048171,0.964497,0.9489,0.956635
3,0.0299,0.027917,0.978951,0.977684,0.978317
4,0.011,0.019334,0.985451,0.98577,0.98561
5,0.0049,0.018149,0.987391,0.98771,0.987551


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 12:33:46,420] Trial 11 finished with value: 0.018149441108107567 and parameters: {'learning_rate': 2.3485145874743088e-05, 'weight_decay': 0.07551498903953874, 'batch_size': 4}. Best is trial 11 with value: 0.018149441108107567.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.018149441108107567, 'eval_precision': 0.9873908826382153, 'eval_recall': 0.9877102199223803, 'eval_f1': 0.9875505254648342, 'eval_runtime': 23.5002, 'eval_samples_per_second': 116.169, 'eval_steps_per_second': 14.553, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1247,0.108112,0.904078,0.874838,0.889218
2,0.0387,0.050127,0.958428,0.954398,0.956409
3,0.022,0.030619,0.975783,0.977361,0.976571
4,0.0085,0.022882,0.981959,0.98577,0.983861
5,0.0017,0.017998,0.988361,0.98868,0.988521


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 13:01:48,626] Trial 12 finished with value: 0.01799764856696129 and parameters: {'learning_rate': 4.093856031376536e-05, 'weight_decay': 0.07293433154507087, 'batch_size': 4}. Best is trial 12 with value: 0.01799764856696129.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.01799764856696129, 'eval_precision': 0.988360814742968, 'eval_recall': 0.9886804657179818, 'eval_f1': 0.9885206143896524, 'eval_runtime': 23.5625, 'eval_samples_per_second': 115.862, 'eval_steps_per_second': 14.515, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1192,0.116247,0.886867,0.867076,0.87686
2,0.0424,0.046444,0.964297,0.952135,0.958177
3,0.0248,0.026878,0.981559,0.981242,0.981401
4,0.0105,0.021958,0.984168,0.985123,0.984645
5,0.0045,0.020115,0.987051,0.986093,0.986572


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 13:29:53,618] Trial 13 finished with value: 0.020115207880735397 and parameters: {'learning_rate': 2.4819730380901434e-05, 'weight_decay': 0.05354972308768808, 'batch_size': 4}. Best is trial 12 with value: 0.01799764856696129.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.020115207880735397, 'eval_precision': 0.9870508255098738, 'eval_recall': 0.9860931435963778, 'eval_f1': 0.9865717521436661, 'eval_runtime': 23.5882, 'eval_samples_per_second': 115.736, 'eval_steps_per_second': 14.499, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.1382,0.130153,0.89318,0.830207,0.860543
2,0.0506,0.05454,0.950016,0.940492,0.94523
3,0.033,0.035188,0.976167,0.967012,0.971568
4,0.0156,0.026313,0.979664,0.981565,0.980614
5,0.0031,0.024528,0.982859,0.982859,0.982859


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 13:57:58,461] Trial 14 finished with value: 0.024528326466679573 and parameters: {'learning_rate': 5.3982150171411363e-05, 'weight_decay': 0.06753031593746794, 'batch_size': 4}. Best is trial 12 with value: 0.01799764856696129.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.024528326466679573, 'eval_precision': 0.9828589909443726, 'eval_recall': 0.9828589909443726, 'eval_f1': 0.9828589909443726, 'eval_runtime': 23.5251, 'eval_samples_per_second': 116.046, 'eval_steps_per_second': 14.538, 'epoch': 5.0}


  learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 5e-4)
  trainer = Trainer(
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.123,0.107477,0.913255,0.851229,0.881152
2,0.0375,0.052705,0.956536,0.946636,0.95156
3,0.0289,0.027716,0.976759,0.978655,0.977706
4,0.0085,0.024628,0.978401,0.981565,0.979981
5,0.0036,0.020461,0.982287,0.986417,0.984347


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Logits Shape: (2730, 7), Labels Shape: (2730, 7)


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


[I 2024-12-06 14:26:01,761] Trial 15 finished with value: 0.020461106672883034 and parameters: {'learning_rate': 2.463639904897267e-05, 'weight_decay': 0.05352072025572893, 'batch_size': 4}. Best is trial 12 with value: 0.01799764856696129.


Logits Shape: (2730, 7), Labels Shape: (2730, 7)
Evaluation Results: {'eval_loss': 0.020461106672883034, 'eval_precision': 0.9822866344605475, 'eval_recall': 0.9864165588615783, 'eval_f1': 0.9843472648055511, 'eval_runtime': 23.5518, 'eval_samples_per_second': 115.915, 'eval_steps_per_second': 14.521, 'epoch': 5.0}
Best Hyperparameters: {'learning_rate': 4.093856031376536e-05, 'weight_decay': 0.07293433154507087, 'batch_size': 4}
Best Evaluation Loss: 0.01799764856696129
