---

### Download recessary pip packages

In [None]:
!pip install transformers
!pip install datasets --upgrade
!pip install torch
!pip install evaluate

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

### Imports

In [None]:
import os
import pandas as pd
import evaluate
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import Dataset, ClassLabel
import warnings
warnings.filterwarnings("ignore")


### Train file download

In [None]:
!wget https://github.com/food-hazard-detection-semeval-2025/food-hazard-detection-semeval-2025.github.io/blob/main/data/incidents_train.csv

--2024-11-24 17:14:20--  https://github.com/food-hazard-detection-semeval-2025/food-hazard-detection-semeval-2025.github.io/blob/main/data/incidents_train.csv
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘incidents_train.csv’

incidents_train.csv     [ <=>                ] 166.87K  --.-KB/s    in 0.008s  

2024-11-24 17:14:20 (20.3 MB/s) - ‘incidents_train.csv’ saved [170876]



## Model Training Template

### Default Hyperparameters for Baseline Model

The default hyperparameters used in the `train_xlm_roberta` function are as follows:

- **learning_rate**: 1e-5  
  - The learning rate controls the step size during training, determining how much the model's weights are updated with each iteration.

- **num_train_epochs**: 1  
  - Number of training epochs. A single epoch is used by default to get a quick baseline result.
  
- **weight_decay**: 0.01  
  - A small weight decay value is used to prevent overfitting and encourage regularization during training.
  
- **per_device_train_batch_size**: 10  
  - The batch size used for training on each device. A value of 10 is used by default for efficient memory usage.

- **per_device_eval_batch_size**: 10  
  - The batch size used for evaluation on each device, set to the same as the training batch size for simplicity.

- **stratify**: True  
  - By default, stratified sampling is used during the train-test split, ensuring that the class distribution is preserved in both the training and test sets.

These default settings provide a reasonable starting point for model training and will be used as our baseline model for comparison.

In [None]:
def train_xlm_roberta(version ,x_column, y_column, stratify=True, learning_rate=1e-5, num_train_epochs=1, weight_decay=0.01, per_device_train_batch_size=10, per_device_eval_batch_size=10):

  # Disable W&B logging
  os.environ["WANDB_MODE"] = "disabled"

  # Load training data from a CSV file
  train = pd.read_csv("/content/incidents_train.csv")   # Google Colab working directory path

  # Select the input text and labels columns from the dataset
  train['input_text'] = train[x_column]
  train['label'] = train[y_column]

  # Encode the target labels
  label_encoder = LabelEncoder()
  train['label'] = label_encoder.fit_transform(train[y_column])

  # Create label2id and id2label mappings for model compatibility
  label2id = {label: idx for idx, label in enumerate(label_encoder.classes_)}
  id2label = {idx: label for label, idx in label2id.items()}

  # Train-test split using stratification (to maintain class distribution)
  if stratify:
    train_df, test_df = train_test_split(train[['input_text', 'label']], test_size=0.2, random_state=42, stratify=train['label'])

  # Traditional train-test split without stratification
  else:
    train_df, test_df = train_test_split(train[['input_text', 'label']], test_size=0.2, random_state=42)

  # Convert pandas dataframes to Hugging Face's Dataset format for efficient processing
  train_dataset = Dataset.from_pandas(train_df)
  test_dataset = Dataset.from_pandas(test_df)

  # Load the RoBERTa tokenizer
  tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

  # Define a tokenization function
  def tokenize_function(example):
      return tokenizer(example['input_text'], truncation=True)

  # Tokenize the datasets
  train_dataset = train_dataset.map(tokenize_function, batched=True)
  test_dataset = test_dataset.map(tokenize_function, batched=True)

  # Set up a data collator to pad inputs dynamically to the maximum length
  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  # Load XLM-RoBERTa model for sequence classification with the correct number of output labels
  num_labels = len(label_encoder.classes_)

  model = AutoModelForSequenceClassification.from_pretrained(
      "FacebookAI/xlm-roberta-base",
      num_labels=num_labels,
      id2label=id2label,
      label2id=label2id
  )

  # Check if GPU is available and assign the device accordingly (CUDA for GPU, CPU otherwise)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  print("Model loaded on: ", device)

  model.to(device)


  # Load the F1 metric and specify macro averaging
  f1_metric = evaluate.load("f1")

  # Define the compute_metrics function to maximize F1 macro average
  def compute_metrics(eval_pred):
      logits, labels = eval_pred
      predictions = np.argmax(logits, axis=-1)

      # Compute the F1 macro average
      f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")
      return {"f1": f1["f1"]}

  # Set up the training arguments with key hyperparameters for optimization
  training_args = TrainingArguments(
      output_dir="/results",
      evaluation_strategy="epoch",
      save_strategy="epoch",
      learning_rate=learning_rate,
      per_device_train_batch_size=per_device_train_batch_size,
      per_device_eval_batch_size=per_device_eval_batch_size,
      num_train_epochs=num_train_epochs,
      weight_decay=weight_decay,
      load_best_model_at_end=True,
  )

  # Initialize the Trainer with the model, training arguments, dataset, and metrics
  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=train_dataset,
      eval_dataset=test_dataset,
      tokenizer=tokenizer,
      data_collator=data_collator,
      compute_metrics=compute_metrics
  )

  # Train the model
  trainer.train()

  # Evaluate the model
  predictions = trainer.predict(test_dataset)
  preds = predictions.predictions.argmax(-1)

  # Generate a classification report for the model's performance
  clr = classification_report(test_df['label'], preds, zero_division=1)
  print(clr)

  # Save the trained model and tokenizer to a directory for future use
  trainer.save_model(f'/content/{version}')  # Creates a folder for the model
  tokenizer.save_pretrained(f'/content/{version}')

  # Save the classification report to a txt document to be loaded later
  with open(f'/content/{version}/classification_report.txt', 'w') as file:
    file.write(clr)

  # Save the classification report to a csv document
  training_metrics = {'Epoch': 'epoch','Training Loss': 'train_loss', 'Validation Loss': 'eval_loss', 'F1': 'eval_f1'}
  training_data = {key:value for dictionary_of_metrics in trainer.state.log_history for key,value in dictionary_of_metrics.items()}
  training_data = [{metric: training_data[metric_alias] for metric, metric_alias in training_metrics.items()}]
  pd.DataFrame(training_data).to_csv(f'/content/{version}/training_metrics.csv', index=False)

### Baseline Model with Default Hyperparameters

If the function `train_xlm_roberta` is called without specifying any hyperparameters, it will automatically use the default values defined in the function. These default settings serve as our first baseline model for multi-label classification. By using this baseline, we can evaluate the model's initial performance before experimenting with hyperparameter tuning or more advanced configurations.

In [None]:
train_xlm_roberta(version='vanilla_xlm_roberta_with_title_on_hazard_category', x_column='title', y_column='hazard-category')

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


Epoch,Training Loss,Validation Loss,F1
1,No log,1.001753,0.181833


Training Report
   Epoch  Training Loss  Validation Loss        F1
0    1.0         1.3746         1.001753  0.181833

Classification Report
                                precision    recall  f1-score   support

                     allergens       0.63      0.94      0.75       371
                    biological       0.71      0.87      0.78       348
                      chemical       0.00      0.00      0.00        57
food additives and flavourings       0.00      0.00      0.00         5
                foreign bodies       0.47      0.15      0.23       112
                         fraud       1.00      0.03      0.05        74
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.00      0.00      0.00        11
                  other hazard       0.00      0.00      0.00        27
              packaging defect       0.00      0.00      0.00        11

                      accuracy                           0.66    

In [None]:
train_xlm_roberta(version='vanilla_xlm_roberta_with_text_on_hazard_category', x_column='text', y_column='hazard-category')

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


Epoch,Training Loss,Validation Loss,F1
1,No log,0.59567,0.25707


Training Report
   Epoch  Training Loss  Validation Loss       F1
0    1.0       1.061735          0.59567  0.25707

Classification Report
                                precision    recall  f1-score   support

                     allergens       0.92      0.99      0.95       371
                    biological       0.92      0.99      0.95       348
                      chemical       1.00      0.02      0.03        57
food additives and flavourings       0.00      0.00      0.00         5
                foreign bodies       0.46      0.99      0.63       112
                         fraud       0.00      0.00      0.00        74
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.00      0.00      0.00        11
                  other hazard       0.00      0.00      0.00        27
              packaging defect       0.00      0.00      0.00        11

                      accuracy                           0.81      

### Conclusion on F1 Scores

Although the F1 scores were not optimal, training on the **text** column resulted in a slightly higher F1 score compared to **title column**. Therefore, we will proceed with the **text column** in the next steps and focus on improving the model's performance further.


# Hyperparameter Tuning for XLM-RoBERTa

In the process of fine-tuning the XLM-RoBERTa model for sequence classification, careful selection of hyperparameters is essential to achieve the best F1 score. Rigorous tests led to the selection of the following hyperparameters:

- **Learning Rate (`learning_rate`) = 5e-5**:  
  A learning rate of **5e-5** is commonly used for providing a good balance between fast convergence and stability.

- **Per Device Train Batch Size (`per_device_train_batch_size`) = 16**:  
  **16** is ideal for most GPUs with 12GB or more memory, balancing speed and memory usage.

- **Per Device Eval Batch Size (`per_device_eval_batch_size`) = 16**:  
  The eval batch size is typically set to the same value as the training batch size to maintain consistency during evaluation. **16** works well for most use cases, but can be adjusted based on GPU memory.

- **Number of Epochs (`num_train_epochs`) = 3**:  
  Transformer models usually converge quickly, and **3 epochs** is often sufficient.

- **Weight Decay (`weight_decay`) = 0.01**:  
  **0.01** is a standard value that helps regularize the model and prevent overfitting, without restricting learning too much.


### Final Hyperparameters:
```python
learning_rate = 5e-5
per_device_train_batch_size = 16
per_device_eval_batch_size = 16
num_train_epochs = 3
weight_decay = 0.01
gradient_accumulation_steps = 2


In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_text_on_hazard_category', x_column='text', y_column='hazard-category', learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


Epoch,Training Loss,Validation Loss,F1
1,No log,0.656123,0.278706
2,0.762400,0.360906,0.441355
3,0.762400,0.318179,0.471375


Training Report
   Epoch  Training Loss  Validation Loss        F1
0    3.0         0.6118         0.318179  0.471375

Classification Report
                                precision    recall  f1-score   support

                     allergens       0.93      0.99      0.96       371
                    biological       0.97      0.98      0.98       348
                      chemical       0.77      0.95      0.85        57
food additives and flavourings       0.00      0.00      0.00         5
                foreign bodies       0.97      0.94      0.95       112
                         fraud       0.66      0.62      0.64        74
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.00      0.00      0.00        11
                  other hazard       0.38      0.30      0.33        27
              packaging defect       0.00      0.00      0.00        11

                      accuracy                           0.91    

In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_title_on_hazard_type', x_column='title', y_column='hazard', stratify=False, learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,2.704217,0.046347
2,2.949100,2.349106,0.092155
3,2.949100,2.242849,0.091621


              precision    recall  f1-score   support

           0       1.00      0.00      0.00         2
           1       1.00      0.00      0.00         1
           3       1.00      0.00      0.00         1
           4       1.00      0.00      0.00         3
           5       1.00      0.00      0.00        13
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         1
           8       1.00      0.00      0.00         1
           9       1.00      0.00      0.00         3
          10       1.00      0.00      0.00         1
          11       1.00      0.00      0.00         2
          12       1.00      0.00      0.00         1
          13       1.00      0.00      0.00         4
          14       1.00      0.00      0.00         6
          15       1.00      0.00      0.00         1
          16       1.00      0.00      0.00         1
          17       0.36      0.32      0.34        41
          18       1.00    

In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_text_on_hazard_type', x_column='text', y_column='hazard', stratify=False, learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,1.750746,0.088827
2,2.214900,1.35252,0.136426
3,2.214900,1.250915,0.156247


              precision    recall  f1-score   support

           0       1.00      0.00      0.00         2
           1       1.00      0.00      0.00         1
           3       1.00      0.00      0.00         1
           4       1.00      0.00      0.00         3
           5       0.44      0.85      0.58        13
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         1
           8       1.00      0.00      0.00         1
           9       1.00      1.00      1.00         3
          10       1.00      0.00      0.00         1
          11       1.00      0.00      0.00         2
          12       1.00      0.00      0.00         1
          13       1.00      0.00      0.00         4
          14       1.00      0.00      0.00         6
          15       1.00      0.00      0.00         1
          16       1.00      0.00      0.00         1
          17       0.79      0.90      0.84        41
          18       1.00    

### Transition to Product Category and Type Prediction

We have completed the tests and got our results for the hazard category and type prediction task. Now, we will proceed to predict the product category and type.

In [None]:
train_xlm_roberta(version='vanilla_xlm_roberta_with_title_on_product_category', x_column='title', y_column='product-category')

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,2.301218,0.026916


              precision    recall  f1-score   support

           0       1.00      0.00      0.00        12
           1       0.17      0.11      0.13       134
           2       1.00      0.00      0.00        42
           3       1.00      0.00      0.00        34
           4       1.00      0.00      0.00        26
           5       1.00      0.00      0.00         4
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         2
           8       1.00      0.00      0.00         1
           9       1.00      0.00      0.00       107
          10       1.00      0.00      0.00        25
          11       1.00      0.00      0.00         2
          12       1.00      0.00      0.00        44
          13       0.30      0.97      0.46       287
          14       1.00      0.00      0.00        27
          15       1.00      0.00      0.00        52
          16       1.00      0.00      0.00        11
          17       1.00    

In [None]:
train_xlm_roberta(version='vanilla_xlm_roberta_with_text_on_product_category', x_column='text', y_column='product-category')

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,2.365748,0.020008


              precision    recall  f1-score   support

           0       1.00      0.00      0.00        12
           1       1.00      0.00      0.00       134
           2       1.00      0.00      0.00        42
           3       1.00      0.00      0.00        34
           4       1.00      0.00      0.00        26
           5       1.00      0.00      0.00         4
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         2
           8       1.00      0.00      0.00         1
           9       1.00      0.00      0.00       107
          10       1.00      0.00      0.00        25
          11       1.00      0.00      0.00         2
          12       1.00      0.00      0.00        44
          13       0.28      1.00      0.44       287
          14       1.00      0.00      0.00        27
          15       1.00      0.00      0.00        52
          16       1.00      0.00      0.00        11
          17       1.00    

### Conclusion on F1 Scores

After evaluating the performance of both columns, we found that neither column was a good predictor for the product category. Therefore, we will focus on the title column moving forward for this task.

In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_title_on_product_category', x_column='title', y_column='product-category', learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,1.798661,0.135227
2,1.920000,1.415187,0.308686
3,1.920000,1.278915,0.342645


              precision    recall  f1-score   support

           0       1.00      0.00      0.00        12
           1       0.65      0.83      0.73       134
           2       0.54      0.76      0.63        42
           3       0.56      0.15      0.23        34
           4       0.51      0.69      0.59        26
           5       1.00      0.00      0.00         4
           6       1.00      0.00      0.00         1
           7       1.00      0.00      0.00         2
           8       1.00      0.00      0.00         1
           9       0.64      0.78      0.70       107
          10       1.00      0.00      0.00        25
          11       1.00      0.00      0.00         2
          12       0.77      0.77      0.77        44
          13       0.83      0.86      0.85       287
          14       0.83      0.70      0.76        27
          15       0.53      0.71      0.61        52
          16       1.00      0.00      0.00        11
          17       1.00    

In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_title_on_product_type', x_column='title', y_column='product', stratify=False, learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

Map:   0%|          | 0/4065 [00:00<?, ? examples/s]

Map:   0%|          | 0/1017 [00:00<?, ? examples/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on:  cuda


A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.


Epoch,Training Loss,Validation Loss,F1
1,No log,6.258984,0.000209
2,6.245200,6.154792,0.001029


In [None]:
train_xlm_roberta(version='tuned_xlm_roberta_with_text_on_product_type', x_column='text', y_column='product', stratify=False, learning_rate=5e-5, num_train_epochs=3, weight_decay=0.01, per_device_train_batch_size=16, per_device_eval_batch_size=16)

[Back to the main notebook](Food_Hazard_Detection_Challenge.ipynb)