## **Machine Learning Algorithms Benchmark - Titles**

This is the application of benchmarking algorithms to the provided titles of the training dataset.

Import some required python modules

In [1]:
# Importing some packages

import pandas as pd
import numpy as np
import spacy
import torch
from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_recall_fscore_support,
    accuracy_score,
)
from sklearn.utils.class_weight import (
    compute_sample_weight,
    compute_class_weight
)
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import (
    train_test_split,
    GridSearchCV,
    StratifiedKFold,
)
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AdamW,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
)

# Loading the English spacy model
eng_model = spacy.load("en_core_web_sm")

Load the training data set and process the columns

In [2]:
# Loading the 'food incidents' training dataset
food_dataset = pd.read_csv("https://raw.githubusercontent.com/food-hazard-detection-semeval-2025/food-hazard-detection-semeval-2025.github.io/refs/heads/main/data/incidents_train.csv")

# Dropping and renaming some columns
food_dataset.drop(columns=["Unnamed: 0", "year", "month", "day", "country", "text"], inplace= True)
food_dataset.rename(columns={"hazard-category": "hazard_category", "product-category": "product_category"}, inplace= True)

food_dataset.sample(2)

Unnamed: 0,title,hazard_category,product_category,hazard,product
2215,valley innovative services recalls meat and po...,biological,"meat, egg and dairy products",listeria monocytogenes,other types of meat
100,Pagasa—Dried Mackeral,chemical,seafood,toxin,mackerel


Encode all the classes into numeric values

In [3]:
# Dictionary to hold the label encoders
encoders = {}

# Iterating through the labels
for label in food_dataset.columns.drop("title"):

    # Encoding labels into numeric values
    label_encoder = LabelEncoder()
    food_dataset[f"{label}_encoded"] = label_encoder.fit_transform(food_dataset[label])

    # Storing the label encoder
    encoders[f"{label}_encoded"] = label_encoder

food_dataset.sample(2)

Unnamed: 0,title,hazard_category,product_category,hazard,product,hazard_category_encoded,product_category_encoded,hazard_encoded,product_encoded
23,Recall Notification: FSIS-022-97,biological,"meat, egg and dairy products",listeria spp,sausage,1,13,56,825
188,Azzura Gelati—Azzura Kisses,allergens,ices and desserts,other not classified allergen hazards,ice cream,0,12,76,530


Process the titles by removing reduntant texts

In [4]:
def process_title(title):
    """
    This function is used to process the titles and remove reduntant texts.

    :param text: Title to process
    :return: The processed title
    """

    # Converting the title to tokens
    tokens = eng_model(title)

    # List to keep the final tokens of the title
    final_tokens = []

    for token in tokens:

        # Checking whether the token is reduntant term or not
        if (
            not token.is_stop
            and not token.is_punct
            and not token.is_space
            and token.is_ascii
        ):

            # Updating
            final_tokens.append(token.lemma_.lower())

    # Concatenating the final title
    new_text = " ".join(final_tokens)

    return new_text

# Processing the titles
food_dataset.insert(1, "title_processed", food_dataset["title"].apply(process_title))

food_dataset.sample(2)

Unnamed: 0,title,title_processed,hazard_category,product_category,hazard,product,hazard_category_encoded,product_category_encoded,hazard_encoded,product_encoded
4865,Co-Op Clearview brand Artesian Water recalled ...,co op clearview brand artesian water recall po...,biological,non-alcoholic beverages,moulds,bottled water,1,14,64,88
318,Woolworths—Bread soft wholemeal loaf 800g,woolworth bread soft wholemeal loaf 800 g,foreign bodies,cereals and bakery products,metal fragment,bread,4,1,57,92


#### **Classification Task - Hazard Categories**

Keep only the columns 'title_processed' and 'hazard_category_encoded' from the initial dataset

In [None]:
hazard_category_dataset = food_dataset[["title_processed", "hazard_category_encoded"]]

hazard_category_dataset.sample(2)

**Logistic Regression**

Below is the main function for training and implementing the Logistic Regression classifier. It performs nested cross validation for model and hyperparameter selection.

In [27]:
def train_evaluate_model(dataset, label, outer_cv, inner_cv, pipeline, param_grid):
    """
    This function  used to evaluate the classifier.

    :param dataset: Dataset to use
    :param label: Label to use
    :param outer_cv: Outer cross validation
    :param inner_cv: Inner cross validation
    :param pipeline: Pipeline to use
    :param param_grid: Hyperparameter grid to use
    :return: Macro f1 scores and hyperparameters
    """

    # Defining the lists to hold values through the cross validation process
    f1_scores = []
    hyperparameters = []

    # Iterating through the folds
    for train_idx, test_idx in outer_cv.split(
        dataset["title_processed"], dataset[label]
    ):

        # Constructing the training and test datasets
        title_train, title_test = (
            dataset["title_processed"].iloc[train_idx],
            dataset["title_processed"].iloc[test_idx],
        )
        label_train, label_test = (
            dataset[label].iloc[train_idx],
            dataset[label].iloc[test_idx],
        )

        # Setting up the grid search for hyperparameter tuning
        grid_search = GridSearchCV(
            pipeline, param_grid, cv=inner_cv, scoring="f1_macro", n_jobs=-1, verbose=0
        )

        # Fitting the training dataset and performing hyperparameter tuning
        grid_search.fit(title_train, label_train)

        # Getting the best model
        best_model = grid_search.best_estimator_

        # Predicting over the testing dataset
        label_pred = best_model.predict(title_test)

        # Updating lists to store the macro f1 score and hyperparameters of the current best model
        f1_scores.append(f1_score(label_test, label_pred, average="macro"))
        hyperparameters.append(grid_search.best_params_)

        print(classification_report(label_test, label_pred, zero_division = 0))

    return f1_scores, hyperparameters

Below, all the steps for training and evaluating the Logistic Regression model for hazard categries are presented

- Setting the desired pipeline
- Setting the hyperparameter grid for tuning
- Setting the outer and inner cross validation
- Training and evaluating the model

In [28]:
# Defining the pipeline consisting of a vectorizer and a classifier
pipeline = Pipeline(
    [
        ("tfidf", TfidfVectorizer()),
        ("logreg", LogisticRegression()),
    ]
)

# Defining hyperparameter grid
param_grid = {
    "tfidf__min_df": [1, 3],
    "tfidf__max_df": [0.8, 1.0],
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "logreg__penalty": ["l1", "l2"],
    "logreg__solver": ["liblinear"],
    "logreg__max_iter": [1000, 2500],
    "logreg__C": [10, 20],
    "logreg__class_weight": [None, "balanced"],
}

# Setting up the outer 3-folds cross validation for checking different train-test splits
outer_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Setting up the inner 2-fold cross validation for hyperparameter tuning
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Implementing and evaluating the logistic regression classifier
haz_cat_macro_f1_scores, haz_cat_hyperparameters = train_evaluate_model(
    hazard_category_dataset,
    "hazard_category_encoded",
    outer_cv,
    inner_cv,
    pipeline,
    param_grid,
)



              precision    recall  f1-score   support

           0       0.85      0.89      0.87       618
           1       0.85      0.89      0.87       581
           2       0.79      0.67      0.73        95
           3       0.60      0.38      0.46         8
           4       0.81      0.73      0.77       187
           5       0.66      0.69      0.67       124
           6       1.00      1.00      1.00         1
           7       0.53      0.56      0.54        18
           8       0.71      0.39      0.50        44
           9       0.89      0.44      0.59        18

    accuracy                           0.82      1694
   macro avg       0.77      0.66      0.70      1694
weighted avg       0.82      0.82      0.82      1694

              precision    recall  f1-score   support

           0       0.86      0.90      0.88       618
           1       0.88      0.91      0.90       580
           2       0.74      0.64      0.69        96
           3       0.57 



Below is the mean and the standard deviation of the macro f1 across the cross-validation and hyperparamter tuning procesess

In [31]:
# Printing f1 distribution
print("Cross-Validation Macro f1:")
print("Mean:", np.mean(haz_cat_macro_f1_scores))
print("Standard Deviation:", np.std(haz_cat_macro_f1_scores))

# Printing best model
print("\nThe hyperparameters which gives the best results are:")
print(haz_cat_hyperparameters[np.argmax(haz_cat_macro_f1_scores)])

Cross-Validation Macro f1:
Mean: 0.6498653671126114
Standard Deviation: 0.039251246439398764

The hyperparameters which gives the best results are:
{'logreg__C': 20, 'logreg__class_weight': 'balanced', 'logreg__max_iter': 1000, 'logreg__penalty': 'l1', 'logreg__solver': 'liblinear', 'tfidf__max_df': 1.0, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 1)}


**BERT - RoBERTa**

Define a function to split the dataset into training, validation and testing datasets using stratification to maintain the distribution of classes in them.

In [None]:
def split_dataset(dataset, label, percentage):
    """
    This function used to split the initial data set to training, validation and
    testing data sets.

    :param dataset: Dataset to split
    :param label: Label to use for splitting
    :param percentage: Percentage of data to use for testing and validation
    :return:
    """

    train_set, temp_set = train_test_split(
        hazard_category_dataset,
        test_size=percentage,
        stratify=dataset[label],
        random_state=42,
    )
    val_set, test_set = train_test_split(
        temp_set, test_size=0.5, random_state=42, stratify=temp_set[label]
    )

    return train_set, val_set, test_set


Define a function to calculate class weights for the training dataset to be used by the classifier to pay more attention to minority classes.

In [None]:
def compute_weight(train_set, label):
    """
    This function used to compute the class weights.

    :param train_set: Training set
    :param label: Label to use for computing class weights
    :return: Class weights
    """

    # Computing class weights
    class_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.sort(np.unique(train_set[label])),
        y=train_set[label].values,
    )

    # Converting weights to a tensor and moving to GPU
    class_weights = torch.tensor(class_weights, dtype=torch.float).to("cuda")

    return class_weights


Define a function to tokenize the data based on the BERT tokenizer

In [None]:
def tokenize_data(train_set, val_set, test_set, label, model):
    """ "
    This function used to tokenize the text data and return the correspondig
    Datasets together with their labels.

    :param train_set: Training set
    :param val_set: Validation set
    :param test_set: Testing set
    :param label: Label to use
    :param model: Model to use
    :return: Tokenized data sets
    """

    # Setting up the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model)

    # Tokenizing text data
    train_encodings = tokenizer(
        train_set["title_processed"].tolist(),
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt",
    )
    val_encodings = tokenizer(
        val_set["title_processed"].tolist(),
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt",
    )
    test_encodings = tokenizer(
        test_set["title_processed"].tolist(),
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt",
    )

    # Converting list of labels to PyTorch tensors
    train_encodings["labels"] = torch.tensor(train_set[label].tolist())
    val_encodings["labels"] = torch.tensor(val_set[label].tolist())
    test_encodings["labels"] = torch.tensor(test_set[label].tolist())

    # Converting to Datasets
    train_dataset = Dataset.from_dict(train_encodings)
    val_dataset = Dataset.from_dict(val_encodings)
    test_dataset = Dataset.from_dict(test_encodings)

    return train_dataset, val_dataset, test_dataset

Define a function that computes some weights to be used by the classifier at each epoch to evaluate its performance against the validation dataset.

In [None]:
def compute_metrics(eval_pred):
    """
    This function used to evaluate the perfomance of the model for each epoch
    based on the evaluation dataset.

    :param eval_pred: Evaluation predictions
    :return: Evaluation metrics
    """

    # Extracting logits and labels
    logits, labels = eval_pred

    # Getting predictions
    predictions = torch.argmax(torch.tensor(logits), dim=1).numpy()

    # Calculating evaluation metrics
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average="macro", zero_division=0
    )
    acc = accuracy_score(labels, predictions)

    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

Define a class that overrides the way the classifier calculates the loss. Here it takes into account the weights of the classes for the cross entropy loss function.

In [None]:
class WeightedTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        """
        This function used to compute loss using class weights handling the
        imbalance classes.

        :param model: The model
        :param inputs: The inputs
        :param return_outputs: Whether to return outputs
        :param kwargs: Keyword arguments
        :return: The loss and outputs
        """

        # Extracting labels
        labels = inputs.pop("labels")

        # Passing inputs and getting results
        outputs = model(**inputs)
        logits = outputs.logits

        # Define weighted cross entropy loss function
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)

        # Calculating
        loss = loss_fct(logits, labels)

        # Checking
        if return_outputs:

            return (loss, outputs)

        else:

            return loss

Define a function to train and evaluate the model

In [None]:
def train_model(train_dataset, val_dataset, model_name, number_classes):
    """
    This function used to train the model.

    :param train_dataset: Training dataset
    :param val_dataset: Validation dataset
    :param model_name: Model name
    :param number_classes: Number of classes
    :return: Trained model
    """

    # Setting up the model to use and moving it to GPU
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, num_labels= number_classes
    )
    model.to("cuda")

    # Setting up the training arguments
    training_args = TrainingArguments(
        output_dir="./results",
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        learning_rate=5e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        weight_decay=0.01,
        lr_scheduler_type="linear",
        warmup_ratio=0.1,
        num_train_epochs=10,
        report_to="none",
        save_total_limit=1,
    )

    # Setting up the trainer of the model
    trainer = WeightedTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    )

    # Training model
    trainer.train()

    return trainer

Define a function to test the trained model on unseen data

In [None]:
def test_model(model, test_dataset):

    # Running model on test dataset
    predictions = model.predict(test_dataset)

    # Getting the predicted labels
    test_logits = predictions.predictions
    labels_pred = torch.argmax(torch.tensor(test_logits), dim=1).numpy()

    # Getting the actual labels
    labels_actual = predictions.label_ids

    return labels_pred, labels_actual

Below, all the steps for training the BERT model are presented

- Separation of the initial dataset
- Calculation of the weights of the classes
- Data tokenazition
- Training the model

In [7]:
# Splitting
train_set, val_set, test_set = split_dataset(
    hazard_category_dataset, "hazard_category_encoded", 0.5
)

# Calculating class weights
class_weights = compute_weight(train_set, "hazard_category_encoded")

# Tokenizating
train_dataset, val_dataset, test_dataset = tokenize_data(
    train_set, val_set, test_set, "hazard_category_encoded", "roberta-base"
)

# Traning
haz_cat_bert_model = train_model(train_dataset, val_dataset, "roberta-base", 10)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.62471,0.653543,0.396172,0.349604,0.341084
2,No log,1.424069,0.754331,0.556865,0.43161,0.441211
3,No log,1.301945,0.774803,0.564301,0.485232,0.486757
4,1.490100,1.437505,0.798425,0.630505,0.509813,0.547826
5,1.490100,1.560661,0.788189,0.590545,0.51191,0.532419
6,1.490100,1.752986,0.779528,0.504816,0.509034,0.487839
7,0.608000,1.949122,0.790551,0.557098,0.504176,0.510074


Now, we test the trained model on the useen data produced from the initial splitting

In [8]:
# Testing
labels_pred, labels_actual = test_model(haz_cat_bert_model, test_dataset)

# Printing the classification report
print("Evaluating on Test Dataset")
print(
    classification_report(labels_actual, labels_pred, zero_division=0)
)

Evaluating on Test Dataset
              precision    recall  f1-score   support

           0       0.86      0.85      0.86       464
           1       0.84      0.91      0.87       435
           2       0.76      0.65      0.70        72
           3       0.67      0.67      0.67         6
           4       0.76      0.79      0.77       141
           5       0.68      0.55      0.61        93
           6       0.00      0.00      0.00         1
           7       0.50      0.31      0.38        13
           8       0.52      0.48      0.50        33
           9       0.64      0.69      0.67        13

    accuracy                           0.81      1271
   macro avg       0.62      0.59      0.60      1271
weighted avg       0.81      0.81      0.81      1271



#### **Classification Task - Product Categories**

Keep only the columns 'title_processed' and 'product_category_encoded' from the initial dataset

In [None]:
product_category_dataset = food_dataset[["title_processed", "product_category_encoded"]]

product_category_dataset.sample(2)

**Logistic Regression**

Below, all the steps for training and evaluating the Logistic Regression model for product categries are presented

- Setting the desired pipeline
- Setting the hyperparameter grid for tuning
- Setting the outer and inner cross validation
- Training and evaluating the model

<br>

We use the previous predefined method to train and evaluate the classifier

In [None]:
# Defining the pipeline consisting of a vectorizer and a classifier
pipeline = Pipeline(
    [
        ("tfidf", TfidfVectorizer()),
        ("logreg", LogisticRegression()),
    ]
)

# Defining hyperparameter grid
param_grid = {
    "tfidf__min_df": [1, 3],
    "tfidf__max_df": [0.8, 1.0],
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "logreg__penalty": ["l1", "l2"],
    "logreg__solver": ["liblinear"],
    "logreg__max_iter": [1000, 2500],
    "logreg__C": [10, 20],
    "logreg__class_weight": [None, "balanced"],
}

# Setting up the outer 3-folds cross validation for checking different train-test splits
outer_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Setting up the inner 2-fold cross validation for hyperparameter tuning
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Implementing and evaluating the logistic regression classifier
prod_cat_macro_f1_scores, prod_cat_hyperparameters = train_evaluate_model(
    product_category_dataset,
    "product_category_encoded",
    outer_cv,
    inner_cv,
    pipeline,
    param_grid,
)

Below is the mean and the standard deviation of the macro f1 across the cross-validation and hyperparamter tuning procesess

In [None]:
# Printing f1 distribution
print("Cross-Validation Macro f1:")
print("Mean:", np.mean(prod_cat_macro_f1_scores))
print("Standard Deviation:", np.std(prod_cat_macro_f1_scores))

# Printing best model
print("The hyperparameters which gives the best results are:")
print(prod_cat_hyperparameters[np.argmax(prod_cat_macro_f1_scores)])

**BERT - RoBERTa**

Below, all the steps for training the BERT model for the product categories are presented

- Separation of the initial dataset
- Calculation of the weights of the classes
- Data tokenazition
- Training the model

<br>

We use all the previous predefined methods to train and evaluate the classifier

In [None]:
# Splitting
train_set, val_set, test_set = split_dataset(
    product_category_dataset, "product_category_encoded", 0.5
)

# Calculating class weights
class_weights = compute_weight(train_set, "product_category_encoded")

# Tokenizating
train_dataset, val_dataset, test_dataset = tokenize_data(
    train_set, val_set, test_set, "product_category_encoded", "roberta-base"
)

# Traning
prod_cat_bert_model = train_model(train_dataset, val_dataset, "roberta-base", 22)

Now, we test the trained model on the useen data produced from the initial splitting

In [None]:
# Testing
labels_pred, labels_actual = test_model(prod_cat_bert_model, test_dataset)

# Printing the classification report
print("Evaluating on Test Dataset")
print(
    classification_report(labels_actual, labels_pred, zero_division=0)
)

#### **Classification Task - Hazard Vectors**

Keep only the columns 'title_processed' and 'hazard_encoded' from the initial dataset

In [None]:
hazard_dataset = food_dataset[["title_processed", "hazard_encoded"]]

hazard_dataset.sample(2)

**Logistic Regression**

Below, all the steps for training and evaluating the Logistic Regression model for hazard vectors are presented

- Setting the desired pipeline
- Setting the hyperparameter grid for tuning
- Setting the outer and inner cross validation
- Training and evaluating the model

<br>

We use the previous predefined method to train and evaluate the classifier

In [None]:
# Defining the pipeline consisting of a vectorizer and a classifier
pipeline = Pipeline(
    [
        ("tfidf", TfidfVectorizer()),
        ("logreg", LogisticRegression()),
    ]
)

# Defining hyperparameter grid
param_grid = {
    "tfidf__min_df": [1, 3],
    "tfidf__max_df": [0.8, 1.0],
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "logreg__penalty": ["l1", "l2"],
    "logreg__solver": ["liblinear"],
    "logreg__max_iter": [1000, 2500],
    "logreg__C": [10, 20],
    "logreg__class_weight": [None, "balanced"],
}

# Setting up the outer 3-folds cross validation for checking different train-test splits
outer_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Setting up the inner 2-fold cross validation for hyperparameter tuning
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Implementing and evaluating the logistic regression classifier
haz_macro_f1_scores, haz_hyperparameters = train_evaluate_model(
    hazard_dataset,
    "hazard_encoded",
    outer_cv,
    inner_cv,
    pipeline,
    param_grid,
)

Below is the mean and the standard deviation of the macro f1 across the cross-validation and hyperparamter tuning procesess

In [None]:
# Printing f1 distribution
print("Cross-Validation Macro f1:")
print("Mean:", np.mean(haz_macro_f1_scores))
print("Standard Deviation:", np.std(haz_macro_f1_scores))

# Printing best model
print("The hyperparameters which gives the best results are:")
print(haz_hyperparameters[np.argmax(haz_macro_f1_scores)])

**BERT - RoBERTa**

Below, all the steps for training the BERT model for the hazard vectors are presented

- Separation of the initial dataset
- Calculation of the weights of the classes
- Data tokenazition
- Training the model

<br>

We use all the previous predefined methods to train and evaluate the classifier

In [None]:
# Splitting
train_set, val_set, test_set = split_dataset(
    hazard_dataset, "hazard_encoded", 0.5
)

# Calculating class weights
class_weights = compute_weight(train_set, "hazard_encoded")

# Tokenizating
train_dataset, val_dataset, test_dataset = tokenize_data(
    train_set, val_set, test_set, "hazard_encoded", "roberta-base"
)

# Traning
haz_bert_model = train_model(train_dataset, val_dataset, "roberta-base", 22)

Now, we test the trained model on the useen data produced from the initial splitting

In [None]:
# Testing
labels_pred, labels_actual = test_model(haz_bert_model, test_dataset)

# Printing the classification report
print("Evaluating on Test Dataset")
print(
    classification_report(labels_actual, labels_pred, zero_division = 0)
)

#### **Classification Task - Product Vectors**

Keep only the columns 'title_processed' and 'product_encoded' from the initial dataset

In [None]:
product_dataset = food_dataset[["title_processed", "product_encoded"]]

product_dataset.sample(2)

**Logistic Regression**

Below, all the steps for training and evaluating the Logistic Regression model for product vectors are presented

- Setting the desired pipeline
- Setting the hyperparameter grid for tuning
- Setting the outer and inner cross validation
- Training and evaluating the model

<br>

We use the previous predefined method to train and evaluate the classifier

In [None]:
# Defining the pipeline consisting of a vectorizer and a classifier
pipeline = Pipeline(
    [
        ("tfidf", TfidfVectorizer()),
        ("logreg", LogisticRegression()),
    ]
)

# Defining hyperparameter grid
param_grid = {
    "tfidf__min_df": [1, 3],
    "tfidf__max_df": [0.8, 1.0],
    "tfidf__ngram_range": [(1, 1), (1, 2)],
    "logreg__penalty": ["l1", "l2"],
    "logreg__solver": ["liblinear"],
    "logreg__max_iter": [1000, 2500],
    "logreg__C": [10, 20],
    "logreg__class_weight": [None, "balanced"],
}

# Setting up the outer 3-folds cross validation for checking different train-test splits
outer_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Setting up the inner 2-fold cross validation for hyperparameter tuning
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

# Implementing and evaluating the logistic regression classifier
prod_macro_f1_scores, prod_hyperparameters = train_evaluate_model(
    product_dataset,
    "product_encoded",
    outer_cv,
    inner_cv,
    pipeline,
    param_grid,
)

Below is the mean and the standard deviation of the macro f1 across the cross-validation and hyperparamter tuning procesess

In [None]:
# Printing f1 distribution
print("Cross-Validation Macro f1:")
print("Mean:", np.mean(prod_macro_f1_scores))
print("Standard Deviation:", np.std(prod_macro_f1_scores))

# Printing best model
print("The hyperparameters which gives the best results are:")
print(prod_hyperparameters[np.argmax(prod_macro_f1_scores)])

**BERT - RoBERTa**

Below, all the steps for training the BERT model for the product vectors are presented

- Separation of the initial dataset
- Calculation of the weights of the classes
- Data tokenazition
- Training the model

<br>

We use all the previous predefined methods to train and evaluate the classifier

In [None]:
# Splitting
train_set, val_set, test_set = split_dataset(
    product_dataset, "product_encoded", 0.5
)

# Calculating class weights
class_weights = compute_weight(train_set, "product_encoded")

# Tokenizating
train_dataset, val_dataset, test_dataset = tokenize_data(
    train_set, val_set, test_set, "product_encoded", "roberta-base"
)

# Traning
prod_bert_model = train_model(train_dataset, val_dataset, "roberta-base", 22)

Now, we test the trained model on the useen data produced from the initial splitting

In [None]:
# Testing
labels_pred, labels_actual = test_model(prod_bert_model, test_dataset)

# Printing the classification report
print("Evaluating on Test Dataset")
print(
    classification_report(labels_actual, labels_pred, zero_division=0)
)