In [1]:
# Install required packages
!pip install transformers datasets accelerate
!pip install transformers[torch]



In [5]:
# Import libraries
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer, TrainingArguments, AdamW
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn as nn
import os


In [6]:
# Download datasets
!wget -P /kaggle/working -nc "https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_train.csv"
!wget -P /kaggle/working -nc "https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_validation.csv"


  pid, fd = os.forkpty()


File '/kaggle/working/final_cleaned_train.csv' already there; not retrieving.

File '/kaggle/working/final_cleaned_validation.csv' already there; not retrieving.



In [7]:
# Load the training data
df = pd.read_csv('/kaggle/working/final_cleaned_train.csv')

# Inspect the dataframe
df.head()


Unnamed: 0,year,month,day,country,title,text,hazard-category,product-category,hazard,product
0,1994,1,7,us,Recall Notification: FSIS-024-94,Date Opened: Date Closed: Name: GERHARD'S NAPA...,biological,"meat, egg and dairy products",listeria monocytogenes,smoked sausage
1,1994,3,10,us,Recall Notification: FSIS-033-94,Date Opened: Date Closed: Name: WIMMER'S MEAT ...,biological,"meat, egg and dairy products",listeria spp,sausage
2,1994,3,28,us,Recall Notification: FSIS-014-94,Date Opened: Date Closed: Name: WILLOW FOODS I...,biological,"meat, egg and dairy products",listeria monocytogenes,ham slices
3,1994,4,3,us,Recall Notification: FSIS-009-94,Date Opened: Date Closed: M Name: OSCAR MAYER ...,foreign bodies,"meat, egg and dairy products",plastic fragment,thermal processed pork meat
4,1994,7,1,us,Recall Notification: FSIS-001-94,Date Opened: Date Closed: Name: TYSON FOODS Im...,foreign bodies,"meat, egg and dairy products",plastic fragment,chicken breast


In [8]:
# Data preprocessing

# Drop unnecessary columns for training
df = df[['text', 'hazard-category', 'product-category', 'hazard', 'product']]

# Drop rows with missing values
df.dropna(inplace=True)

# Initialize label encoders
hazard_category_encoder = LabelEncoder()
product_category_encoder = LabelEncoder()
hazard_encoder = LabelEncoder()
product_encoder = LabelEncoder()

# Fit the encoders
hazard_category_encoder.fit(df['hazard-category'])
product_category_encoder.fit(df['product-category'])
hazard_encoder.fit(df['hazard'])
product_encoder.fit(df['product'])

# Transform the labels
df['hazard-category'] = hazard_category_encoder.transform(df['hazard-category'])
df['product-category'] = product_category_encoder.transform(df['product-category'])
df['hazard'] = hazard_encoder.transform(df['hazard'])
df['product'] = product_encoder.transform(df['product'])

# Split the data into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

print(f"Number of training samples: {len(train_df)}")
print(f"Number of validation samples: {len(val_df)}")


Number of training samples: 4772
Number of validation samples: 1194


In [9]:
# Define the FoodHazardDataset class
class FoodHazardDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, hazard_categories, product_categories, hazards, products):
        self.encodings = encodings
        self.hazard_categories = hazard_categories
        self.product_categories = product_categories
        self.hazards = hazards
        self.products = products

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['hazard_category_labels'] = torch.tensor(self.hazard_categories[idx])
        item['product_category_labels'] = torch.tensor(self.product_categories[idx])
        item['hazard_labels'] = torch.tensor(self.hazards[idx])
        item['product_labels'] = torch.tensor(self.products[idx])
        return item

    def __len__(self):
        return len(self.hazard_categories)


In [10]:
# Define the number of unique labels for each category
num_hazard_category_labels = len(hazard_category_encoder.classes_)
num_product_category_labels = len(product_category_encoder.classes_)
num_hazard_labels = len(hazard_encoder.classes_)
num_product_labels = len(product_encoder.classes_)


In [11]:
from transformers import AutoModel
import torch.nn as nn

class TransformerForFoodHazardClassification(nn.Module):
    def __init__(self, model_name, num_labels_dict):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        # Uncomment the line below if you want to use dropout
        # self.dropout = nn.Dropout(self.transformer.config.hidden_dropout_prob)

        hidden_size = self.transformer.config.hidden_size

        # Classifiers for the four labels
        self.hazard_category_classifier = nn.Linear(hidden_size, num_labels_dict['hazard_category'])
        self.product_category_classifier = nn.Linear(hidden_size, num_labels_dict['product_category'])
        self.hazard_classifier = nn.Linear(hidden_size, num_labels_dict['hazard'])
        self.product_classifier = nn.Linear(hidden_size, num_labels_dict['product'])

        # Loss function
        self.loss_fct = nn.CrossEntropyLoss()

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None,
                hazard_category_labels=None, product_category_labels=None,
                hazard_labels=None, product_labels=None):
        # Check if the model supports token_type_ids
        if "token_type_ids" in self.transformer.forward.__code__.co_varnames:
            outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        else:
            # For DistilBERT and similar models that do not accept token_type_ids
            outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)

        # Select pooled output for models like BERT and DeBERTa, or use CLS token for others
        if hasattr(outputs, 'pooler_output'):
            pooled_output = outputs.pooler_output
        else:
            pooled_output = outputs.last_hidden_state[:, 0, :]  # CLS token

        # Apply dropout if using
        # pooled_output = self.dropout(pooled_output)

        # Predict the four labels
        hazard_category_logits = self.hazard_category_classifier(pooled_output)
        product_category_logits = self.product_category_classifier(pooled_output)
        hazard_logits = self.hazard_classifier(pooled_output)
        product_logits = self.product_classifier(pooled_output)

        loss = None
        if hazard_category_labels is not None and product_category_labels is not None \
           and hazard_labels is not None and product_labels is not None:
            # Compute loss for each task
            hazard_category_loss = self.loss_fct(hazard_category_logits, hazard_category_labels)
            product_category_loss = self.loss_fct(product_category_logits, product_category_labels)
            hazard_loss = self.loss_fct(hazard_logits, hazard_labels)
            product_loss = self.loss_fct(product_logits, product_labels)

            # Aggregate losses
            loss = hazard_category_loss + product_category_loss + hazard_loss + product_loss

        # Return the loss and logits
        output = (hazard_category_logits, product_category_logits, hazard_logits, product_logits)
        return ((loss,) + output) if loss is not None else output


In [12]:
# Define the compute_metrics function to calculate both accuracy and average F1 score across all labels

def compute_metrics(pred):

    labels = pred.label_ids

    preds = pred.predictions



    # Unpack labels and predictions for each task

    hazard_category_labels = labels[0]

    product_category_labels = labels[1]

    hazard_labels = labels[2]

    product_labels = labels[3]



    hazard_category_preds = preds[0].argmax(-1)

    product_category_preds = preds[1].argmax(-1)

    hazard_preds = preds[2].argmax(-1)

    product_preds = preds[3].argmax(-1)



    # Compute accuracy for each task (can be used separately if needed)

    hazard_category_acc = accuracy_score(hazard_category_labels, hazard_category_preds)

    product_category_acc = accuracy_score(product_category_labels, product_category_preds)

    hazard_acc = accuracy_score(hazard_labels, hazard_preds)

    product_acc = accuracy_score(product_labels, product_preds)



    # Compute F1 score for each task

    hazard_category_f1 = f1_score(hazard_category_labels, hazard_category_preds, average='weighted')

    product_category_f1 = f1_score(product_category_labels, product_category_preds, average='weighted')

    hazard_f1 = f1_score(hazard_labels, hazard_preds, average='weighted')

    product_f1 = f1_score(product_labels, product_preds, average='weighted')



    # Compute average F1 score across all tasks

    avg_f1 = (hazard_category_f1 + product_category_f1 + hazard_f1 + product_f1) / 4



    # Optionally, you can also compute average accuracy across tasks if needed

    avg_acc = (hazard_category_acc + product_category_acc + hazard_acc + product_acc) / 4



    # Return a dictionary with both accuracy and average F1 score

    return {

        'hazard_category_acc': hazard_category_acc,

        'product_category_acc': product_category_acc,

        'hazard_acc': hazard_acc,

        'product_acc': product_acc,

        'avg_accuracy': avg_acc,

        'avg_f1': avg_f1

    }

In [13]:
# Define the data collator
def data_collator(batch):
    return {
        'input_ids': torch.stack([x['input_ids'] for x in batch]),
        'attention_mask': torch.stack([x['attention_mask'] for x in batch]),
        'hazard_category_labels': torch.tensor([x['hazard_category_labels'] for x in batch]),
        'product_category_labels': torch.tensor([x['product_category_labels'] for x in batch]),
        'hazard_labels': torch.tensor([x['hazard_labels'] for x in batch]),
        'product_labels': torch.tensor([x['product_labels'] for x in batch]),
    }


In [14]:
# Disable W&B entirely
os.environ["WANDB_DISABLED"] = "true"


In [15]:
# Function to train and save a model
def train_and_save_model(model_name, output_dir):
    """
    Trains and saves a model (only the final model after the last epoch).
    
    Args:
    - model_name: the pre-trained model name or path.
    - output_dir: directory to save the model
    """
    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize the text data
    train_texts = train_df['text'].tolist()
    train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)

    val_texts = val_df['text'].tolist()
    val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

    # Prepare the datasets
    train_dataset = FoodHazardDataset(
        train_encodings,
        train_df['hazard-category'].tolist(),
        train_df['product-category'].tolist(),
        train_df['hazard'].tolist(),
        train_df['product'].tolist()
    )

    val_dataset = FoodHazardDataset(
        val_encodings,
        val_df['hazard-category'].tolist(),
        val_df['product-category'].tolist(),
        val_df['hazard'].tolist(),
        val_df['product'].tolist()
    )

    # Define the number of labels
    num_labels_dict = {
        'hazard_category': num_hazard_category_labels,
        'product_category': num_product_category_labels,
        'hazard': num_hazard_labels,
        'product': num_product_labels
    }

    # Initialize the model
    model = TransformerForFoodHazardClassification(model_name, num_labels_dict)

    # Move the model to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model.to(device)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=8,  # Train for 8 epochs
        per_device_train_batch_size=8,  # Adjust based on your GPU memory
        per_device_eval_batch_size=8,
        evaluation_strategy="epoch",
        save_strategy="no",  # Do not save after each epoch
        logging_dir='./logs',
        logging_steps=10,
        warmup_steps=500,
        weight_decay=0.01,
        report_to=[]  # Disable W&B logging
    )

    # Initialize the Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
        optimizers=(AdamW(model.parameters(), lr=1e-5), None),
    )

    # Train the model
    trainer.train()

    # Save the model only after the last epoch (epoch 8)
    if model_name == 'allenai/scibert_scivocab_uncased':
        # Save the model only after the last epoch (epoch 8)
        state_dict = {k: v.contiguous() if isinstance(v, torch.Tensor) else v for k, v in model.state_dict().items()}
        torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
    else:
        trainer.save_model(output_dir)

    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation results for {model_name}:")
    print(eval_results)

    # Clear GPU memory
    del model
    torch.cuda.empty_cache()

    return eval_results  # Return evaluation results instead of the trainer


In [16]:
trainer_scibert = train_and_save_model('allenai/scibert_scivocab_uncased', 'scibert_scivocab_uncased-model')

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/228k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/442M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Hazard Acc,Product Acc,Avg Accuracy,Avg F1
1,15.0707,14.560096,0.649079,0.30402,0.326633,0.030988,0.32768,0.2168
2,11.8951,11.918249,0.878559,0.359296,0.580402,0.037688,0.463987,0.388071
3,11.131,10.921135,0.890285,0.409548,0.664154,0.043551,0.501884,0.437529
4,9.9269,10.376544,0.89531,0.456449,0.680905,0.049414,0.520519,0.465037
5,9.4734,10.028104,0.90201,0.489112,0.697655,0.073702,0.54062,0.485218
6,9.0116,9.835027,0.90536,0.5,0.69933,0.086265,0.547739,0.4923
7,8.7498,9.70709,0.90536,0.509213,0.70938,0.100503,0.556114,0.501549
8,9.3127,9.668348,0.907873,0.518425,0.711055,0.106365,0.56093,0.506012


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


ValueError: You are trying to save a non contiguous tensor: `transformer.encoder.layer.0.attention.self.query.weight` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.

In [13]:
# Train Bert Large uncased
trainer_bert_large = train_and_save_model('bert-base-uncased', 'bert-base-uncased-model')


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Hazard Acc,Product Acc,Avg Accuracy,Avg F1
1,15.9225,15.637586,0.59129,0.28727,0.175879,0.005025,0.264866,0.172268
2,13.1257,12.940312,0.847571,0.340034,0.386097,0.041876,0.403894,0.316606
3,11.8667,11.654268,0.877722,0.39196,0.515075,0.057789,0.460637,0.38777
4,10.8388,11.045493,0.882747,0.469849,0.586265,0.070352,0.502303,0.437414
5,10.267,10.678938,0.88861,0.498325,0.623116,0.071189,0.52031,0.459726
6,9.7166,10.452283,0.887772,0.509213,0.637353,0.075377,0.527429,0.467604
7,9.482,10.307032,0.890285,0.520938,0.643216,0.078727,0.533291,0.476083
8,10.1692,10.258956,0.889447,0.532663,0.646566,0.079564,0.53706,0.479919


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for bert-base-uncased:
{'eval_loss': 10.258955955505371, 'eval_hazard_category_acc': 0.8894472361809045, 'eval_product_category_acc': 0.5326633165829145, 'eval_hazard_acc': 0.6465661641541038, 'eval_product_acc': 0.07956448911222781, 'eval_avg_accuracy': 0.5370603015075378, 'eval_avg_f1': 0.4799187051760568, 'eval_runtime': 21.6337, 'eval_samples_per_second': 55.192, 'eval_steps_per_second': 3.467, 'epoch': 8.0}


In [14]:
# Train DeBERTa Large
trainer_deberta_large = train_and_save_model('microsoft/deberta-base', 'deberta-base-model')


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Hazard Acc,Product Acc,Avg Accuracy,Avg F1
1,13.7404,12.877097,0.762982,0.336683,0.380235,0.032663,0.378141,0.291854
2,10.8096,10.785297,0.871859,0.386097,0.634841,0.045226,0.484506,0.424018
3,9.927,9.669526,0.889447,0.505863,0.691792,0.092127,0.544807,0.491236
4,8.646,9.033434,0.896985,0.610553,0.714405,0.134003,0.588987,0.541592
5,8.1057,8.571469,0.907035,0.649079,0.732831,0.164992,0.613484,0.567313
6,7.6191,8.331431,0.907873,0.675042,0.742044,0.180067,0.626256,0.579581
7,7.1345,8.186049,0.907873,0.690117,0.744556,0.18928,0.632956,0.587398
8,7.6997,8.124866,0.907873,0.69263,0.747069,0.194305,0.635469,0.590822


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for microsoft/deberta-base:
{'eval_loss': 8.124865531921387, 'eval_hazard_category_acc': 0.9078726968174204, 'eval_product_category_acc': 0.6926298157453936, 'eval_hazard_acc': 0.7470686767169179, 'eval_product_acc': 0.19430485762144054, 'eval_avg_accuracy': 0.635469011725293, 'eval_avg_f1': 0.5908215708833717, 'eval_runtime': 39.1956, 'eval_samples_per_second': 30.463, 'eval_steps_per_second': 1.913, 'epoch': 8.0}


In [16]:
# Train XLM-Roberta Large
trainer_xlm_roberta_large = train_and_save_model('xlm-roberta-base', 'xlm-roberta-base-model')


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Hazard Acc,Product Acc,Avg Accuracy,Avg F1
1,15.7866,15.34369,0.554439,0.28727,0.131491,0.030988,0.251047,0.150584
2,13.034,12.950892,0.836683,0.319933,0.396985,0.030988,0.396147,0.306388
3,12.2218,11.950312,0.856784,0.340871,0.458124,0.029313,0.421273,0.34968
4,11.2026,11.433931,0.877722,0.350921,0.4866,0.031826,0.436767,0.36458
5,10.6873,11.04658,0.88526,0.378559,0.585427,0.036013,0.471315,0.406737
6,10.2061,10.776897,0.89196,0.394472,0.610553,0.041876,0.484715,0.419708
7,10.0512,10.63416,0.896147,0.419598,0.622278,0.041876,0.494975,0.434758
8,10.6075,10.580544,0.89531,0.422948,0.623116,0.044389,0.496441,0.436061


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for xlm-roberta-base:
{'eval_loss': 10.580544471740723, 'eval_hazard_category_acc': 0.8953098827470687, 'eval_product_category_acc': 0.42294807370184256, 'eval_hazard_acc': 0.6231155778894473, 'eval_product_acc': 0.04438860971524288, 'eval_avg_accuracy': 0.4964405360134003, 'eval_avg_f1': 0.4360610710129702, 'eval_runtime': 24.3299, 'eval_samples_per_second': 49.075, 'eval_steps_per_second': 3.083, 'epoch': 8.0}


In [35]:
trainer_distilbert_base = train_and_save_model('distilbert-base-uncased', 'distilbert-base-uncased-model')

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Hazard Acc,Product Acc,Avg Accuracy,Avg F1
1,14.9493,14.557254,0.617253,0.288107,0.258794,0.030988,0.298786,0.192012
2,11.9798,11.921219,0.855946,0.355946,0.562814,0.036851,0.452889,0.381099
3,11.0261,10.794368,0.869347,0.447236,0.656616,0.065327,0.509631,0.445182
4,9.9123,10.284127,0.88191,0.50335,0.680067,0.088777,0.538526,0.481421
5,9.5216,9.956043,0.881072,0.530988,0.68928,0.103853,0.551298,0.494834
6,8.9805,9.737551,0.880235,0.551926,0.695142,0.120603,0.561977,0.506725
7,8.7005,9.614624,0.880235,0.565327,0.700168,0.123116,0.567211,0.514524
8,9.3325,9.571804,0.880235,0.571189,0.701843,0.126466,0.569933,0.516511


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for distilbert-base-uncased:
{'eval_loss': 9.57180404663086, 'eval_hazard_category_acc': 0.8802345058626466, 'eval_product_category_acc': 0.5711892797319933, 'eval_hazard_acc': 0.7018425460636516, 'eval_product_acc': 0.12646566164154105, 'eval_avg_accuracy': 0.5699329983249581, 'eval_avg_f1': 0.5165108017850886, 'eval_runtime': 12.4311, 'eval_samples_per_second': 96.049, 'eval_steps_per_second': 6.033, 'epoch': 8.0}


In [8]:
# Import libraries
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer, TrainingArguments, AdamW
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn as nn
import os


In [12]:
# Load the test data (validation data is actually the test data)
test_df = pd.read_csv('/kaggle/working/final_cleaned_validation.csv')

# Drop unnecessary columns
test_df = test_df[['text']]
test_texts = test_df['text'].tolist()


In [13]:
!pip install safetensors

  pid, fd = os.forkpty()




In [15]:
from torch.utils.data import DataLoader, TensorDataset
from safetensors.torch import load_file
import numpy as np
import torch

def get_model_logits(model_name, model_dir, test_texts, batch_size=8):
    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize the test data
    test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors='pt')

    # Convert tokenized inputs to a TensorDataset
    test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'])

    # Use DataLoader to load the data in batches
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    # Define the number of labels
    num_labels_dict = {
        'hazard_category': num_hazard_category_labels,
        'product_category': num_product_category_labels,
        'hazard': num_hazard_labels,
        'product': num_product_labels
    }

    # Initialize the model
    model = TransformerForFoodHazardClassification(model_name, num_labels_dict)

    # Load the model state dict from model.safetensors
    state_dict = load_file(f"{model_dir}/model.safetensors")
    model.load_state_dict(state_dict)

    # Move model to the GPUs (using DataParallel for multiple GPUs)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Default to cuda:0
    model = torch.nn.DataParallel(model, device_ids=[0, 1])  # Use both GPU 0 and GPU 1
    model.to(device)

    # Initialize dictionaries to accumulate logits
    all_hazard_category_logits = []
    all_product_category_logits = []
    all_hazard_logits = []
    all_product_logits = []

    with torch.no_grad():
        model.eval()
        for batch in test_dataloader:
            input_ids, attention_mask = [b.to(device) for b in batch]
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            # Unpack logits and move them to CPU
            hazard_category_logits, product_category_logits, hazard_logits, product_logits = outputs
            all_hazard_category_logits.append(hazard_category_logits.cpu().numpy())
            all_product_category_logits.append(product_category_logits.cpu().numpy())
            all_hazard_logits.append(hazard_logits.cpu().numpy())
            all_product_logits.append(product_logits.cpu().numpy())

    # Concatenate all logits from batches along the batch dimension (axis=0)
    hazard_category_logits_concat = np.concatenate(all_hazard_category_logits, axis=0)
    product_category_logits_concat = np.concatenate(all_product_category_logits, axis=0)
    hazard_logits_concat = np.concatenate(all_hazard_logits, axis=0)
    product_logits_concat = np.concatenate(all_product_logits, axis=0)

    # Free GPU memory by deleting model and test_encodings
    del model
    del test_encodings

    # Clear CUDA cache
    torch.cuda.empty_cache()

    # Return logits as a structured dictionary
    return {
        'hazard_category': hazard_category_logits_concat,
        'product_category': product_category_logits_concat,
        'hazard': hazard_logits_concat,
        'product': product_logits_concat
    }


In [16]:
# Get logits from Bert Large
bert_large_logits = get_model_logits('bert-base-uncased', 'bert-base-uncased-model', test_texts)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


In [17]:
# After evaluating the model
torch.cuda.empty_cache()


In [18]:
# Get logits from DeBERTa Large
deberta_large_logits = get_model_logits('microsoft/deberta-base', 'deberta-base-model', test_texts)


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

In [19]:
# After evaluating the model
torch.cuda.empty_cache()


In [20]:
# Get logits from XLM-Roberta Large
xlm_roberta_large_logits = get_model_logits('xlm-roberta-base', 'xlm-roberta-base-model', test_texts)


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

In [36]:
# After evaluating the model
torch.cuda.empty_cache()


In [37]:
distilbert_base_logits = get_model_logits('distilbert-base-uncased', 'distilbert-base-uncased-model', test_texts)

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


In [38]:
# After evaluating the model
torch.cuda.empty_cache()


In [39]:
# Average the logits
hazard_category_logits_avg = (distilbert_base_logits['hazard_category'] + deberta_large_logits['hazard_category'] + bert_large_logits['hazard_category']) / 3
product_category_logits_avg = (distilbert_base_logits['product_category'] + deberta_large_logits['product_category'] + bert_large_logits['product_category']) / 3
hazard_logits_avg = (distilbert_base_logits['hazard'] + deberta_large_logits['hazard'] + bert_large_logits['hazard']) / 3
product_logits_avg = (distilbert_base_logits['product'] + deberta_large_logits['product'] + bert_large_logits['product']) / 3

In [40]:
# Get predicted labels
hazard_category_preds = np.argmax(hazard_category_logits_avg, axis=1)
product_category_preds = np.argmax(product_category_logits_avg, axis=1)
hazard_preds = np.argmax(hazard_logits_avg, axis=1)
product_preds = np.argmax(product_logits_avg, axis=1)

In [41]:
print(hazard_category_preds)
print(hazard_category_encoder.classes_)  # Print the classes the encoder was trained on


[1 1 1 0 4 4 1 9 4 1 4 9 2 2 4 2 0 1 1 2 1 0 4 0 0 4 0 1 0 0 1 1 0 2 0 4 1
 4 1 0 0 9 1 5 4 0 1 0 0 4 4 4 5 4 4 0 0 1 2 4 4 1 0 1 1 1 1 1 1 4 0 5 0 1
 1 1 0 0 1 0 4 1 0 2 1 1 0 0 1 1 1 0 0 4 0 0 4 1 1 0 1 1 1 1 0 1 0 9 0 0 4
 1 0 0 1 0 2 1 1 1 1 1 1 1 1 0 0 2 5 4 1 1 1 0 0 0 0 1 0 0 2 5 1 0 5 0 5 4
 0 1 1 1 0 0 5 0 0 0 0 0 0 1 0 1 1 2 2 0 1 1 4 0 1 1 0 5 0 1 0 5 4 1 1 1 1
 4 5 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 4 0 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 5 0 1
 0 0 4 4 5 1 1 1 0 1 4 0 5 0 1 1 0 0 1 1 7 1 1 0 0 0 1 1 4 1 0 1 1 4 2 2 0
 1 1 1 1 1 1 1 1 1 0 4 0 0 0 4 5 1 5 0 0 4 0 0 0 0 0 1 0 0 0 1 1 0 0 5 0 4
 0 0 0 5 0 1 0 1 4 0 1 4 1 2 1 0 1 0 0 1 1 0 1 4 0 9 4 4 5 2 1 5 9 0 5 0 0
 1 1 1 0 1 0 4 0 5 2 0 0 1 0 0 1 0 0 0 4 1 0 0 4 0 1 4 4 0 0 1 0 0 5 0 0 1
 0 4 1 0 1 1 0 1 0 0 1 4 0 5 0 0 4 1 2 0 9 0 0 0 4 0 0 0 4 1 0 0 1 0 0 1 1
 1 1 0 1 0 1 1 1 0 0 1 0 1 1 0 1 1 4 4 0 0 1 0 0 5 0 0 0 0 0 1 1 0 0 1 2 5
 5 1 0 5 0 0 1 1 0 0 1 1 1 0 1 0 0 1 4 0 0 1 9 2 4 1 1 2 1 0 0 0 1 2 4 0 0
 0 4 1 0 0 0 4 0 2 1 0 1 

In [42]:
print(hazard_category_logits_avg.shape)


(565, 10)


In [43]:
# Decode the predicted labels using the label encoders
hazard_category_labels = hazard_category_encoder.inverse_transform(hazard_category_preds)
product_category_labels = product_category_encoder.inverse_transform(product_category_preds)
hazard_labels = hazard_encoder.inverse_transform(hazard_preds)
product_labels = product_encoder.inverse_transform(product_preds)

In [44]:
# Create a DataFrame for the predictions
output_df = pd.DataFrame({
    'hazard-category': hazard_category_labels,
    'product-category': product_category_labels,
    'hazard': hazard_labels,
    'product': product_labels
})

# Save the output DataFrame to a CSV file
output_df.to_csv('test_predictions_ensemble.csv', index=False)

# For subtask 1 (hazard-category and product-category)
subtask1_df = output_df[['hazard-category', 'product-category']]
subtask1_df.to_csv('subtask1_predictions_ensemble.csv', index=True)

# For subtask 2 (hazard and product)
subtask2_df = output_df[['hazard', 'product']]
subtask2_df.to_csv('subtask2_predictions_ensemble.csv', index=True)


In [45]:
# Analyze the predictions
print("Hazard Category Predictions:")
print(subtask1_df['hazard-category'].value_counts())

print("\nProduct Category Predictions:")
print(subtask1_df['product-category'].value_counts())

print("\nHazard Predictions:")
print(subtask2_df['hazard'].value_counts())

print("\nProduct Predictions:")
print(subtask2_df['product'].value_counts())


Hazard Category Predictions:
hazard-category
allergens               226
biological              200
foreign bodies           64
chemical                 32
fraud                    32
packaging defect          9
organoleptic aspects      1
other hazard              1
Name: count, dtype: int64

Product Category Predictions:
product-category
meat, egg and dairy products                         179
cereals and bakery products                          123
fruits and vegetables                                 76
nuts, nut products and seeds                          34
prepared dishes and snacks                            29
non-alcoholic beverages                               23
soups, broths, sauces and condiments                  23
cocoa and cocoa preparations, coffee and tea          20
ices and desserts                                     20
seafood                                               20
dietetic foods, food supplements, fortified foods      5
herbs and spices              