In [1]:
# Import libraries
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from transformers import AutoTokenizer, AutoModel, AutoConfig, Trainer, TrainingArguments, AdamW
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch.nn as nn
import os


In [2]:
# Download datasets
!wget -P /kaggle/working -nc "https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_train.csv"
# !wget -P /kaggle/working -nc "https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_combined_augmented.csv"
!wget -P /kaggle/working -nc "https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_validation.csv"


  pid, fd = os.forkpty()


--2024-11-30 10:00:00--  https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_train.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12659880 (12M) [text/plain]
Saving to: '/kaggle/working/final_cleaned_train.csv'


2024-11-30 10:00:00 (160 MB/s) - '/kaggle/working/final_cleaned_train.csv' saved [12659880/12659880]

--2024-11-30 10:00:02--  https://raw.githubusercontent.com/HammadxSaj/Sem-Eval-Task10-Dataset/refs/heads/main/final_cleaned_validation.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response.

In [3]:
# Load the training data
df = pd.read_csv('/kaggle/working/final_cleaned_train.csv')

# Inspect the dataframe
df.head()


Unnamed: 0,year,month,day,country,title,text,hazard-category,product-category,hazard,product
0,1994,1,7,us,Recall Notification: FSIS-024-94,Date Opened: Date Closed: Name: GERHARD'S NAPA...,biological,"meat, egg and dairy products",listeria monocytogenes,smoked sausage
1,1994,3,10,us,Recall Notification: FSIS-033-94,Date Opened: Date Closed: Name: WIMMER'S MEAT ...,biological,"meat, egg and dairy products",listeria spp,sausage
2,1994,3,28,us,Recall Notification: FSIS-014-94,Date Opened: Date Closed: Name: WILLOW FOODS I...,biological,"meat, egg and dairy products",listeria monocytogenes,ham slices
3,1994,4,3,us,Recall Notification: FSIS-009-94,Date Opened: Date Closed: M Name: OSCAR MAYER ...,foreign bodies,"meat, egg and dairy products",plastic fragment,thermal processed pork meat
4,1994,7,1,us,Recall Notification: FSIS-001-94,Date Opened: Date Closed: Name: TYSON FOODS Im...,foreign bodies,"meat, egg and dairy products",plastic fragment,chicken breast


In [4]:
# Data preprocessing

# Drop unnecessary columns for training
df = df[['text', 'hazard-category', 'product-category']]

# Drop rows with missing values
df.dropna(inplace=True)

# Initialize label encoders
hazard_category_encoder = LabelEncoder()
product_category_encoder = LabelEncoder()
# hazard_encoder = LabelEncoder()
# product_encoder = LabelEncoder()

# Fit the encoders
hazard_category_encoder.fit(df['hazard-category'])
product_category_encoder.fit(df['product-category'])
# hazard_encoder.fit(df['hazard'])
# product_encoder.fit(df['product'])

# Transform the labels
df['hazard-category'] = hazard_category_encoder.transform(df['hazard-category'])
df['product-category'] = product_category_encoder.transform(df['product-category'])
# df['hazard'] = hazard_encoder.transform(df['hazard'])
# df['product'] = product_encoder.transform(df['product'])

# Split the data into train and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

print(f"Number of training samples: {len(train_df)}")
print(f"Number of validation samples: {len(val_df)}")


Number of training samples: 4772
Number of validation samples: 1194


In [5]:
# Define the FoodHazardDataset class
class FoodHazardDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, hazard_categories, product_categories):
        self.encodings = encodings
        self.hazard_categories = hazard_categories
        self.product_categories = product_categories
        # self.hazards = hazards
        # self.products = products

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['hazard_category_labels'] = torch.tensor(self.hazard_categories[idx])
        item['product_category_labels'] = torch.tensor(self.product_categories[idx])
        # item['hazard_labels'] = torch.tensor(self.hazards[idx])
        # item['product_labels'] = torch.tensor(self.products[idx])
        return item

    def __len__(self):
        return len(self.hazard_categories)


In [6]:
# Define the number of unique labels for each category
num_hazard_category_labels = len(hazard_category_encoder.classes_)
num_product_category_labels = len(product_category_encoder.classes_)
# num_hazard_labels = len(hazard_encoder.classes_)
# num_product_labels = len(product_encoder.classes_)


In [7]:
from transformers import AutoModel
import torch.nn as nn

class TransformerForFoodHazardClassification(nn.Module):
    def __init__(self, model_name, num_labels_dict):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        # Uncomment the line below if you want to use dropout
        # self.dropout = nn.Dropout(self.transformer.config.hidden_dropout_prob)

        hidden_size = self.transformer.config.hidden_size

        # Classifiers for the four labels
        self.hazard_category_classifier = nn.Linear(hidden_size, num_labels_dict['hazard_category'])
        self.product_category_classifier = nn.Linear(hidden_size, num_labels_dict['product_category'])
        # self.hazard_classifier = nn.Linear(hidden_size, num_labels_dict['hazard'])
        # self.product_classifier = nn.Linear(hidden_size, num_labels_dict['product'])

        # Loss function
        self.loss_fct = nn.CrossEntropyLoss()

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None,
                hazard_category_labels=None, product_category_labels=None):
        # Check if the model supports token_type_ids
        if "token_type_ids" in self.transformer.forward.__code__.co_varnames:
            outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        else:
            # For DistilBERT and similar models that do not accept token_type_ids
            outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)

        # Select pooled output for models like BERT and DeBERTa, or use CLS token for others
        if hasattr(outputs, 'pooler_output'):
            pooled_output = outputs.pooler_output
        else:
            pooled_output = outputs.last_hidden_state[:, 0, :]  # CLS token

        # Apply dropout if using
        # pooled_output = self.dropout(pooled_output)

        # Predict the four labels
        hazard_category_logits = self.hazard_category_classifier(pooled_output)
        product_category_logits = self.product_category_classifier(pooled_output)
        # hazard_logits = self.hazard_classifier(pooled_output)
        # product_logits = self.product_classifier(pooled_output)

        loss = None
        if hazard_category_labels is not None and product_category_labels is not None:
            # Compute loss for each task
            hazard_category_loss = self.loss_fct(hazard_category_logits, hazard_category_labels)
            product_category_loss = self.loss_fct(product_category_logits, product_category_labels)
            # hazard_loss = self.loss_fct(hazard_logits, hazard_labels)
            # product_loss = self.loss_fct(product_logits, product_labels)

            # Aggregate losses
            loss = hazard_category_loss + product_category_loss

        # Return the loss and logits
        output = (hazard_category_logits, product_category_logits)
        return ((loss,) + output) if loss is not None else output


In [8]:
# Define the compute_metrics function to calculate both accuracy and average F1 score across all labels

def compute_metrics(pred):

    labels = pred.label_ids

    preds = pred.predictions



    # Unpack labels and predictions for each task

    hazard_category_labels = labels[0]

    product_category_labels = labels[1]

    # hazard_labels = labels[2]

    # product_labels = labels[3]



    hazard_category_preds = preds[0].argmax(-1)

    product_category_preds = preds[1].argmax(-1)

    # hazard_preds = preds[2].argmax(-1)

    # product_preds = preds[3].argmax(-1)



    # Compute accuracy for each task (can be used separately if needed)

    hazard_category_acc = accuracy_score(hazard_category_labels, hazard_category_preds)

    product_category_acc = accuracy_score(product_category_labels, product_category_preds)

    # hazard_acc = accuracy_score(hazard_labels, hazard_preds)

    # product_acc = accuracy_score(product_labels, product_preds)



    # Compute F1 score for each task

    hazard_category_f1 = f1_score(hazard_category_labels, hazard_category_preds, average='weighted')

    product_category_f1 = f1_score(product_category_labels, product_category_preds, average='weighted')

    # hazard_f1 = f1_score(hazard_labels, hazard_preds, average='weighted')

    # product_f1 = f1_score(product_labels, product_preds, average='weighted')



    # Compute average F1 score across all tasks

    avg_f1 = (hazard_category_f1 + product_category_f1) / 4



    # Optionally, you can also compute average accuracy across tasks if needed

    avg_acc = (hazard_category_acc + product_category_acc) / 4



    # Return a dictionary with both accuracy and average F1 score

    return {

        'hazard_category_acc': hazard_category_acc,

        'product_category_acc': product_category_acc,

        'avg_accuracy': avg_acc,

        'avg_f1': avg_f1

    }

In [9]:
# Define the data collator
def data_collator(batch):
    return {
        'input_ids': torch.stack([x['input_ids'] for x in batch]),
        'attention_mask': torch.stack([x['attention_mask'] for x in batch]),
        'hazard_category_labels': torch.tensor([x['hazard_category_labels'] for x in batch]),
        'product_category_labels': torch.tensor([x['product_category_labels'] for x in batch]),
    }


In [32]:
# Function to train and save a model
def train_and_save_model(model_name, output_dir):
    """
    Trains and saves a model (only the final model after the last epoch).
    
    Args:
    - model_name: the pre-trained model name or path.
    - output_dir: directory to save the model
    """
    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize the text data
    train_texts = train_df['text'].tolist()
    train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)

    val_texts = val_df['text'].tolist()
    val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

    # Prepare the datasets
    train_dataset = FoodHazardDataset(
        train_encodings,
        train_df['hazard-category'].tolist(),
        train_df['product-category'].tolist()
        # train_df['hazard'].tolist(),
        # train_df['product'].tolist()
    )

    val_dataset = FoodHazardDataset(
        val_encodings,
        val_df['hazard-category'].tolist(),
        val_df['product-category'].tolist()
        # val_df['hazard'].tolist(),
        # val_df['product'].tolist()
    )

    # Define the number of labels
    num_labels_dict = {
        'hazard_category': num_hazard_category_labels,
        'product_category': num_product_category_labels
        # 'hazard': num_hazard_labels,
        # 'product': num_product_labels
    }

    # Initialize the model
    model = TransformerForFoodHazardClassification(model_name, num_labels_dict)

    # Move the model to GPU if available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model.to(device)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=8,  # Train for 8 epochs
        per_device_train_batch_size=16,  # Adjust based on your GPU memory
        per_device_eval_batch_size=16,
        evaluation_strategy="epoch",
        save_strategy="no",  # Do not save after each epoch
        logging_dir='./logs',
        logging_steps=10,
        warmup_steps=500,
        weight_decay=0.01,
        report_to=[]  # Disable W&B logging
    )

    # Initialize the Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
        optimizers=(AdamW(model.parameters(), lr=1e-5), None),
    )

    # Train the model
    trainer.train()

    # Save the model only after the last epoch (epoch 8)
    if model_name == 'allenai/scibert_scivocab_uncased':
        # Save the model only after the last epoch (epoch 8)
        state_dict = {k: v.contiguous() if isinstance(v, torch.Tensor) else v for k, v in model.state_dict().items()}
        torch.save(state_dict, os.path.join(output_dir, "scibert_weights"))
    else:
        trainer.save_model(output_dir)

    # Evaluate the model
    eval_results = trainer.evaluate()
    print(f"Evaluation results for {model_name}:")
    print(eval_results)

    # Clear GPU memory
    del model
    torch.cuda.empty_cache()

    return eval_results  # Return evaluation results instead of the trainer


In [11]:
# # Plotting function
# def plot_training_metrics(metrics, output_dir):
#     epochs = []
#     train_loss = []
#     eval_loss = []
#     eval_accuracy = []
#     eval_f1 = []

#     for log in metrics:
#         if 'epoch' in log:
#             epochs.append(log['epoch'])
#         if 'loss' in log:
#             train_loss.append(log['loss'])
#         if 'eval_loss' in log:
#             eval_loss.append(log['eval_loss'])
#         if 'eval_accuracy' in log:
#             eval_accuracy.append(log['eval_accuracy'])
#         if 'eval_f1' in log:
#             eval_f1.append(log['eval_f1'])

#     # Plot Loss
#     plt.figure(figsize=(10, 6))
#     plt.plot(epochs, train_loss, label='Train Loss', marker='o')
#     plt.plot(epochs, eval_loss, label='Eval Loss', marker='x')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss')
#     plt.title('Loss vs Epochs')
#     plt.legend()
#     plt.grid(True)
#     plt.savefig(f"{output_dir}/loss_vs_epochs.png")
#     plt.show()

#     # Plot Accuracy
#     plt.figure(figsize=(10, 6))
#     plt.plot(epochs, eval_accuracy, label='Eval Accuracy', marker='o')
#     plt.xlabel('Epoch')
#     plt.ylabel('Accuracy')
#     plt.title('Accuracy vs Epochs')
#     plt.legend()
#     plt.grid(True)
#     plt.savefig(f"{output_dir}/accuracy_vs_epochs.png")
#     plt.show()

#     # Plot F1 Score
#     plt.figure(figsize=(10, 6))
#     plt.plot(epochs, eval_f1, label='Eval F1 Score', marker='x')
#     plt.xlabel('Epoch')
#     plt.ylabel('F1 Score')
#     plt.title('F1 Score vs Epochs')
#     plt.legend()
#     plt.grid(True)
#     plt.savefig(f"{output_dir}/f1_score_vs_epochs.png")
#     plt.show()

In [29]:
torch.cuda.empty_cache()

In [13]:
# Train DeBERTa Large
trainer_deberta_base = train_and_save_model('microsoft/deberta-base', 'deberta-base-model')

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Avg Accuracy,Avg F1
1,3.5809,3.254996,0.659129,0.31407,0.2433,0.188807
2,2.2586,2.152755,0.855946,0.524288,0.345059,0.323262
3,1.5793,1.572194,0.894472,0.645729,0.38505,0.37389
4,1.1742,1.419254,0.897822,0.680905,0.394682,0.389779
5,0.972,1.329947,0.896147,0.720268,0.404104,0.398983
6,0.73,1.301678,0.899497,0.724456,0.405988,0.401012
7,0.6663,1.275241,0.906198,0.724456,0.407663,0.404075
8,0.5955,1.271792,0.902848,0.731156,0.408501,0.404945


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for microsoft/deberta-base:
{'eval_loss': 1.2717922925949097, 'eval_hazard_category_acc': 0.9028475711892797, 'eval_product_category_acc': 0.7311557788944724, 'eval_avg_accuracy': 0.408500837520938, 'eval_avg_f1': 0.40494515026030603, 'eval_runtime': 39.8434, 'eval_samples_per_second': 29.967, 'eval_steps_per_second': 1.882, 'epoch': 8.0}


In [None]:
# # After training DeBERTa model
# metrics_deberta = trainer_deberta_base.state.log_history
# plot_training_metrics(metrics_deberta, 'deberta-base-model')

In [14]:
torch.cuda.empty_cache()

In [16]:
trainer_distilbert_base = train_and_save_model('distilbert-base-uncased', 'distilbert-base-uncased-model')

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Avg Accuracy,Avg F1
1,4.2239,4.082211,0.531826,0.286432,0.204564,0.138292
2,3.2716,3.172018,0.703518,0.328308,0.257956,0.210897
3,2.5937,2.563595,0.860134,0.386097,0.311558,0.275712
4,2.1857,2.135347,0.869347,0.526801,0.349037,0.329341
5,2.0081,1.887733,0.875209,0.576214,0.362856,0.345559
6,1.5226,1.779248,0.877722,0.603015,0.370184,0.355818
7,1.27,1.718544,0.884422,0.621441,0.376466,0.364454
8,1.5091,1.698771,0.88861,0.627303,0.378978,0.367598


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for distilbert-base-uncased:
{'eval_loss': 1.6987708806991577, 'eval_hazard_category_acc': 0.8886097152428811, 'eval_product_category_acc': 0.6273031825795645, 'eval_avg_accuracy': 0.3789782244556114, 'eval_avg_f1': 0.3675979446969274, 'eval_runtime': 12.3662, 'eval_samples_per_second': 96.554, 'eval_steps_per_second': 3.073, 'epoch': 8.0}


In [22]:
# # metrics_distilbert = trainer_distilbert_base.state.log_history
# plot_training_metrics(trainer_distilbert_base, 'distilbert-base-uncased-model')

In [33]:
torch.cuda.empty_cache()

In [34]:
trainer_scibert = train_and_save_model('allenai/scibert_scivocab_uncased', 'scibert_scivocab_uncased-model')

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Hazard Category Acc,Product Category Acc,Avg Accuracy,Avg F1
1,4.2391,4.080337,0.60469,0.28727,0.22299,0.15334
2,3.0185,2.989182,0.81407,0.335008,0.28727,0.244808
3,2.4956,2.509274,0.878559,0.403685,0.320561,0.286823
4,2.2407,2.197078,0.883585,0.473199,0.339196,0.311013
5,1.9614,1.970347,0.887772,0.557789,0.36139,0.341576
6,1.6041,1.881533,0.889447,0.571189,0.365159,0.346247
7,1.3276,1.794378,0.89196,0.612228,0.376047,0.361425
8,1.488,1.780465,0.891122,0.620603,0.377931,0.363556


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation results for allenai/scibert_scivocab_uncased:
{'eval_loss': 1.7804646492004395, 'eval_hazard_category_acc': 0.8911222780569514, 'eval_product_category_acc': 0.6206030150753769, 'eval_avg_accuracy': 0.3779313232830821, 'eval_avg_f1': 0.36355608186465943, 'eval_runtime': 20.9991, 'eval_samples_per_second': 56.86, 'eval_steps_per_second': 1.81, 'epoch': 8.0}


In [35]:
# # After training SciBERT model
# metrics_scibert = trainer_scibert.state.log_history
# plot_training_metrics(metrics_scibert, 'scibert_scivocab_uncased-model')

In [36]:
torch.cuda.empty_cache()

In [37]:
# Load the test data (validation data is actually the test data)
test_df = pd.read_csv('/kaggle/working/final_cleaned_validation.csv')

# Drop unnecessary columns
test_df = test_df[['text']]
test_texts = test_df['text'].tolist()


In [16]:
from torch.utils.data import DataLoader, TensorDataset
from safetensors.torch import load_file
import numpy as np
import torch

def get_model_logits(model_name, model_dir, test_texts, batch_size=8):
    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize the test data
    test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors='pt')

    # Convert tokenized inputs to a TensorDataset
    test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'])

    # Use DataLoader to load the data in batches
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    # Define the number of labels
    num_labels_dict = {
        'hazard_category': num_hazard_category_labels,
        'product_category': num_product_category_labels
    }

    # Initialize the model
    model = TransformerForFoodHazardClassification(model_name, num_labels_dict)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the model state dict from model.safetensors
    if model_name == "allenai/scibert_scivocab_uncased":
        state_dict = torch.load(f"{model_dir}/scibert_weights", map_location=device)
        model.load_state_dict(state_dict)
    else:
        state_dict = load_file(f"{model_dir}/model.safetensors")
        model.load_state_dict(state_dict)

    # Move model to the GPUs (using DataParallel for multiple GPUs)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')  # Default to cuda:0
    model = torch.nn.DataParallel(model, device_ids=[0, 1])  # Use both GPU 0 and GPU 1
    model.to(device)

    # Initialize dictionaries to accumulate logits
    all_hazard_category_logits = []
    all_product_category_logits = []

    with torch.no_grad():
        model.eval()
        for batch in test_dataloader:
            input_ids, attention_mask = [b.to(device) for b in batch]
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            # Unpack logits and move them to CPU
            hazard_category_logits, product_category_logits = outputs
            all_hazard_category_logits.append(hazard_category_logits.cpu().numpy())
            all_product_category_logits.append(product_category_logits.cpu().numpy())
            # all_hazard_logits.append(hazard_logits.cpu().numpy())
            # all_product_logits.append(product_logits.cpu().numpy())

    # Concatenate all logits from batches along the batch dimension (axis=0)
    hazard_category_logits_concat = np.concatenate(all_hazard_category_logits, axis=0)
    product_category_logits_concat = np.concatenate(all_product_category_logits, axis=0)
    # hazard_logits_concat = np.concatenate(all_hazard_logits, axis=0)
    # product_logits_concat = np.concatenate(all_product_logits, axis=0)

    # Free GPU memory by deleting model and test_encodings
    del model
    del test_encodings

    # Clear CUDA cache
    torch.cuda.empty_cache()

    # Return logits as a structured dictionary
    return {
        'hazard_category': hazard_category_logits_concat,
        'product_category': product_category_logits_concat
    }


In [11]:
# After evaluating the model
torch.cuda.empty_cache()


In [3]:
# Get logits from DeBERTa Large
deberta_base_logits = get_model_logits('microsoft/deberta-base', 'deberta-base-model', test_texts)


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


In [4]:
# After evaluating the model
torch.cuda.empty_cache()


In [5]:
distilbert_base_logits = get_model_logits('distilbert-base-uncased', 'distilbert-base-uncased-model', test_texts)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [14]:
# After evaluating the model
torch.cuda.empty_cache()


In [17]:
scibert_logits = get_model_logits('allenai/scibert_scivocab_uncased', 'scibert_scivocab_uncased-model', test_texts)

  state_dict = torch.load(f"{model_dir}/scibert_weights", map_location=device)


In [21]:
# Average the logits
hazard_category_logits_avg = (distilbert_base_logits['hazard_category'] + deberta_base_logits['hazard_category'] + scibert_logits['hazard_category']) / 3
product_category_logits_avg = (distilbert_base_logits['product_category'] + deberta_base_logits['product_category'] + scibert_logits['product_category']) / 3
# hazard_logits_avg = (distilbert_base_logits['hazard'] + deberta_large_logits['hazard']) / 2
# product_logits_avg = (distilbert_base_logits['product'] + deberta_large_logits['product']) / 2

In [22]:
# Get predicted labels
hazard_category_preds = np.argmax(hazard_category_logits_avg, axis=1)
product_category_preds = np.argmax(product_category_logits_avg, axis=1)
# hazard_preds = np.argmax(hazard_logits_avg, axis=1)
# product_preds = np.argmax(product_logits_avg, axis=1)

In [23]:
# Decode the predicted labels using the label encoders
hazard_category_labels = hazard_category_encoder.inverse_transform(hazard_category_preds)
product_category_labels = product_category_encoder.inverse_transform(product_category_preds)
# hazard_labels = hazard_encoder.inverse_transform(hazard_preds)
# product_labels = product_encoder.inverse_transform(product_preds)

In [24]:
# Create a DataFrame for the predictions
output_df = pd.DataFrame({
    'hazard-category': hazard_category_labels,
    'product-category': product_category_labels
})

# Save the output DataFrame to a CSV file
output_df.to_csv('test_predictions_ensemble.csv', index=False)

# For subtask 1 (hazard-category and product-category)
subtask1_df = output_df[['hazard-category', 'product-category']]
subtask1_df.to_csv('subtask1_predictions_ensemble.csv', index=True)


In [25]:
# Analyze the predictions
print("Hazard Category Predictions:")
print(subtask1_df['hazard-category'].value_counts())

print("\nProduct Category Predictions:")
print(subtask1_df['product-category'].value_counts())

Hazard Category Predictions:
hazard-category
allergens                         222
biological                        197
foreign bodies                     64
fraud                              34
chemical                           31
packaging defect                    7
other hazard                        5
organoleptic aspects                4
food additives and flavourings      1
Name: count, dtype: int64

Product Category Predictions:
product-category
meat, egg and dairy products                         171
cereals and bakery products                           90
fruits and vegetables                                 71
prepared dishes and snacks                            42
nuts, nut products and seeds                          37
seafood                                               26
soups, broths, sauces and condiments                  26
ices and desserts                                     23
non-alcoholic beverages                               23
cocoa and cocoa preparatio