<a href="https://colab.research.google.com/github/SushovitNanda/SemEval-Food-Hazards/blob/main/PyTorch_FFNN_RNN_LSTM_Bi_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizer, DistilBertModel
import copy
import warnings
warnings.filterwarnings("ignore")

In [14]:
# Load and preprocess data
train_df = pd.read_csv('incidents_labelled.csv')

# Combine title and text for the input feature
train_df['combined_text'] = train_df['title'] + " " + train_df['text']

# Label encode the target variable
label_encoder = LabelEncoder()
train_df['label'] = label_encoder.fit_transform(train_df['hazard-category'])
num_classes = len(label_encoder.classes_)

# Split the data
train_data, test_data = train_test_split(train_df, test_size=0.2, random_state=42)

# Tokenizer setup
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
max_length = 128

# Custom Dataset class
class HazardDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data.iloc[idx]['combined_text']
        label = self.data.iloc[idx]['label']

        # Tokenize the text
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Create data loaders
train_dataset = HazardDataset(train_data, tokenizer, max_length)
test_dataset = HazardDataset(test_data, tokenizer, max_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# FFNN

In [10]:
# Define the FFNN model
class FFNNModel(nn.Module):
    def __init__(self, hidden_size, num_classes):
        super(FFNNModel, self).__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.fc1 = nn.Linear(self.distilbert.config.hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, input_ids, attention_mask):
        outputs = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0]  # [CLS] token output
        x = self.fc1(pooled_output)
        x = self.relu(x)
        x = self.dropout(x)
        return self.fc2(x)

# Initialize the model, loss function, and optimizer
model = FFNNModel(hidden_size=64, num_classes=num_classes)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Convert class weights to float32
class_weights = torch.tensor([1.0 / count for count in train_df['hazard-category'].value_counts().values], dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0
best_model_state = copy.deepcopy(model.state_dict())

# Training function
def train_model(model, data_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(data_loader)

# Evaluation function
def eval_model(model, data_loader):
    model.eval()
    total_loss = 0
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    return total_loss / len(data_loader), predictions, true_labels

# Train and evaluate with early stopping
epochs = 10
for epoch in range(epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer)
    val_loss, _, _ = eval_model(model, test_loader)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Check for early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

# Load the best model state before evaluation
model.load_state_dict(best_model_state)

# Get predictions and evaluate
_, preds, true_labels = eval_model(model, test_loader)
print(classification_report(true_labels, preds, target_names=label_encoder.classes_))

Epoch 1/10, Train Loss: 1.9620, Validation Loss: 1.6676
Epoch 2/10, Train Loss: 1.4763, Validation Loss: 1.2725
Epoch 3/10, Train Loss: 1.0837, Validation Loss: 1.2677
Epoch 4/10, Train Loss: 1.0457, Validation Loss: 1.3293
Epoch 5/10, Train Loss: 0.8943, Validation Loss: 1.2568
Epoch 6/10, Train Loss: 0.8132, Validation Loss: 1.3132
Epoch 7/10, Train Loss: 0.7231, Validation Loss: 1.2737
Early stopping triggered
                                precision    recall  f1-score   support

                     allergens       0.92      0.92      0.92       377
                    biological       0.99      0.86      0.92       398
                      chemical       0.75      0.67      0.71       107
food additives and flavourings       0.00      0.00      0.00         7
                foreign bodies       0.95      0.73      0.83       166
                         fraud       0.71      0.44      0.54        77
                     migration       0.00      0.00      0.00         1
      

# RNN

In [12]:
# Define the RNN-based model
class RNNModel(nn.Module):
    def __init__(self, hidden_size, rnn_hidden_size, num_classes):
        super(RNNModel, self).__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.rnn = nn.RNN(input_size=self.distilbert.config.hidden_size,
                          hidden_size=rnn_hidden_size,
                          num_layers=1,
                          batch_first=True)
        self.fc = nn.Linear(rnn_hidden_size, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, input_ids, attention_mask):
        # Get DistilBERT embeddings
        bert_output = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = bert_output.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Pass through RNN
        rnn_output, hidden = self.rnn(last_hidden_state)  # RNN outputs hidden state (batch_size, seq_len, rnn_hidden_size)

        # Use the last hidden state from the RNN
        rnn_output = self.dropout(hidden[-1])  # Shape: (batch_size, rnn_hidden_size)

        # Fully connected layer
        output = self.fc(rnn_output)  # Shape: (batch_size, num_classes)
        return output

# Initialize the model, loss function, and optimizer
model = RNNModel(hidden_size=64, rnn_hidden_size=128, num_classes=num_classes)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Convert class weights to float32
class_weights = torch.tensor([1.0 / count for count in train_df['hazard-category'].value_counts().values], dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0
best_model_state = copy.deepcopy(model.state_dict())

# Training function
def train_model(model, data_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(data_loader)

# Evaluation function
def eval_model(model, data_loader):
    model.eval()
    total_loss = 0
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    return total_loss / len(data_loader), predictions, true_labels

# Train and evaluate with early stopping
epochs = 10
for epoch in range(epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer)
    val_loss, _, _ = eval_model(model, test_loader)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Check for early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

# Load the best model state before evaluation
model.load_state_dict(best_model_state)

# Get predictions and evaluate
_, preds, true_labels = eval_model(model, test_loader)
print(classification_report(true_labels, preds, target_names=label_encoder.classes_))

Epoch 1/10, Train Loss: 1.9508, Validation Loss: 1.6845
Epoch 2/10, Train Loss: 1.4659, Validation Loss: 1.5280
Epoch 3/10, Train Loss: 1.1060, Validation Loss: 0.9919
Epoch 4/10, Train Loss: 0.8757, Validation Loss: 1.1561
Epoch 5/10, Train Loss: 0.7052, Validation Loss: 1.0534
Early stopping triggered
                                precision    recall  f1-score   support

                     allergens       0.94      0.80      0.86       377
                    biological       0.99      0.82      0.90       398
                      chemical       0.80      0.63      0.70       107
food additives and flavourings       0.00      0.00      0.00         7
                foreign bodies       0.95      0.77      0.85       166
                         fraud       0.37      0.35      0.36        77
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.67      0.15      0.25        13
                  other hazard       0.19     

# LSTM

In [11]:
# Define the LSTM-based model
class LSTMModel(nn.Module):
    def __init__(self, hidden_size, lstm_hidden_size, num_classes):
        super(LSTMModel, self).__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.lstm = nn.LSTM(input_size=self.distilbert.config.hidden_size,
                            hidden_size=lstm_hidden_size,
                            num_layers=1,
                            batch_first=True)
        self.fc = nn.Linear(lstm_hidden_size, num_classes)
        self.dropout = nn.Dropout(0.3)

    def forward(self, input_ids, attention_mask):
        # Get DistilBERT embeddings
        bert_output = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = bert_output.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Pass through LSTM
        lstm_output, (hidden, cell) = self.lstm(last_hidden_state)  # LSTM outputs hidden state (batch_size, seq_len, lstm_hidden_size)

        # Get the hidden state from the last LSTM layer
        lstm_output = self.dropout(hidden[-1])  # Shape: (batch_size, lstm_hidden_size)

        # Fully connected layer
        output = self.fc(lstm_output)  # Shape: (batch_size, num_classes)
        return output

# Initialize the model, loss function, and optimizer
model = LSTMModel(hidden_size=64, lstm_hidden_size=128, num_classes=num_classes)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Convert class weights to float32
class_weights = torch.tensor([1.0 / count for count in train_df['hazard-category'].value_counts().values], dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0
best_model_state = copy.deepcopy(model.state_dict())

# Training function
def train_model(model, data_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(data_loader)

# Evaluation function
def eval_model(model, data_loader):
    model.eval()
    total_loss = 0
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    return total_loss / len(data_loader), predictions, true_labels

# Train and evaluate with early stopping
epochs = 10
for epoch in range(epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer)
    val_loss, _, _ = eval_model(model, test_loader)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Check for early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

# Load the best model state before evaluation
model.load_state_dict(best_model_state)

# Get predictions and evaluate
_, preds, true_labels = eval_model(model, test_loader)
print(classification_report(true_labels, preds, target_names=label_encoder.classes_))

Epoch 1/10, Train Loss: 1.9815, Validation Loss: 1.9264
Epoch 2/10, Train Loss: 1.8057, Validation Loss: 1.6028
Epoch 3/10, Train Loss: 1.5937, Validation Loss: 1.6121
Epoch 4/10, Train Loss: 1.5272, Validation Loss: 1.5873
Epoch 5/10, Train Loss: 1.4465, Validation Loss: 1.5525
Epoch 6/10, Train Loss: 1.7132, Validation Loss: 1.6746
Epoch 7/10, Train Loss: 1.7125, Validation Loss: 1.7144
Early stopping triggered
                                precision    recall  f1-score   support

                     allergens       0.74      0.86      0.80       377
                    biological       0.99      0.59      0.74       398
                      chemical       0.00      0.00      0.00       107
food additives and flavourings       0.00      0.00      0.00         7
                foreign bodies       0.00      0.00      0.00       166
                         fraud       0.88      0.27      0.42        77
                     migration       0.00      0.00      0.00         1
      

# Bidirectional LSTM

In [16]:
# Define the Bidirectional LSTM-based model
class BiLSTMModel(nn.Module):
    def __init__(self, hidden_size, lstm_hidden_size, num_classes):
        super(BiLSTMModel, self).__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.bilstm = nn.LSTM(
            input_size=self.distilbert.config.hidden_size,
            hidden_size=lstm_hidden_size,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )
        self.fc = nn.Linear(lstm_hidden_size * 2, num_classes)  # Multiply by 2 for bidirectional output
        self.dropout = nn.Dropout(0.3)

    def forward(self, input_ids, attention_mask):
        # Get DistilBERT embeddings
        bert_output = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = bert_output.last_hidden_state  # Shape: (batch_size, seq_len, hidden_size)

        # Pass through bidirectional LSTM
        lstm_output, (hidden, cell) = self.bilstm(last_hidden_state)  # lstm_output shape: (batch_size, seq_len, lstm_hidden_size * 2)

        # Concatenate the forward and backward hidden states from the last layer of the LSTM
        lstm_output = self.dropout(torch.cat((hidden[-2], hidden[-1]), dim=1))  # Shape: (batch_size, lstm_hidden_size * 2)

        # Fully connected layer
        output = self.fc(lstm_output)  # Shape: (batch_size, num_classes)
        return output

# Initialize the model, loss function, and optimizer
model = BiLSTMModel(hidden_size=64, lstm_hidden_size=128, num_classes=num_classes)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Convert class weights to float32
class_weights = torch.tensor([1.0 / count for count in train_df['hazard-category'].value_counts().values], dtype=torch.float32)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0
best_model_state = copy.deepcopy(model.state_dict())

# Training function
def train_model(model, data_loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask).float()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(data_loader)

# Evaluation function
def eval_model(model, data_loader):
    model.eval()
    total_loss = 0
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    return total_loss / len(data_loader), predictions, true_labels

# Train and evaluate with early stopping
epochs = 10
for epoch in range(epochs):
    train_loss = train_model(model, train_loader, criterion, optimizer)
    val_loss, _, _ = eval_model(model, test_loader)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Check for early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

# Load the best model state before evaluation
model.load_state_dict(best_model_state)

# Get predictions and evaluate
_, preds, true_labels = eval_model(model, test_loader)
print(classification_report(true_labels, preds, target_names=label_encoder.classes_))

Epoch 1/10, Train Loss: 1.8725, Validation Loss: 1.4803
Epoch 2/10, Train Loss: 1.1924, Validation Loss: 1.2809
Epoch 3/10, Train Loss: 0.9557, Validation Loss: 1.1131
Epoch 4/10, Train Loss: 0.7003, Validation Loss: 1.1368
Epoch 5/10, Train Loss: 0.5582, Validation Loss: 1.1529
Early stopping triggered
                                precision    recall  f1-score   support

                     allergens       0.95      0.78      0.86       377
                    biological       1.00      0.71      0.83       398
                      chemical       0.85      0.56      0.67       107
food additives and flavourings       0.00      0.00      0.00         7
                foreign bodies       0.99      0.72      0.84       166
                         fraud       0.38      0.56      0.45        77
                     migration       0.00      0.00      0.00         1
          organoleptic aspects       0.00      0.00      0.00        13
                  other hazard       0.12     