In [None]:
#DEMO SCRIPT - loads a trained model and runs inference on a test set

import torch
import pandas as pd
import numpy as np

from torch.utils.data import DataLoader
from datasets import Dataset
from transformers import AutoTokenizer

Model and embedding matrix loaded successfully!


Map:   0%|          | 0/4688 [00:00<?, ? examples/s]

Predictions saved to: data\predictionsDemo.csv


In [None]:
########################################
# 1. Define model
########################################

import torch.nn as nn
import torch.nn.functional as F

class SimpleAttention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.W = nn.Linear(2 * hidden_dim, 2 * hidden_dim)
        self.v = nn.Linear(2 * hidden_dim, 1, bias=False)

    def forward(self, lstm_outputs, mask=None):
        # lstm_outputs: (B, L, 2H)
        score = torch.tanh(self.W(lstm_outputs))    # (B, L, 2H)
        score = self.v(score).squeeze(-1)          # (B, L)
        if mask is not None:
            score = score.masked_fill(mask == 0, -1e9)
        attn_weights = F.softmax(score, dim=-1)    # (B, L)
        attn_weights = attn_weights.unsqueeze(1)   # (B, 1, L)
        context = torch.bmm(attn_weights, lstm_outputs)  # (B, 1, 2H)
        context = context.squeeze(1)               # (B, 2H)
        return context


class CustomBiLSTMModel(nn.Module):
    def __init__(self, 
                 vocab_size,
                 embed_dim=300,
                 hidden_dim=256, 
                 num_labels=2,
                 num_layers=2, 
                 dropout=0.3, 
                 use_attention=True,
                 use_focal_loss=False, 
                 gamma=2.0, 
                 label_smoothing=0.0,
                 embedding_matrix=None):
        super().__init__()
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.dropout = dropout
        self.num_labels = num_labels
        
        self.use_attention = use_attention
        self.use_focal_loss = use_focal_loss
        self.gamma = gamma
        self.label_smoothing = label_smoothing

        # Embedding layer
        self.embedding = nn.Embedding(self.vocab_size, self.embed_dim, padding_idx=0)
        if embedding_matrix is not None:
            with torch.no_grad():
                self.embedding.weight.copy_(embedding_matrix)

        # BiLSTM
        self.lstm = nn.LSTM(
            input_size=self.embed_dim,
            hidden_size=self.hidden_dim,
            num_layers=self.num_layers,
            dropout=self.dropout,
            batch_first=True,
            bidirectional=True
        )

        # Optional attention
        if self.use_attention:
            self.attn = SimpleAttention(self.hidden_dim)

        # Classification head
        self.classifier = nn.Linear(2 * self.hidden_dim, self.num_labels)

    def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
        # Embeddings
        embeds = self.embedding(input_ids)
        if attention_mask is not None:
            expand_mask = attention_mask.unsqueeze(-1).float()
            embeds = embeds * expand_mask
        
        # LSTM
        lstm_outputs, (h, c) = self.lstm(embeds)  # (B, L, 2H), h shape: (2*num_layers, B, H)
        
        if self.use_attention:
            context = self.attn(lstm_outputs, mask=attention_mask) 
        else:
            # Take the last forward and backward states from the top LSTM layer
            h_forward = h[-2]  # last layer's forward state
            h_backward = h[-1] # last layer's backward state
            context = torch.cat((h_forward, h_backward), dim=-1)

        logits = self.classifier(context)
        return {"logits": logits}

In [None]:
########################################
# 2. Load the Trained Model Checkpoint
########################################

BEST_MODEL_PATH = "data\\taskB\\ED_B_Model.pt"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

checkpoint = torch.load(
    BEST_MODEL_PATH, 
    map_location=device, 
    weights_only=False
)

# Extract hyperparams & embedding matrix from the checkpoint
hyperparams = checkpoint["hyperparams"]
embedding_matrix_tensor = checkpoint["embedding_matrix"].to(device)

# Re-instantiate the model
loaded_model = CustomBiLSTMModel(
    vocab_size      = hyperparams["vocab_size"],
    embed_dim       = hyperparams["embed_dim"],
    hidden_dim      = hyperparams["hidden_dim"],
    num_labels      = hyperparams["num_labels"],
    num_layers      = hyperparams["num_layers"],
    dropout         = hyperparams["dropout"],
    use_attention   = hyperparams["use_attention"],
    use_focal_loss  = hyperparams["use_focal_loss"],
    gamma           = hyperparams["gamma"],
    label_smoothing = hyperparams["label_smoothing"],
    embedding_matrix=embedding_matrix_tensor
)

loaded_model.load_state_dict(checkpoint["model_state_dict"])
loaded_model.to(device)
loaded_model.eval()

print("Model and embedding matrix loaded successfully!")

In [None]:
########################################
# 3. Load and Preprocess Test Data
########################################
TEST_PATH = "data\\test.csv"  
OUTPUT_PATH = "data\\predictionsDemo.csv"

# Load test CSV with pandas
test_df = pd.read_csv(TEST_PATH)
test_df.rename(columns={"Claim": "claim", "Evidence": "evidence"}, inplace=True)

test_dataset = Dataset.from_pandas(test_df)

TOKENIZER_NAME = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

def tokenize_function(examples):
    return tokenizer(
        examples["claim"],
        examples["evidence"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

# Tokenize
encoded_test = test_dataset.map(tokenize_function, batched=True)

# Remove the original text columns 
encoded_test = encoded_test.remove_columns(["claim", "evidence"])

# Set to PyTorch format
encoded_test.set_format("torch")

In [None]:
########################################
# 4. Run Inference on the Test Set
########################################
test_loader = DataLoader(encoded_test, batch_size=8)

all_preds = []

loaded_model.eval()
with torch.no_grad():
    for batch in test_loader:
        # Move each tensor in the batch to the same device as the model
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = loaded_model(**batch)   # forward pass
        logits = outputs["logits"]        # shape: (B, num_labels)
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().tolist())



In [None]:
########################################
# 5. Save Predictions
########################################
# Create a DataFrame with the predicted labels
test_pred_df = pd.DataFrame({"prediction": all_preds})

# Save to CSV
test_pred_df.to_csv(OUTPUT_PATH, index=False, header=True)
print(f"Predictions saved to: {OUTPUT_PATH}")