In [10]:
# ----------------------------------------------------------------------------
# DEMO SCRIPT - loads a trained DeBERTa model and runs inference on a test set
# ----------------------------------------------------------------------------

import torch
import pandas as pd
import numpy as np

from torch.utils.data import DataLoader
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig

import torch.nn as nn
import torch.nn.functional as F

class CustomDebertaModel(nn.Module):
    def __init__(
        self,
        model_name,
        num_labels=2,
        use_focal_loss=False,
        gamma=2.0,
        label_smoothing=0.0
    ):
        super().__init__()
        self.num_labels = num_labels
        self.use_focal_loss = use_focal_loss
        self.gamma = gamma
        self.label_smoothing = label_smoothing

        # We'll load the HF model from its config or from_pretrained
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels
        )

    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            **kwargs
        )
        logits = outputs.logits
        return {"logits": logits}

In [11]:
# (2) Load the Trained Model Checkpoint
BEST_MODEL_PATH = "data\\taskC\\best_deberta_model.pt" 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

checkpoint = torch.load(
    BEST_MODEL_PATH, 
    map_location=device
)

# Extract hyperparams, config, or anything else you saved
state_dict   = checkpoint["model_state_dict"]
hyperparams  = checkpoint["hyperparams"]

model_name       = hyperparams["model_name"]
num_labels       = hyperparams["num_labels"]
use_focal_loss   = hyperparams["use_focal_loss"]
gamma            = hyperparams["gamma"]
label_smoothing  = hyperparams["label_smoothing"]

# Re-instantiate the custom model
loaded_model = CustomDebertaModel(
    model_name=model_name,
    num_labels=num_labels,
    use_focal_loss=use_focal_loss,
    gamma=gamma,
    label_smoothing=label_smoothing
)

# Load the trained weights
loaded_model.load_state_dict(state_dict)
loaded_model.to(device)
loaded_model.eval()
print("DeBERTa model loaded successfully!")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DeBERTa model loaded successfully!


In [12]:
# (3) Load and Preprocess Test Data
TEST_PATH = "data\\dev.csv"  
OUTPUT_PATH = "data\\predictions_demo_deberta.csv"

test_df = pd.read_csv(TEST_PATH)

if 'label' in test_df.columns:
    test_df.drop(columns=['label'], inplace=True)

test_df.rename(columns={"Claim": "claim", "Evidence": "evidence"}, inplace=True)

test_dataset = Dataset.from_pandas(test_df)

# Use the same tokenizer that was used in training
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(
        examples["claim"],
        examples["evidence"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

test_dataset = test_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.remove_columns(["claim", "evidence"])

# Convert to PyTorch Tensors
test_dataset.set_format("torch")




Map:   0%|          | 0/5926 [00:00<?, ? examples/s]

In [13]:
# (4) Inference Loop
test_loader = DataLoader(test_dataset, batch_size=8)
all_preds = []

loaded_model.eval()
with torch.no_grad():
    for batch in test_loader:
        # Move tensor fields to device
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = loaded_model(**batch)
        logits = outputs["logits"]
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().tolist())

In [14]:
# (5) Save Predictions
test_pred_df = pd.DataFrame({"prediction": all_preds})
test_pred_df.to_csv(OUTPUT_PATH, index=False, header=True)
print(f"Predictions saved to {OUTPUT_PATH}")

Predictions saved to data\predictions_demo_deberta.csv
