# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import DistilBertModel, AdamW
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Dataloaders

In [2]:
train_DB_inputs = torch.load('intermediates/DB_inputs.pt')
dev_DB_inputs = torch.load('intermediates/DB_dev_inputs.pt')

train_labels = torch.load('intermediates/labels.pt')
dev_labels = torch.load('intermediates/labels_dev.pt')

In [3]:
class CustomDataset(Dataset):
    def __init__(self, dB_inputs, labels):
        self.dB_inputs = dB_inputs
        self.labels = labels

    def __len__(self):
        return len(self.dB_inputs['input_ids'])

    def __getitem__(self, idx):
        input_ids = self.dB_inputs['input_ids'][idx]
        attention_mask = self.dB_inputs['attention_mask'][idx]
        label = self.labels[idx]
        
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': label
        }

In [4]:
train_custom_dataset = CustomDataset(dB_inputs=train_DB_inputs, labels=train_labels)
dev_custom_dataset = CustomDataset(dB_inputs=dev_DB_inputs, labels=dev_labels)

train_dataloader = DataLoader(train_custom_dataset, batch_size=10, shuffle=True)
dev_dataloader = DataLoader(dev_custom_dataset, batch_size=10)

# Loading Pretrained and Finetuning model

In [5]:
distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')

In [6]:
train_dataset = pd.read_csv('cleaned_dataset.csv')
dev_dataset = pd.read_csv('cleaned_dev_dataset.csv')

In [7]:
class CNNLSTMClassifier(nn.Module):
    def __init__(self, distilbert, cnn_out_channels=64, lstm_hidden_dim=64, num_classes=2):
        super(CNNLSTMClassifier, self).__init__()
        self.distilbert = distilbert
        self.cnn = nn.Conv1d(in_channels=768, out_channels=cnn_out_channels, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(cnn_out_channels, lstm_hidden_dim, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_dim, num_classes)
        
    def forward(self, input_ids, attention_mask):
        with torch.set_grad_enabled(self.distilbert.training):
            distilbert_output = self.distilbert(input_ids=input_ids, attention_mask=attention_mask)
            embeddings = distilbert_output.last_hidden_state.permute(0, 2, 1)  # (batch, embed_dim, seq_len)
        
        cnn_out = self.cnn(embeddings)
        
        lstm_out, _ = self.lstm(cnn_out.permute(0, 2, 1))  # (batch, seq_len, lstm_hidden_dim)
        
        logits = self.fc(lstm_out[:, -1, :])  # Use last hidden state for classification
        return logits

In [8]:
model = CNNLSTMClassifier(distilbert)

In [9]:
model.train()

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=2e-5) #learing rate used by baseline from COLING 2025

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
model.to(device)

def train_model(model, dataloader, criterion, optimizer, num_epochs=1, accumulation_steps=10):
    for epoch in range(num_epochs):
        total_loss = 0
        for i, batches in enumerate(tqdm(dataloader)):
            input_ids = batches['input_ids'].to(device)
            attention_mask = batches['attention_mask'].to(device)
            labels = batches['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            loss.backward()
            
            if (i + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}')

    return model



Using device: cuda


In [10]:
def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for batches in tqdm(dataloader):
            input_ids = batches['input_ids'].to(device)
            attention_mask = batches['attention_mask'].to(device)
            labels = batches['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            _, preds = torch.max(outputs, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(true_labels, predictions)
    print(f'Validation Accuracy: {accuracy:.4f}')

In [11]:
DB_model = train_model(model, train_dataloader, criterion, optimizer)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 61077/61077 [5:02:40<00:00,  3.36it/s]  

Epoch 1/1, Loss: 0.2886





In [12]:
torch.save(DB_model, 'intermediates/DB_model.pt')

In [13]:
evaluate_model(model, dev_dataloader)

100%|██████████| 26176/26176 [42:21<00:00, 10.30it/s]

Validation Accuracy: 0.9113



