# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import DistilBertModel, AdamW, AutoModel, ElectraModel
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


# Dataloaders

In [2]:
class CustomDataset(Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __len__(self):
        return len(self.inputs['input_ids'])

    def __getitem__(self, idx):
        input_ids = self.inputs['input_ids'][idx]
        attention_mask = self.inputs['attention_mask'][idx]
        label = self.labels[idx]
        
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': label
        }

# Creating Model Definition

In [19]:
class CNNLSTMClassifier(nn.Module):
    def __init__(self, tinybert, cnn_out_channels=64, lstm_hidden_dim=64, num_classes=2):
        super(CNNLSTMClassifier, self).__init__()
        self.tinybert = tinybert
        self.cnn = nn.Conv1d(in_channels=768, out_channels=cnn_out_channels, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(cnn_out_channels, lstm_hidden_dim, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_dim, num_classes)
        
    def forward(self, input_ids, attention_mask):
        with torch.set_grad_enabled(self.tinybert.training):
            distilbert_output = self.tinybert(input_ids=input_ids, attention_mask=attention_mask)
            embeddings = distilbert_output.last_hidden_state.permute(0, 2, 1)  # (batch, embed_dim, seq_len)
        
        cnn_out = self.cnn(embeddings)
        
        lstm_out, _ = self.lstm(cnn_out.permute(0, 2, 1))  # (batch, seq_len, lstm_hidden_dim)
        
        logits = self.fc(lstm_out[:, -1, :])  # Use last hidden state for classification
        return logits

In [3]:
class CNNLSTMClassifier(nn.Module):
    def __init__(self, bert, in_channels, cnn_out_channels=64, lstm_hidden_dim=64, num_classes=2):
        super(CNNLSTMClassifier, self).__init__()
        self.bert = bert
        self.cnn = nn.Conv1d(in_channels=in_channels, out_channels=cnn_out_channels, kernel_size=3, padding=1)
        self.lstm = nn.LSTM(cnn_out_channels, lstm_hidden_dim, batch_first=True)
        self.fc = nn.Linear(lstm_hidden_dim, num_classes)
        
    def forward(self, input_ids, attention_mask):
        with torch.set_grad_enabled(self.bert.training):
            bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
            embeddings = bert_output.last_hidden_state.permute(0, 2, 1)  # (batch, embed_dim, seq_len)
        
        cnn_out = self.cnn(embeddings)
        
        lstm_out, _ = self.lstm(cnn_out.permute(0, 2, 1))  # (batch, seq_len, lstm_hidden_dim)
        
        logits = self.fc(lstm_out[:, -1, :])  # Use last hidden state for classification
        return logits

# Training and Evaluation

In [4]:
train_dataset = pd.read_csv('cleaned_dataset.csv')
dev_dataset = pd.read_csv('cleaned_dev_dataset.csv')

In [5]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=1, accumulation_steps=10, device='cuda'):
    for epoch in range(num_epochs):
        total_loss = 0
        for i, batches in enumerate(tqdm(dataloader)):
            input_ids = batches['input_ids'].to(device)
            attention_mask = batches['attention_mask'].to(device)
            labels = batches['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            loss.backward()
            
            if (i + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}')

    return model

In [6]:
def evaluate_model(model, dataloader, device):
    model.eval()
    predictions, true_labels = [], []
    print("Classification Report:\n")
    with torch.no_grad():
        for batches in tqdm(dataloader):
            input_ids = batches['input_ids'].to(device)
            attention_mask = batches['attention_mask'].to(device)
            labels = batches['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            _, preds = torch.max(outputs, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    report = classification_report(true_labels, predictions, digits=4)
    print(report)

In [7]:
def test_model(model, dataloader, device):
    model.eval()
    predictions, true_labels = [], []
    print("Classification Report:\n")
    with torch.no_grad():
        for batches in tqdm(dataloader):
            input_ids = batches['input_ids'].to(device)
            attention_mask = batches['attention_mask'].to(device)
            labels = batches['labels'].to(device)
            
            outputs = model(input_ids, attention_mask)
            _, preds = torch.max(outputs, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    report = classification_report(true_labels, predictions, digits=4)
    print(report)
    return predictions, true_labels

# DistilBert

In [14]:
train_DB_inputs = torch.load('intermediates/DB_inputs.pt')
dev_DB_inputs = torch.load('intermediates/DB_dev_inputs.pt')
test_DB_inputs = torch.load('intermediates/DB_test_inputs.pt')

train_labels = torch.load('intermediates/labels.pt')
dev_labels = torch.load('intermediates/labels_dev.pt')
test_labels = torch.load('intermediates/labels_test.pt')

  train_DB_inputs = torch.load('intermediates/DB_inputs.pt')
  dev_DB_inputs = torch.load('intermediates/DB_dev_inputs.pt')
  test_DB_inputs = torch.load('intermediates/DB_test_inputs.pt')
  train_labels = torch.load('intermediates/labels.pt')
  dev_labels = torch.load('intermediates/labels_dev.pt')
  test_labels = torch.load('intermediates/labels_test.pt')


In [15]:
train_custom_dataset = CustomDataset(inputs=train_DB_inputs, labels=train_labels)
dev_custom_dataset = CustomDataset(inputs=dev_DB_inputs, labels=dev_labels)
test_custom_dataset = CustomDataset(inputs=test_DB_inputs, labels=test_labels)

train_dataloader = DataLoader(train_custom_dataset, batch_size=10, shuffle=True)
dev_dataloader = DataLoader(dev_custom_dataset, batch_size=10)
test_dataloader = DataLoader(test_custom_dataset, batch_size=10)

In [16]:
distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')

In [17]:
DB_model = CNNLSTMClassifier(distilbert, in_channels=768)
DB_model.train()

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(DB_model.parameters(), lr=2e-5) #learing rate used by baseline from COLING 2025

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
DB_model.to(device)

Using device: cuda


CNNLSTMClassifier(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Line

In [None]:
DB_model = train_model(DB_model, train_dataloader, criterion, optimizer, num_epochs=1, accumulation_steps=10, device=device)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 61077/61077 [5:02:40<00:00,  3.36it/s]  

Epoch 1/1, Loss: 0.2886





In [None]:
torch.save(DB_model, 'models/DB_model.pt')

In [30]:
DB_model = torch.load('models/DB_model.pt')

  DB_model = torch.load('models/DB_model.pt')


In [33]:
evaluate_model(DB_model, dev_dataloader, device)

Classification Report:



  0%|          | 0/26176 [00:00<?, ?it/s]

100%|██████████| 26176/26176 [1:22:24<00:00,  5.29it/s]


              precision    recall  f1-score   support

           0     0.9033    0.8553    0.8786     98328
           1     0.9157    0.9449    0.9300    163430

    accuracy                         0.9113    261758
   macro avg     0.9095    0.9001    0.9043    261758
weighted avg     0.9110    0.9113    0.9107    261758



In [38]:
DB_preds, DB_true_labels = test_model(DB_model, test_dataloader, device)

Classification Report:



100%|██████████| 7395/7395 [23:32<00:00,  5.24it/s]


              precision    recall  f1-score   support

           0     0.8106    0.6062    0.6937     34675
           1     0.7156    0.8749    0.7873     39266

    accuracy                         0.7489     73941
   macro avg     0.7631    0.7406    0.7405     73941
weighted avg     0.7601    0.7489    0.7434     73941



# TinyBert

In [11]:
train_TB_inputs = torch.load('intermediates/TB_inputs.pt')
dev_TB_inputs = torch.load('intermediates/TB_dev_inputs.pt')
test_TB_inputs = torch.load('intermediates/TB_test_inputs.pt')

train_labels = torch.load('intermediates/labels.pt')
dev_labels = torch.load('intermediates/labels_dev.pt')
test_labels = torch.load('intermediates/labels_test.pt')

In [12]:
train_custom_dataset = CustomDataset(inputs=train_TB_inputs, labels=train_labels)
dev_custom_dataset = CustomDataset(inputs=dev_TB_inputs, labels=dev_labels)
test_custom_dataset = CustomDataset(inputs=test_TB_inputs, labels=test_labels)

train_dataloader = DataLoader(train_custom_dataset, batch_size=10, shuffle=True)
dev_dataloader = DataLoader(dev_custom_dataset, batch_size=10)
test_dataloader = DataLoader(test_custom_dataset, batch_size=10)

In [15]:
tinybert = AutoModel.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

In [16]:
TB_model = CNNLSTMClassifier(tinybert, in_channels=312)
TB_model.train()

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(TB_model.parameters(), lr=2e-5) #learing rate used by baseline from COLING 2025

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
TB_model.to(device)



Using device: cuda


CNNLSTMClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 312, padding_idx=0)
      (position_embeddings): Embedding(512, 312)
      (token_type_embeddings): Embedding(2, 312)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-3): 4 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=312, out_features=312, bias=True)
              (key): Linear(in_features=312, out_features=312, bias=True)
              (value): Linear(in_features=312, out_features=312, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=312, out_features=312, bias=True)
              (LayerNorm): LayerNorm((312,), eps=1e-12, elementwi

In [None]:
TB_model = train_model(TB_model, train_dataloader, criterion, optimizer, num_epochs=1, accumulation_steps=10, device=device)

100%|██████████| 61077/61077 [2:51:55<00:00,  5.92it/s]  

Epoch 1/1, Loss: 0.3766





In [None]:
torch.save(TB_model, 'models/TB_model.pt')

In [20]:
TB_model = torch.load('models/TB_model.pt')

In [None]:
evaluate_model(TB_model, dev_dataloader, device)

100%|██████████| 26176/26176 [26:23<00:00, 16.53it/s]


Classification Report:
               precision    recall  f1-score   support

           0     0.8366    0.8328    0.8347     98328
           1     0.8997    0.9021    0.9009    163430

    accuracy                         0.8761    261758
   macro avg     0.8681    0.8674    0.8678    261758
weighted avg     0.8760    0.8761    0.8760    261758



In [22]:
TB_preds, TB_true_labels = test_model(TB_model, test_dataloader, device)

Classification Report:



100%|██████████| 7395/7395 [03:43<00:00, 33.16it/s]


              precision    recall  f1-score   support

           0     0.7583    0.5596    0.6440     34675
           1     0.6842    0.8425    0.7551     39266

    accuracy                         0.7098     73941
   macro avg     0.7212    0.7010    0.6995     73941
weighted avg     0.7189    0.7098    0.7030     73941



# Electra

In [8]:
train_EL_inputs = torch.load('intermediates/EL_inputs.pt')
dev_EL_inputs = torch.load('intermediates/EL_dev_inputs.pt')
test_EL_inputs = torch.load('intermediates/EL_test_inputs.pt')

train_labels = torch.load('intermediates/labels.pt')
dev_labels = torch.load('intermediates/labels_dev.pt')
test_labels = torch.load('intermediates/labels_test.pt')

In [9]:
train_custom_dataset = CustomDataset(inputs=train_EL_inputs, labels=train_labels)
dev_custom_dataset = CustomDataset(inputs=dev_EL_inputs, labels=dev_labels)
test_custom_dataset = CustomDataset(inputs=test_EL_inputs, labels=test_labels)

train_dataloader = DataLoader(train_custom_dataset, batch_size=10, shuffle=True)
dev_dataloader = DataLoader(dev_custom_dataset, batch_size=10)
test_dataloader = DataLoader(test_custom_dataset, batch_size=10)

In [10]:
electra = ElectraModel.from_pretrained('google/electra-small-discriminator')

In [11]:
EL_model = CNNLSTMClassifier(electra, in_channels=256)
EL_model.train()

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(EL_model.parameters(), lr=2e-5) #learing rate used by baseline from COLING 2025

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
EL_model.to(device)



Using device: cuda


CNNLSTMClassifier(
  (bert): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(30522, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (embeddings_project): Linear(in_features=128, out_features=256, bias=True)
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0-11): 12 x ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=2

In [None]:
EL_model = train_model(EL_model, train_dataloader, criterion, optimizer, num_epochs=1, accumulation_steps=10, device=device) #136m 53.1s

100%|██████████| 61077/61077 [2:16:53<00:00,  7.44it/s]  

Epoch 1/1, Loss: 0.3171





In [15]:
torch.save(EL_model, 'models/EL_model.pt')

In [16]:
evaluate_model(EL_model, dev_dataloader, device)

Classification Report:



100%|██████████| 26176/26176 [19:31<00:00, 22.35it/s]


              precision    recall  f1-score   support

           0     0.9291    0.6536    0.7674     98328
           1     0.8231    0.9700    0.8906    163430

    accuracy                         0.8511    261758
   macro avg     0.8761    0.8118    0.8290    261758
weighted avg     0.8629    0.8511    0.8443    261758



In [17]:
AB_preds, AB_true_labels = test_model(EL_model, test_dataloader, device)

Classification Report:



100%|██████████| 7395/7395 [05:31<00:00, 22.28it/s]


              precision    recall  f1-score   support

           0     0.8379    0.3740    0.5171     34675
           1     0.6287    0.9361    0.7522     39266

    accuracy                         0.6725     73941
   macro avg     0.7333    0.6550    0.6347     73941
weighted avg     0.7268    0.6725    0.6420     73941

