In [1]:
import transformers
import torch
import pandas as pd
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, Trainer, TrainingArguments, BartForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
from torch.optim import Adam
from accelerate import Accelerator
import wandb
from tqdm import tqdm
from torch.optim import AdamW
import torch.nn as nn 

import torch
from tqdm import tqdm
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from nltk.translate.meteor_score import meteor_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
import numpy as np
import nltk
from nltk.tokenize import word_tokenize

In [2]:
!nvidia-smi

Mon Apr 14 16:56:40 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A5000               Off | 00000000:01:00.0 Off |                  Off |
| 30%   26C    P8              15W / 230W |      3MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
from evaluate import load
from rouge_score import rouge_scorer
from bert_score import score as bert_score

# meteor = load("meteor")
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
accelerator = Accelerator()

In [4]:
!wandb login --relogin --verify f59d448beb3315f3efbc5a0a80d9d2c346926308

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/dhruv/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnoel22338[0m ([33mnlp_project_team[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
training_data = pd.read_csv('../Dataset/train.csv')
testing_data = pd.read_csv('../Dataset/test.csv')
validation_data = pd.read_csv('../Dataset/validation.csv')

In [6]:
columns = training_data.columns
print(columns)

Index(['hatespeech', 'csType', 'counterspeech', 'Suggest', 'Relevance',
       'Aggressive', 'Complexity', 'Comments', 'source', 'claim',
       'centralTopic', 'speakerIntent', 'targetGroup', 'relevantPowerDynamics',
       'hatespeechImplication', 'targetGroupEmotionalReaction',
       'targetGroupCognitiveReaction', 'hatespeechOffensiveness', 'id',
       'is_high_quality', 'hs_id', 'hatespeechTarget', 'powerDynamics',
       'prompt_offensiveness', 'prompt_target_group', 'prompt_speaker_intent',
       'prompt_power_dynamics', 'prompt_implication',
       'prompt_emotional_reaction', 'prompt_cognitive_reaction',
       'prompt_cs_generation'],
      dtype='object')


In [7]:
class DialoGPTDataset(Dataset):
    def __init__(self, data):
        self.data = data.reset_index(drop=True)
        self.tokenizer = AutoTokenizer.from_pretrained("GroNLP/hateBERT")
        self.bart_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")

        # Intent label mapping
        self.categories = {
            'informative': 0,
            'questioning': 1,
            'denouncing': 2,
            'positive': 3
        }

        # Create a mapping from hate speech → list of intent labels
        self.intent_map = (
            data.groupby("hatespeech")["csType"]
            .apply(lambda x: [self.categories[t.lower()] for t in x.unique()])
            .to_dict()
        )

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # Tokenize hate speech
        hate_inputs = self.tokenizer(
            row["hatespeech"],
            return_tensors='pt',
            max_length=128,
            truncation=True,
            padding="max_length"
        )

        # Tokenize counterspeech
        counter_inputs = self.bart_tokenizer(
            row["counterspeech"],
            return_tensors='pt',
            max_length=128,
            truncation=True,
            padding="max_length"
        )

        intent_id = torch.tensor(self.categories[row["csType"].lower()], dtype=torch.long)
        all_intents = self.intent_map[row["hatespeech"]]  #Look up all intents for this hate speech

        return {
            'input_ids': hate_inputs['input_ids'].squeeze(0),
            'attention_mask': hate_inputs['attention_mask'].squeeze(0),
            'counter_speech': counter_inputs['input_ids'].squeeze(0),
            'intent_id': intent_id,
            'raw_text': row["hatespeech"],
            'all_intents': all_intents  
        }

    def __len__(self):
        return len(self.data)


def custom_collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    counter_speech = torch.stack([item['counter_speech'] for item in batch])
    intent_id = torch.stack([item['intent_id'] for item in batch])
    all_intents = [item['all_intents'] for item in batch]
    raw_inputs = [item['raw_text'] for item in batch]

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'counter_speech': counter_speech,
        'intent_id': intent_id,
        'all_intents': all_intents,
        'raw_inputs':raw_inputs
    }



In [8]:
train_dataset = DialoGPTDataset(training_data)
test_dataset = DialoGPTDataset(testing_data)
validation_dataset = DialoGPTDataset(validation_data)

print(len(train_dataset))
print(len(test_dataset))
print(len(validation_dataset))

print(train_dataset[0])


9532
2971
1470
{'input_ids': tensor([  101,  2672,  1996,  4895,  2071,  2831,  2000,  2216,  4004,  1998,
         3060,  3741,  3625,  2005,  3938,  1009,  1997,  1996, 10796,  1999,
         1996, 17401,  2612,  1997, 22604,  2006,  2023, 14636,  2055,  4785,
         2689,  1012,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,

In [9]:
class FeatureEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, next_input):
        super(FeatureEncoder, self).__init__()
        self.model = AutoModel.from_pretrained('GroNLP/hateBERT')
        self.hidden_dim = hidden_dim
        self.output_size = next_input

        self.informative_head = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim, self.output_size),
            torch.nn.ReLU(),
        )

        self.questioning_head = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim, self.output_size),
            torch.nn.ReLU(),
        )

        self.denouncing_head = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim, self.output_size),
            torch.nn.ReLU(),
        )

        self.positive_head = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim, self.output_size),
            torch.nn.ReLU(),
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        hate_speech_h = outputs.last_hidden_state[:, 0, :]

        informative_e = self.informative_head(hate_speech_h)
        questioning_e = self.questioning_head(hate_speech_h)
        denouncing_e = self.denouncing_head(hate_speech_h)
        positive_e = self.positive_head(hate_speech_h)

        return informative_e, questioning_e, denouncing_e, positive_e, hate_speech_h

### Linear Fusion Mechanism in CounterSpeechNetwork

In [10]:
class CounterSpeechNetwork(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, encoder_output, max_length):
        super(CounterSpeechNetwork, self).__init__()

        self.feature_encoder = FeatureEncoder(input_dim, hidden_dim, encoder_output)

        self.informative_decoder = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
        self.questioning_decoder = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
        self.denouncing_decoder = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
        self.positive_decoder = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

        self.informative_fusion = torch.nn.Linear(hidden_dim + encoder_output, self.informative_decoder.config.d_model)
        self.questioning_fusion = torch.nn.Linear(hidden_dim + encoder_output, self.questioning_decoder.config.d_model)
        self.denouncing_fusion = torch.nn.Linear(hidden_dim + encoder_output, self.denouncing_decoder.config.d_model)
        self.positive_fusion = torch.nn.Linear(hidden_dim + encoder_output, self.positive_decoder.config.d_model)

        self.tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")
        self.max_length = max_length

    def forward(self, input_ids, attention_mask, intent_id, counter_speech=None):
        informative_e, questioning_e, denouncing_e, positive_e, hate_speech_h = self.feature_encoder(input_ids, attention_mask)

        batch_size = input_ids.size(0)

        fused = torch.zeros(batch_size, 1, self.informative_decoder.config.d_model, device=input_ids.device)
    
        for i in range(batch_size):
            if intent_id[i] == 0:
                fused[i] = self.informative_fusion(torch.cat((hate_speech_h[i], informative_e[i]), dim=-1)).unsqueeze(0)
            elif intent_id[i] == 1:
                fused[i] = self.questioning_fusion(torch.cat((hate_speech_h[i], questioning_e[i]), dim=-1)).unsqueeze(0)
            elif intent_id[i] == 2:
                fused[i] = self.denouncing_fusion(torch.cat((hate_speech_h[i], denouncing_e[i]), dim=-1)).unsqueeze(0)
            elif intent_id[i] == 3:
                fused[i] = self.positive_fusion(torch.cat((hate_speech_h[i], positive_e[i]), dim=-1)).unsqueeze(0)
            else:
                raise ValueError(f"Invalid intent_id: {intent_id[i]}")

        if counter_speech is not None:
            losses = []
            for i in range(batch_size):
                if intent_id[i] == 0:
                    output = self.informative_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0))
                elif intent_id[i] == 1:
                    output = self.questioning_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0))
                elif intent_id[i] == 2:
                    output = self.denouncing_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0))
                elif intent_id[i] == 3:
                    output = self.positive_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0))
                losses.append(output.loss)
            avg_loss = sum(losses) / len(losses)  # Average loss across the batch
            return None, avg_loss  # No decoded text during training
        else:
            decoded_texts = []
            for i in range(batch_size):
                if intent_id[i] == 0:
                    output = self.informative_decoder.generate(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), max_length=self.max_length, num_beams=4, early_stopping=True)
                elif intent_id[i] == 1:
                    output = self.questioning_decoder.generate(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), max_length=self.max_length, num_beams=4, early_stopping=True)
                elif intent_id[i] == 2:
                    output = self.denouncing_decoder.generate(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), max_length=self.max_length, num_beams=4, early_stopping=True)
                elif intent_id[i] == 3:
                    output = self.positive_decoder.generate(encoder_outputs=BaseModelOutput(last_hidden_state=fused[i].unsqueeze(0)), max_length=self.max_length, num_beams=4, early_stopping=True)
                decoded_texts.append(self.tokenizer.decode(output[0], skip_special_tokens=True))
            return decoded_texts, None  # Decoded text during inference, no loss
    def judge_responses(self,input_ids, attention_mask,counter_speech=None):
        
        informative_e, questioning_e, denouncing_e, positive_e, hate_speech_h = self.feature_encoder(input_ids, attention_mask)

        batch_size = input_ids.size(0)
        output_dict = {'informative':[],'questioning':[],'denouncing':[],'positive':[]}
        informative_fused = torch.zeros(batch_size, 1, self.informative_decoder.config.d_model, device=input_ids.device)
        questioning_fused = torch.zeros(batch_size, 1, self.informative_decoder.config.d_model, device=input_ids.device)
        denouncing_fused = torch.zeros(batch_size, 1, self.informative_decoder.config.d_model, device=input_ids.device)
        positive_fused = torch.zeros(batch_size, 1, self.informative_decoder.config.d_model, device=input_ids.device)
        
    
        for i in range(batch_size):
            informative_fused[i] = self.informative_fusion(torch.cat((hate_speech_h[i], informative_e[i]), dim=-1)).unsqueeze(0)
            questioning_fused[i] = self.questioning_fusion(torch.cat((hate_speech_h[i], questioning_e[i]), dim=-1)).unsqueeze(0)
            denouncing_fused[i] = self.denouncing_fusion(torch.cat((hate_speech_h[i], denouncing_e[i]), dim=-1)).unsqueeze(0)
            positive_fused[i] = self.positive_fusion(torch.cat((hate_speech_h[i], positive_e[i]), dim=-1)).unsqueeze(0)


            output_dict['informative'].append(self.informative_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=informative_fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0)))
            output_dict['questioning'].append(self.questioning_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=questioning_fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0)))
            output_dict['denouncing'].append(self.denouncing_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=denouncing_fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0)))
            output_dict['positive'].append(self.positive_decoder(encoder_outputs=BaseModelOutput(last_hidden_state=positive_fused[i].unsqueeze(0)), labels=counter_speech[i].unsqueeze(0)))


        return output_dict

In [11]:
model = CounterSpeechNetwork(input_dim=128, hidden_dim=768, encoder_output=256, max_length=50)
optimizer = AdamW(model.parameters(), lr=5e-5)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn= custom_collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
validation_dataloader = DataLoader(validation_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 10

for epoch in range(epochs):
    model.train()
    total_train_loss = 0.0

    # Wrap train_dataloader with tqdm for training progress
    train_loop = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs} [Train]", leave=False)
    for batch in train_loop:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        counter_speech = batch['counter_speech'].to(device)
        intent_ids = batch['intent_id'].to(device)

        optimizer.zero_grad()

        # Process the entire batch at once
        _, loss = model(input_ids, attention_mask, intent_ids, counter_speech)

        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

        # Update tqdm with current batch loss
        train_loop.set_postfix({'batch_loss': loss.item(), 'avg_loss': total_train_loss / (train_loop.n + 1)})

    avg_train_loss = total_train_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}/{epochs} | Train Loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()
    total_val_loss = 0.0

    # Wrap validation_dataloader with tqdm for validation progress
    val_loop = tqdm(validation_dataloader, desc=f"Epoch {epoch + 1}/{epochs} [Validation]", leave=False)
    with torch.no_grad():
        for batch in val_loop:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            counter_speech = batch['counter_speech'].to(device)
            intent_ids = batch['intent_id'].to(device)

            # Process the entire batch at once
            _, loss = model(input_ids, attention_mask, intent_ids, counter_speech)

            total_val_loss += loss.item()

            # Update tqdm with current batch loss
            val_loop.set_postfix({'batch_loss': loss.item(), 'avg_loss': total_val_loss / (val_loop.n + 1)})

    avg_val_loss = total_val_loss / len(validation_dataloader)
    print(f"Epoch {epoch + 1}/{epochs} | Validation Loss: {avg_val_loss:.4f}")

                                                                                                                                                                                                    

Epoch 1/10 | Train Loss: 2.1134


                                                                                                                                                                                                    

Epoch 1/10 | Validation Loss: 0.7529


                                                                                                                                                                                                    

Epoch 2/10 | Train Loss: 0.7974


                                                                                                                                                                                                    

Epoch 2/10 | Validation Loss: 0.7116


                                                                                                                                                                                                    

Epoch 3/10 | Train Loss: 0.7171


                                                                                                                                                                                                    

Epoch 3/10 | Validation Loss: 0.6950


                                                                                                                                                                                                    

Epoch 4/10 | Train Loss: 0.6578


                                                                                                                                                                                                    

Epoch 4/10 | Validation Loss: 0.6852


                                                                                                                                                                                                    

Epoch 5/10 | Train Loss: 0.6061


                                                                                                                                                                                                    

Epoch 5/10 | Validation Loss: 0.6786


                                                                                                                                                                                                    

Epoch 6/10 | Train Loss: 0.5604


                                                                                                                                                                                                    

Epoch 6/10 | Validation Loss: 0.6788


                                                                                                                                                                                                    

Epoch 7/10 | Train Loss: 0.5169


                                                                                                                                                                                                    

Epoch 7/10 | Validation Loss: 0.6864


                                                                                                                                                                                                    

Epoch 8/10 | Train Loss: 0.4765


                                                                                                                                                                                                    

Epoch 8/10 | Validation Loss: 0.6965


                                                                                                                                                                                                    

Epoch 9/10 | Train Loss: 0.4376


                                                                                                                                                                                                    

Epoch 9/10 | Validation Loss: 0.7042


                                                                                                                                                                                                    

Epoch 10/10 | Train Loss: 0.4006


                                                                                                                                                                                                    

Epoch 10/10 | Validation Loss: 0.7238




In [12]:
torch.save(model.state_dict(), 'HateBERT_linear_fusion_final.pth')

### Evaluation Script

In [13]:
test_dataset = DialoGPTDataset(testing_data)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)

In [14]:
def evaluate_with_judge_model(model, test_dataloader, device):
    categories = {
        'informative': 0,
        'questioning': 1,
        'denouncing': 2,
        'positive': 3
    }

    model.to(device)
    model.eval()

    from transformers import AutoTokenizer, AutoModelForCausalLM
    judge_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
    judge_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small").to(device)
    judge_model.eval()
    print("Judge model ready!")

    total_samples = 0
    correct_predictions = 0

    test_loop = tqdm(test_dataloader, desc="Test Evaluation", leave=True)
    with torch.no_grad():
        for batch in test_loop:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            counter_speech = batch['counter_speech'].to(device)
            raw_inputs = batch['raw_inputs']
            all_intents = batch['all_intents']  # list of gold intents per sample (as int indices)

            outputs = model.judge_responses(input_ids, attention_mask, counter_speech)

            for i in range(len(input_ids)):
                intent_scores = {}

                for intent in ["informative", "questioning", "denouncing", "positive"]:
                    response = model.tokenizer.decode(
                        torch.argmax(outputs[intent][i].logits, dim=-1)[0],
                        skip_special_tokens=True
                    )

                    # Prompting for a rating
                    prompt = (
                        f"Hate speech: {raw_inputs[i]}\n"
                        f"Proposed counterspeech (intent: {intent}): {response}\n\n"
                        f"On a scale of 1 to 10, how appropriate and effective is this counterspeech in response to the hate speech? Just respond with a number."
                    )

                    judge_input = judge_tokenizer.encode(prompt, return_tensors='pt').to(device)
                    output_ids = judge_model.generate(judge_input, max_new_tokens=10, pad_token_id=judge_tokenizer.eos_token_id)
                    score_text = judge_tokenizer.decode(output_ids[0], skip_special_tokens=True)

                    try:
                        # Extract the first number in response (robust to "I would give it a 7" etc.)
                        score = next((float(s) for s in score_text.split() if s.replace('.', '', 1).isdigit()), 0)
                        score = max(0, min(score, 10))  # Clamp between 0 and 10
                    except:
                        score = 0  # Fallback score if judge LM fails

                    intent_scores[intent] = score

                # Pick best scoring intent
                best_intent = max(intent_scores, key=intent_scores.get)
                best_intent_idx = categories[best_intent]

                if best_intent_idx in all_intents[i]:
                    correct_predictions += 1
                total_samples += 1

            test_loop.set_postfix({'accuracy': correct_predictions / total_samples if total_samples else 0})

    final_accuracy = correct_predictions / total_samples if total_samples else 0
    return final_accuracy

In [15]:
# # Download required data for METEOR
# nltk.download('wordnet')
# nltk.download('punkt')

# Initialize model
model = CounterSpeechNetwork(input_dim=128, hidden_dim=768, encoder_output=256, max_length=50)
model.load_state_dict(torch.load("HateBERT_linear_fusion_final.pth", map_location="cuda" if torch.cuda.is_available() else "cpu"))
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Evaluation loop
test_predictions = []
test_references = []

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
meteor_scores = []
cosine_sims = []

test_loop = tqdm(test_dataloader, desc="Test Evaluation", leave=True)

with torch.no_grad():
    for batch in test_loop:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        counter_speech = batch['counter_speech'].to(device)  # Reference texts
        intent_ids = batch['intent_id'].to(device)

        predictions, logits = model(input_ids, attention_mask, intent_ids)
        
        # Decode predictions and references
        pred_texts = predictions
        ref_texts = [model.tokenizer.decode(cs, skip_special_tokens=True) for cs in counter_speech]

        test_predictions.extend(pred_texts)
        test_references.extend(ref_texts)

        # Compute METEOR and Cosine Similarity
        for pred, ref in zip(pred_texts, ref_texts):
            score = meteor_score([ref.split()], pred.split())
            meteor_scores.append(score)

            # Cosine similarity using simple TF representation
            pred_vec = model.tokenizer(pred, return_tensors="pt", padding=True, truncation=True)['input_ids'].float()
            ref_vec = model.tokenizer(ref, return_tensors="pt", padding=True, truncation=True)['input_ids'].float()

            pred_vec = normalize(torch.sum(pred_vec, dim=1).numpy().reshape(1, -1))
            ref_vec = normalize(torch.sum(ref_vec, dim=1).numpy().reshape(1, -1))
            cos_sim = cosine_similarity(pred_vec, ref_vec)[0][0]
            cosine_sims.append(cos_sim)


  model.load_state_dict(torch.load("HateBERT_linear_fusion_final.pth", map_location="cuda" if torch.cuda.is_available() else "cpu"))
Test Evaluation:   0%|                                                                                                                                                       | 0/93 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Test Evaluation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 93/93 [10:13<00:00,  6.60s/it]


In [None]:
# Compute BERTScore
P, R, F1 = bert_score(test_predictions, test_references, lang="en", verbose=True)

# Compute ROUGE
rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
for pred, ref in zip(test_predictions, test_references):
    scores = scorer.score(ref, pred)
    rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
    rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
    rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

avg_rouge1 = sum(rouge_scores['rouge1']) / len(rouge_scores['rouge1'])
avg_rouge2 = sum(rouge_scores['rouge2']) / len(rouge_scores['rouge2'])
avg_rougeL = sum(rouge_scores['rougeL']) / len(rouge_scores['rougeL'])
avg_meteor = sum(meteor_scores) / len(meteor_scores)
avg_cosine = sum(cosine_sims) / len(cosine_sims)

# Compute category (intent) accuracy
intent_accuracy = evaluate_with_judge_model(model,test_dataloader,device)

# Display results
print(f"\n=== Evaluation Metrics ===")
print(f"Total Predictions: {len(test_predictions)}")
print(f"BERTScore - Precision: {P.mean():.4f}, Recall: {R.mean():.4f}, F1: {F1.mean():.4f}")
print(f"ROUGE - Rouge-1: {avg_rouge1:.4f}, Rouge-2: {avg_rouge2:.4f}, Rouge-L: {avg_rougeL:.4f}")
print(f"METEOR: {avg_meteor:.4f}")
print(f"Cosine Similarity: {avg_cosine:.4f}")
print(f"Category Accuracy (Intent): {intent_accuracy:.4f}")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/71 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/47 [00:00<?, ?it/s]

done in 10.16 seconds, 292.32 sentences/sec
Judge model ready!


Test Evaluation:   0%|                                                                                                                                                       | 0/93 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [None]:
# Save test predictions to a text file
model_name = "HateBERT-linear-fusion"
txt_filename = f"predictions_{model_name}.txt"
with open(txt_filename, "w", encoding="utf-8") as f:
    for pred in test_predictions:
        f.write(pred.strip() + "\n")

print(f"📄 Saved predictions to {txt_filename}")


In [None]:
# Total Predictions: 2971
# BERTScore - Precision: 0.8688, Recall: 0.8705, F1: 0.8696
# ROUGE - Rouge-1: 0.2504, Rouge-2: 0.0644, Rouge-L: 0.1752
# METEOR: 0.1543
# Cosine Similarity: 1.0000
# Category Accuracy (Intent): 0.7519