In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install --upgrade transformers
!pip install rouge_score
!pip install bert_score
!pip install evaluate



In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from transformers import Seq2SeqTrainingArguments
from datasets import Dataset
import evaluate

# Hugging Face transformers and datasets libraries
from transformers import (
    T5ForConditionalGeneration,
    T5Tokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForSeq2Seq
)


# For reproducibility
import random
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7fab7c994830>

In [None]:

# Data analysis

file_path = "/content/drive/MyDrive/colab/DIALOCONAN.csv"  
df = pd.read_csv(file_path)

print("Dataset Info:")
print(df.info())


print("\nFirst 5 entries:")
print(df.head())


print("\n--- Detailed Analysis ---")

print("\nTurn Type Distribution:")
print(df['type'].value_counts())


print("\nTarget Distribution:")
print(df['TARGET'].value_counts())


print("\nSource Distribution:")
print(df['source'].value_counts())


num_dialogues = df['dialogue_id'].nunique()
print("\nNumber of unique dialogues:", num_dialogues)

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16625 entries, 0 to 16624
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   text         16625 non-null  object
 1   TARGET       16625 non-null  object
 2   dialogue_id  16625 non-null  int64 
 3   turn_id      16625 non-null  int64 
 4   type         16625 non-null  object
 5   source       16625 non-null  object
dtypes: int64(2), object(4)
memory usage: 779.4+ KB
None

First 5 entries:
                                                text    TARGET  dialogue_id  \
0  We’ve just imported 20k ticking time bombs fro...  MIGRANTS            0   
1  Surely people who also risked their lives to h...  MIGRANTS            0   
2  Our own must always come first. We have at lea...  MIGRANTS            0   
3  We also have at least 1 million empty homes. T...  MIGRANTS            0   
4  Our soldiers are left to rot on our streets wh...  MIGRANTS            0   



In [None]:


def create_input_output(df):
    inputs = []
    targets = []

    
    grouped = df.groupby('dialogue_id')

    for dialogue_id, group in grouped:
        
        group = group.sort_values('turn_id')
        
        for i in range(1, len(group)):
            if group.iloc[i]['type'] == 'CN' and group.iloc[i-1]['type'] == 'HS':

                dialogue_history = group.iloc[:i]['text'].tolist()
                input_text = " [SEP] ".join(dialogue_history)
                inputs.append(input_text)
                targets.append(group.iloc[i]['text'])
    return inputs, targets

inputs, targets = create_input_output(df)
print("\nNumber of training pairs generated:", len(inputs))


data_df = pd.DataFrame({'input_text': inputs, 'target_text': targets})


Number of training pairs generated: 8309


In [None]:
#Spliting the dataset into training and validation sets.

train_df, val_df = train_test_split(data_df, test_size=0.1, random_state=42)
print("\nTrain samples:", len(train_df))
print("Validation samples:", len(val_df))


train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)



Train samples: 7478
Validation samples: 831


In [None]:


from transformers import BartTokenizer, BartForConditionalGeneration

model_name = "facebook/bart-large"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Defining maximum sequence lengths for inputs and outputs.
max_input_length = 512
max_target_length = 128




In [None]:
# Preprocessing function

def preprocess_function(examples):
    
    inputs = examples["input_text"]
    targets = examples["target_text"]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True)

   
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=max_target_length, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/7478 [00:00<?, ? examples/s]



Map:   0%|          | 0/831 [00:00<?, ? examples/s]

In [None]:

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
# Training arguments
from transformers import Seq2SeqTrainingArguments



training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=5e-5,
    predict_with_generate=True,
    eval_accumulation_steps=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.001,
    save_total_limit=1,
    logging_dir="./logs",
    logging_steps=50,
    report_to=[]  
)

In [None]:
# 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    # compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [None]:
# Training the model
import torch
torch.cuda.empty_cache()
trainer.train()
model.save_pretrained("/content/drive/MyDrive/colab/dialoconan_counterspeech_model")
tokenizer.save_pretrained("/content/drive/MyDrive/colab/dialoconan_counterspeech_model")



Epoch,Training Loss,Validation Loss
1,2.4984,2.417




('/content/drive/MyDrive/colab/dialoconan_counterspeech_model/tokenizer_config.json',
 '/content/drive/MyDrive/colab/dialoconan_counterspeech_model/special_tokens_map.json',
 '/content/drive/MyDrive/colab/dialoconan_counterspeech_model/vocab.json',
 '/content/drive/MyDrive/colab/dialoconan_counterspeech_model/merges.txt',
 '/content/drive/MyDrive/colab/dialoconan_counterspeech_model/added_tokens.json')

In [None]:


import gc
import torch
from tqdm.auto import tqdm
from transformers import BartForConditionalGeneration, BartTokenizer
import evaluate
import pandas as pd


gc.collect()
torch.cuda.empty_cache()


model_dir = "/content/drive/MyDrive/colab/dialoconan_counterspeech_model"
model     = BartForConditionalGeneration.from_pretrained(model_dir)
tokenizer = BartTokenizer.from_pretrained(model_dir)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()


val_texts  = val_dataset["input_text"]
val_labels = val_dataset["target_text"]
n_samples  = len(val_texts)
print(f"Total validation samples: {n_samples}")


inputs = tokenizer(
    val_texts,
    padding=True,
    truncation=True,
    return_tensors="pt"
)
input_ids      = inputs["input_ids"]
attention_mask = inputs["attention_mask"]


batch_size    = 16    
decoded_preds = []

for start in tqdm(range(0, n_samples, batch_size),
                  desc="Generating",
                  total=(n_samples + batch_size - 1)//batch_size):
    end = min(start + batch_size, n_samples)
    batch_ids   = input_ids[start:end].to(device)
    batch_mask  = attention_mask[start:end].to(device)

    with torch.no_grad():
        gen_ids = model.generate(
            input_ids=batch_ids,
            attention_mask=batch_mask,
            max_length=60,
            num_beams=6,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.9,
            repetition_penalty=1.2,
            length_penalty=1.0,
            early_stopping=True
        )

    
    batch_texts = tokenizer.batch_decode(gen_ids, skip_special_tokens=True)
    decoded_preds.extend(batch_texts)

   
    print(f"  • Processed samples {start + 1}–{end}/{n_samples}")

    # Free GPU memory for next iteration
    del batch_ids, batch_mask, gen_ids
    torch.cuda.empty_cache()


assert len(decoded_preds) == n_samples, "Decoded preds length mismatch!"


bleu  = evaluate.load("bleu")
rouge = evaluate.load("rouge")

bleu_result  = bleu.compute(predictions=decoded_preds, references=val_labels)
rouge_result = rouge.compute(predictions=decoded_preds, references=val_labels)


print("\n Evaluation Results: \n")
print(f"BLEU Score:    {bleu_result['bleu']:.4f}")
print(f"ROUGE-1 F1:    {rouge_result['rouge1']:.4f}")
print(f"ROUGE-2 F1:    {rouge_result['rouge2']:.4f}")
print(f"ROUGE-L F1:    {rouge_result['rougeL']:.4f}")

# Save the predictions to a CSV file
out_df = pd.DataFrame({
    "input_text":  val_texts,
    "target_text": val_labels,
    "prediction":  decoded_preds
})
out_df.to_csv("/content/drive/MyDrive/colab/generated_results.csv", index=False)
print("Saved predictions to /content/drive/MyDrive/colab/generated_results.csv")


Total validation samples: 831


Generating:   0%|          | 0/104 [00:00<?, ?it/s]

  • Processed samples 1–8/831
  • Processed samples 9–16/831
  • Processed samples 17–24/831
  • Processed samples 25–32/831
  • Processed samples 33–40/831
  • Processed samples 41–48/831
  • Processed samples 49–56/831
  • Processed samples 57–64/831
  • Processed samples 65–72/831
  • Processed samples 73–80/831
  • Processed samples 81–88/831
  • Processed samples 89–96/831
  • Processed samples 97–104/831
  • Processed samples 105–112/831
  • Processed samples 113–120/831
  • Processed samples 121–128/831
  • Processed samples 129–136/831
  • Processed samples 137–144/831
  • Processed samples 145–152/831
  • Processed samples 153–160/831
  • Processed samples 161–168/831
  • Processed samples 169–176/831
  • Processed samples 177–184/831
  • Processed samples 185–192/831
  • Processed samples 193–200/831
  • Processed samples 201–208/831
  • Processed samples 209–216/831
  • Processed samples 217–224/831
  • Processed samples 225–232/831
  • Processed samples 233–240/831
  • Proc

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]


📊 Evaluation Results:
BLEU Score:    0.0107
ROUGE-1 F1:    0.1458
ROUGE-2 F1:    0.0192
ROUGE-L F1:    0.1116
✅ Saved predictions to /content/drive/MyDrive/colab/generated_results.csv
