In [1]:
import torch
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments, DataCollatorForSeq2Seq

In [None]:
import torch
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"

# loading BART model & tokenizer 
MODEL_NAME = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn").to(device)


train_df = pd.read_csv("/kaggle/input/intentconan2/train.csv")
val_df = pd.read_csv("/kaggle/input/intentconan2/validation.csv")
test_df = pd.read_csv("/kaggle/input/intentconan2/test.csv")

print("Train Dataset Sample:\n", train_df.head())


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Train Dataset Sample:
                                           hatespeech       csType  \
0  Maybe the UN could talk to those asian and afr...  Informative   
1  Maybe the UN could talk to those asian and afr...  Questioning   
2  Maybe the UN could talk to those asian and afr...   Denouncing   
3  Maybe the UN could talk to those asian and afr...     Positive   
4  Juice Jews are worse that nukes. Caption to a ...  Informative   

                                       counterspeech Suggest  Relevance  \
0  The us is the second most polluting country in...       3        4.0   
1  Doesn't everyone on the planet have a responsi...                3.0   
2  The world would be a better place if people we...       1        1.0   
3  You're right, ocean pollution is one of the ma...       3        4.0   
4  Anti-semitism is a serious problem that we nee...       2        3.0   

   Aggressive  Complexity  Comments source  \
0         2.0         3.0       NaN  Human   
1         2.0      

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


In [None]:
def preprocess_function(examples):
    inputs = [f"Hate Speech: {hs} Intent: {intent}" for hs, intent in zip(examples["hatespeech"], examples["csType"])]
    targets = examples["counterspeech"]

    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=250, truncation=True, padding="max_length")

    #replacing padding token ids with -100 for loss function
    labels["input_ids"] = [
        [(token if token != tokenizer.pad_token_id else -100) for token in label] for label in labels["input_ids"]
    ]

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

tokenized_datasets = DatasetDict({
    "train": train_dataset.map(preprocess_function, batched=True),
    "validation": val_dataset.map(preprocess_function, batched=True),
    "test": test_dataset.map(preprocess_function, batched=True),
})


Map:   0%|          | 0/9532 [00:00<?, ? examples/s]

Map:   0%|          | 0/1470 [00:00<?, ? examples/s]

Map:   0%|          | 0/2971 [00:00<?, ? examples/s]

In [None]:

training_args = TrainingArguments(
    output_dir="./bart_finetuned_samples",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=5e-5,
    num_train_epochs=3, 
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100, 
    report_to="none",
    fp16=torch.cuda.is_available(),
    save_total_limit=1,  # Keep only last checkpoint
)


# Data Collator for Padding
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)


trainer.train()

trainer.save_model("./bart_finetuned_model_samples")


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,1.7743,1.878842
2,1.4028,1.839502
3,1.1212,1.834578




In [None]:
import torch
import pandas as pd
from tqdm import tqdm
from transformers import BartForConditionalGeneration, BartTokenizer, pipeline

MODEL_PATH = "./bart_finetuned_model_samples"
tokenizer = BartTokenizer.from_pretrained(MODEL_PATH)
model = BartForConditionalGeneration.from_pretrained(MODEL_PATH).to("cuda" if torch.cuda.is_available() else "cpu")

# loading test dataset & Reset Index
test_df = pd.read_csv("/kaggle/input/intentconan2/test.csv")
test_df["BART_counterspeech"] = ""  # Add empty column for results

bart_generator = pipeline(
    "text2text-generation",
    model=model,  
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)


def generate_counterspeech_bart(hate_speech, intent):
    """Generates counterspeech using fine-tuned BART"""
    
    prompt = f"""Generate a {intent} counterspeech response for the following hate speech:

    Hate Speech: {hate_speech}

    Expected Response ({intent} intent):"""

    try:
        response = bart_generator(
            prompt, 
            max_length=100,  
            num_return_sequences=1, 
            temperature=0.7, 
            top_p=0.9, 
            repetition_penalty=1.0, 
            do_sample=True
        )
        
        return response[0]['generated_text'].strip() if response else "No response generated."
    
    except Exception as e:
        print("Error generating response with BART:", e)
        return ""


print("\nGenerating counterspeech using BART for...")

for i in tqdm(range(len(test_df)), desc="Processing Hate Speech"):
    test_df.at[i, "BART_counterspeech"] = generate_counterspeech_bart(
        test_df.at[i, "hatespeech"], test_df.at[i, "csType"]
    )

print("\nGenerated Counterspeech Samples:")
print(test_df[["hatespeech", "csType", "BART_counterspeech"]])

output_file = "/kaggle/working/test_counterspeech.csv"
test_df.to_csv(output_file, index=False)

print(f"\nCounterspeech generation complete! Results saved to {output_file}")


Device set to use cuda:0



Generating counterspeech using BART for 10 samples...


Processing Hate Speech:   0%|          | 10/2971 [00:10<49:44,  1.01s/it] You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing Hate Speech: 100%|██████████| 2971/2971 [50:44<00:00,  1.02s/it]


Generated Counterspeech Samples:
                                             hatespeech       csType  \
0     Women are basically childlike, they remain thi...  Informative   
1     Women are basically childlike, they remain thi...   Denouncing   
2     Women are basically childlike, they remain thi...     Positive   
3     Minorities vote to strip wealth and power from...  Informative   
4     Minorities vote to strip wealth and power from...  Questioning   
...                                                 ...          ...   
2966  The Holocaust never happened. It was a coverup...  Informative   
2967  We cannot coexist with millions of Muslims in ...     Positive   
2968  Women are basically childlike, they remain thi...  Informative   
2969  Women do not have equal rights, they are subhu...  Questioning   
2970  Women have no place in our society, its a mens...  Questioning   

                                     BART_counterspeech  
0     I understand that you may have some s




In [6]:
%pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13
Note: you may need to restart the kernel to use updated packages.


In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

def calculate_bleu(reference, candidate):
    """Computes BLEU score between reference and candidate text."""
    if pd.isna(reference) or pd.isna(candidate):  # handling missing values
        return 0.0
    reference_tokens = reference.split()
    candidate_tokens = candidate.split()
    return sentence_bleu([reference_tokens], candidate_tokens)

In [3]:
test_df = pd.read_csv("/kaggle/input/bart-cs/test_bart_counterspeech.csv")

In [None]:
# BLEU score for each row
test_df["bleu_bart"] = test_df.apply(lambda row: calculate_bleu(row["counterspeech"], row["BART_counterspeech"]), axis=1)

# BLEU score
mean_bleu_bart = test_df["bleu_bart"].mean()
print(f"📌 **Mean BLEU Score (BART):** {mean_bleu_bart:.4f}")

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


📌 **Mean BLEU Score (BART):** 0.2862


In [None]:
from bert_score import score

references = test_df["counterspeech"].fillna("").tolist()
bart_outputs = test_df["BART_counterspeech"].fillna("").tolist()

# BERT Scores
P_bart, R_bart, F1_bart = score(bart_outputs, references, lang="en", verbose=True)

test_df["bert_p_bart"] = P_bart.tolist()
test_df["bert_r_bart"] = R_bart.tolist()
test_df["bert_f1_bart"] = F1_bart.tolist()

# BERT-F1 score
mean_bert_f1_bart = F1_bart.mean().item()
print(f"📌 **Mean BERT-F1 Score (BART):** {mean_bert_f1_bart:.4f}")


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/93 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/47 [00:00<?, ?it/s]

done in 32.61 seconds, 91.11 sentences/sec
📌 **Mean BERT-F1 Score (BART):** 0.8726
