In [None]:
!pip install -q transformers datasets evaluate bert_score detoxify

# BLOOMZ WITH NO FINE TUNE

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset, DatasetDict
import numpy as np
from evaluate import load

# 1. Load and prepare dataset
dataset = load_dataset("Rhma/DIALOCONAN")
small_dataset = dataset["train"].select(range(3500))

# Split train data into train/validation
train_val = small_dataset.train_test_split(test_size=0.15, seed=42)
dataset = DatasetDict({
    "train": train_val["train"],
    "validation": train_val["test"]
})

# 2. Group turns by dialogue_id
def group_dialogues(examples):
    sorted_data = sorted(zip(examples["dialogue_id"], 
                            examples["turn_id"], 
                            examples["text"],
                            examples["type"],
                            examples["TARGET"]),
                       key=lambda x: (x[0], x[1]))
    dialogues = []
    current_dialogue = []
    current_id = None
    for item in sorted_data:
        dialogue_id, turn_id, text, turn_type, target = item
        if dialogue_id != current_id:
            if current_id is not None and current_dialogue:
                dialogues.append({
                    "dialogue_id": current_id,
                    "turns": current_dialogue,
                    "target": current_dialogue[0]["target"]
                })
            current_id = dialogue_id
            current_dialogue = []
        current_dialogue.append({
            "text": text,
            "type": turn_type,
            "target": target
        })
    if current_id is not None and current_dialogue:
        dialogues.append({
            "dialogue_id": current_id,
            "turns": current_dialogue,
            "target": current_dialogue[0]["target"]
        })
    return {"dialogues": dialogues}

processed_dataset = dataset.map(
    group_dialogues,
    batched=True,
    remove_columns=dataset["train"].column_names,
    batch_size=1000
)

# 3. Create conversation history for each CN turn
def create_conversation_history(examples):
    new_examples = {"input": [], "target": []}
    for dialogue in examples["dialogues"]:
        history = []
        for turn in dialogue["turns"]:
            if turn["type"] == "CN":
                new_examples["input"].append(" [SEP] ".join(history))
                new_examples["target"].append(turn["text"])
            history.append(turn["text"])
    return new_examples

final_dataset = processed_dataset.map(
    create_conversation_history,
    batched=True,
    remove_columns=["dialogues"]
)

# 4. Load pretrained BLOOMZ model and tokenizer
model_name = "bigscience/bloomz-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# 5. Generation function (inference only)
def generate_counterspeech(dialogue_history):
    device = model.device
    input_text = " [SEP] ".join(dialogue_history) + " [ANS] "
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    with torch.inference_mode():
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=128,
            num_beams=5,
            repetition_penalty=2.0,
            early_stopping=True
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# 6. Example usage
sample_dialogue = [
    "You people are ruining our country!",
    "Immigrants are stealing our jobs!",
    "We should send them all back!"
]
print("\nGenerated counterspeech:")
print(generate_counterspeech(sample_dialogue))


# BLOOMZ WITH INSTRUCTION FINE TUNING

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset, DatasetDict
from peft import LoraConfig, get_peft_model, TaskType
import numpy as np
from evaluate import load

# 1. Load and prepare dataset
dataset = load_dataset("Rhma/DIALOCONAN")
small_dataset = dataset["train"].select(range(3500))
train_val = small_dataset.train_test_split(test_size=0.15, seed=42)
dataset = DatasetDict({
    "train": train_val["train"],
    "validation": train_val["test"]
})

# 2. Group turns by dialogue_id
def group_dialogues(examples):
    sorted_data = sorted(zip(examples["dialogue_id"], 
                            examples["turn_id"], 
                            examples["text"],
                            examples["type"],
                            examples["TARGET"]),
                       key=lambda x: (x[0], x[1]))
    dialogues = []
    current_dialogue = []
    current_id = None
    for item in sorted_data:
        dialogue_id, turn_id, text, turn_type, target = item
        if dialogue_id != current_id:
            if current_id is not None and current_dialogue:
                dialogues.append({
                    "dialogue_id": current_id,
                    "turns": current_dialogue,
                    "target": current_dialogue[0]["target"]
                })
            current_id = dialogue_id
            current_dialogue = []
        current_dialogue.append({
            "text": text,
            "type": turn_type,
            "target": target
        })
    if current_id is not None and current_dialogue:
        dialogues.append({
            "dialogue_id": current_id,
            "turns": current_dialogue,
            "target": current_dialogue[0]["target"]
        })
    return {"dialogues": dialogues}

processed_dataset = dataset.map(
    group_dialogues,
    batched=True,
    remove_columns=dataset["train"].column_names,
    batch_size=1000
)

# 3. Create conversation history for each CN turn
def create_conversation_history(examples):
    new_examples = {"input": [], "target": []}
    for dialogue in examples["dialogues"]:
        history = []
        for turn in dialogue["turns"]:
            if turn["type"] == "CN":
                new_examples["input"].append(" [SEP] ".join(history))
                new_examples["target"].append(turn["text"])
            history.append(turn["text"])
    return new_examples

final_dataset = processed_dataset.map(
    create_conversation_history,
    batched=True,
    remove_columns=["dialogues"]
)

# 4. Instruction tuning formatting
def format_instruction(examples):
    inputs = []
    targets = []
    for inp, tgt in zip(examples["input"], examples["target"]):
        instruction = (
            "[INST] <<SYS>>\n"
            "You are a helpful assistant that generates fact-based counterspeech."
            "<<SYS>>\n"
            f"{inp} [/INST]"
        )
        inputs.append(instruction)
        targets.append(tgt)
    return {"input": inputs, "target": targets}

instruction_dataset = final_dataset.map(
    format_instruction,
    batched=True,
    remove_columns=["input", "target"]
)

# 5. Tokenization
model_name = "bigscience/bloomz-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

def preprocess_function(examples):
    inputs = [inp + " " + tgt for inp, tgt in zip(examples["input"], examples["target"])]
    model_inputs = tokenizer(
        inputs,
        max_length=128,
        truncation=True,
        padding="max_length"
    )
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_datasets = instruction_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["input", "target"]
)

# 6. LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# 7. TrainingArguments
training_args = TrainingArguments(
    output_dir="./bloomz-lora-instruction",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=2,
    fp16=True,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    logging_strategy="steps",
    logging_steps=10,
    report_to="none",
    eval_accumulation_steps=1,
    learning_rate=2e-4
)


# 9. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer
    
)

print("Starting instruction tuning training...")
trainer.train()

# 10. Generation function (for inference)
def generate_counterspeech(dialogue_history):
    device = model.device
    input_text = "[INST] <<SYS>>\nYou are a helpful assistant that generates fact-based counterspeech.<<SYS>>\n" + " [SEP] ".join(dialogue_history) + " [/INST] "
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    with torch.inference_mode():
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=128,
            num_beams=5,
            repetition_penalty=2.0,
            early_stopping=True
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
sample_dialogue = [
    "You people are ruining our country!",
    "Immigrants are stealing our jobs!",
    "We should send them all back!"
]
print("\nGenerated counterspeech after fine-tuning:")
print(generate_counterspeech(sample_dialogue))


In [None]:
# Install dependencies if not already installed
!pip install -q evaluate detoxify tqdm
!pip install rouge_score bert_score
from evaluate import load
from detoxify import Detoxify
from tqdm import tqdm
import numpy as np
import math

# Load metrics
rouge = load("rouge")
bertscore = load("bertscore")
# Use first 100 samples
inputs = [ex["input"] for ex in final_dataset["validation"]][:100]
targets = [ex["target"] for ex in final_dataset["validation"]][:100]

# Generate predictions
print("Generating counter speech...")
generated = []
for text in tqdm(inputs, desc="Generating"):
    response = generate_counterspeech(text)  # <-- make sure this function is defined
    generated.append(response)

In [None]:
# BERTScore
print("Calculating BERTScore...")
bertscore_result = bertscore.compute(
    predictions=generated,
    references=targets,
    model_type="distilbert-base-uncased"
)
print(f"BERTScore F1: {np.mean(bertscore_result['f1']):.4f}")

# ROUGE
# ROUGE
print("Calculating ROUGE...")
rouge_result = rouge.compute(predictions=generated, references=targets)
print(f"ROUGE-1 F1: {rouge_result['rouge1']:.4f}")
print(f"ROUGE-2 F1: {rouge_result['rouge2']:.4f}")
print(f"ROUGE-L F1: {rouge_result['rougeL']:.4f}")
# Perplexity
print("Calculating Perplexity...")
def calculate_perplexity(texts):
    total_log_prob = 0.0
    total_words = 0
    for text in texts:
        words = text.split()
        total_words += len(words)
        # You can use a pre-trained language model (e.g., GPT-2) for calculating perplexity
        # Here, we will use a placeholder for the log-prob calculation, which should ideally come from a language model
        # For simplicity, assume a fixed value here
        total_log_prob += len(words) * math.log(1.0)  # Placeholder for log-prob calculation
    return math.exp(-total_log_prob / total_words) if total_words > 0 else float('inf')

perplexity_result = calculate_perplexity(generated)
print(f"Perplexity: {perplexity_result:.4f}")

# Toxicity
print("Calculating Toxicity...")
toxicity_scores = [Detoxify('original').predict(pred)['toxicity'] for pred in tqdm(generated, desc="Toxicity")]
avg_toxicity = np.mean(toxicity_scores)
print(f"Avg. Toxicity Score: {avg_toxicity:.4f}")

# BLOOMZ WITH PREFIX OR PROMPT TUNING

In [None]:
!pip install peft

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset, DatasetDict
import numpy as np
from evaluate import load
from peft import PrefixTuningConfig, get_peft_model, TaskType

# 1. Load and prepare dataset
dataset = load_dataset("Rhma/DIALOCONAN")
small_dataset = dataset["train"].select(range(3500))
train_val = small_dataset.train_test_split(test_size=0.15, seed=42)
dataset = DatasetDict({
    "train": train_val["train"],
    "validation": train_val["test"]
})

# 2. Group turns by dialogue_id
def group_dialogues(examples):
    sorted_data = sorted(zip(examples["dialogue_id"], 
                            examples["turn_id"], 
                            examples["text"],
                            examples["type"],
                            examples["TARGET"]),
                       key=lambda x: (x[0], x[1]))
    dialogues = []
    current_dialogue = []
    current_id = None
    for item in sorted_data:
        dialogue_id, turn_id, text, turn_type, target = item
        if dialogue_id != current_id:
            if current_id is not None and current_dialogue:
                dialogues.append({
                    "dialogue_id": current_id,
                    "turns": current_dialogue,
                    "target": current_dialogue[0]["target"]
                })
            current_id = dialogue_id
            current_dialogue = []
        current_dialogue.append({
            "text": text,
            "type": turn_type,
            "target": target
        })
    if current_id is not None and current_dialogue:
        dialogues.append({
            "dialogue_id": current_id,
            "turns": current_dialogue,
            "target": current_dialogue[0]["target"]
        })
    return {"dialogues": dialogues}

processed_dataset = dataset.map(
    group_dialogues,
    batched=True,
    remove_columns=dataset["train"].column_names,
    batch_size=1000
)

# 3. Create conversation history for each CN turn
def create_conversation_history(examples):
    new_examples = {"input": [], "target": []}
    for dialogue in examples["dialogues"]:
        history = []
        for turn in dialogue["turns"]:
            if turn["type"] == "CN":
                new_examples["input"].append(" [SEP] ".join(history))
                new_examples["target"].append(turn["text"])
            history.append(turn["text"])
    return new_examples

final_dataset = processed_dataset.map(
    create_conversation_history,
    batched=True,
    remove_columns=["dialogues"]
)

# 4. Load pretrained BLOOMZ model and tokenizer
model_name = "bigscience/bloomz-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# 5. PEFT: PrefixTuning
peft_config = PrefixTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=30,  # can be tuned
    encoder_hidden_size=model.config.hidden_size
)
# --- OR PromptTuning ---
# peft_config = PromptTuningConfig(
#     task_type=TaskType.CAUSAL_LM,
#     num_virtual_tokens=30
# )
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# 6. Tokenization for Trainer
def preprocess_function(examples):
    inputs = [inp + " [ANS] " + tgt for inp, tgt in zip(examples["input"], examples["target"])]
    model_inputs = tokenizer(
        inputs,
        max_length=128,
        truncation=True,
        padding="max_length"
    )
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_datasets = final_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["input", "target"]
)

# 7. TrainingArguments and Trainer
training_args = TrainingArguments(
    output_dir="./bloomz-prefix-tuning",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=2,
    fp16=True,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    logging_strategy="steps",
    logging_steps=10,
    report_to="none",
    eval_accumulation_steps=1,
    learning_rate=2e-4
)



trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer
   
)

print("Starting PrefixTuning training...")
trainer.train()

# 8. Generation function (for inference)
def generate_counterspeech(dialogue_history):
    device = model.device
    input_text = " [SEP] ".join(dialogue_history) + " [ANS] "
    inputs = tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True).to(device)
    with torch.inference_mode():
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=128,
            num_beams=5,
            repetition_penalty=2.0,
            early_stopping=True
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# 9. Example usage
sample_dialogue = [
    "You people are ruining our country!",
    "Immigrants are stealing our jobs!",
    "We should send them all back!"
]
print("\nGenerated counterspeech after PrefixTuning:")
print(generate_counterspeech(sample_dialogue))


In [None]:
# Install dependencies if not already installed
!pip install -q evaluate detoxify tqdm
!pip install rouge_score bert_score
from evaluate import load
from detoxify import Detoxify
from tqdm import tqdm
import numpy as np
import math

# Load metrics
rouge = load("rouge")
bertscore = load("bertscore")
# Use first 100 samples
inputs = [ex["input"] for ex in final_dataset["validation"]][:100]
targets = [ex["target"] for ex in final_dataset["validation"]][:100]

# Generate predictions
print("Generating counter speech...")
generated = []
for text in tqdm(inputs, desc="Generating"):
    response = generate_counterspeech(text)  # <-- make sure this function is defined
    generated.append(response)

In [None]:
# BERTScore
print("Calculating BERTScore...")
bertscore_result = bertscore.compute(
    predictions=generated,
    references=targets,
    model_type="distilbert-base-uncased"
)
print(f"BERTScore F1: {np.mean(bertscore_result['f1']):.4f}")

# ROUGE
# ROUGE
print("Calculating ROUGE...")
rouge_result = rouge.compute(predictions=generated, references=targets)
print(f"ROUGE-1 F1: {rouge_result['rouge1']:.4f}")
print(f"ROUGE-2 F1: {rouge_result['rouge2']:.4f}")
print(f"ROUGE-L F1: {rouge_result['rougeL']:.4f}")
# Perplexity
print("Calculating Perplexity...")
def calculate_perplexity(texts):
    total_log_prob = 0.0
    total_words = 0
    for text in texts:
        words = text.split()
        total_words += len(words)
        # You can use a pre-trained language model (e.g., GPT-2) for calculating perplexity
        # Here, we will use a placeholder for the log-prob calculation, which should ideally come from a language model
        # For simplicity, assume a fixed value here
        total_log_prob += len(words) * math.log(1.0)  # Placeholder for log-prob calculation
    return math.exp(-total_log_prob / total_words) if total_words > 0 else float('inf')

perplexity_result = calculate_perplexity(generated)
print(f"Perplexity: {perplexity_result:.4f}")

# Toxicity
print("Calculating Toxicity...")
toxicity_scores = [Detoxify('original').predict(pred)['toxicity'] for pred in tqdm(generated, desc="Toxicity")]
avg_toxicity = np.mean(toxicity_scores)
print(f"Avg. Toxicity Score: {avg_toxicity:.4f}")