Generating pairs in both gemini and GPT

In [None]:
import os
import pandas as pd
import openai
import google.generativeai as genai
from tqdm import tqdm

os.environ['OPENAI_API_KEY'] = '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'
os.environ['GOOGLE_API_KEY'] = 'AIzaSyCcHuFOoq2ste0srbmOTcQhNCtPwM0FvdM'

openai.api_key = os.getenv("OPENAI_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

examples_df = pd.read_excel("counter_narrative.xlsx")
input_df = pd.read_excel("test.xlsx").head(100)

prompt_prefix = ""
for _, row in examples_df.iterrows():
    prompt_prefix += f"Toxic: {row['toxic_text']}\nCounter: {row['counter_narrative']}\n\n"

def generate_with_gpt(toxic_text):
    prompt = f"{prompt_prefix}Toxic: {toxic_text}\nCounter:"
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=150
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        return f"GPT Error: {e}"

def generate_with_gemini(toxic_text):
    prompt = f"{prompt_prefix}Toxic: {toxic_text}\nCounter:"
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"Gemini Error: {e}"

gpt_outputs = []
gemini_outputs = []

for text in tqdm(input_df['toxic_text']):
    gpt_outputs.append(generate_with_gpt(text))
    gemini_outputs.append(generate_with_gemini(text))

input_df['gpt_counter_narrative'] = gpt_outputs
input_df['gemini_counter_narrative'] = gemini_outputs

input_df.to_excel("generated_counters_100.xlsx", index=False)


In [None]:
import re
import random
import string
import time
from typing import List

# === Step 1: Sample Hate Speech Dataset ===
hate_speech_samples = [
    "All immigrants are ruining our country.",
    "Women are too emotional to be leaders.",
    "People from that religion are terrorists.",
    "Gay people are unnatural.",
    "Disabled people are a burden on society."
]

# === Step 2: Text Preprocessing Function ===
def preprocess_text(text: str) -> str:
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    return text.strip()

# === Step 3: Dummy Counter-Narrative Generator ===
def generate_counter_narrative(text: str) -> str:
    templates = [
        "I believe in treating everyone with respect, regardless of their background.",
        "We should judge people by their actions, not stereotypes.",
        "Every individual deserves equal rights and opportunities.",
        "Hate has no place in a compassionate society.",
        "Inclusivity makes our communities stronger, not weaker."
    ]
    # Pretend the "model" uses text context to choose a template
    idx = random.randint(0, len(templates) - 1)
    return templates[idx]

# === Step 4: Explainable Model Simulation ===
def explain_model(text: str) -> str:
    explanation = (
        f"Identified bias in text: '{text}'.\n"
        "Reason: Presence of stereotypical or discriminatory language.\n"
        "Suggested counter-narrative: Promote empathy, equality, and facts."
    )
    return explanation

# === Step 5: Batch Processing Function ===
def process_batch(texts: List[str]) -> List[dict]:
    results = []
    for i, text in enumerate(texts):
        print(f"Processing {i+1}/{len(texts)}: {text}")
        preprocessed = preprocess_text(text)
        counter_narrative = generate_counter_narrative(preprocessed)
        explanation = explain_model(preprocessed)
        results.append({
            "original": text,
            "preprocessed": preprocessed,
            "counter_narrative": counter_narrative,
            "explanation": explanation
        })
        time.sleep(1)  # simulate model latency
    return results

# === Step 6: Display Results ===
def display_results(results: List[dict]):
    print("\n=== Counter-Narrative Generation Results ===\n")
    for result in results:
        print(f"Original Text: {result['original']}")
        print(f"Preprocessed: {result['preprocessed']}")
        print(f"Counter-Narrative: {result['counter_narrative']}")
        print(f"Explanation: {result['explanation']}")
        print("-" * 50)

# === Main Driver Code ===
if __name__ == "__main__":
    print("Loading hate speech samples...\n")
    processed_results = process_batch(hate_speech_samples)
    display_results(processed_results)
