In [None]:
import openai
import torch
import re
import os
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tqdm import tqdm


In [None]:

!git clone https://github.com/Vicomtech/hate-speech-dataset.git

dataset_path = "hate-speech-dataset/sampled_test" 
annotations_path = "hate-speech-dataset/annotations_metadata.csv" 
annotations_df = pd.read_csv(annotations_path)
annotations_df = annotations_df[["file_id", "label"]]
label_map = {"noHate": 0, "hate": 1}
annotations_df["label"] = annotations_df["label"].map(label_map)
label_dict = dict(zip(annotations_df["file_id"], annotations_df["label"]))


In [None]:

def load_text_files(folder_path, label_dict):
    dataset = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_id = filename.replace(".txt", "")  
            
            if file_id in label_dict:
                file_path = os.path.join(folder_path, filename)
                with open(file_path, "r", encoding="utf-8") as file:
                    text = file.read().strip()
                    dataset.append({"text": text, "label": label_dict[file_id]})
    return dataset

processed_ds = load_text_files(dataset_path, label_dict)
print(f"Loaded {len(processed_ds)} samples from {dataset_path}")
print("Sample data:", processed_ds[:5])

def extract_yes_no(response):
    response = response.lower().strip()
    if "yes" in response.split() or "it is fair" in response:
        return 0  # fair -> not hate speech
    elif "no" in response.split() or "it is unfair" in response or "it is not fair" in response:
        return 1  # unfair -> hate speech
    else:
        return 1

def think_twice(claim, model, tokenizer):
    prompt_suspicious = f"Find the most suspicious part of the statement:\n\n{claim}\n\nSuspicious Part:"
    inputs = tokenizer(prompt_suspicious, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=20)
    suspicious_info = tokenizer.decode(outputs[0], skip_special_tokens=True)
    suspicious_info = suspicious_info.replace(prompt_suspicious, "").strip()
    print("\n🔹 Suspicious Info:", suspicious_info)
    
    prompt_fact = f"Given the statement:\n\n{claim}\n\nThe most suspicious part is: {suspicious_info}\n\nProvide a relevant social or scientific fact to verify it:\nFact:"
    inputs = tokenizer(prompt_fact, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=30)
    fact = tokenizer.decode(outputs[0], skip_special_tokens=True)
    fact = fact.replace(prompt_fact, "").strip()
    print("🔹 Generated Fact:", fact)
    
    final_prompt = f"Claim: {claim}\nThe claim mentions that {suspicious_info}.\nFact: {fact}\nQuestion: Is it fair to say that?\nAnswer:"
    inputs = tokenizer(final_prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=5)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.replace(final_prompt, "").strip()
    print("🔹 Final Answer:", response)
    return extract_yes_no(response)  # yes -> 0 (fair), no -> 1 (unfair)

HUGGINGFACE_TOKEN = ""  # Replace with your token

login(token=HUGGINGFACE_TOKEN)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print("Loading LLaMA-2-7b model...")
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto"
)

demo_samples = processed_ds[:5]

print("\nProcessing demo samples:")
for i, sample in enumerate(demo_samples, 1):
    print(f"\nSample {i}:")
    print(f"Text: {sample['text']}")
    print(f"Ground Truth: {'Unfair (Hate)' if sample['label'] == 1 else 'Fair (Not Hate)'}")
    
    # Process with think_twice
    prediction = think_twice(sample['text'], model, tokenizer)
    print(f"Prediction: {'Unfair (Hate)' if prediction == 1 else 'Fair (Not Hate)'}")

print("\nProcessing all samples for evaluation...")
predictions = []
ground_truth = []

for sample in tqdm(processed_ds):
    try:
        prediction = think_twice(sample['text'], model, tokenizer)
        predictions.append(prediction)
        ground_truth.append(int(sample['label']))
    except Exception as e:
        print(f"Error processing sample: {e}")
        predictions.append(1)
        ground_truth.append(int(sample['label']))

accuracy = accuracy_score(ground_truth, predictions)
f1 = f1_score(ground_truth, predictions, average='weighted')

print("\nEvaluation Results:")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

print("\nClassification Report:")
print(classification_report(ground_truth, predictions, target_names=["Fair (Not Hate)", "Unfair (Hate)"]))

results_df = pd.DataFrame({
    'text': [sample['text'] for sample in processed_ds],
    'ground_truth': ground_truth,
    'prediction': predictions,
    'correct': [gt == pred for gt, pred in zip(ground_truth, predictions)]
})

results_df.to_csv('hate_speech_results.csv', index=False)
print("\nResults saved to hate_speech_results.csv")