In [1]:
!pip install transformers datasets torch sentence-transformers

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_c

In [37]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [38]:
from huggingface_hub import login

# Replace with your actual Hugging Face token
login("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")

In [49]:
# Load a pre-trained model for text classification
model_name = "unitary/toxic-bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a toxicity detection pipeline
toxicity_detector = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer
)

Device set to use cpu


In [50]:
print("Loading semantic similarity model...")
sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

Loading semantic similarity model...


In [51]:
bias_templates = [
    "Women are inferior to men.",
    "Men are better than women.",
    "Women should stay at home.",
    "Women are too emotional for leadership.",
    "Men are naturally better at technical jobs.",
    "Women are meant for household duties.",
    "Men shouldn't express emotions.",
    "Women are weaker than men.",
    "Men are more logical than women.",
    "Women are better caregivers than men.",
    "Men are breadwinners, women are homemakers.",
    "Girls are not good at math and science.",
    "Boys should not play with dolls.",
    "Women should be submissive to men.",
    "Men should be dominant over women."
]

# Create embeddings for the bias templates
bias_embeddings = sentence_model.encode(bias_templates)

In [52]:
# Function to detect gender bias using multiple approaches
def detect_gender_bias(text):
    bias_score = 0.0
    is_biased = False

    # Approach 1: Check for toxicity
    try:
        toxicity_result = toxicity_detector(text)[0]
        if toxicity_result['label'] == 'toxic' and toxicity_result['score'] > 0.5:
            bias_score = max(bias_score, toxicity_result['score'])
    except Exception as e:
        print(f"Toxicity detection error: {e}")

    # Approach 2: Semantic similarity with bias templates
    try:
        text_embedding = sentence_model.encode([text])
        similarities = cosine_similarity(text_embedding, bias_embeddings)[0]
        max_similarity = max(similarities)

        if max_similarity > 0.5:  # Threshold for similarity
            bias_score = max(bias_score, max_similarity)
    except Exception as e:
        print(f"Semantic similarity error: {e}")

    # Determine if the query is biased based on combined scores
    is_biased = bias_score > 0.5

    return is_biased, bias_score

In [57]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

def get_llm_response(user_input, is_biased):
    try:
        # Load a stronger, instruction-following model
        model_name = "google/flan-t5-large"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

        # Create an ethical, task-specific prompt
        if is_biased:
            prompt = (
                f"The user said: \"{user_input}\"\n\n"
                "Your job is to explain why this statement is problematic and provide an empowering response. "
                "Avoid agreeing with stereotypes, and explain clearly why leadership is not determined by gender. "
                "Provide a factual, thoughtful, and empathetic response promoting gender equality in leadership."
            )

        else:
            prompt = (
                f"The user said: \"{user_input}\"\n\n"
                "Respond with a thoughtful, respectful, and inclusive reply that avoids any kind of stereotypes."
            )

        # Tokenize and generate
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
        with torch.no_grad():
            output = model.generate(
                inputs["input_ids"],
                max_length=512,
                temperature=0.7,
                do_sample=True
            )

        # Decode and clean response
        response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
        return response

    except Exception as e:
        print(f"Error generating response: {e}")
        if is_biased:
            return ("I noticed some potential gender bias in your message. "
                    "Both men and women bring unique strengths to leadership roles. "
                    "Successful leadership thrives on diversity, and gender-diverse teams "
                    "often perform better due to varied perspectives and problem-solving approaches.")
        else:
            return "I'm here to help. Could you clarify your question so I can give a better response?"

In [58]:
def chatbot():
    print("\n===== Gender Bias-Aware Chatbot =====")
    print("(Type 'exit' to end the conversation)")

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() == 'exit':
            break

        # Check for gender bias
        is_biased, bias_score = detect_gender_bias(user_input)

        # If bias is detected, inform the user (but don't show the score)
        if is_biased:
            print("\n[Bias detected. Generating empowering response]")

        # Generate response using the LLM with the specified prompt
        response = get_llm_response(user_input, is_biased)

        # Print the response
        print(f"\nChatbot: {response}")

In [None]:
print("Initializing Gender Bias Detection and Response System...")
chatbot()

Initializing Gender Bias Detection and Response System...

===== Gender Bias-Aware Chatbot =====
(Type 'exit' to end the conversation)

You: women are not good leaders

[Bias detected. Generating empowering response]

Chatbot: Women are more skilled at addressing, analyzing, and resolving tasks than men, and women are often able to express their ideas effectively.

You: women dont belong in tech

[Bias detected. Generating empowering response]

Chatbot: Women in tech are becoming an essential part of our economy, and the best leaders in this field are women.

You: i'm searching for a job

Chatbot: I am looking for a job in the city of california
