<a href="https://colab.research.google.com/github/JaveyBae/exist2025/blob/main/Experiment_onTweets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!pip install transformers sentence-transformers torch

import torch
from transformers import BertTokenizer, BertForSequenceClassification
from sentence_transformers import SentenceTransformer
import numpy as np
from torch.nn.functional import softmax



In [12]:
# --- Model Loading ---
# We will load a RoBERTa model for masked language modeling.
# In a real application, you would fine-tune this on a relevant task if needed.
roberta_model_name = 'roberta-base'
# Use the AutoTokenizer to automatically get the correct tokenizer for the model
from transformers import AutoTokenizer, AutoModelForMaskedLM

# Load the RoBERTa tokenizer
roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model_name)
# Load a RoBERTa model with a masked language model head
roberta_model_mlm = AutoModelForMaskedLM.from_pretrained(roberta_model_name)

# --- CLIP Model Loading ---
# Using the sentence-transformers library to easily load the text encoder part of CLIP
clip_model_name = 'clip-ViT-B-32'
clip_model = SentenceTransformer(clip_model_name)

print("模型加载成功!")

模型加载成功!


# 新段落

In [18]:
# Example texts
texts = [
    "Women belong in the kitchen.",  # Obvious sexism
    "He is too emotional to be a leader, just like a woman.", # Implicit sexism
    "The new software update will be released tomorrow.", # Neutral
    "She was promoted because of her skills and hard work.", # Neutral
    "Girls are not good at math.", # Sexist
    "This is a great achievement for all the scientists involved." # Neutral
    "one time I sucked d**k so hard that I looked at myself in the mirror and was like holy shiiit I’m so ugly &amp; he was like “you look like a whore. I love it!” LMAOOOOO" # Discriminatory


]

# New Section

In [19]:
def classify_with_roberta_prompting(text, tokenizer, model, prompt_template):
    """
    Classify using prompt and RoBERTa MASK prediction.

    Args:
        text (str): The original text to classify.
        tokenizer: RoBERTa tokenizer.
        model: RoBERTa Masked Language Model.
        prompt_template (str): Prompt template containing <mask>, e.g., "This statement is <mask>."

    Returns:
        tuple: List of predicted words and their corresponding scores.
    """
    # RoBERTa uses <mask> instead of [MASK]
    prompt_text = prompt_template.replace("[MASK]", "<mask>").replace("[TEXT]", text)

    # Combine the text and prompt
    # RoBERTa doesn't use special tokens like [CLS] and [SEP] in the same way as BERT
    # We can just concatenate the text and the prompt
    input_text = f"{text} {prompt_text}"

    # Find the <mask> token id
    mask_token_id = tokenizer.mask_token_id

    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt")

    # Get the position of the <mask> token in the input sequence
    mask_token_index = torch.where(inputs["input_ids"] == mask_token_id)[1]

    if len(mask_token_index) == 0:
        print("Error: <mask> token not found in the input.")
        return None, None

    # Perform prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.logits

    # Get the prediction results for the <mask> position
    mask_token_logits = predictions[0, mask_token_index, :]

    # Apply softmax to convert logits to probabilities
    mask_token_probabilities = softmax(mask_token_logits, dim=-1)

    # Get the top N predicted words and their probabilities
    top_n = 5
    top_probabilities, top_indices = torch.topk(mask_token_probabilities, top_n, dim=-1)

    predicted_tokens = [tokenizer.decode([token_id]) for token_id in top_indices[0]]
    predicted_scores = top_probabilities[0].tolist()

    return predicted_tokens, predicted_scores

# Define the prompt template
# Note: We use [TEXT] as a placeholder which will be replaced by the actual text in the function
prompt_template = "This statement is [MASK]."

# Example texts (using the previously defined texts list)
# texts = [
#     "Women belong in the kitchen.",  # Obvious sexism
#     "He is too emotional to be a leader, just like a woman.", # Implicit sexism
#     "The new software update will be released tomorrow.", # Neutral
#     "She was promoted because of her skills and hard work.", # Neutral
#     "Girls are not good at math.", # Sexist
#     "This is a great achievement for all the scientists involved." # Neutral
# ]


print("--- RoBERTa Prompting Prediction Results ---")
for text in texts:
    predicted_tokens, predicted_scores = classify_with_roberta_prompting(
        text, roberta_tokenizer, roberta_model_mlm, prompt_template
    )

    print(f"Text: '{text}'")
    if predicted_tokens:
        print("Predicted words to fill <mask> and their probabilities:")
        for token, score in zip(predicted_tokens, predicted_scores):
            print(f"  - {token}: {score:.4f}")
    print("-" * 20)

print("\nNote: The RoBERTa model was pre-trained on a massive dataset and has good ability to fill <mask>. By observing the words it predicts, we can indirectly infer the attributes of the original text (e.g., whether it's sexist). However, this method's results are more interpretive and may require manual analysis of the predicted words to draw final conclusions.")

--- RoBERTa Prompting Prediction Results ---
Text: 'Women belong in the kitchen.'
Predicted words to fill <mask> and their probabilities:
  -  false: 0.1893
  -  unacceptable: 0.0892
  -  wrong: 0.0879
  -  incorrect: 0.0523
  -  sexist: 0.0450
--------------------
Text: 'He is too emotional to be a leader, just like a woman.'
Predicted words to fill <mask> and their probabilities:
  -  unacceptable: 0.0891
  -  disturbing: 0.0554
  -  false: 0.0484
  -  wrong: 0.0412
  -  true: 0.0334
--------------------
Text: 'The new software update will be released tomorrow.'
Predicted words to fill <mask> and their probabilities:
  -  below: 0.1053
  -  final: 0.0969
  -  incomplete: 0.0414
  -  preliminary: 0.0413
  -  official: 0.0387
--------------------
Text: 'She was promoted because of her skills and hard work.'
Predicted words to fill <mask> and their probabilities:
  -  false: 0.4298
  -  misleading: 0.0923
  -  incorrect: 0.0884
  -  inaccurate: 0.0451
  -  true: 0.0381
-----------------

In [20]:
def classify_with_bert(text_list):
    """Classify a list of texts using the BERT model"""
    inputs = bert_tokenizer(text_list, return_tensors="pt", padding=True, truncation=True, max_length=128)
    with torch.no_grad():
        outputs = bert_model(**inputs)
        logits = outputs.logits
        probabilities = softmax(logits, dim=1)
        predictions = torch.argmax(probabilities, dim=1)
    return predictions, probabilities

# Run BERT classification
bert_predictions, bert_probabilities = classify_with_bert(texts)

print("--- BERT Classification Results ---")
for i, text in enumerate(texts):
    label = "Sexist" if bert_predictions[i].item() == 1 else "Non-sexist"
    print(f"Text: '{text}'\nPrediction: {label} (Confidence: {bert_probabilities[i][bert_predictions[i]].item():.4f})\n")

--- BERT Classification Results ---
Text: 'Women belong in the kitchen.'
Prediction: Non-sexist (Confidence: 0.5107)

Text: 'He is too emotional to be a leader, just like a woman.'
Prediction: Non-sexist (Confidence: 0.5264)

Text: 'The new software update will be released tomorrow.'
Prediction: Non-sexist (Confidence: 0.5031)

Text: 'She was promoted because of her skills and hard work.'
Prediction: Non-sexist (Confidence: 0.5437)

Text: 'Girls are not good at math.'
Prediction: Non-sexist (Confidence: 0.5261)

Text: 'This is a great achievement for all the scientists involved.one time I sucked d**k so hard that I looked at myself in the mirror and was like holy shiiit I’m so ugly &amp; he was like “you look like a whore. I love it!” LMAOOOOO'
Prediction: Non-sexist (Confidence: 0.5092)



In [21]:
def analyze_with_clip(text_list, concept_labels):
    """Calculate text-concept label similarity using CLIP"""
    # Encode both text and labels into vectors
    text_embeddings = clip_model.encode(text_list, convert_to_tensor=True)
    label_embeddings = clip_model.encode(concept_labels, convert_to_tensor=True)

    # Calculate cosine similarity
    # (text_embeddings @ label_embeddings.T) will result in a [len(text_list), len(concept_labels)] matrix
    similarity_scores = torch.nn.functional.cosine_similarity(text_embeddings.unsqueeze(1), label_embeddings.unsqueeze(0), dim=-1)

    # Apply softmax to similarity scores to make them more like probabilities
    probabilities = softmax(similarity_scores, dim=1)
    predictions = torch.argmax(probabilities, dim=1)

    return predictions, probabilities

# Define the concepts we want to compare
concept_labels = ["a neutral statement", "a sexist statement"]

# Run CLIP analysis
clip_predictions, clip_probabilities = analyze_with_clip(texts, concept_labels)

print("\n--- CLIP Zero-Shot Classification Results ---")
for i, text in enumerate(texts):
    predicted_concept = concept_labels[clip_predictions[i].item()]
    label = "Sexist" if "sexist" in predicted_concept else "Non-sexist"
    print(f"Text: '{text}'\nPrediction: {label} (More similar to '{predicted_concept}', Confidence: {clip_probabilities[i][clip_predictions[i]].item():.4f})\n")


--- CLIP Zero-Shot Classification Results ---
Text: 'Women belong in the kitchen.'
Prediction: Sexist (More similar to 'a sexist statement', Confidence: 0.5107)

Text: 'He is too emotional to be a leader, just like a woman.'
Prediction: Sexist (More similar to 'a sexist statement', Confidence: 0.5097)

Text: 'The new software update will be released tomorrow.'
Prediction: Non-sexist (More similar to 'a neutral statement', Confidence: 0.5024)

Text: 'She was promoted because of her skills and hard work.'
Prediction: Sexist (More similar to 'a sexist statement', Confidence: 0.5061)

Text: 'Girls are not good at math.'
Prediction: Sexist (More similar to 'a sexist statement', Confidence: 0.5108)

Text: 'This is a great achievement for all the scientists involved.one time I sucked d**k so hard that I looked at myself in the mirror and was like holy shiiit I’m so ugly &amp; he was like “you look like a whore. I love it!” LMAOOOOO'
Prediction: Sexist (More similar to 'a sexist statement', C

In [22]:
print("\n--- Integrated Analysis Results ---")
for i, text in enumerate(texts):
    # BERT predicts sexist (label 1)
    is_sexist_bert = bert_predictions[i].item() == 1
    # CLIP predicts sexist (label 1, corresponding to 'a sexist statement')
    is_sexist_clip = clip_predictions[i].item() == 1

    final_decision = "Potentially Sexist" if is_sexist_bert or is_sexist_clip else "Non-sexist"

    print(f"Text: '{text}'")
    print(f"  - BERT Prediction: {'Sexist' if is_sexist_bert else 'Non-sexist'}")
    print(f"  - CLIP Prediction: {'Sexist' if is_sexist_clip else 'Non-sexist'}")
    print(f"  - Final Conclusion: **{final_decision}**\n")


--- Integrated Analysis Results ---
Text: 'Women belong in the kitchen.'
  - BERT Prediction: Non-sexist
  - CLIP Prediction: Sexist
  - Final Conclusion: **Potentially Sexist**

Text: 'He is too emotional to be a leader, just like a woman.'
  - BERT Prediction: Non-sexist
  - CLIP Prediction: Sexist
  - Final Conclusion: **Potentially Sexist**

Text: 'The new software update will be released tomorrow.'
  - BERT Prediction: Non-sexist
  - CLIP Prediction: Non-sexist
  - Final Conclusion: **Non-sexist**

Text: 'She was promoted because of her skills and hard work.'
  - BERT Prediction: Non-sexist
  - CLIP Prediction: Sexist
  - Final Conclusion: **Potentially Sexist**

Text: 'Girls are not good at math.'
  - BERT Prediction: Non-sexist
  - CLIP Prediction: Sexist
  - Final Conclusion: **Potentially Sexist**

Text: 'This is a great achievement for all the scientists involved.one time I sucked d**k so hard that I looked at myself in the mirror and was like holy shiiit I’m so ugly &amp; h