POLITICAL BIAS

In [35]:
from google.colab import drive
import pandas as pd
from IPython.display import display

In [36]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
csv_file_path = '/content/drive/MyDrive/Colab Notebooks/data/random_selected_summary.csv'

In [52]:
df = pd.read_csv(csv_file_path)

In [39]:
display(df.head())

Unnamed: 0,title,cleaned_article,gpt4_summary
0,Poland abortion: Polish women look for help in...,Poland abortion Polish women look for help in ...,After Poland enacted a near-total ban on abort...
1,Hong Kong Cardinal advocates for promoting a c...,Hong Kong Cardinal advocates for promoting a c...,Cardinal John Tong of Hong Kong urged Christia...
2,Sex Education in the Philippines - The Borgen ...,Sex Education in the Philippines - The Borgen ...,The article discusses the state of sex educati...
3,"When a fetal scan showed problems, she fled Id...","When a fetal scan showed problems, she fled Id...","Jillaine St. Michel, a 37-year-old mother from..."
4,Prioritizing the Neglected Areas of SRHR in Ca...,Prioritizing the Neglected Areas of SRHR in Ca...,The article discusses the need to prioritize n...


In [40]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="bucketresearch/politicalBiasBERT")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [41]:
# Load model directly
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("bucketresearch/politicalBiasBERT")
model = AutoModelForSequenceClassification.from_pretrained("bucketresearch/politicalBiasBERT")
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [42]:
def score_article(article):
    # Tokenize the input article
    inputs = tokenizer(article, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Apply softmax to get probabilities
    probs = torch.softmax(outputs.logits, dim=1)

    # Get predicted class and probability
    predicted_class = torch.argmax(probs, dim=1).item()
    class_probabilities = probs.squeeze().tolist()

    return predicted_class, class_probabilities


In [43]:
# Append results to the list
      # [0] -> left
      # [1] -> center
      # [2] -> right
# Initialize a list to store results
results = []
# Iterate over the first 10 articles in the DataFrame
for _, article in df.head(10).iterrows():  # Using iterrows() to access each row
    # Score the cleaned article
    article_predicted_class, article_class_probabilities = score_article(article['cleaned_article'])

    # Score the summary
    summary_predicted_class, summary_class_probabilities = score_article(article['gpt4_summary'])

    # Append results to the list
    results.append({
        "Article Title": article['title'],  # Assuming you want to use the article's title
        "Article Predicted Class": article_predicted_class,
        "Article Class Probabilities": article_class_probabilities,
        "GPT Predicted Class": summary_predicted_class,
        "GPT Class Probabilities": summary_class_probabilities
    })

# Convert the results to a DataFrame
df = pd.DataFrame(results)

# Display the results in tabular format
display(df)


Unnamed: 0,Article Title,Article Predicted Class,Article Class Probabilities,GPT Predicted Class,GPT Class Probabilities
0,Poland abortion: Polish women look for help in...,0,"[0.9828202724456787, 0.012913397513329983, 0.0...",1,"[0.11445138603448868, 0.7803583741188049, 0.10..."
1,Hong Kong Cardinal advocates for promoting a c...,0,"[0.5691642761230469, 0.19896821677684784, 0.23...",2,"[0.11200925707817078, 0.0830753743648529, 0.80..."
2,Sex Education in the Philippines - The Borgen ...,0,"[0.4743711054325104, 0.3002409040927887, 0.225...",0,"[0.9959208965301514, 0.002078546676784754, 0.0..."
3,"When a fetal scan showed problems, she fled Id...",2,"[0.3486248254776001, 0.19945968687534332, 0.45...",1,"[0.3855026662349701, 0.47100964188575745, 0.14..."
4,Prioritizing the Neglected Areas of SRHR in Ca...,0,"[0.6540043950080872, 0.28962942957878113, 0.05...",0,"[0.9946122169494629, 0.00312062562443316, 0.00..."
5,Abortion policy activism heats up for Roe v. W...,0,"[0.9927558302879333, 0.004444750491529703, 0.0...",0,"[0.9956940412521362, 0.0019656834192574024, 0...."
6,Barbara Kruger’s 'Your Body is a Battleground'...,0,"[0.5994633436203003, 0.33239686489105225, 0.06...",0,"[0.9554882645606995, 0.027878064662218094, 0.0..."
7,Abortion 'practically banned in Turkey',0,"[0.9774123430252075, 0.019876575097441673, 0.0...",1,"[0.0974910780787468, 0.6852431297302246, 0.217..."
8,"Mitch McConnell, Senate Republicans Vote to St...",0,"[0.46743670105934143, 0.19386853277683258, 0.3...",0,"[0.9953905344009399, 0.002250072779133916, 0.0..."
9,DeSantis signs 15-week abortion ban into law d...,1,"[0.11015105247497559, 0.8716214895248413, 0.01...",1,"[0.13631711900234222, 0.445563942193985, 0.418..."


Hallucination

In [None]:
# # Use a pipeline as a high-level helper
# from transformers import pipeline

# pipe = pipeline("text-classification", model="vectara/hallucination_evaluation_model", trust_remote_code=True)

In [None]:
# from transformers import AutoModelForSequenceClassification, PreTrainedTokenizerFast
# import torch
# # Load model directly
# from transformers import AutoModelForSequenceClassification
# model = AutoModelForSequenceClassification.from_pretrained("vectara/hallucination_evaluation_model", trust_remote_code=True)

In [None]:
# import pandas as pd
# from transformers import AutoModelForSequenceClassification


# # Article text
# article_text = """
# Following a recent constitutional court ruling that imposes a near-total ban on abortion in Poland, widespread protests erupted, marking the largest demonstrations since the fall of communism in 1989. The protests, primarily organized by the grassroots initiative Women’s Strike, are characterized by several significant symbols. The red lightning bolt, designed by Ola Jasionowska, serves as a warning against depriving women of their rights, although critics have attempted to associate it with Nazi insignia. The slogan "Wypierdala," meaning "Fuck off," has been prominently used, often alongside the iconic Solidarity logo. Other notable symbols include coat hangers, which highlight the dangers of illegal abortions, black umbrellas symbolizing mourning and resistance, and the anchor symbol representing Poland's fight against occupation, which has been adapted to signify "Polish Women Fighting." These symbols reflect the deep-seated anger towards the ruling Law and Justice party and the Catholic Church's influence over reproductive rights in Poland, showcasing the protestors' determination to defend women's rights.
# """

# # Summary text
# summary_text = """
# Following a recent constitutional court ruling that imposes a near-total ban on abortion in Poland, widespread protests erupted, marking the largest demonstrations since the fall of communism in 1989. The protests, primarily organized by the grassroots initiative Women’s Strike, are characterized by several significant symbols, such as the red lightning bolt, the slogan "Wypierdala," coat hangers, black umbrellas, and an adapted anchor symbol. These symbols reflect the protestors' anger towards the ruling Law and Justice party and the Catholic Church's influence over reproductive rights, showcasing their determination to defend women's rights.
# """


# # Define the test data, List[Tuple[str, str]]
# pairs = [
#     (article_text, summary_text)
# ]

# # Step 1: Load the model
# model = AutoModelForSequenceClassification.from_pretrained('vectara/hallucination_evaluation_model', trust_remote_code=True)

# # Step 2: Use the model to predict
# # Here you may need to transform pairs to the required input format if necessary
# predictions = model.predict(pairs).numpy()  # Assuming predictions return an array-like structure

# # Prepare the predictions (0 means not hallucinated, 1 means hallucinated)
# # You might need to apply a threshold based on your model's outputs
# # For demonstration, let's assume values close to 0.5 are considered hallucinated (use your model's logic)
# predicted_labels = [1 if pred > 0.5 else 0 for pred in predictions]

# # Create a DataFrame to display results in tabular format
# results_df = pd.DataFrame({
#    'Prediction': predicted_labels
# })

# # Display the results
# print(results_df)
