### Import

In [None]:
%pip install transformers
import pandas as pd
import random
from transformers import AutoTokenizer
import matplotlib
from tqdm import tqdm
from sklearn.metrics import f1_score
import ollama
from time import time

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df =pd.read_csv("../../script/data_annotation/Attack_scenarios_350_tokens.csv")
df

In [None]:
df_examples = df[['title', 'rounded_average_int', 'news_cleaned_350']]
df_examples = df_examples[df_examples['rounded_average_int'] != 'neither believes nor disbelieves']
df_examples['rounded_average_int'] = df_examples['rounded_average_int'].str.replace('completely ', '', regex=False)
df_examples

### Naive prompting

In [None]:
import ollama
import pandas as pd
from tqdm import tqdm

def classify_with_ollama(news_article: str) -> str:
    """
    Classify a single news article into 'believes' or 'disbelieves' using Ollama (naive, zero-shot).
    """
    prompt = f"""
Read the following news article and classify it into exactly one category:

- believes
- disbelieves

Rules:
- Answer with ONLY the label, no explanation.

News: "{news_article}"

Label:
"""

    response = ollama.chat(
        model="llama3.1:8b",  # change this to any Llama model you want
        messages=[{"role": "user", "content": prompt}]
    )

    prediction = response.get("message", {}).get("content", "").strip().lower()
    if prediction not in {"believes", "disbelieves"}:
        prediction = "disbelieves"  # fallback default
    return prediction


# ---- Apply to your DataFrame ----
tqdm.pandas()
df_examples["belief_classification"] = df_examples["news_cleaned_350"].progress_apply(classify_with_ollama)

print(df_examples[["news_cleaned_350", "belief_classification"]].head())


In [None]:
import pandas as pd
import numpy as np
from typing import Sequence, Tuple, Dict, Any
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix
)

def compute_label_metrics(
    df: pd.DataFrame,
    true_col: str,
    pred_col: str,
    classes: Sequence[str] = ("disbelieves", "believes"),
    nan_placeholder: str = "__nan__",       # what we map NaN/empty preds to
    normalize: bool = True                  # lowercase/strip both columns
) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """
    Compute classification metrics while KEEPING all rows.
    Any NaN/empty prediction is treated as a mismatch (mapped to `nan_placeholder`).

    Returns:
      results_df: 1-row DataFrame with:
        support_disbelieves, support_believes,
        mis_disbelieves, mis_believes,
        correct_disbelieves, correct_believes,
        accuracy, f1_disbelieves, f1_believes, f1_macro, f1_weighted, total_samples
      details: dict containing confusion matrix, per-class report, counts, etc.
    """
    # Copy to avoid mutating caller data
    d = df[[true_col, pred_col]].copy()

    # Normalize text (safe if already clean)
    if normalize:
        d[true_col] = d[true_col].astype(str).str.strip().str.lower()
        # Note: "nan" as a string may appear after astype(str); convert to np.nan before filling
        pred_norm = d[pred_col].astype(str).str.strip().str.lower()
        pred_norm = pred_norm.replace({"": nan_placeholder, "nan": nan_placeholder})
        d[pred_col] = pred_norm
    else:
        # Only map NaN in preds to placeholder
        d[pred_col] = d[pred_col].fillna(nan_placeholder)

    # Ensure actual NaNs (if any remained) in preds become placeholder
    d[pred_col] = d[pred_col].fillna(nan_placeholder)

    y_true = d[true_col].values
    y_pred = d[pred_col].values

    # --- Metrics ---
    # Accuracy over ALL rows; placeholder ensures NaN preds are mismatches
    accuracy = accuracy_score(y_true, y_pred)

    # Per-class report (only over REAL classes, exclude placeholder from averaging)
    report = classification_report(
        y_true, y_pred, labels=list(classes), output_dict=True, zero_division=0
    )

    # Per-class F1
    f1_per_class = {c: report[c]["f1-score"] for c in classes}

    # Macro / weighted across REAL classes
    f1_macro = f1_score(y_true, y_pred, labels=list(classes), average="macro", zero_division=0)
    f1_weighted = f1_score(y_true, y_pred, labels=list(classes), average="weighted", zero_division=0)

    # Confusion matrix ONLY for REAL classes (rows=true, cols=pred)
    cm = confusion_matrix(y_true, y_pred, labels=list(classes))

    # Supports from y_true (per real class)
    supports = pd.Series(y_true).value_counts().reindex(list(classes), fill_value=0)

    # Correct / misclass counts
    correct = {
        classes[0]: int(cm[0, 0]),
        classes[1]: int(cm[1, 1]),
    }
    mis = {
        classes[0]: int(supports[classes[0]] - correct[classes[0]]),
        classes[1]: int(supports[classes[1]] - correct[classes[1]]),
    }

    results = {
        f"support_{classes[0]}": int(supports[classes[0]]),
        f"support_{classes[1]}": int(supports[classes[1]]),
        f"mis_{classes[0]}":     mis[classes[0]],
        f"mis_{classes[1]}":     mis[classes[1]],
        f"correct_{classes[0]}": correct[classes[0]],
        f"correct_{classes[1]}": correct[classes[1]],
        "accuracy":              accuracy,
        f"f1_{classes[0]}":      f1_per_class[classes[0]],
        f"f1_{classes[1]}":      f1_per_class[classes[1]],
        "f1_macro":              f1_macro,
        "f1_weighted":           f1_weighted,
        "total_samples":         len(d),
    }

    details = {
        "confusion_matrix_labels": list(classes),
        "confusion_matrix": cm,
        "classification_report": report,
        "supports": supports.to_dict(),
        "correct": correct,
        "mis": mis,
        "num_rows_with_nan_pred": int((df[pred_col].isna()).sum()),
    }

    return pd.DataFrame([results]), details


In [None]:
results_df, details = compute_label_metrics(
    df_examples,
    true_col="rounded_average_int",
    pred_col="belief_classification",
    classes=("disbelieves", "believes")  # change if your labels differ
)

print(results_df)          # one-row table of your requested metrics
print(details["confusion_matrix_labels"])
print(details["confusion_matrix"])       # rows=true, cols=pred in the order above


### COLA

In [None]:
# Define the function
def linguist_analysis_with_ollama(news_text):
    prompt = """
You are a linguist trained to analyze written language.

Accurately and concisely explain the linguistic elements in the following sentence, including:
- grammatical structure,
- tense and inflection,
- figurative or virtual speech,
- rhetorical devices,
- lexical choices,
and how these elements affect meaning.

Only provide linguistic analysis. Do not summarize the content or give irrelevant commentary.

Text: "{}"
""".format(news_text)

    response = ollama.chat(
        model="llama3.1:8b",  # Adjust the model tag if needed, like "llama3:70b"
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.get("message", {}).get("content", "No response received").strip()


In [None]:
# Enable progress bar support for pandas
tqdm.pandas()

# Track runtime
start_time = time.time()

# Apply the function with progress bar
df_examples['linguistic_analysis'] = df_examples['news_cleaned'].progress_apply(linguist_analysis_with_ollama)

# Print total time taken
end_time = time.time()
print(f"Total time taken: {end_time - start_time:.2f} seconds")

In [None]:
def expert_analysis_with_ollama(news_text):
    instruction = (
        "You are a political scientist trained to analyze political content in text.\n\n"
        "Accurately and concisely explain the key elements in the following quote, such as:\n"
        "- characters\n"
        "- events\n"
        "- political parties\n"
        "- organizations\n"
        "- religious or ideological references\n\n"
        "Also explain their relevance or relationship to politics (if any). Do nothing else."
    )

    prompt = f"{instruction}\n\nText: \"{news_text}\""

    response = ollama.chat(
        model="llama3:8b",  # or "llama3:70b" if you're using the larger version
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    return response.get("message", {}).get("content", "No response received").strip()


In [None]:
tqdm.pandas()

start_time = time.time()

df_examples['expert_analysis'] = df_examples['news_cleaned'].progress_apply(expert_analysis_with_ollama)

print(f"Total time taken: {time.time() - start_time:.2f} seconds")


In [None]:
def user_analysis_with_ollama(text):
    # Ensure that the text is a valid string and clean if needed
    if not isinstance(text, str):  # Check if text is not a string
        text = str(text)  # Convert it to string if necessary
    text = text.strip()  # Remove leading/trailing whitespaces

    # If the text is empty after cleaning, return a default response
    if not text:
        return "No valid content provided"

    instruction = (
        "You are a heavy social media user trained to analyze social content.\n\n"
        "Analyze the following sentence, focusing on:\n"
        "- the content\n"
        "- hashtags\n"
        "- Internet slang and colloquialisms\n"
        "- emotional tone\n"
        "- implied meaning\n\n"
        "Do nothing else."
    )

    # Construct the full prompt
    prompt = f"{instruction}\n\nText: \"{text}\""

    # Call the Ollama API
    response = ollama.chat(
        model="llama3:8b",  # Ensure that you're using the correct model name
        messages=[{"role": "user", "content": prompt}]
    )

    # Return the response content or a fallback message
    return response.get("message", {}).get("content", "No response received").strip()

In [None]:
tqdm.pandas()

start_time = time.time()

df_examples['user_analysis'] = df_examples['news_cleaned'].progress_apply(user_analysis_with_ollama)

print(f"Total time taken: {time.time() - start_time:.2f} seconds")


In [None]:
import ollama

def get_completion(prompt, model="llama3:8b", temperature=0):
    """
    Use Ollama to generate a completion based on a given prompt.
    """
    response = ollama.chat(model=model, messages=[
        {"role": "user", "content": prompt}
    ])
    return response['message']['content'].strip()

def believability_analysis(news, ling_response, expert_response, user_response, target): 
    """
    Evaluate whether a news item is believable or not, based on different sources of analysis.
    `target` should be either 'believes' or 'disbelieves'.
    """
    prompt = (
        f"'''{news}'''\n"
        f"<<<{ling_response}>>>\n"
        f"[[[{expert_response}]]]\n"
        f"---{user_response}---\n\n"
        f"Based on the content provided:\n"
        f"- ''' ''' contains the news statement.\n"
        f"- <<< >>> contains linguistic cues (e.g., hedging, assertiveness).\n"
        f"- [[[ ]]] is the expert analysis (factual correctness, consistency, credibility).\n"
        f"- --- --- reflects the perception of a heavy social media user.\n\n"
        f"Do you think the news statement is more aligned with the belief that it is **{target}**?\n"
        f"Identify the top three pieces of evidence from the above sources that justify your conclusion, "
        f"and explain why they support or undermine the believability of the news."
    )
    return get_completion(prompt)



In [None]:
df_examples['believes_response'] = df_examples.progress_apply(
    lambda row: believability_analysis(
        row['news_cleaned'],
        row['linguistic_analysis'],
        row['expert_analysis'],
        row['user_analysis'],
        target="believes"
    ),
    axis=1
)

In [None]:
df_examples['disbelieves_response'] = df_examples.progress_apply(
    lambda row: believability_analysis(
        row['news_cleaned'],
        row['linguistic_analysis'],
        row['expert_analysis'],
        row['user_analysis'],
        target="disbelieves"
    ),
    axis=1
)

In [None]:
def final_judgement(news, believes_response, disbelieves_response):
    prompt = (
        f"Determine whether the following news statement is believable or not believable.\n\n"
        f"News: '''{news}'''\n\n"
        f"Judge this in relation to the following arguments:\n"
        f"Arguments that the statement is believable: {believes_response}\n"
        f"Arguments that the statement is not believable: {disbelieves_response}\n\n"
        f"Choose from:\n"
        f"A: Not Believable\nB: Believable\n\n"
        f"Constraint: Answer with only the option above that is most accurate and nothing else."
    )

    response = ollama.chat(model='llama3:8b', messages=[
        {"role": "user", "content": prompt}
    ])

    return response['message']['content'].strip()


In [None]:
# Step 3: Apply final_judgement to decide the believability label
df_examples['final_response'] = df_examples.progress_apply(
    lambda row: final_judgement(
        row['news_cleaned'],
        row['believes_response'],
        row['disbelieves_response']
    ),
    axis=1
)

In [None]:
import re

# Extract only 'A' or 'B' from the noisy LLM response in 'final_response'
df_examples['final_response_ab'] = df_examples['final_response'].str.extract(r'\b([AB])\b', flags=re.IGNORECASE)

# Convert to uppercase for consistency
df_examples['final_response_ab'] = df_examples['final_response_ab'].str.upper()

# Drop rows where extraction failed (i.e., no A or B found)
df_examples = df_examples.dropna(subset=['final_response_ab']).copy()


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Step 1: Convert final model outputs (e.g., A/B) to labels
def map_judgement_to_label(x):
    if isinstance(x, str):
        x = x.strip().upper()
        if x == 'A':
            return 'disbelieves'
        elif x == 'B':
            return 'believes'
    return 'unknown'

In [None]:


# Apply the label mapping
# df_examples['final_label'] = df_examples['final_response'].apply(map_judgement_to_label)
df_examples['final_label'] = df_examples['final_response_ab'].apply(map_judgement_to_label)


# Step 2: Remove 'unknown' if any
df_filtered = df_examples[df_examples['final_label'] != 'unknown']

# Step 3: Evaluate
y_true = df_filtered['rounded_average_int']  # your actual column
y_pred = df_filtered['final_label']

print(classification_report(y_true, y_pred, target_names=['believes', 'disbelieves']))


### In context learning

In [None]:
# Function to generate prompt for the news article
def generate_prompt(df, row_index, limit):
    df_filtered = df.drop(row_index)

    def get_first_300_tokens(text, limit):
        return text[:limit]    ##### Change the token to 250/550 based on the selected model
    
    examples = {}

    # Sample 10 examples for each label, padding with repeats if not enough
    for label in df_filtered['rounded_average_int'].unique():
        label_examples = df_filtered[df_filtered['rounded_average_int'] == label]['news_cleaned'].tolist()
        
        if len(label_examples) >= 10:
            # Take 10 random unique samples
            sampled_examples = random.sample(label_examples, 10)
        else:
            # Take all available, then duplicate randomly to reach 10
            sampled_examples = label_examples.copy()
            needed = 10 - len(sampled_examples)
            sampled_examples += random.choices(label_examples, k=needed)

        examples[label] = sampled_examples

    # Flatten examples into a list
    example_list = []
    for label in examples:
        example_list.extend(examples[label])

    # Build example string
    example_str = ""
    for example in example_list:
        news_tokens = get_first_300_tokens(example, limit)
        label = df_filtered[df_filtered['news_cleaned'] == example]['rounded_average_int'].iloc[0]
        example_str += f'News: "{news_tokens}"\nLabel: {label}\n\n'
    
    return example_str


In [None]:
# Use Ollama to classify a new example (using the formatted prompt)
def classify_with_ollama(news_article, example_str):
    prompt = f"""
    You are an expert trained to classify news articles into 2 categories based on believability: 
    This news indicates either believing or not into the below defined categories. 
    For each news, label it as one of the following categories: 'disbelieves', 
    'believes'. I have provided a few examples below to guide you. 
    Only respond with the category, no explanation needed

    {example_str}

    Now classify the following news article:
    News: "{news_article}"
    Label:
    """

    response = ollama.chat(model="llama3.1:8b", messages=[{"role": "user", "content": prompt}]) ### change model here
    
    prediction = response.get("message", {}).get("content", "No response received").strip()

    return prediction


In [None]:
res = []
token_limit = 350 # changed to 225/550/700 depending on the model token limit

for index, row in tqdm(df_examples.iterrows(), total=len(df_examples)):
    df_cur = row.to_frame().T
    gen_string =  generate_prompt(df_examples, index, token_limit)
    pre_label = classify_with_ollama(' '.join(df_cur['news_cleaned'].values.astype(str))[:300], gen_string) ### change token depending on how many tokens to select
    df_cur['predicted_label'] = pre_label
    res.append(df_cur)

In [None]:
df_all_preds = pd.concat(res, ignore_index=True)

In [None]:
import re
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# Choose the single predicted column to evaluate
pred_col = 'predicted_label'  # <-- change this if needed
clean_col = f'final_response_cleaned_{pred_col}'

ordered_labels = ['disbelieves', 'believes']  # consistent ordering

# 1) Clean y_true (string labels)
y_true_str = (
    df_all_preds['rounded_average_int']
    .astype(str).str.lower()
    .str.extract(r'\b(disbelieves|believes)\b', flags=re.IGNORECASE, expand=False)
)

metrics = []

# 2) Clean the predicted column and evaluate
# regex clean predictions to canonical strings
df_all_preds[clean_col] = (
    df_all_preds[pred_col]
    .astype(str).str.lower()
    .str.extract(r'\b(disbelieves|believes)\b', flags=re.IGNORECASE, expand=False)
)

# evaluate only where both y_true and y_pred are present
mask = y_true_str.notna() & df_all_preds[clean_col].notna()
y_t = y_true_str[mask]
y_p = df_all_preds[clean_col][mask]
n_scored = int(mask.sum())

# per-column detailed report
print(f"\n=== Classification Report for {pred_col} (n={n_scored} rows) ===")
print(classification_report(
    y_t, y_p,
    labels=ordered_labels,
    target_names=ordered_labels,
    digits=4,
    zero_division=0
))

if n_scored > 0:
    # Confusion matrix (rows=true, cols=pred) in ordered_labels order
    cm = confusion_matrix(y_t, y_p, labels=ordered_labels)
    # Supports (true counts)
    support_dis = int(cm[0, :].sum())
    support_bel = int(cm[1, :].sum())
    # Misclassifications per true class
    mis_dis = int(cm[0, 1])  # true disbelieves predicted as believes
    mis_bel = int(cm[1, 0])  # true believes predicted as disbelieves
    # Correct counts (diagonal)
    corr_dis = int(cm[0, 0])
    corr_bel = int(cm[1, 1])

    acc      = accuracy_score(y_t, y_p)
    f1_dis   = f1_score(y_t, y_p, pos_label='disbelieves', zero_division=0)
    f1_bel   = f1_score(y_t, y_p, pos_label='believes',    zero_division=0)
    f1_macro = f1_score(y_t, y_p, average='macro',    zero_division=0)
    f1_wt    = f1_score(y_t, y_p, average='weighted', zero_division=0)
else:
    support_dis = support_bel = mis_dis = mis_bel = corr_dis = corr_bel = np.nan
    acc = f1_dis = f1_bel = f1_macro = f1_wt = np.nan

metrics.append({
    'variant': pred_col,
    'n_scored': n_scored,
    'support_disbelieves': support_dis,
    'support_believes': support_bel,
    'mis_disbelieves': mis_dis,
    'mis_believes': mis_bel,
    'correct_disbelieves': corr_dis,
    'correct_believes': corr_bel,
    'accuracy': acc,
    'f1_disbelieves': f1_dis,
    'f1_believes': f1_bel,
    'f1_macro': f1_macro,
    'f1_weighted': f1_wt
})

# 3) Summary table
metrics_df = pd.DataFrame(metrics).sort_values(['f1_macro', 'accuracy'], ascending=False)
print("\n=== Summary (sorted by macro-F1, then accuracy) ===")
print(metrics_df.to_string(index=False))

# Optionally save
# metrics_df.to_csv("metrics_summary_with_misclass_for_news_cleaned.csv", index=False)
