In [76]:
import sys
import os
from pathlib import Path

project_root = Path.cwd().parent 
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.predictive_models import (
    load_datasets,
    build_frequency_model,
    predict_frequency_model,
    build_sensationalism_model,
    predict_sensationalism_model,
    build_malicious_account_model,
    predict_malicious_account_model,
    build_naive_realism_model,
    predict_naive_realism_model,
    map_sensationalism_from_counts,
    map_naive_realism_from_sentiment
)

import pandas as pd
import json
from sklearn.metrics import accuracy_score




# Ground Truth

In [158]:
ground_truth = pd.read_csv('../data/ground_truth.csv')
ground_truth.head()

Unnamed: 0,statement,frequency_heuristic,malicious_account,sensationalism,naive_realism,link
0,A president steeped in constitutional lore mig...,2,0,2,2,https://www.cnn.com/2025/10/20/politics/trump-...
1,"Trump is 'committed' to $2,000 tariff dividend...",0,0,0,1,https://abc7.com/post/trump-is-committed-givin...
2,The longest US government shutdown in history ...,0,0,0,1,https://www.theguardian.com/us-news/2025/nov/1...
3,Add AP News as your preferred source to see mo...,0,0,0,0,https://apnews.com/article/redistricting-gerry...
4,"Key Points Prop 50 passed in California, appro...",1,0,1,0,https://azcapitoltimes.com/news/2025/11/12/pro...


In [78]:
ground_truth.statement[0]



# Predictive Models

In [80]:
train_path = "../data/train_set.csv"
val_path = "../data/val_set.csv"
test_path = "../data/test_set.csv"
df_train, df_val, df_test = load_datasets(
    train_path, val_path, test_path
)

## Frequency Heuristic

### LiarPLUS Accuracy

In [81]:
train_freq = df_train.copy()
val_freq   = df_val.copy()
test_freq  = df_test.copy()

In [82]:
model, tfidf, count_vec, token_dict, buzzwords, label_encoder = build_frequency_model(train_freq)

val_pred_freq = predict_frequency_model(val_freq, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)
test_pred_freq = predict_frequency_model(test_freq, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)


In [83]:
label_to_score = {
    "true": 0,
    "mostly-true": 0,
    "half-true": 1,
    "barely-true": 2,
    "false": 2,
    "pants-on-fire": 2
}

val_freq["frequency_heuristic"] = [label_to_score.get(lbl, 1) for lbl in val_freq["label"]]
test_freq["frequency_heuristic"] = [label_to_score.get(lbl, 1) for lbl in test_freq["label"]]

freq_val_accuracy = accuracy_score(val_freq["frequency_heuristic"], val_pred_freq["predicted_frequency_heuristic"])
freq_test_accuracy = accuracy_score(test_freq["frequency_heuristic"], test_pred_freq["predicted_frequency_heuristic"])


In [84]:
print("Frequency Heuristic Val Accuracy:", freq_val_accuracy)
print("Frequency Heuristic Test Accuracy:", freq_test_accuracy)

Frequency Heuristic Val Accuracy: 0.36751497005988026
Frequency Heuristic Test Accuracy: 0.3769633507853403


### Ground Truth Accuracy

In [85]:
freq_pred_ground_truth = predict_frequency_model(ground_truth, model, tfidf, count_vec, token_dict, buzzwords, label_encoder)

In [86]:
freq_accuracy = accuracy_score(ground_truth["frequency_heuristic"], freq_pred_ground_truth["predicted_frequency_heuristic"])
print("Frequency Heuristic Ground Truth Accuracy:", freq_accuracy)

Frequency Heuristic Ground Truth Accuracy: 0.45


## Sensationliasm

### LiarPLUS Accuracy

In [87]:
train_sens = df_train.copy()
val_sens   = df_val.copy()
test_sens  = df_test.copy()

In [88]:
sens_model, sens_numeric_features = build_sensationalism_model(train_sens)

val_pred_sens = predict_sensationalism_model(val_sens, sens_model, sens_numeric_features)
test_pred_sens = predict_sensationalism_model(test_sens, sens_model, sens_numeric_features)


In [89]:
val_sens["sensationalism"]  = val_sens.apply(map_sensationalism_from_counts, axis=1)
test_sens["sensationalism"] = test_sens.apply(map_sensationalism_from_counts, axis=1)


In [90]:
sens_val_accuracy = accuracy_score(val_sens["sensationalism"], val_pred_sens['predicted_sensationalism'])
sens_test_accuracy = accuracy_score(test_sens["sensationalism"], test_pred_sens['predicted_sensationalism'])

print("Sensationalism Val Accuracy:", sens_val_accuracy)
print("Sensationalism Test Accuracy:", sens_test_accuracy)

Sensationalism Val Accuracy: 0.5112275449101796
Sensationalism Test Accuracy: 0.5130890052356021


### Ground Truth Accuracy

In [91]:
sens_pred_ground_truth = predict_sensationalism_model(ground_truth, sens_model, sens_numeric_features)

In [92]:
sens_accuracy = accuracy_score(ground_truth["sensationalism"], sens_pred_ground_truth["predicted_sensationalism"])
print("Sensationalism Ground Truth Accuracy:", sens_accuracy)

Sensationalism Ground Truth Accuracy: 0.55


## Malicious Account

### LiarPLUS Accuracy

In [93]:
train_mal = df_train.copy()
val_mal   = df_val.copy()
test_mal  = df_test.copy()

In [94]:
mal_model, mal_tfidf, mal_le = build_malicious_account_model(train_mal)

In [95]:
val_pred_mal = predict_malicious_account_model(val_mal, mal_model, mal_tfidf, mal_le)
test_pred_mal = predict_malicious_account_model(test_mal, mal_model, mal_tfidf, mal_le)


In [96]:
label_to_score = {
    "true": 0,
    "mostly-true": 0,
    "half-true": 1,
    "barely-true": 2,
    "false": 2,
    "pants-on-fire": 2
}

val_mal["malicious_account"]  = [label_to_score.get(lbl, 1) for lbl in val_mal["label"]]
test_mal["malicious_account"] = [label_to_score.get(lbl, 1) for lbl in test_mal["label"]]


In [97]:
mal_val_accuracy = accuracy_score(val_mal["malicious_account"], val_pred_mal['predicted_malicious_account'])

mal_test_accuracy = accuracy_score(test_mal["malicious_account"], test_pred_mal['predicted_malicious_account'])

print("Malicious Account Val Accuracy:", mal_val_accuracy)
print("Malicious Account Test Accuracy:", mal_test_accuracy)

Malicious Account Val Accuracy: 0.35254491017964074
Malicious Account Test Accuracy: 0.36350037397157814


### Ground Truth Accuracy

In [98]:
mal_pred_ground_truth = predict_malicious_account_model(ground_truth, mal_model, mal_tfidf, mal_le)

In [99]:
mal_accuracy = accuracy_score(ground_truth["malicious_account"], mal_pred_ground_truth["predicted_malicious_account"])
print("Malicious Account Ground Truth Accuracy:", mal_accuracy)

Malicious Account Ground Truth Accuracy: 0.7


## Naive Realism

### LiarPLUS Accuracy

In [100]:
train_nr = df_train.copy()
val_nr   = df_val.copy()
test_nr  = df_test.copy()

In [101]:
naive_model, naive_numeric_features = build_naive_realism_model(train_nr)


In [102]:
val_pred_nr = predict_naive_realism_model(val_nr, naive_model, naive_numeric_features)
test_pred_nr = predict_naive_realism_model(test_nr, naive_model, naive_numeric_features)


In [103]:
val_nr["naive_realism"]  = val_nr["statement"].apply(map_naive_realism_from_sentiment)
test_nr["naive_realism"] = test_nr["statement"].apply(map_naive_realism_from_sentiment)


In [104]:
nr_val_accuracy = accuracy_score(val_nr["naive_realism"], val_pred_nr['predicted_naive_realism'])
nr_test_accuracy = accuracy_score(test_nr["naive_realism"], test_pred_nr['predicted_naive_realism'])

print("Naive Realism Val Accuracy:", nr_val_accuracy)
print("Naive Realism Test Accuracy:", nr_test_accuracy)

Naive Realism Val Accuracy: 0.6781437125748503
Naive Realism Test Accuracy: 0.6768885564697082


### Ground Truth Accuracy

In [105]:
nr_pred_ground_truth = predict_naive_realism_model(ground_truth, naive_model, naive_numeric_features)


In [106]:
nr_accuracy = accuracy_score(ground_truth["naive_realism"], nr_pred_ground_truth["predicted_naive_realism"])
print("Naive Realism Ground Truth Accuracy:", nr_accuracy)

Naive Realism Ground Truth Accuracy: 0.4


# GenAI

## Base Prompt

In [157]:
base_df = pd.read_csv('../webapp/results/base_outputs.csv')
base_df.head()

Unnamed: 0,id,url,freq_score,freq_reason,freq_confidence,mal_score,mal_reason,mal_confidence,sens_score,sens_reason,sens_confidence,naive_score,naive_reason,naive_confidence,overall_confidence
0,e4fc0d08-70fb-47f3-b9f7-244fae6b6ac0,https://www.cnn.com/2025/10/20/politics/trump-...,2,The article consistently repeats the central c...,95,0,"The article itself, while highly partisan, fol...",90,2,"The text is saturated with dramatic, emotional...",100,2,"The article presents a single, un-nuanced pers...",95,95.0
1,dcc96fd3-d309-4e77-a521-9b8cd1d97947,https://abc7.com/post/trump-is-committed-givin...,0,The article reports on a specific political pr...,95,0,"The article cites multiple, credible, and name...",100,1,The author's tone is generally neutral and rep...,90,0,The article presents a balanced view by includ...,100,96.25
2,7044f906-1f3e-49be-af9c-9fbbc65b0a83,https://www.theguardian.com/us-news/2025/nov/1...,0,The article does not exhibit significant repet...,95,0,The article reads as a standard piece of polit...,98,1,The article uses some dramatic language to des...,90,0,The article presents a balanced view by includ...,99,95.5
3,2920dab3-fbc3-4f8e-b3e6-3ff6724f0915,https://apnews.com/article/redistricting-gerry...,0,The article's structure discusses the same top...,100,0,The article is attributed to AP News (Associat...,100,0,The article maintains a neutral and factual to...,95,0,The article presents a balanced view by report...,100,98.75
4,dc3c3657-889f-4578-baa9-117718901b0a,https://azcapitoltimes.com/news/2025/11/12/pro...,1,The article's central theme of a deepening pol...,90,0,The article attributes its information and quo...,95,1,The language is generally analytical but uses ...,85,0,The article actively works against a simplisti...,95,91.25


In [108]:
freq_acc  = (ground_truth["frequency_heuristic"] == base_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == base_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == base_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == base_df["naive_score"]).mean()

In [109]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)


Frequency heuristic accuracy: 0.6
Malicious account accuracy: 0.8
Sensationalism accuracy: 0.45
Naive Realism accuracy: 0.6


## Chain of Thought

In [156]:
cot_df = pd.read_csv('../webapp/results/cot_outputs.csv')
cot_df.head()

Unnamed: 0,id,url,freq_score,freq_reason,freq_confidence,mal_score,mal_reason,mal_confidence,sens_score,sens_reason,sens_confidence,naive_score,naive_reason,naive_confidence,overall_confidence
0,846eba04-9a07-4db2-8e2b-bd22b5e637ee,https://www.cnn.com/2025/10/20/politics/trump-...,2,The article heavily repeats the central narrat...,100,2,The article is a piece of speculative fiction ...,100,2,The article uses highly dramatic and exaggerat...,100,2,The article presents its critical perspective ...,100,100.0
1,fc5d28a2-d97d-4b17-97ad-897599641450,https://abc7.com/post/trump-is-committed-givin...,0,The article reports on a specific policy propo...,100,0,The sources cited are all credible and clearly...,100,1,While the article's own prose is neutral and o...,95,0,The article is a strong example of balanced re...,100,98.75
2,486dca50-6def-4ed3-b9eb-378771925237,https://www.theguardian.com/us-news/2025/nov/1...,0,The article reports on a singular event and it...,100,0,The article is a piece of standard political r...,100,1,While the headline is factual and the tone is ...,95,0,The article is well-balanced and explicitly pr...,100,98.75
3,adb23f07-5f8c-45a7-8bd6-8ebac3e55e91,https://apnews.com/article/redistricting-gerry...,0,The article introduces a central theme—mid-dec...,100,0,The article attributes information to credible...,100,1,The tone is largely informational and objectiv...,95,0,The article presents a nuanced situation by re...,90,96.25
4,c94d27e2-2d60-45ed-99f6-99edf4e4db0d,https://azcapitoltimes.com/news/2025/11/12/pro...,1,The article consistently repeats and reinforce...,95,0,The article is presented as a piece of politic...,100,0,The article maintains a neutral and analytical...,95,0,"The article is highly balanced and nuanced, ex...",100,97.5


In [111]:
freq_acc  = (ground_truth["frequency_heuristic"] == cot_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == cot_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == cot_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == cot_df["naive_score"]).mean()

In [112]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)

Frequency heuristic accuracy: 0.7
Malicious account accuracy: 0.5
Sensationalism accuracy: 0.35
Naive Realism accuracy: 0.45


## Fractal Chain of Thought

In [155]:
fcot_df = pd.read_csv('../webapp/results/fcot_outputs.csv')
fcot_df.head()

Unnamed: 0,id,url,freq_score,freq_reason,freq_confidence,mal_score,mal_reason,mal_confidence,sens_score,sens_reason,sens_confidence,naive_score,naive_reason,naive_confidence,overall_confidence
0,57c0b9f9-9da2-4a91-adb6-9899a13eb906,https://www.cnn.com/2025/10/20/politics/trump-...,2,The article heavily and repeatedly centers on ...,100,0,I am disagreeing with the model's score of 2. ...,95,2,The article employs highly sensationalized and...,100,2,I am disagreeing with the model's score of 1 a...,90,96.25
1,da9de0e4-dd6e-4b9d-87d5-41599a7da785,https://abc7.com/post/trump-is-committed-givin...,1,I disagree with the model's score of 2. The ar...,95,0,I strongly disagree with the model's score of ...,100,1,I disagree with the model's score of 0. While ...,90,0,I disagree with the model's score of 1. The ar...,100,96.25
2,041aae66-36c3-41fd-8acd-7b44625aada4,https://www.theguardian.com/us-news/2025/nov/1...,1,The article exhibits moderate repetition by co...,95,0,"The article cites credible, though inherently ...",100,1,The article uses mildly emotional and dramatic...,90,0,The article provides a balanced and nuanced pr...,95,95.0
3,6b2c1669-a78a-40ed-ae64-c974c484f5ba,https://apnews.com/article/redistricting-gerry...,1,The article demonstrates moderate repetition b...,95,0,"The article is from 'AP News,' a credible and ...",100,0,The article maintains a neutral and objective ...,95,0,The article presents a balanced and multi-face...,90,95.0
4,eaef0763-a00b-47b2-a1c1-53501aa9f3de,https://azcapitoltimes.com/news/2025/11/12/pro...,1,"I rate this a 1, disagreeing with the model's ...",95,0,"I rate this a 0, disagreeing with the model's ...",100,0,"I rate this a 0, which aligns with the model's...",100,0,"I rate this a 0, disagreeing with the model's ...",100,98.75


In [114]:
freq_acc  = (ground_truth["frequency_heuristic"] == fcot_df["freq_score"]).mean()
mal_acc   = (ground_truth["malicious_account"] == fcot_df["mal_score"]).mean()
sens_acc  = (ground_truth["sensationalism"] == fcot_df["sens_score"]).mean()
naive_acc = (ground_truth["naive_realism"] == fcot_df["naive_score"]).mean()

In [115]:
print("Frequency heuristic accuracy:", freq_acc)
print("Malicious account accuracy:", mal_acc)
print("Sensationalism accuracy:", sens_acc)
print("Naive Realism accuracy:", naive_acc)

Frequency heuristic accuracy: 0.55
Malicious account accuracy: 0.55
Sensationalism accuracy: 0.6
Naive Realism accuracy: 0.7
