In [2]:
import streamlit as st
import pandas as pd
import numpy as np
import requests
from sentence_transformers import SentenceTransformer
from joblib import load
from sklearn.preprocessing import LabelEncoder

# -------------------------
# Load model + transformers
# -------------------------


@st.cache_resource
def load_models_and_encoders():
    # Load main text classifier
    clf_text = load("saved_model.pkl")
    
    # Load sentence transformer
    encoder = SentenceTransformer("all-MiniLM-L6-v2")
    
    # Load source-only classifier and its encoder
    clf_source = load("source_only_model.pkl")
    source_encoder = load("source_encoder.pkl")
    ohe = load("source_ohe.pkl")
    
    return clf_text, encoder, clf_source, source_encoder, ohe

clf_text, encoder, clf_source, source_encoder, ohe = load_models_and_encoders()


# -------------------------
# Function: Fetch news articles
# -------------------------
@st.cache_data
def fetch_news_articles(api_key, query="misinformation", page_size=20):
    url = (
        f"https://newsapi.org/v2/everything?"
        f"q={query}&language=en&sortBy=popularity&pageSize={page_size}&apiKey={api_key}"
    )
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        st.error("Failed to fetch articles.")
        return []



# -------------------------
# Streamlit App UI
# -------------------------
st.title("🧠 FactCheck Radar")
st.markdown("Prioritizing content for review using ML + Risk Scoring")

# 👇 REMOVE api key input — use your own key here:
API_KEY = "9f21f14a5e0d457bbb3304f9f7fade40"  # <--- REPLACE THIS

# Sidebar inputs
st.sidebar.header("🔍 Live News Settings")
query = st.sidebar.text_input("Search topic", value="misinformation")
top_n = st.sidebar.slider("Top posts to review", 5, 50, 20)
fetch_now = st.sidebar.button("Fetch Live News")

# -------------------------
# Fetch, process, predict
# -------------------------
if fetch_now:
    articles = fetch_news_articles(API_KEY, query)

    if articles:
        st.success(f"Fetched {len(articles)} articles on '{query}'")
        texts = []
        sources = []

        for article in articles:
            text = article["title"] + ". " + str(article["description"])
            source = article["source"]["name"]  # This is the source label from NewsAPI
            texts.append(text)
            sources.append(source)

        df = pd.DataFrame({
            "statement": texts,
            "source": sources})
        

        # Transform text
        X_embed = encoder.encode(df["statement"].tolist(), show_progress_bar=False)

        # Main classifier prediction (text only)
        text_probs = clf_text.predict_proba(X_embed)[:, 1]

        # Source-only classifier prediction
        X_source = ohe.transform(df[["source"]])  # <- double brackets here = 2D DataFrame
        source_probs = clf_source.predict_proba(X_source)[:, 1]
        
        # Predict & compute scores
        df["P(x)"] = text_probs
        df["S(x)"] = source_probs
        df["Risk"] = df["P(x)"] * df["S(x)"]

        # Display top results
        st.subheader(f"📋 Top {top_n} Articles by Risk Score")
        top_posts = df.sort_values("Risk", ascending=False).head(top_n)
        st.dataframe(top_posts[["statement", "source", "P(x)", "S(x)", "Risk"]])

    else:
        st.warning("No articles found or API failed.")


2025-04-23 11:17:03.402 No runtime found, using MemoryCacheStorageManager
