In [79]:
import os
import gradio as gr
import pandas as pd
import plotly.express as px
import re
from google.cloud import bigquery
import bigframes

# Setup
os.environ["GOOGLE_CLOUD_PROJECT"] = "bias-buster-471818"
os.environ["BIGFRAMES_LOCATION"] = "us-central1"

PROJECT_ID = "bias-buster-471818"
DATASET_ID = "bias_buster_dataset"
TABLE_ID = "news_articles_placeholder"
MODEL_ID = f"{PROJECT_ID}.{DATASET_ID}.gemini_flash_model"

client = bigquery.Client()
bf = bigframes.connect()

# Shared analyst prompt
ANALYST_PROMPT = """You are a media analyst tasked with identifying and documenting biases in news reporting. Based *only* on the provided excerpts, perform a detailed analysis and report on the following aspects:

1.  **Framing Bias:** How is the story being framed? What is the central narrative or perspective being promoted? Is there a particular angle (e.g., political, economic, social) that is being emphasized?
2.  **Confirmation Bias:** Are the excerpts selectively choosing information that confirms a pre-existing belief or hypothesis? Point to specific examples where this might be occurring.
3.  **Sensationalism:** Is the language used overly dramatic or inflammatory? Identify any use of hyperbole, emotionally charged words, or shocking imagery intended to grab the reader's attention rather than inform them.
4.  **Selection Bias:** What information seems to be included or excluded? Are certain voices, perspectives, or facts missing? How does this selective inclusion/exclusion shape the overall message?
5.  **Placement Bias:** If applicable, consider the placement of information within the excerpts. Is a particular point of view given more prominence by being placed at the beginning or end of a paragraph or article?
6.  **Omission Bias:** What facts or perspectives are left out entirely? Are key details or counter-arguments missing that would provide a more complete picture?
7.  **Propaganda and Spin:** Is there evidence of a deliberate attempt to manipulate public opinion? Look for signs of "spin" - the strategic presentation of information to favor a particular outcome or point of view.
8.  **Sentiment and Tone:** Describe the overall sentiment and tone of the excerpts. Is it positive, negative, neutral, or something else? Is the tone consistent, or does it shift?
9.  **Social Implications:** Based on the biases identified, what are the potential social implications of this type of reporting? How might this coverage influence public perception, policy decisions, or social cohesion?

Provide your analysis in a structured, point-by-point format, citing specific phrases or sentences from the provided text to support each of your findings. Do not introduce any outside information or personal opinions."""

# Get Unique Topics
def get_unique_topics():
    sql = f"""
    SELECT DISTINCT topic
      FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`,
           UNNEST(topics) AS topic
     WHERE topic IS NOT NULL
       AND LENGTH(TRIM(topic)) > 0
    """
    df = client.query(sql).to_dataframe()
    cats = df['topic'].dropna().unique().tolist()
    return sorted({t.split('->')[0] for t in cats})

unique_topics = get_unique_topics()
default_topic = unique_topics[0] if unique_topics else ""

# Summarize the selected excerpts as a batch
def generate_summary_for_articles(df):
    snippets = df['trimmed_text'].str[:500].tolist()
    joined = "\n\n---\n\n".join(snippets)
    full_prompt = f"{ANALYST_PROMPT}\n\nExcerpts:\n\n{joined}"

    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("prompt", "STRING", full_prompt)
        ]
    )
    sql = f"""
    SELECT ml_generate_text_llm_result AS summary
      FROM ML.GENERATE_TEXT(
        MODEL `{MODEL_ID}`,
        (SELECT @prompt AS prompt),
        STRUCT(TRUE AS flatten_json_output)
      )
    """
    out = client.query(sql, job_config=job_config).to_dataframe()
    return out['summary'].iloc[0] if not out.empty else "No summary available."

# Run Gemini Flash on a temp table
def run_generate_text(input_df, prompt_col, temp_table_name):
    tmp_id = f"{PROJECT_ID}.{DATASET_ID}.{temp_table_name}"
    upload_df = input_df[['trimmed_text', prompt_col]].rename(columns={prompt_col: 'prompt'})
    job = client.load_table_from_dataframe(
        upload_df,
        tmp_id,
        job_config=bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
    )
    job.result()
    sql = f"""
    SELECT trimmed_text,
           ml_generate_text_llm_result AS result
      FROM ML.GENERATE_TEXT(
        MODEL `{MODEL_ID}`,
        TABLE `{tmp_id}`,
        STRUCT(TRUE AS flatten_json_output)
      )
    """
    return client.query(sql).to_dataframe()

# Robust parse: catch both numbered and bulleted headings
def parse_analysis(text):
    bias_keys = [
        "Framing Bias","Confirmation Bias","Sensationalism",
        "Selection Bias","Placement Bias","Omission Bias",
        "Propaganda and Spin"
    ]
    bullets = []
    for key in bias_keys:
        # look for bolded heading
        pat_bold = re.compile(rf"\*\*{re.escape(key)}\*\*:\s*([^\n]+)", re.IGNORECASE)
        m = pat_bold.search(text)
        if not m:
            # fallback: plain heading
            pat_plain = re.compile(rf"{re.escape(key)}:\s*([^\n]+)", re.IGNORECASE)
            m = pat_plain.search(text)
        if m:
            val = m.group(1).strip()
            if val and not val.lower().startswith("none"):
                bullets.append(f"- **{key}**: {val}")
    score = round(len(bullets) / len(bias_keys) * 10, 1)
    if not bullets:
        bullets = ["- **No biases detected**"]
    return "\n".join(bullets), score

# Sentiment Distribution
def generate_sentiment_plot(grid, topic):
    fig = px.histogram(
        grid, x='Sentiment', color='Sentiment',
        title=f"Sentiment Distribution for '{topic}'", template="plotly_dark"
    )
    fig.update_layout(height=400)
    return fig

# Forecast Bias Score
def generate_forecast_plot(grid, topic):
    df2 = grid.dropna(subset=['Published']).copy()
    df2['Published'] = pd.to_datetime(df2['Published'])
    df2['bias_score'] = df2['Bias Score'].fillna(0)
    bf_df = bf.read_pandas(df2[['Published','bias_score']].sort_values('Published'))
    if bf_df.shape[0] < 2:
        return px.scatter(title="Not enough data points for forecasting.")
    fc = bf_df.ai.forecast("Published","bias_score",horizon=10).to_pandas()
    fig = px.line(
        fc, x='forecast_timestamp', y='forecast_value',
        title=f"Forecasted Bias Score for '{topic}'", template="plotly_dark"
    )
    fig.add_scatter(
        x=fc['forecast_timestamp'],
        y=fc['prediction_interval_upper_bound'],
        mode='lines', line=dict(dash='dot', color='lightgray'),
        name='Upper Bound'
    )
    fig.add_scatter(
        x=fc['forecast_timestamp'],
        y=fc['prediction_interval_lower_bound'],
        mode='lines', line=dict(dash='dot', color='lightgray'),
        name='Lower Bound'
    )
    fig.update_layout(height=400)
    return fig

# Bias Score Heatmap
def generate_bias_heatmap(grid, topic):
    fig = px.density_heatmap(
        grid, x='Sentiment', y='Bias Score',
        nbinsx=3, nbinsy=10,
        title=f"Bias Score Heatmap for '{topic}'",
        template="plotly_dark", color_continuous_scale='reds'
    )
    fig.update_layout(height=400)
    return fig

# Bias Score Bar Chart
def generate_bias_bar_chart(grid, topic):
    fig = px.bar(
        grid, x='Title', y='Bias Score',
        title=f"Bias Score per Article in '{topic}'", template="plotly_dark"
    )
    fig.update_layout(height=400, xaxis_tickangle=-45)
    return fig

# Main Dashboard Logic
def update_dashboard(topic):
    # 1) Load & dedupe
    sql = f"""
    SELECT title, author, published, sentiment,
           LEFT(text, 3000) AS trimmed_text
      FROM `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`,
           UNNEST(topics) AS topic_name
     WHERE LOWER(topic_name) LIKE LOWER('%{topic}%')
     LIMIT 10
    """
    df = client.query(sql).to_dataframe()
    df = df.drop_duplicates(subset=['trimmed_text']).reset_index(drop=True)

    if df.empty:
        empty_cols = ['Title','Author','Published','Sentiment','Biases','Bias Score']
        return (
            "No summary available for these articles.",
            pd.DataFrame(columns=empty_cols),
            px.scatter(title="No data"),
            px.scatter(title="No data"),
            px.scatter(title="No data"),
            px.scatter(title="No data")
        )

    # 2) Batch summary
    summary_text = generate_summary_for_articles(df)
    summary_md = f"### Analysis of selected articles on '{topic}'\n\n{summary_text}"

    # 3) Per-article analysis
    df['analysis_prompt'] = ANALYST_PROMPT + "\n\nExcerpt:\n\n" + df['trimmed_text']
    analysis_df = run_generate_text(df, 'analysis_prompt', 'tmp_analysis')

    merged = df.merge(
        analysis_df, on='trimmed_text', how='left'
    ).rename(columns={'result':'analysis_output'})

    # 4) Parse biases & scores
    parsed = merged['analysis_output'].fillna("").map(parse_analysis)
    merged['Biases'], merged['Bias Score'] = zip(*parsed)

    # 5) Build grid
    grid = merged[[
        'title','author','published','sentiment','Biases','Bias Score'
    ]].copy()
    grid.columns = ['Title','Author','Published','Sentiment','Biases','Bias Score']

    # 6) Generate visuals
    sent_plot = generate_sentiment_plot(grid, topic)
    fc_plot = generate_forecast_plot(grid, topic)
    hm_plot = generate_bias_heatmap(grid, topic)
    bc_plot = generate_bias_bar_chart(grid, topic)

    return summary_md, grid, sent_plot, fc_plot, hm_plot, bc_plot

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), title="Bias Intelligence Dashboard") as demo:
    gr.Markdown("# <div style='text-align: center; font-size: 2.5em; font-weight: bold;'>Bias Intelligence Dashboard</div>")
    gr.Markdown("<div style='text-align: center; font-size: 1.2em; color: #666;'>See Beyond the Story: Bias Under the Lens. Powered by BigQuery & Gemini</div>")

    topic_selector = gr.Dropdown(unique_topics, default_topic, label="Select Topic")
    summary_out = gr.Markdown()
    ai_output_grid = gr.Dataframe(
        headers=["Title","Author","Published","Sentiment","Biases","Bias Score"],
        interactive=False
    )

    with gr.Row():
        with gr.Column():
            sentiment_plot_out = gr.Plot()
        with gr.Column():
            heatmap_plot_out = gr.Plot()

    with gr.Row():
        with gr.Column():
            bar_chart_plot_out = gr.Plot()
        with gr.Column():
            forecast_plot_out = gr.Plot()

    topic_selector.change(
        fn=update_dashboard,
        inputs=[topic_selector],
        outputs=[
            summary_out,
            ai_output_grid,
            sentiment_plot_out,
            forecast_plot_out,
            heatmap_plot_out,
            bar_chart_plot_out
        ]
    )

    demo.load(
        fn=lambda: update_dashboard(default_topic),
        outputs=[
            summary_out,
            ai_output_grid,
            sentiment_plot_out,
            forecast_plot_out,
            heatmap_plot_out,
            bar_chart_plot_out
        ]
    )

if __name__ == "__main__":
    demo.launch()


No explicit location is set, so using location US for the session.



It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6fafe40106018d14a0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
