In [7]:
#!/usr/bin/env python3
"""
AI Evaluator for Fake News + Wikipedia Fact Check
Reads JSON outputs from both systems and asks Claude to summarize and judge reliability.
Outputs a concise ~100-word summary with reasoning.
"""

from dotenv import load_dotenv
import os
import boto3
import json

# -------------------- Load Environment --------------------
load_dotenv(".env")

# -------------------- Claude Agent --------------------
def ask_claude(prompt, system=None):
    """
    Sends a prompt to Claude via AWS Bedrock and returns the response text.
    """
    if system is None:
        system = (
            "You are a neutral evaluator that judges the reliability of text based on multiple AI systems. "
            "When Wikipedia fact-check entries are NEUTRAL or NOT_FOUND, give them lesser weight "
            "because recent news may not be reflected on Wikipedia. "
            "Focus on synthesizing all evidence, prioritize FakeNewsDetector outputs when Wikipedia is inconclusive, "
            "and provide a concise summary (~100 words) of reliability and reasoning."
        )

    client = boto3.client("bedrock-runtime", region_name="us-east-1")
    
    response = client.converse(
        modelId="anthropic.claude-3-haiku-20240307-v1:0",
        messages=[{"role": "user", "content": [{"text": prompt}]}],
        system=[{"text": system}],
        inferenceConfig={"temperature": 0, "maxTokens": 500}
    )
    return response["output"]["message"]["content"][0]["text"]

# -------------------- Evaluator --------------------
def evaluate_from_files(fake_news_file: str, fact_check_file: str):
    """
    Reads JSON files from FakeNewsDetector and WikipediaFactChecker
    and sends them to Claude for a concise summary.
    """
    # Load JSON files
    try:
        with open(fake_news_file, "r", encoding="utf-8") as f:
            fake_news_result = json.load(f)
        with open(fact_check_file, "r", encoding="utf-8") as f:
            fact_check_result = json.load(f)
    except Exception as e:
        print(f"Error reading JSON files: {e}")
        return None

    # Convert to JSON strings
    fake_news_json = json.dumps(fake_news_result, indent=2, ensure_ascii=False)
    fact_check_json = json.dumps(fact_check_result, indent=2, ensure_ascii=False)

    # Construct prompt
    prompt = f"""
You are given the outputs of two systems evaluating a news article:

1. FakeNewsDetector (BERT classifier):
{fake_news_json}

2. WikipediaFactChecker (retrieval + claim verification):
{fact_check_json}

Your task:
- Compare both outputs and identify agreements or contradictions.
- Give lesser weight to Wikipedia entries marked as NEUTRAL or NOT_FOUND because recent news may not be reflected there.
- Provide a concise summary (~100 words) explaining the reliability of the news article and your reasoning.
- End the summary with a clear reliability judgment: HIGH, MEDIUM, or LOW.
"""

    # Ask Claude
    return ask_claude(prompt)

# -------------------- Main --------------------
if __name__ == "__main__":
    # Filenames from your pipeline
    fake_news_file = "fake_news_analysis_results.json"
    fact_check_file = "context_fact_check_results.json"

    print(f"Evaluating '{fake_news_file}' and '{fact_check_file}' using AI agent...\n")
    evaluation = evaluate_from_files(fake_news_file, fact_check_file)
    
    if evaluation:
        print("=== AI EVALUATION SUMMARY (~100 words) ===")
        print(evaluation)
    else:
        print("Evaluation failed.")


Evaluating 'fake_news_analysis_results.json' and 'context_fact_check_results.json' using AI agent...

=== AI EVALUATION SUMMARY (~100 words) ===
The FakeNewsDetector system classified the news article as "REAL" with a high confidence of 0.999810516834259, indicating that the article is likely to be truthful. The WikipediaFactChecker, on the other hand, found the claim in the article to be "REFUTED" with a confidence of 0.9092686093479515, citing "Little support in Singapore" as the evidence.

Given the high confidence of the FakeNewsDetector and the fact that the WikipediaFactChecker's verdict is marked as "REFUTED" rather than "NEUTRAL" or "NOT_FOUND," I would give more weight to the FakeNewsDetector's assessment. The recent nature of the news article may not be fully reflected in the Wikipedia entry, and the FakeNewsDetector's strong classification suggests the article is reliable.

In summary, the news article appears to be HIGHLY reliable based on the FakeNewsDetector's classificat