In [112]:
from dotenv import load_dotenv
import os
import pandas as pd
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
serper_api_key = os.getenv("SERPER_API_KEY")
os.environ["USER_AGENT"] = "factcheck/1.0"



In [None]:
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.utilities import GoogleSerperAPIWrapper
import json
from pprint import pprint

import warnings
warnings.filterwarnings('ignore')

In [114]:
fallacies_df= pd.read_csv("fallacies.csv")
fallacies_list = "\n".join(
    f"{row['fine_class']}: {row['definition']}" for _, row in fallacies_df.iterrows()
)

In [115]:
#Defining the model
llm = ChatOpenAI(model="llama3-70b-8192", temperature=0, api_key=groq_api_key, base_url="https://api.groq.com/openai/v1", max_completion_tokens=1024)

In [124]:
template1 = """
You are a neutral fact-checking communications analyst. Your tasks are:
(1) Summarize the article in five clear sentences (neutral, specific, no hype).
(2) Extract and verify check-worthy claims.
(3) Flag logical fallacies from the provided list.

Rules:
- Do not speculate or make things up.
- Cite reputable, independent sources with short quotes and URLs.
- If evidence is insufficient or conflicting, mark the claim as "Unverifiable" or "Needs context".
- Use the provided fallacies list only; if none apply, say "None found".
- Show only final answers. No hidden reasoning.

Article to analyze:
{content}

Fallacy reference list:
{fallacies_list}

## Tasks
1. **Summary**: Five-sentence neutral summary.
2. **Claim extraction**: List 5–10 key factual claims.
3. **Verification**: For each claim, give a verdict: True | False | Misleading | Needs context | Unverifiable.
   - Provide 1–3 short supporting/contradicting quotes with URLs and source dates.
   - Note if event dates match or conflict.
4. **Fallacies**: List matching fallacies (by name) for each claim.
5. **Red flags**: Note sensational language, anonymous sourcing, or inconsistencies (if any).
6. **Confidence**: Provide an overall confidence score between 0 and 1.

IMPORTANT: Respond ONLY with valid JSON. No additional text before or after. Use this exact structure:

{{
  "summary": "Five sentences summary here...",
  "claims": [
    {{
      "claim": "Specific claim text",
      "verdict": "True|False|Misleading|Needs context|Unverifiable",
      "evidence": [
        {{"quote": "Short quote", "url": "https://example.com", "source": "Source name", "published_date": "YYYY-MM-DD", "matches_event_date": true}}
      ],
      "fallacies": ["Fallacy name or None found"],
      "notes": "Brief notes if needed"
    }}
  ],
  "red_flags": ["Flag 1", "Flag 2"],
  "confidence": 0.8
}}
""" 

In [117]:
chain1 = LLMChain(
    llm=llm,
    prompt=PromptTemplate(
        template=template1,
        input_variables=["content", "fallacies_list"] 
    )
)

In [118]:
template2 = """You are an ethics professor reviewing a news article SUMMARY. Be succinct and easy to read, but ground your critique in core ethics principles (fairness, non-maleficence, duty of care, transparency). Use ONLY the fallacy names/definitions provided below. If no fallacy applies, say "None found" and explain why.

Article summary: {summary}

Fallacies to consider:
{fallacies_list}

Provide EXACTLY:
1) Most impactful fallacy: <name from list or "None found">
2) Why this could mislead readers: <1–3 sentences, plain language>
3) Counterfactual/counterpoint: <one plausible alternative interpretation for why this fallacy (or appearance of it) might be present>

Constraints:
- Do not invent facts beyond the summary and fallacy list.
- No step-by-step reasoning; show final answers only.
- Keep the total response under 120 words.

Professor:"""

In [119]:
chain2= LLMChain(
    llm=llm,
    prompt=PromptTemplate(
        template=template2,
        input_variables=["summary","fallacies_list"]
    )
)

In [120]:
search = GoogleSerperAPIWrapper(
    type="news",
    tbs="qdr:m1",  
    serper_api_key=serper_api_key
)

In [121]:
# Define search topic
search_topic = "global trade"

# Get search results and load with WebBaseLoader and loader.load
search_results = search.results(f"site:whitehouse.gov {search_topic}")
article_url = search_results['news'][0]['link']
article_title = search_results['news'][0]['title']
loader = WebBaseLoader(article_url)
article_text = ' '.join(loader.load()[0].page_content[:3000].split())

In [125]:

print("🔍 Running fact-checking analysis...")
summary = chain1.invoke({"content": article_text, "fallacies_list": fallacies_list})["text"]
analysis = chain2.invoke({"summary": summary, "fallacies_list": fallacies_list})["text"]

print("✅ Analysis complete!")

🔍 Running fact-checking analysis...
✅ Analysis complete!
✅ Analysis complete!


In [129]:
# Function to extract JSON from LLM response
def extract_json_from_response(response_text):
    """Extract JSON from LLM response, handling various formats"""
    import re
    
    # First try: direct JSON parsing
    try:
        return json.loads(response_text.strip())
    except json.JSONDecodeError:
        pass
    
    # Second try: find JSON between code blocks
    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(1))
        except json.JSONDecodeError:
            pass
    
    # Third try: find JSON-like structure
    json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group(0))
        except json.JSONDecodeError:
            pass
    
    # If all fails, return structured error
    return {
        "summary": "JSON parsing failed - raw response included below",
        "claims": [],
        "red_flags": ["Failed to parse LLM response as JSON"],
        "confidence": 0.0,
        "raw_response": response_text
    }

# Test the JSON extraction
print("🧪 Testing JSON extraction...")

🧪 Testing JSON extraction...


In [128]:
def display_results(summary_json, ethics_analysis):
    print("=" * 80)
    print("📰 FACT-CHECKING REPORT")
    print("=" * 80)
    print(f"📄 Article: {article_title}")
    print(f"🔗 URL: {article_url}")
    print("=" * 80)
    
    # Extract JSON using our robust function
    data = extract_json_from_response(summary_json)
    
    print("\n📝 SUMMARY:")
    print("-" * 40)
    print(data.get('summary', 'No summary available'))
    
    print(f"\n🎯 CONFIDENCE SCORE: {data.get('confidence', 'N/A')}")
    
    print("\n🔍 CLAIMS ANALYSIS:")
    print("-" * 40)
    claims = data.get('claims', [])
    for i, claim in enumerate(claims, 1):
        print(f"\n{i}. CLAIM: {claim.get('claim', 'N/A')}")
        print(f"   VERDICT: {claim.get('verdict', 'N/A')}")
        
        fallacies = claim.get('fallacies', [])
        if fallacies and fallacies != ['None found']:
            print(f"   FALLACIES: {', '.join(fallacies)}")
        
        evidence = claim.get('evidence', [])
        if evidence:
            print("   EVIDENCE:")
            for j, ev in enumerate(evidence, 1):
                print(f"     {j}. \"{ev.get('quote', '')}\" - {ev.get('source', '')} ({ev.get('published_date', 'N/A')})")
        
        notes = claim.get('notes', '')
        if notes:
            print(f"   NOTES: {notes}")
    
    red_flags = data.get('red_flags', [])
    if red_flags:
        print("\n🚩 RED FLAGS:")
        print("-" * 40)
        for flag in red_flags:
            print(f"• {flag}")
    
    # Show raw response if JSON parsing failed
    if 'raw_response' in data:
        print("\n⚠️  RAW LLM RESPONSE:")
        print("-" * 40)
        print(data['raw_response'])
    
    print("\n🎓 ETHICS PROFESSOR REVIEW:")
    print("-" * 40)
    print(ethics_analysis)
    print("\n" + "=" * 80)

# Display the results
display_results(summary, analysis)

📰 FACT-CHECKING REPORT
📄 Article: Fact Sheet: President Donald J. Trump Further Modifies the Reciprocal Tariff Rates
🔗 URL: https://www.whitehouse.gov/fact-sheets/2025/07/fact-sheet-president-donald-j-trump-further-modifies-the-reciprocal-tariff-rates/

📝 SUMMARY:
----------------------------------------
['President Donald J. Trump signed an Executive Order modifying reciprocal tariff rates for certain countries to address US goods trade deficits.', "The order reflects the President's efforts to protect the US against foreign threats to national security and economy.", 'The President announced additional tariffs on April 2, and some countries have since agreed to trade deals and security agreements.', 'The modified tariff rates will apply to countries listed in Annex I of the Executive Order, while others will be subject to a 10% tariff.', "The President aims to strengthen America's position in the global market by addressing decades of failed trade policy."]

🎯 CONFIDENCE SCORE: 0.6



🧪 Testing JSON extraction...
