<a href="https://colab.research.google.com/github/Priyanshu-Singh-Rajput/TruthCheck-AI/blob/main/Fake_news_detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Overview


Step 1:installation

In [None]:
# Core LangChain
!pip install -q langchain

#NLP Tools
!pip install -q spacy

#IBM Watson NLP (for NLU or sentiment)
!pip install -q ibm-watson

#Download SpaCy model
!python -m spacy download en_core_web_sm

#Download google gen ai
!pip install -q -U google-generativeai

Step 2:Importing libraries

In [None]:
# Core utilities
import os
import re

# LangChain modular tools
from langchain.agents import Tool
from langchain.agents import initialize_agent, AgentType

# NLP processing
import spacy

# IBM Watson NLU
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, KeywordsOptions, EntitiesOptions

#google gen ai
import google.generativeai as genai

Step 3:Setup API KEYS

In [None]:

# Securely collect API credentials
import getpass

watson_api_key = getpass.getpass("🔐 Enter your IBM Watson API Key: ")
watson_url = input("🌍 Enter your IBM Watson Service URL: ").strip()
gemini_api_key=getpass.getpass("🔐 Enter your Gemini API Key: ")

os.environ["GEMINI_API_KEY"] = gemini_api_key

# Configure IBM Watson NLU client
authenticator = IAMAuthenticator(watson_api_key)
nlu = NaturalLanguageUnderstandingV1(
    version="2021-08-01",
    authenticator=authenticator
)
nlu.set_service_url(watson_url)

Step 4: Function to take input from User

In [None]:
from ipywidgets import Textarea, Button, VBox, Output
out = Output()
ta = Textarea(placeholder="Paste your text here…", layout={'width':'500px','height':'200px'})
btn = Button(description="Submit")
def on_submit(b):
    with out:
        print("Received:", ta.value[:100], "…")
btn.on_click(on_submit)
display(VBox([ta, btn, out]))

Step 5: Agent to anaylize Input

In [None]:
nlp = spacy.load("en_core_web_sm")
def preprocess_merged(text: str) -> dict:
    # 1. Remove emojis (but keep punctuation!)
    raw_text = re.sub(r"[\U00010000-\U0010ffff]", "", text)

    # 2. Run spaCy on raw text (for sentence detection + entities)
    doc = nlp(raw_text)

    # Lemmatized, lowercase, no stop words — for Watson input
    clean_tokens = [tok.lemma_.lower()
                    for tok in doc
                    if tok.is_alpha and not tok.is_stop]
    clean_text = " ".join(clean_tokens)

    # Extract entities from spaCy
    spacy_entities = {(ent.text, ent.label_) for ent in doc.ents}

    # 3. Watson NLU enrichment (on clean text)
    resp = nlu.analyze(
        text=clean_text,
        features=Features(
            keywords=KeywordsOptions(limit=10),
            entities=EntitiesOptions(limit=10)
        )
    ).get_result()
    watson_keywords = {kw["text"] for kw in resp["keywords"]}
    watson_entities = {(ent["text"], ent["type"]) for ent in resp["entities"]}

    # 4. Merge & dedupe entities
    merged_keywords = list(spacy_entities and watson_keywords or watson_keywords)
    merged_entities = list(watson_entities.union(spacy_entities))

    return {
        "raw_text": raw_text,         # ← use this for claim extraction
        "clean_text": clean_text,     # ← use this for Watson/embeddings
        "keywords": merged_keywords,
        "entities": merged_entities
    }

In [None]:
result = preprocess_merged(ta.value)
print("Raw Text:", result["raw_text"])
print("Clean Text:", result["clean_text"])
print("Keywords:", result["keywords"])
print("Entities:", result["entities"])

Step 6: Claim Extraction Agent

In [None]:
def extract_claims(text):
    doc = nlp(text)
    claims = []
    for sent in doc.sents:
        if sent[-1].text != '?':  # skip questions
            if any(tok.dep_ == 'ROOT' and tok.pos_ == 'VERB' for tok in sent):
                claims.append(sent.text.strip())
    return claims
claim_extraction_tool = Tool(
    name="Claim Extraction Agent",
    func=extract_claims,
    description="Extracts factual claim-like sentences from cleaned text."
)

In [None]:
# Directly use the function to check output immediately
claims = extract_claims(result["raw_text"])

print("🔍 Extracted Claims:")
for i, claim in enumerate(claims, 1):
    print(f"{i}. {claim}")

Step 7: Search and Verdict Agent

In [None]:
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel("gemini-1.5-flash")
def verify_claim_with_gemini(claim: str, keywords: list[str]) -> dict:
    search_terms = f"{claim} {' '.join(keywords)}"
    prompt = f"""
You are a fact-checking assistant.

Given the following claim and context keywords, search for up-to-date information and decide if the claim is:
-  Supported
-  Refuted
-  Not Enough Evidence

Respond in this format:
Verdict: <one of the above>
Justification: <one short paragraph based on your reasoning>

Claim: "{claim}"
Keywords: {', '.join(keywords)}

Please begin.
"""

    response = model.generate_content(prompt)
    return {
        "claim": claim,
        "verdict_response": response.text
    }

# Example use with outputs from previous steps:
claims = extract_claims(ta.value)
keywords = list(result["keywords"])

# Run verification
verified_claims = [verify_claim_with_gemini(claim, keywords) for claim in claims]

# Print results
for v in verified_claims:
    print("\n🧾 Claim:", v["claim"])
    print(v["verdict_response"])