In [1]:
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q openai
!pip install -q pandas
!pip install -q tqdm

In [2]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET

# Fetching RSS feed using an URL
rss_url = "https://www.pib.gov.in/RssMain.aspx?ModId=6&Lang=1&Regid=3"
response = requests.get(rss_url)
xml_data = response.content

# Parse XML
root = ET.fromstring(xml_data)

# Extract items
titles = []
links = []

for item in root.findall(".//item"):
    title = item.find("title").text
    link = item.find("link").text
    titles.append(title.strip())
    links.append(link.strip())

# Save to CSV
df = pd.DataFrame({'title': titles, 'link': links})
df.to_csv("verified_facts.csv", index=False)
print("Saved verified_facts.csv with", len(df), "entries")

Saved verified_facts.csv with 20 entries


In [3]:
# Load facts for embedding
facts = df['title'].tolist()

In [4]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd

# Loading facts
df = pd.read_csv("verified_facts.csv")
facts = df['title'].tolist()

# Loading Sentence Transformer
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generating embeddings
embeddings = model.encode(facts, show_progress_bar=True)

# Converting to numpy array
embeddings = np.array(embeddings).astype('float32')

# Creating FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
print(f"Indexed {len(facts)} verified PIB facts.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Indexed 20 verified PIB facts.


In [5]:
# Define a Function to Retrieve Top-k Matches
def retrieve_similar_facts(claim_text, top_k=3, min_score=0.7):
    claim_embedding = model.encode([claim_text]).astype('float32')
    distances, indices = index.search(claim_embedding, top_k)

    valid_facts = []
    for dist, idx in zip(distances[0], indices[0]):
        score = 1 - dist / 2
        if score >= min_score:
            valid_facts.append(facts[idx])

    return valid_facts


In [None]:
# Install & Set OpenAI API Key
!pip install openai
import os

import openai
# Secure key handling
openai.api_key = os.getenv("API_KEY")

def classify_claim_with_gpt(claim, retrieved_facts):
    if not retrieved_facts:
        return {
            "verdict": "Unverifiable",
            "evidence": [],
            "reasoning": "No semantically similar facts were retrieved to verify the claim."
        }

    prompt = f"""
You are an AI fact-checking assistant.

Claim: "{claim}"

Retrieved Facts:
{chr(10).join(f"- {fact}" for fact in retrieved_facts)}

Classify the claim as one of: "Likely True", "Likely False", or "Unverifiable".
Provide JSON output with keys: "verdict", "evidence", and "reasoning".
Respond only with JSON.
"""

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        output = response['choices'][0]['message']['content']
        return json.loads(output)

    except Exception as e:
        return {
            "verdict": "Error",
            "evidence": [],
            "reasoning": f"GPT classification failed: {str(e)}"
        }




In [7]:
!pip install -q spacy
!python -m spacy download en_core_web_sm
import spacy

# Load spaCy
nlp = spacy.load("en_core_web_sm")

def extract_main_claim(text):
    doc = nlp(text)
    noun_chunks = [chunk.text for chunk in doc.noun_chunks]
    named_entities = [ent.text for ent in doc.ents]
    key_claim = max(noun_chunks + named_entities, key=len) if noun_chunks + named_entities else text
    return key_claim.strip()

claim_input = "The Indian government has announced free electricity to all farmers starting July 2025."
main_claim = extract_main_claim(claim_input)
print("Extracted Claim:", main_claim)


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Extracted Claim: The Indian government


In [8]:
# Extract key claim
main_claim = extract_main_claim(claim_input)

# Retrieve similar facts
retrieved = retrieve_similar_facts(main_claim)

# Classify using GPT
final_result = classify_claim_with_gpt(main_claim, retrieved)

# Import the json module
import json

# Print and save
print(json.dumps(final_result, indent=2))
with open("fact_check_output.json", "w") as f:
    json.dump(final_result, f, indent=2)

{
  "verdict": "Unverifiable",
  "evidence": [],
  "reasoning": "No semantically similar facts were retrieved to verify the claim."
}
