In [None]:
!pip install newspaper3k
!pip install lxml_html_clean

Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.2.0-py3-none-any.whl.metadata (11 kB)
Collecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tinysegmenter==0.3 (from newspaper3k)
  Downloading tinysegmenter-0.3.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collec

In [None]:
import pandas as pd
import requests
from newspaper import Article
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from transformers import pipeline
import urllib.request

# -------- Train TF-IDF + Logistic Regression --------
def train_model():
    fake = pd.read_csv("/content/Fake.csv")
    true = pd.read_csv("/content/True.csv")

    fake["label"], true["label"] = 0, 1
    df = pd.concat([fake, true])[["text", "label"]].dropna()

    vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
    X = vectorizer.fit_transform(df["text"])
    y = df["label"]

    model = LogisticRegression()
    model.fit(X, y)

    return vectorizer, model

vectorizer, tfidf_model = train_model()

# -------- Transformers Zero-Shot Classifier --------
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# -------- Google Fact Check --------
API_KEY = "AIzaSyCrJzQ3Io7ji7xW_ermckT20XByvTlb63k"

def google_fact_check(query):
    url = f"https://factchecktools.googleapis.com/v1alpha1/claims:search?query={query}&key={API_KEY}"
    res = requests.get(url)
    if res.status_code == 200:
        claims = res.json().get("claims", [])
        for c in claims:
            for review in c.get("claimReview", []):
                rating = review.get("textualRating", "").lower()
                if "fake" in rating or "false" in rating:
                    return "❌ Verified Fake by Google Fact Check"
                elif "true" in rating:
                    return "✅ Verified True by Google Fact Check"
    return None

# -------- Article Scraper --------
def scrape_article(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        req = urllib.request.Request(url, headers=headers)
        html = urllib.request.urlopen(req).read()

        article = Article(url)
        article.set_html(html)
        article.parse()
        return article.text
    except:
        return ""

# -------- Classify Text --------
def classify_text(text, threshold=0.75):
    result = classifier(text, candidate_labels=["True", "False"])
    true_score = result["scores"][result["labels"].index("True")]

    if true_score >= threshold:
        return "✅ True News (via Transformer)"
    elif true_score <= (1 - threshold):
        return "❌ Fake News (via Transformer)"

    # TF-IDF
    pred = tfidf_model.predict(vectorizer.transform([text]))[0]
    return "✅ True News (via TF-IDF)" if pred == 1 else "❌ Fake News (via TF-IDF)"

# -------- Main Pipeline --------
def check_news(text):
    g_result = google_fact_check(text)
    if g_result:
        return g_result
    return classify_text(text)

# -------- Input & Output --------
print("=" * 60)
print("📰 Fake News Detection Tool")
print("You can paste the news")
print("=" * 60)

user_input = input("🔹 Enter text or URL: ").strip()

if user_input.startswith("http"):
    print("\n🌐 Extracting article content from URL...\n")
    user_input = scrape_article(user_input)

if user_input:
    print("\n🔎 Running authenticity checks...\n")
    result = check_news(user_input)
    print("🧾 Final Verdict:", result)
    print("=" * 60)
else:
    print("⚠️ No valid input provided. Please try again.")

Device set to use cpu


📰 Fake News Detection Tool
You can paste the news
🔹 Enter text or URL: https://economictimes.indiatimes.com/news/economy/foreign-trade/unplug-china-plug-into-india-centre-looks-to-attract-us-cos-planning-to-exit-china/articleshow/120356180.cms?from=mdr

🌐 Extracting article content from URL...


🔎 Running authenticity checks...

🧾 Final Verdict: ✅ True News (via Transformer)
