<a href="https://colab.research.google.com/github/ITwArRiOr001/Sitee/blob/main/NLPFakenewsAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
# 📦 Imports
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sentence_transformers import SentenceTransformer, util
import joblib
import json

# ✅ Step 1: Preprocessing (using 'text' column)
X = df['text']
y = df['label']  # 1 = Real, 0 = Fake

# ✅ Step 2: Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ✅ Step 3: TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# ✅ Step 4: Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# ✅ Step 5: Evaluate
y_pred = model.predict(X_test_tfidf)
print(classification_report(y_test, y_pred))

# ✅ Step 6: Save model
joblib.dump(model, "news_fake_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# ✅ Step 7: Predict (local ML)
def predict_news(text):
    model = joblib.load("news_fake_model.pkl")
    vectorizer = joblib.load("tfidf_vectorizer.pkl")
    features = vectorizer.transform([text])
    pred = model.predict(features)[0]
    return "REAL" if pred == 1 else "FAKE"

# ✅ Step 8: Web-Based Semantic Verifier (NewsAPI)
NEWS_API_KEY = "your_newsapi_key_here"  # 🔑 Replace with your NewsAPI key
semantic_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def search_news_articles(query):
    url = f"https://newsapi.org/v2/everything?q={query}&language=en&pageSize=5&sortBy=relevancy&apiKey={NEWS_API_KEY}"
    res = requests.get(url)
    data = res.json()
    return [f"{a['title']}. {a['description']}" for a in data.get("articles", []) if a['description']]

def web_verify_claim(text):
    articles = search_news_articles(text)
    if not articles:
        return {"verdict": "UNVERIFIED", "reason": "No article found", "evidence": []}

    input_embedding = semantic_model.encode(text, convert_to_tensor=True)
    article_embeddings = semantic_model.encode(articles, convert_to_tensor=True)

    sim_scores = util.pytorch_cos_sim(input_embedding, article_embeddings)[0]
    best_idx = int(sim_scores.argmax())
    top_score = float(sim_scores[best_idx])

    if top_score > 0.75:
        return {
            "verdict": "REAL",
            "confidence": round(top_score, 3),
            "reason": "Matched real source",
            "evidence": [articles[best_idx]]
        }
    else:
        return {
            "verdict": "UNVERIFIED",
            "confidence": round(top_score, 3),
            "reason": "Low similarity to known sources",
            "evidence": articles
        }

# ✅ Step 9: Unified checker
def unified_news_check(text):
    model_result = predict_news(text)
    web_result = web_verify_claim(text)
    return {
        "local_model_verdict": model_result,
        "web_verifier_verdict": web_result["verdict"],
        "reason": web_result["reason"],
        "confidence": web_result.get("confidence"),
        "evidence": web_result.get("evidence")
    }

# ✅ Step 10: Example test
if __name__ == "__main__":
    sample = "Iran and Israel is at War"
    result = unified_news_check(sample)
    print("[RESULT]", json.dumps(result, indent=2))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      4708
           1       0.98      0.99      0.98      4272

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980

[RESULT] {
  "local_model_verdict": "FAKE",
  "web_verifier_verdict": "UNVERIFIED",
  "reason": "No article found",
  "confidence": null,
  "evidence": []
}
