In [1]:
import pandas as pd
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [3]:
# Load datasets
true_df = pd.read_csv("True.csv")
fake_df = pd.read_csv("Fake.csv")

# Add labels
true_df['label'] = 1
fake_df['label'] = 0

# Combine datasets
df = pd.concat([true_df, fake_df], axis=0).sample(frac=1, random_state=42).reset_index(drop=True)

# Combine title and text
df['content'] = df['title'] + " " + df['text']


In [94]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)              # Remove URLs
    text = re.sub(r"[^a-zA-Z]", " ", text)            # Keep only letters
    text = text.lower()                               # Lowercase
    return text

df['content'] = df['content'].apply(clean_text)


In [96]:
X = df['content']
y = df['label']
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_df=0.7, min_df=5, stop_words='english', ngram_range=(1,2))
X = vectorizer.fit_transform(df['text'])
y = df['label']




In [97]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

X_train_text, X_test_text, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_df=0.7, min_df=5, stop_words='english', ngram_range=(1,2))
X_train = vectorizer.fit_transform(X_train_text)
X_test = vectorizer.transform(X_test_text)


In [112]:
from sklearn.naive_bayes import MultinomialNB

model = MultinomialNB()
model.fit(X_train, y_train)


In [114]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9524498886414254
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.96      0.95      4669
           1       0.95      0.95      0.95      4311

    accuracy                           0.95      8980
   macro avg       0.95      0.95      0.95      8980
weighted avg       0.95      0.95      0.95      8980



In [122]:
import joblib

joblib.dump(model, "model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")


['tfidf_vectorizer.pkl']

In [120]:
import joblib

# Save the vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# Confirm it was saved
import os
print("Files in current directory:", os.listdir())


Files in current directory: ['.ipynb_checkpoints', 'Fake.csv', 'fake_news_model.pkl', 'model.pkl', 'tfidf_vectorizer.pkl', 'True.csv', 'Untitled.ipynb', 'Untitled1.ipynb', 'vectorizer.pkl']


In [None]:
import joblib
import os

print("Current working directory:", os.getcwd())
print("Files:", os.listdir())

# Load the vectorizer
vectorizer = joblib.load("tfidf_vectorizer.pkl")


Current working directory: C:\Users\Ekta\fake news detection model
Files: ['.ipynb_checkpoints', 'Fake.csv', 'fake_news_model.pkl', 'model.pkl', 'tfidf_vectorizer.pkl', 'True.csv', 'Untitled.ipynb', 'Untitled1.ipynb', 'vectorizer.pkl']


In [130]:
model = joblib.load("model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")


In [51]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-zA-Z]", " ", text)
    text = text.lower()
    return text


In [53]:
def predict_news(news_text):
    cleaned = clean_text(news_text)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    return "✅ Real News" if prediction == 1 else "❌ Fake News"


In [80]:
test_cases = [
    "Breaking: India wins T20 World Cup 2024 after thrilling final.",
    "NASA confirms aliens have contacted Earth through secret messages.",
    "Delhi government announces new metro line connecting outer ring road.",
]

for news in test_cases:
    print(f"\nNews: {news}")
    print("Prediction:", predict_news(news))



News: Breaking: India wins T20 World Cup 2024 after thrilling final.
Prediction: ✅ Real News

News: NASA confirms aliens have contacted Earth through secret messages.
Prediction: ❌ Fake News

News: Delhi government announces new metro line connecting outer ring road.
Prediction: ✅ Real News


In [29]:
def predict_news_prob(news_text):
    cleaned = clean_text(news_text)
    vectorized = vectorizer.transform([cleaned])
    proba = model.predict_proba(vectorized)[0]
    prediction = model.predict(vectorized)[0]
    return "✅ Real News" if prediction == 1 else "❌ Fake News", round(proba[prediction]*100, 2)


In [31]:
result, confidence = predict_news_prob(news)
print(f"Prediction: {result} (Confidence: {confidence}%)")


Prediction: ✅ Real News (Confidence: 50.61%)


In [82]:
test_cases = [
    # Real News
    "India launches its first solar mission Aditya-L1 from Sriharikota.",
    "Apple announces the iPhone 15 with USB-C port and AI upgrades.",
    "Nobel Prize in Physics awarded for research on quantum dots.",
    "Delhi government imposes restrictions due to rising pollution levels.",
    "ISRO to test reusable launch vehicle next year, says Chairman Somanath.",
    
    # Fake / Sensational News
    "Bill Gates admits vaccines are part of global population control.",
    "NASA confirms Earth will be dark for six days due to solar storm.",
    "COVID-19 vaccine turns people into zombies, says TikTok doctor.",
    "Aliens spotted helping with crop harvesting in rural Punjab.",
    "New law allows humans to legally marry robots in California."
]


In [84]:
for news in test_cases:
    label, confidence = predict_news_prob(news)
    print(f"\nNews: {news}")
    if confidence < 60:
        print(f"Prediction: {label} (Confidence: {confidence}%) — ⚠️ Low Confidence")
    else:
        print(f"Prediction: {label} (Confidence: {confidence}%)")



News: India launches its first solar mission Aditya-L1 from Sriharikota.
Prediction: ✅ Real News (Confidence: 83.49%)

News: Apple announces the iPhone 15 with USB-C port and AI upgrades.
Prediction: ❌ Fake News (Confidence: 54.8%) — ⚠️ Low Confidence

News: Nobel Prize in Physics awarded for research on quantum dots.
Prediction: ❌ Fake News (Confidence: 56.72%) — ⚠️ Low Confidence

News: Delhi government imposes restrictions due to rising pollution levels.
Prediction: ✅ Real News (Confidence: 91.36%)

News: ISRO to test reusable launch vehicle next year, says Chairman Somanath.
Prediction: ✅ Real News (Confidence: 63.62%)

News: Bill Gates admits vaccines are part of global population control.
Prediction: ❌ Fake News (Confidence: 68.96%)

News: NASA confirms Earth will be dark for six days due to solar storm.
Prediction: ❌ Fake News (Confidence: 69.18%)

News: COVID-19 vaccine turns people into zombies, says TikTok doctor.
Prediction: ❌ Fake News (Confidence: 79.3%)

News: Aliens spo