<a href="https://colab.research.google.com/github/Aditya2877/Aditya2877/blob/main/Faek%20News%20Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ========================================
# STEP 1: Install dependencies
# ========================================
!pip install gradio joblib scikit-learn

# ========================================
# STEP 2: Import libraries
# ========================================
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import gradio as gr

# ========================================
# STEP 3: Load Dataset
# (Upload Fake.csv and True.csv from Kaggle dataset)
# ========================================
from google.colab import files
true_df = pd.read_csv("/content/drive/MyDrive/True.csv")
fake_df = pd.read_csv("/content/drive/MyDrive/Fake.csv")

# Add labels
fake_df["label"] = "FAKE"
true_df["label"] = "REAL"

# Merge datasets
df = pd.concat([fake_df, true_df], axis=0).reset_index(drop=True)
df = df[["title", "text", "label"]]

print("Dataset shape:", df.shape)
print(df.head())

# ========================================
# STEP 4: Preprocessing
# ========================================
# Combine title + text for better context
df["content"] = df["title"] + " " + df["text"]

X = df["content"]
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# ========================================
# STEP 5: Train Model
# ========================================
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Predictions
y_pred = model.predict(X_test_vec)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ========================================
# STEP 6: Save Model & Vectorizer
# ========================================
joblib.dump(model, "fake_news_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

# ========================================
# STEP 7: Build Gradio App
# ========================================
def predict_news(text):
    vec = vectorizer.transform([text])
    prediction = model.predict(vec)[0]
    return "✅ Real News" if prediction == "REAL" else "❌ Fake News"

demo = gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(lines=5, placeholder="Paste news article here..."),
    outputs="text",
    title="📰 Fake News Detector",
    description="Enter a news article to check if it's Fake or Real."
)

# Launch Gradio with public share link
demo.launch(share=True)


Dataset shape: (44898, 3)
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text label  
0  Donald Trump just couldn t wish all Americans ...  FAKE  
1  House Intelligence Committee Chairman Devin Nu...  FAKE  
2  On Friday, it was revealed that former Milwauk...  FAKE  
3  On Christmas day, Donald Trump announced that ...  FAKE  
4  Pope Francis used his annual Christmas Day mes...  FAKE  
Accuracy: 0.9859688195991091

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.99      0.99      0.99      4733
        REAL       0.98      0.99      0.99      4247

    accuracy                           0.99      8980



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# 1) Label balance
print(df['label'].value_counts())

# 2) Empty/very short texts?
df['content'] = (df['title'].fillna('') + ' ' + df['text'].fillna('')).str.strip()
print('Empty rows:', (df['content'].str.len()==0).sum())
print('Very short (<20 chars):', (df['content'].str.len()<20).sum())

# 3) After training: are predictions all one class?
import numpy as np
unique, counts = np.unique(y_pred, return_counts=True)
print(dict(zip(unique, counts)))


label
FAKE    23481
REAL    21417
Name: count, dtype: int64
Empty rows: 0
Very short (<20 chars): 0
{'FAKE': np.int64(4719), 'REAL': np.int64(4261)}


In [7]:
!pip install scikit-learn joblib gradio

import pandas as pd, numpy as np, joblib
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# --- Load your Drive CSVs (adjust paths if needed)
from google.colab import drive
drive.mount('/content/drive')

true_df = pd.read_csv("/content/drive/MyDrive/True.csv")
fake_df = pd.read_csv("/content/drive/MyDrive/Fake.csv")

true_df['label'] = 'REAL'
fake_df['label'] = 'FAKE'

df = pd.concat([true_df, fake_df], ignore_index=True)
df['content'] = (df['title'].fillna('') + ' ' + df['text'].fillna('')).str.strip()

# Drop empties
df = df[df['content'].str.len() > 20].reset_index(drop=True)

X = df['content']
y = df['label']

# Stratified split to keep FAKE/REAL balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Single pipeline (vectorizer + classifier together)
pipe = Pipeline([
    ('tfidf', TfidfVectorizer(
        stop_words='english',
        ngram_range=(1,2),
        max_df=0.9,
        min_df=5
    )),
    ('clf', LogisticRegression(
        max_iter=2000,
        class_weight='balanced',
        n_jobs=None
    ))
])

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))
print('\nReport:\n', classification_report(y_test, y_pred))

# Save ONE file (prevents “everything looks fake” due to mismatch)
joblib.dump(pipe, 'fake_news_pipeline.pkl')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Confusion matrix:
 [[4617   79]
 [  36 4248]]

Report:
               precision    recall  f1-score   support

        FAKE       0.99      0.98      0.99      4696
        REAL       0.98      0.99      0.99      4284

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



['fake_news_pipeline.pkl']

In [8]:
# Pick a few REAL/FAKE samples from the test set and see predictions
test_df = pd.DataFrame({'X': X_test, 'y': y_test}).reset_index(drop=True)

print('\nSample REAL predictions:')
print(test_df[test_df.y=='REAL'].sample(3, random_state=0).assign(pred=lambda d: pipe.predict(d.X)))

print('\nSample FAKE predictions:')
print(test_df[test_df.y=='FAKE'].sample(3, random_state=1).assign(pred=lambda d: pipe.predict(d.X)))



Sample REAL predictions:
                                                      X     y  pred
1638  Biden makes the case for Obama's Supreme Court...  REAL  REAL
983   Homeland Security not targeting Dreamers: Kell...  REAL  REAL
6790  Syrian government denies U.N. chemical attack ...  REAL  REAL

Sample FAKE predictions:
                                                      X     y  pred
6306  OOPS! HECKLERS FORCE HILLARY OFF Stage In LA A...  FAKE  FAKE
8075  One Group Of GOP Voters May Vote Blue For The ...  FAKE  FAKE
5519  SHOCK POLL: TRUMP GETS SUPPORT FROM BLACK VOTE...  FAKE  FAKE


In [9]:
import gradio as gr, joblib

pipe = joblib.load('fake_news_pipeline.pkl')

def predict_news(text):
    text = (text or '').strip()
    if len(text) < 30:
        return "⚠️ Please paste a longer article or headline+snippet."
    pred = pipe.predict([text])[0]
    return "✅ REAL" if pred == "REAL" else "❌ FAKE"

demo = gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(lines=8, placeholder="Paste article text..."),
    outputs="text",
    title="📰 Fake News Detector",
    description="TF-IDF + Logistic Regression"
)
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://79876d76bcb248cb53.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


