In [1]:
# 📌 1. Install Required Libraries (Colab or Jupyter)
!pip install gradio yfinance --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import gradio as gr

In [6]:
# 2. Load Dataset
df = pd.read_csv("fake_or_real_news.csv")

In [7]:
# 3. Initial Data Inspection
print(df.shape)

(6335, 4)


In [8]:
print(df.dtypes)

id        int64
title    object
text     object
label    object
dtype: object


In [9]:
print(df.isnull().sum())

id       0
title    0
text     0
label    0
dtype: int64


In [10]:
print(df['label'].value_counts())

label
REAL    3171
FAKE    3164
Name: count, dtype: int64


In [11]:
# 4. Preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub("https?://\S+", "", text)
    text = re.sub("<.*?>", "", text)
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub("\n", " ", text)
    return text

In [12]:
df['text'] = df['text'].astype(str).apply(clean_text)

In [13]:
# 5. Feature Engineering
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(df['text'])
y = df['label']

In [14]:
# 6. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# 7. Model Training
model = PassiveAggressiveClassifier(max_iter=1000)
model.fit(X_train, y_train)

In [16]:
# 8. Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9384372533543804

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.94      0.93      0.94       628
        REAL       0.93      0.94      0.94       639

    accuracy                           0.94      1267
   macro avg       0.94      0.94      0.94      1267
weighted avg       0.94      0.94      0.94      1267


Confusion Matrix:
 [[586  42]
 [ 36 603]]


In [17]:
# 9. Gradio Interface
def predict_news(news_text):
    cleaned = clean_text(news_text)
    vec = vectorizer.transform([cleaned])
    prediction = model.predict(vec)[0]
    return f"📰 The news is predicted to be: {prediction}"

In [18]:
gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(lines=10, placeholder="Enter news article text here..."),
    outputs=gr.Textbox(label="Prediction"),
    title="🧠 Fake News Detection",
    description="Enter a news article text to determine if it's REAL or FAKE."
).launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e8014e1a0db215750e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


