In [1]:
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [11]:
from google.colab import files

uploaded = files.upload()

import pandas as pd

for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])
    ))

df = pd.read_csv(list(uploaded.keys())[0])
df.head()


Saving fake_news.csv to fake_news.csv
User uploaded file "fake_news.csv" with length 30696129 bytes


Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [12]:
import re
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

stop_words = stopwords.words('english')  # Stopwords load

# Function to clean text
def clean_text(text):
    text = text.lower()                     # 1. Lowercase
    text = re.sub('[^a-zA-Z]', ' ', text)  # 2. Remove numbers & punctuation
    words = text.split()                    # 3. Split into words
    words = [w for w in words if w not in stop_words]  # 4. Remove stopwords
    return " ".join(words)                  # 5. Join back to string

# Apply function to dataset
df['clean_text'] = df['text'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
df.head()


Unnamed: 0.1,Unnamed: 0,title,text,label,clean_text
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE,daniel greenfield shillman journalism fellow f...
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE,google pinterest digg linkedin reddit stumbleu...
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL,u secretary state john f kerry said monday sto...
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE,kaydee king kaydeeking november lesson tonight...
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL,primary day new york front runners hillary cli...


In [14]:
from sklearn.model_selection import train_test_split

# Features (X) aur Labels (y)
X = df['clean_text']  # Text data
y = df['label']       # 0 = Fake, 1 = Real

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Size check
print("Training data size:", X_train.shape[0])
print("Testing data size:", X_test.shape[0])


Training data size: 5068
Testing data size: 1267


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)  # Top 5000 words

X_train_vectors = vectorizer.fit_transform(X_train)

X_test_vectors = vectorizer.transform(X_test)

print("TF-IDF vectorization done!")


TF-IDF vectorization done!


In [16]:
from sklearn.linear_model import LogisticRegression

# Model create karo
model = LogisticRegression()

# Model train karo
model.fit(X_train_vectors, y_train)

print("Model training complete!")


Model training complete!


In [17]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_pred = model.predict(X_test_vectors)

print("Accuracy:", accuracy_score(y_test, y_pred))

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9139700078926598

Confusion Matrix:
 [[582  46]
 [ 63 576]]

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.90      0.93      0.91       628
        REAL       0.93      0.90      0.91       639

    accuracy                           0.91      1267
   macro avg       0.91      0.91      0.91      1267
weighted avg       0.91      0.91      0.91      1267



In [18]:
# Function to predict news
def predict_news(text):
    # 1. Clean the text
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    clean_text_input = " ".join(words)

    # 2. Convert to TF-IDF vector
    vector = vectorizer.transform([clean_text_input])

    # 3. Predict using model
    result = model.predict(vector)

    # 4. Return result
    return "REAL NEWS" if result[0] == 1 else "FAKE NEWS"

# Example: User input
input_news = input("Enter news text: ")
print("Prediction:", predict_news(input_news))


Enter news text: The government has announced a new education policy.
Prediction: FAKE NEWS


In [19]:
!pip install gradio




In [20]:
import gradio as gr

# Gradio interface
iface = gr.Interface(
    fn=predict_news,                  # Function to call
    inputs=gr.Textbox(lines=7, placeholder="Type news here..."),
    outputs="text",                   # Output: FAKE or REAL
    title="Fake News Detector",
    description="Type any news article below and click Submit to see if it is FAKE or REAL"
)

# Launch app
iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6ca756fe22a282ab29.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [38]:
def predict_news_final(text):
    # Clean
    text_clean = clean_text(text)
    # Vectorize
    vector = vectorizer.transform([text_clean])
    # Predict
    pred = model.predict(vector)[0]
    prob = model.predict_proba(vector)[0]

    if pred == 1:
        return f"✅ REAL NEWS | Confidence: {prob[1]*100:.2f}%"
    else:
        return f"❌ FAKE NEWS | Confidence: {prob[0]*100:.2f}%"




In [22]:
iface = gr.Interface(
    fn=predict_news_final,
    inputs=gr.Textbox(lines=15, placeholder="Type your news article here...", value=example_news),
    outputs="text",
    title="Fake News Detector",
    description="Type or paste any news article. Click Submit to see if it is FAKE or REAL along with confidence."
)


In [23]:
iface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ea84bb56b34aca5e4e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [30]:
import csv

# Function to save flagged news
def save_flag(text, prediction):
    with open("flagged_news.csv", "a", newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow([text, prediction])
    return "Feedback saved!"

# Main function
def predict_news_with_flag(text):
    prediction = predict_news_final(text)
    return prediction

# Gradio Interface with manual flagging
iface = gr.Interface(
    fn=predict_news_with_flag,
    inputs=gr.Textbox(lines=15, placeholder="Type news here...", value=example_news),
    outputs="text",
    title="Fake News Detector",
    description="Type or paste any news article. Click Submit to see if it is FAKE or REAL along with confidence.",
    allow_flagging="manual",          # Enable flagging
    flagging_options=["Incorrect Prediction"],  # User can click flag
    flagging_dir="flags"
)

iface.launch()




It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2df1dd883e6bb4f674.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [27]:
import os
import csv

# Step 1: Create folder for flagged news (agar exist na kare)
if not os.path.exists("flags"):
    os.makedirs("flags")

# Step 2: Custom function run when flag is clicked
def save_flag(text, prediction, flag_option):
    # Save flagged news to CSV
    file_path = "flags/flagged_news.csv"
    with open(file_path, "a", newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow([text, prediction, flag_option])
    return "⚑ Feedback saved! Thank you."


In [37]:
iface.launch()

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://88a2aa7191b653244a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


