In [1]:
import pandas as pd
import numpy as np
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from gensim.models import Word2Vec
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# Loading datasets

In [3]:
fake_df = pd.read_csv("C:\\Users\\hp\\Fake.csv\\Fake.csv")

In [4]:
true_df = pd.read_csv("C:\\Users\\hp\\True.csv")

In [5]:
true_df.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake_df.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


# Checking for null data

In [7]:
print("For True Dataset")
true_df.isnull().sum()

For True Dataset


title      0
text       0
subject    0
date       0
dtype: int64

In [8]:
print("For Fake Dataset")
fake_df.isnull().sum()

For Fake Dataset


title      0
text       0
subject    0
date       0
dtype: int64

# Labelling the data

In [9]:
true_df['label'] = 1

In [10]:
fake_df['label'] = 0

In [11]:
true_df.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [12]:
fake_df.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


# Combining Datasets

In [13]:
df = pd.concat([fake_df, true_df], ignore_index=True)

Shuffling dataset

In [14]:
df = df.sample(frac=1).reset_index(drop=True)

In [15]:
df["content"] = df["title"].astype(str) + " " + df["text"].astype(str)

In [16]:
df.head()

Unnamed: 0,title,text,subject,date,label,content
0,First Time in 30 Years: US Deploys B-52 Bomber...,21st Century Wire says Is Washington preparing...,Middle-east,"April 10, 2016",0,First Time in 30 Years: US Deploys B-52 Bomber...
1,Right-Wing Militias Preparing To Wage War If ...,"As Election Day approaches, the threat of viol...",News,"November 2, 2016",0,Right-Wing Militias Preparing To Wage War If ...
2,SHOCKING HYPOCRISY: THE MOST RACIST INDUSTRY I...,So much for tolerance and diversity How would ...,left-news,"Jul 29, 2015",0,SHOCKING HYPOCRISY: THE MOST RACIST INDUSTRY I...
3,Donald Trump Posts A Picture Of Himself With ...,Donald Trump is now so desperate to improve hi...,News,"January 10, 2017",0,Donald Trump Posts A Picture Of Himself With ...
4,‘Newtown’ Documentary Sheds New Light On Gun ...,"Newtown, Connecticut was the site of one of th...",News,"March 18, 2016",0,‘Newtown’ Documentary Sheds New Light On Gun ...


In [17]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [18]:
def preprocess(text):
    #Lowercase teh text
    text = text.lower()
    
    #Remove unwanted character and patterns
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = re.sub("\\W", " ", text)
    
    #Tokenize and remove stopwords
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and len(word) > 2]
    return tokens

Applying preprocessing

In [19]:
import string

In [20]:
df['tokens'] = df['content'].apply(preprocess)

Preparing corpus for Word2Vec

In [22]:
corpus = df['tokens'].tolist()

Training Word2Vec model

In [23]:
w2v_model = Word2Vec(sentences=corpus, vector_size=100, window=5, min_count=2, workers=4)

Getting average embadding for each document

In [24]:
def get_avg_vector(tokens, model):
    vectors = [model.wv[token] for token in tokens if token in model.wv]
    if not vectors:
        return np.zeros(model.vector_size)
    return np.mean(vectors, axis=0)

Creating feature vectors

In [25]:
df['vector'] = df['tokens'].apply(lambda tokens: get_avg_vector(tokens, w2v_model))
X = np.vstack(df['vector'].values)
y = df['label'].values

Training split

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

Training LR model

In [27]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

Predict & Evaluate

In [28]:
y_pred = lr.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98      4696
           1       0.97      0.98      0.98      4284

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



# WEB SCRAPING

In [29]:
from bs4 import BeautifulSoup

In [30]:
def scrape_inshorts_articles():
    url = "https://inshorts.com/en/read"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    articles = []
    for card in soup.find_all("div", class_="news-card"):
        title = card.find("span", itemprop="headline").text.strip()
        body = card.find("div", itemprop="articleBody").text.strip()
        articles.append(f"{title} {body}")
    return articles

# PREDICTING LIVE NEWS

In [31]:
import requests

In [32]:
def predict_articles(articles):
    results = []
    for article in articles:
        tokens = preprocess(article)
        vec = get_avg_vector(tokens, w2v_model).reshape(1, -1)
        pred = lr.predict(vec)[0]
        label = "🟩 REAL" if pred == 1 else "🟥 FAKE"
        results.append((article[:100] + "...", label))
    return results

# Run predictions
live_news = scrape_inshorts_articles()
results = predict_articles(live_news)

for i, (headline, label) in enumerate(results, 1):
    print(f"{i}. {label} — {headline}")

In [33]:
test_article = "c"
tokens = preprocess(test_article)
vector = get_avg_vector(tokens, w2v_model).reshape(1, -1)
pred = lr.predict(vector)[0]
label = "🟩 REAL" if pred == 1 else "🟥 FAKE"
print(f"Test prediction: {label}")

Test prediction: 🟥 FAKE


# TKinter APP

In [34]:
import tkinter as tk
from tkinter import messagebox
import numpy as np

In [35]:
# --- GUI Setup ---
root = tk.Tk()
root.title("📰 Fake News Detector")
root.geometry("500x400")
root.configure(bg="#f4f4f4")

In [36]:
# --- Input Label ---
label = tk.Label(root, text="Enter a news article:", font=("Arial", 12, "bold"), bg="#f4f4f4")
label.pack(pady=10)

In [37]:
# --- Text Box ---
text_input = tk.Text(root, height=10, width=55, font=("Arial", 10))
text_input.pack()

In [38]:
# --- Result Label ---
result_var = tk.StringVar()
result_label = tk.Label(root, textvariable=result_var, font=("Arial", 14, "bold"), bg="#f4f4f4", fg="blue")
result_label.pack(pady=20)

In [39]:
# --- Prediction Logic ---
def predict_news():
    article = text_input.get("1.0", tk.END).strip()
    if not article:
        messagebox.showwarning("Input Error", "Please enter some text.")
        return

    tokens = preprocess(article)
    vec = get_avg_vector(tokens, w2v_model).reshape(1, -1)
    prediction = lr.predict(vec)[0]
    
    if prediction == 1:
        result_var.set("🟩 This news is likely REAL.")
        result_label.config(fg="green")
    else:
        result_var.set("🟥 This news is likely FAKE.")
        result_label.config(fg="red")

In [40]:
# --- Predict Button ---
predict_btn = tk.Button(root, text="Check", command=predict_news, bg="navy", fg="white", font=("Arial", 12, "bold"))
predict_btn.pack(pady=10)

In [None]:
# --- Start App ---
root.mainloop()