In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [2]:
fake_df=pd.read_csv(r"C:\project news\Fake.csv\Fake.csv")
real_df=pd.read_csv(r"C:\project news\True.csv\True.csv")
print(fake_df.head())

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  


In [3]:
fake_df["label"]=0
real_df["label"]=1


In [4]:
df=pd.concat([fake_df,real_df],axis=0)
df.sample(frac=1).reset_index(drop=True)
df.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [5]:
!pip install nltk




[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:

nltk.download("stopwords")
# Load stopwords
stop_words = set(stopwords.words("english"))

def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\[.*?\]', '', text)  # Remove text in brackets
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'<.*?>+', '', text)  # Remove HTML tags
    text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
    text = " ".join(word for word in text.split() if word not in stop_words)  # Remove stopwords
    return text




[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\krish\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
import string
df["text"] = df["text"].apply(clean_text)
print(df.head())  # Check first few rows after cleaning


                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  donald trump wish americans happy new year lea...    News   
1  house intelligence committee chairman devin nu...    News   
2  friday revealed former milwaukee sheriff david...    News   
3  christmas day donald trump announced would bac...    News   
4  pope francis used annual christmas day message...    News   

                date  label  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  


In [8]:

x=df["text"]
y=df["label"]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
print(f"Training Samples:{len(x_train)}")
print(f"Testing Samples:{len(x_test)}")

Training Samples:35918
Testing Samples:8980


In [9]:

tfidf_vectorizer=TfidfVectorizer(max_df=0.95,min_df=2,stop_words='english')
x_train_tfidf = tfidf_vectorizer.fit_transform(x_train)
x_test_tfidf= tfidf_vectorizer.transform(x_test)
print("TF-IDF  transformation completed succesfully!")
print(f"shape of training Data:{x_train_tfidf.shape}")
print(f"shape of testing Data:{x_test_tfidf.shape}")

TF-IDF  transformation completed succesfully!
shape of training Data:(35918, 97835)
shape of testing Data:(8980, 97835)


In [10]:
nb_model=MultinomialNB()
nb_model.fit(x_train_tfidf,y_train)
y_pred=nb_model.predict(x_test_tfidf)
accuracy=accuracy_score(y_test,y_pred)
print(f"Naive Bayes Model Accuracy:{accuracy:.4f}")

Naive Bayes Model Accuracy:0.9379


In [11]:

sample_news = "Breaking: Government announces new tax reforms for businesses."
print("Cleaned Text:", clean_text(sample_news))
sample_tfidf = tfidf_vectorizer.transform([clean_text(sample_news)])
print("TF-IDF Shape:", sample_tfidf.shape)
prediction = nb_model.predict(sample_tfidf)[0]
print("Prediction:", "Fake News" if prediction == 1 else "Real News")


Cleaned Text: breaking government announces new tax reforms businesses
TF-IDF Shape: (1, 97835)
Prediction: Fake News


In [12]:
sample_news ="NASA successfully lands rover on Mars, begins exploration mission."
print("Cleaned Text:", clean_text(sample_news))
sample_tfidf = tfidf_vectorizer.transform([clean_text(sample_news)])
print("TF-IDF Shape:", sample_tfidf.shape)
prediction = nb_model.predict(sample_tfidf)[0]
print("Prediction:", "Fake News" if prediction == 1 else "Real News")

Cleaned Text: nasa successfully lands rover mars begins exploration mission
TF-IDF Shape: (1, 97835)
Prediction: Fake News
