In [2]:
import re
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.src.ops import LogicalAnd
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import nltk
nltk.download('stopwords')

print(stopwords.words('english'))


[nltk_data] Downloading package stopwords to C:\Users\Nishan
[nltk_data]     Bhandari\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd", "he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is', 'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she

Data preprocessing

In [3]:
fake=pd.read_csv('Fake.csv')
true=pd.read_csv('True.csv')
fake.shape
true.shape

(21417, 4)

In [12]:
#Concat the dataset
fake['label']=1
true['label']=0
data = pd.concat([fake, true], axis=0).reset_index(drop=True)
data.head(5)



Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",1


In [13]:
#Count the number of missing values
data.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [15]:
#Merging the title and text
data['content']=data['title']+" "+data['text']
data['content']

0         Donald Trump Sends Out Embarrassing New Year’...
1         Drunk Bragging Trump Staffer Started Russian ...
2         Sheriff David Clarke Becomes An Internet Joke...
3         Trump Is So Obsessed He Even Has Obama’s Name...
4         Pope Francis Just Called Out Donald Trump Dur...
                               ...                        
44893    'Fully committed' NATO backs new U.S. approach...
44894    LexisNexis withdrew two products from Chinese ...
44895    Minsk cultural hub becomes haven from authorit...
44896    Vatican upbeat on possibility of Pope Francis ...
44897    Indonesia to buy $1.14 billion worth of Russia...
Name: content, Length: 44898, dtype: object

In [18]:
#Separting the data and label
X=data.drop(columns='label', axis=1)
y=data['label']
X,y

(                                                   title  \
 0       Donald Trump Sends Out Embarrassing New Year’...   
 1       Drunk Bragging Trump Staffer Started Russian ...   
 2       Sheriff David Clarke Becomes An Internet Joke...   
 3       Trump Is So Obsessed He Even Has Obama’s Name...   
 4       Pope Francis Just Called Out Donald Trump Dur...   
 ...                                                  ...   
 44893  'Fully committed' NATO backs new U.S. approach...   
 44894  LexisNexis withdrew two products from Chinese ...   
 44895  Minsk cultural hub becomes haven from authorities   
 44896  Vatican upbeat on possibility of Pope Francis ...   
 44897  Indonesia to buy $1.14 billion worth of Russia...   
 
                                                     text    subject  \
 0      Donald Trump just couldn t wish all Americans ...       News   
 1      House Intelligence Committee Chairman Devin Nu...       News   
 2      On Friday, it was revealed that former Mil

Stemming


In [19]:
port_stem=PorterStemmer()
def stemming(content):
    stemmed_content=re.sub("[^a-zA-z]","",content)
    stemmed_content=stemmed_content.lower()
    stemmed_content=stemmed_content.split()
    stemmed_content=[port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content=' '.join(stemmed_content)
    return stemmed_content

In [21]:
data['content']=data['content'].apply(stemming)
print(data['content'])

0        donaldtrumpsendsoutembarrassingnewyearsevemess...
1        drunkbraggingtrumpstafferstartedrussiancollusi...
2        sheriffdavidclarkebecomesaninternetjokeforthre...
3        trumpissoobsessedheevenhasobamasnamecodedintoh...
4        popefrancisjustcalledoutdonaldtrumpduringhisch...
                               ...                        
44893    fullycommittednatobacksnewusapproachonafghanis...
44894    lexisnexiswithdrewtwoproductsfromchinesemarket...
44895    minskculturalhubbecomeshavenfromauthoritiesmin...
44896    vaticanupbeatonpossibilityofpopefrancisvisitin...
44897    indonesiatobuybillionworthofrussianjetsjakarta...
Name: content, Length: 44898, dtype: object


In [23]:
#separating the data and label
X=data['content'].values
y=data['label'].values
print(X)

['donaldtrumpsendsoutembarrassingnewyearsevemessagethisisdisturbingdonaldtrumpjustcouldntwishallamericansahappynewyearandleaveitatthatinsteadhehadtogiveashoutouttohisenemieshatersandtheverydishonestfakenewsmediatheformerrealityshowstarhadjustonejobtodoandhecouldntdoitasourcountryrapidlygrowsstrongerandsmarteriwanttowishallofmyfriendssupportersenemieshatersandeventheverydishonestfakenewsmediaahappyandhealthynewyearpresidentangrypantstweetedwillbeagreatyearforamericaasourcountryrapidlygrowsstrongerandsmarteriwanttowishallofmyfriendssupportersenemieshatersandeventheverydishonestfakenewsmediaahappyandhealthynewyearwillbeagreatyearforamericadonaldjtrumprealdonaldtrumpdecembertrumpstweetwentdownaboutaswelllasyoudexpectwhatkindofpresidentsendsanewyearsgreetinglikethisdespicablepettyinfantilegibberishonlytrumphislackofdecencywontevenallowhimtoriseabovethegutterlongenoughtowishtheamericancitizensahappynewyearbishoptalbertswantalbertswandecembernoonelikesyoucalvincalvinstowelldecemberyourimpeach

In [25]:
print(y)
y.shape

[1 1 1 ... 0 0 0]


(44898,)

In [26]:
#converting textual data to numerical data
vectorizer=TfidfVectorizer()
vectorizer.fit(X)

X=vectorizer.transform(X)

In [27]:
print(X)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 64917 stored elements and shape (44898, 48121)>
  Coords	Values
  (0, 9137)	1.0
  (1, 9364)	1.0
  (2, 31550)	1.0
  (3, 37728)	1.0
  (4, 27080)	1.0
  (5, 28077)	1.0
  (6, 12865)	1.0
  (7, 38518)	1.0
  (8, 12147)	1.0
  (9, 44327)	1.0
  (10, 26205)	1.0
  (11, 45032)	1.0
  (12, 2384)	1.0
  (13, 44864)	1.0
  (14, 14525)	1.0
  (15, 36019)	1.0
  (16, 34303)	1.0
  (17, 22743)	1.0
  (18, 19135)	0.5773502691896258
  (18, 31902)	0.5773502691896258
  (18, 47709)	0.5773502691896258
  (19, 7560)	0.5773502691896258
  (19, 28846)	0.5773502691896258
  (19, 30504)	0.5773502691896258
  (20, 16633)	1.0
  :	:
  (44873, 17327)	1.0
  (44874, 7968)	1.0
  (44875, 40939)	1.0
  (44876, 26924)	1.0
  (44877, 27621)	1.0
  (44878, 13390)	1.0
  (44879, 3325)	1.0
  (44880, 30435)	1.0
  (44881, 34599)	1.0
  (44882, 1625)	1.0
  (44883, 10519)	1.0
  (44884, 42809)	1.0
  (44885, 10567)	1.0
  (44886, 39668)	1.0
  (44887, 42718)	1.0
  (44888, 21385)	1.0
  (44889,

In [29]:
#Splitting data into training and testing data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)



Training the model:Logistic regression

In [30]:
model=LogisticRegression()
model.fit(X_train,y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


Accuracy score on training data

In [36]:
X_train_prediction=model.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction,y_train)
print("Accuracy score of the training scores=",training_data_accuracy)

Accuracy score of the training scores= 0.9997215880616961


Making a prediction System


In [42]:
x_new=X_test[0]
prediction=model.predict(x_new)

print(prediction)

if (prediction[0]==[0]):
    print("The news is Real")
else:
    print("The news is Fake")


[0]
The news is Real


In [41]:
print(y_test[0])

1
