Importing Libraries

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore') 

Data Collection

In [None]:
df1=pd.read_csv("../data/Fake.csv")
df2=pd.read_csv("../data/True.csv")
df1["labels"]=0
df2["labels"]=1

News_data = pd.concat([df1,df2], axis=0).reset_index(drop=True)
News_data.head()

Unnamed: 0,title,text,subject,date,labels
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


Data Preparation

In [22]:
News_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44898 entries, 0 to 44897
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    44898 non-null  object
 1   text     44898 non-null  object
 2   subject  44898 non-null  object
 3   date     44898 non-null  object
 4   labels   44898 non-null  int64 
dtypes: int64(1), object(4)
memory usage: 1.7+ MB


In [23]:
News_data.describe()

Unnamed: 0,labels
count,44898.0
mean,0.477015
std,0.499477
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [24]:
News_data.isnull().sum()

title      0
text       0
subject    0
date       0
labels     0
dtype: int64

In [25]:
News_data.isna().sum()

title      0
text       0
subject    0
date       0
labels     0
dtype: int64

In [26]:
News_data=News_data.drop("date",axis=1)

In [27]:
News_data.head()

Unnamed: 0,title,text,subject,labels
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,0


In [28]:
News_data["contant"] = News_data["title"]+" "+News_data["subject"]
News_data["contant"].head()

0     Donald Trump Sends Out Embarrassing New Year’...
1     Drunk Bragging Trump Staffer Started Russian ...
2     Sheriff David Clarke Becomes An Internet Joke...
3     Trump Is So Obsessed He Even Has Obama’s Name...
4     Pope Francis Just Called Out Donald Trump Dur...
Name: contant, dtype: object

In [29]:
News_data.head()

Unnamed: 0,title,text,subject,labels,contant
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,0,Donald Trump Sends Out Embarrassing New Year’...
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,0,Drunk Bragging Trump Staffer Started Russian ...
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,0,Sheriff David Clarke Becomes An Internet Joke...
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,0,Trump Is So Obsessed He Even Has Obama’s Name...
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,0,Pope Francis Just Called Out Donald Trump Dur...


In [30]:
def stemming(content):
    stemmed_content=re.sub('[^a-zA-Z]',' ',content)
    stemmed_content=stemmed_content.lower()
    stemmed_content=word_tokenize(stemmed_content)
    stemmed_content=[word for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content=' '.join(stemmed_content)
    return stemmed_content

In [31]:
News_data["contant"]=News_data["contant"].apply(stemming)
News_data["contant"]

0        donald trump sends embarrassing new year eve m...
1        drunk bragging trump staffer started russian c...
2        sheriff david clarke becomes internet joke thr...
3        trump obsessed even obama name coded website i...
4        pope francis called donald trump christmas spe...
                               ...                        
44893    fully committed nato backs new u approach afgh...
44894    lexisnexis withdrew two products chinese marke...
44895     minsk cultural hub becomes authorities worldnews
44896    vatican upbeat possibility pope francis visiti...
44897    indonesia buy billion worth russian jets world...
Name: contant, Length: 44898, dtype: object

In [32]:
vectoizer=TfidfVectorizer()

X=vectoizer.fit_transform(News_data["contant"])
Y=News_data["labels"]

Train Test Split

In [33]:
x_train,x_test,y_train,y_test=train_test_split(X,Y, test_size=0.2,random_state=42)

Model Selection

In [34]:
Model=LogisticRegression()

In [35]:
Model.fit(x_train,y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [36]:
y_train_predict=Model.predict(x_train)
acc_scr=accuracy_score(y_train_predict,y_train)
f1=f1_score(y_train,y_train_predict)
print(acc_scr)
print(f1)

0.9999721588061696
0.9999708785928536


In [37]:
ypredict=Model.predict(x_test)
acc_scr=accuracy_score(ypredict,y_test)
f1=f1_score(ypredict,y_test)
print(acc_scr)
print(f1)

0.9998886414253898
0.9998822559755093


Predective Mechanism

In [38]:
input1=X[0]
input2=X[44870]

p1=Model.predict(input1)
p2=Model.predict(input2)

print(p1)
print(p2)

[0]
[1]


Experimental

In [39]:
Logistic_Regression = LogisticRegression()
KNeighbors_Classifier = KNeighborsClassifier()
Support_Vector_Classifier = SVC()
Random_Forest_Classifier = RandomForestClassifier()
Gradient_Boosting_Classifier = GradientBoostingClassifier()

Models=[Logistic_Regression,KNeighbors_Classifier,Support_Vector_Classifier,Random_Forest_Classifier,Gradient_Boosting_Classifier]


In [None]:
for i in Models:
    i.fit(x_train,y_train)
    y_train_predict=i.predict(x_train)
    acc_scr=accuracy_score(y_train_predict,y_train)
    print(i)
    print(acc_scr)

LogisticRegression()
0.9999721588061696
KNeighborsClassifier()
0.9543126009243277


In [None]:
for i in Models:
    ypredict=i.predict(x_test)
    acc_scr=accuracy_score(ypredict,y_test)
    print(i)
    print(acc_scr)

LogisticRegression()
0.9998886414253898
KNeighborsClassifier()
0.9173719376391982
SVC()
1.0
RandomForestClassifier()
0.9997772828507795
GradientBoostingClassifier()
1.0
