In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


In [3]:
data = pd.read_csv('fake_news.csv')

In [4]:
data.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [5]:
data.shape

(20800, 5)

In [6]:
data.info

<bound method DataFrame.info of           id                                              title  \
0          0  House Dem Aide: We Didn’t Even See Comey’s Let...   
1          1  FLYNN: Hillary Clinton, Big Woman on Campus - ...   
2          2                  Why the Truth Might Get You Fired   
3          3  15 Civilians Killed In Single US Airstrike Hav...   
4          4  Iranian woman jailed for fictional unpublished...   
...      ...                                                ...   
20795  20795  Rapper T.I.: Trump a ’Poster Child For White S...   
20796  20796  N.F.L. Playoffs: Schedule, Matchups and Odds -...   
20797  20797  Macy’s Is Said to Receive Takeover Approach by...   
20798  20798  NATO, Russia To Hold Parallel Exercises In Bal...   
20799  20799                          What Keeps the F-35 Alive   

                                          author  \
0                                  Darrell Lucus   
1                                Daniel J. Flynn   
2      

In [7]:
data.isnull().sum()

id           0
title      558
author    1957
text        39
label        0
dtype: int64

In [8]:
data.loc[200:300]

Unnamed: 0,id,title,author,text,label
200,200,Miami Beach Tries to Tame Its Most Raucous Str...,Lizette Alvarez,"MIAMI BEACH, Fla. — From rooftop bars and r...",0
201,201,Doctors Mysteriously Found Dead After Summit F...,Starkman,The medical community at large is a gasp as se...,1
202,202,"Donald Trump, the Unsinkable Candidate - The N...",Declan Walsh,This American presidential election is a dizzy...,0
203,203,Shocking New Mock Hillary Ad Campaign Warns Sh...,The Daily Sheeple,"\nA lot of people, especially millennials, are...",1
204,204,The Failure of US Democracy,,The Failure of US Democracy How The Oligarchs ...,1
...,...,...,...,...,...
296,296,Germany Reacts to Merkel-Trump Visit: ‘Could H...,Melissa Eddy,BERLIN — A strong relationship with the Uni...,0
297,297,Justin Rose Outduels Henrik Stenson for Golf G...,Karen Crouse,RIO DE JANEIRO — The lives of the world’s t...,0
298,298,Iceland’s Water Cure - The New York Times,Dan Kois,"On a frigid February day in Reykjavik, I stood...",0
299,299,US to Hold Off on Cyberwar With Russia Until A...,,US to Hold Off on Cyberwar With Russia Until A...,1


In [9]:
data = data.drop(['id'],axis=1)

In [10]:
data.head()

Unnamed: 0,title,author,text,label
0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [11]:
data = data.fillna('')

In [12]:
data.isnull().sum()

title     0
author    0
text      0
label     0
dtype: int64

In [13]:
data['content'] = data['author']+' '+data['title']+' '+data['text']

In [14]:
data = data.drop(['title','author','text'],axis = 1)

In [15]:
data.head()

Unnamed: 0,label,content
0,1,Darrell Lucus House Dem Aide: We Didn’t Even S...
1,0,"Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo..."
2,1,Consortiumnews.com Why the Truth Might Get You...
3,1,Jessica Purkiss 15 Civilians Killed In Single ...
4,1,Howard Portnoy Iranian woman jailed for fictio...


In [16]:
data['content'] = data['content'].apply(lambda x : " ".join(x.lower() for x in x.split()))

In [17]:
data['content'] = data['content'] = data['content'].str.replace('[^\w\s]','')

In [18]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Bhuvansai\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [19]:
from nltk.corpus import stopwords
stop = stopwords.words('english')
data['content'] = data['content'].apply(lambda x:" ".join(x for x in x.split() if x not in stop))

In [20]:
data.head()

Unnamed: 0,label,content
0,1,darrell lucus house dem aide: didn’t even see ...
1,0,"daniel j. flynn flynn: hillary clinton, big wo..."
2,1,consortiumnews.com truth might get fired truth...
3,1,jessica purkiss 15 civilians killed single us ...
4,1,howard portnoy iranian woman jailed fictional ...


In [21]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20800 entries, 0 to 20799
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   label    20800 non-null  int64 
 1   content  20800 non-null  object
dtypes: int64(1), object(1)
memory usage: 325.1+ KB


In [23]:
!pip install textblob



In [24]:
from nltk.stem import WordNetLemmatizer
from textblob import Word
nltk.download('wordnet')
data['content'] = data['content'].apply(lambda x:" ".join([Word(word).lemmatize() for word in x.split()]))

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Bhuvansai\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [25]:
data['content'].head()

0    darrell lucus house dem aide: didn’t even see ...
1    daniel j. flynn flynn: hillary clinton, big wo...
2    consortiumnews.com truth might get fired truth...
3    jessica purkiss 15 civilian killed single u ai...
4    howard portnoy iranian woman jailed fictional ...
Name: content, dtype: object

In [26]:
X = data[['content']]
y = data['label']

In [30]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 45,stratify = y)

In [31]:
print(X_test.shape)

(6240, 1)


In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vect = TfidfVectorizer(analyzer='word',token_pattern = r'\w{1,}',max_features = 5000)
tfidf_vect.fit(data['content'])
xtrain_tfidf = tfidf_vect.transform(X_train['content'])
xtest_tfidf = tfidf_vect.transform(X_test['content'])

'''Model Building'''

'''Passive Agressive classifier'''

In [46]:
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
model = PassiveAggressiveClassifier()
model.fit(xtrain_tfidf,y_train)
y_pred = model.predict(xtest_tfidf)
print("Accuracy:")
print(accuracy_score(y_test,y_pred))
print("Classification_report:")
print(classification_report(y_test,y_pred))
print("confusion matrix:")
print(confusion_matrix(y_test,y_pred))

Accuracy:
0.9621794871794872
Classification_report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      3116
           1       0.96      0.96      0.96      3124

    accuracy                           0.96      6240
   macro avg       0.96      0.96      0.96      6240
weighted avg       0.96      0.96      0.96      6240

confusion matrix:
[[2996  120]
 [ 116 3008]]


In [51]:
#MLP classifier

from sklearn.neural_network import MLPClassifier
mlpclf = MLPClassifier(hidden_layer_sizes=(256,64,16),
                       activation = 'relu',
                       solver = 'adam')
mlpclf.fit(xtrain_tfidf,y_train)
y_pred = mlpclf.predict(xtest_tfidf)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96      3116
           1       0.96      0.96      0.96      3124

    accuracy                           0.96      6240
   macro avg       0.96      0.96      0.96      6240
weighted avg       0.96      0.96      0.96      6240



In [52]:
print(confusion_matrix(y_test,y_pred))

[[3000  116]
 [ 123 3001]]


In [53]:
#saved the modelu
import pickle
pickle.dump(mlpclf,open('fakenews.pkl','wb'))

In [62]:
loaded_model = pickle.load(open('fakenews.pkl','rb'))
result = loaded_model.score(X_test,y_test)
print(result)



ValueError: could not convert string to float: 'michael munk getting away terrorism oregon email obama regime’s gingerly kid glove treatment white christian terrorist policy foresaw acquittal bundy gang. gang able conduct armed occupation oregon wildlife refuge full view nation 2014 armed confrontation fed bundy ranch nevada. armed federal marshal retreated battlefield one arrested year bundy pere, immune nevada, made unwise visit portland. charging terrorist narrow whimpy crime conspiring keep refuge worker away job difficult prove bos ordered stay away none tried go work. federal government determined fight terrorism could escorted employee terrorist line much firepower deemed necessary crime intended prevent. but, nevada, law enforcement even order terrorist drop gun (oregon offer “open carry” invitation) threaten arrest. instead, obama’s doj policy wait out, perhaps white life really matter. meant prosecution could present evidentiary proof actual weapon threat intimidation terrorist dictated weak charge. armed terrorist refuge roadblock charge entire platoon law enforcement officer vehicle tried run shot killed him. acquittal armed occupier consequence political culture. transmitted medium ( oregonian avoided calling used neutral label like “standoff”) culture decided white christian can’t terrorists– designation reserved black muslims. jury reflected statewide portland constituency evidently kicked lone dissenter quickly acquitted assumed acquired cultural norm. oregon suffer frightened governor brown whose response verdict was, “the occupation malheur reserve reflect oregon way respectfully working together resolve differences” isn’t special?'