In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [48]:
data = pd.read_csv('FakeNews.csv')

In [49]:
data.shape

(4009, 4)

In [50]:
data.columns

Index(['URLs', 'Headline', 'Body', 'Label'], dtype='object')

In [51]:
data.head(5)

Unnamed: 0,URLs,Headline,Body,Label
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1


In [52]:
print(data.iloc[2][0])

https://www.nytimes.com/2017/10/09/us/politics/corkers-blast-at-trump-has-other-republicans-nodding-in-agreement.html?rref=collection%2Fsectioncollection%2Fpolitics



In [53]:
print(data.iloc[2][1])

Trump’s Fight With Corker Jeopardizes His Legislative Agenda


In [54]:
print(data.iloc[2][2])

The feud broke into public view last week when Mr. Corker said that Mr. Trump’s advisers were guarding against “chaos.” The president retaliated on Sunday by saying the retiring senator “didn’t have the guts to run” for another term. Mr. Corker responded on Twitter an hour later, saying that the White House had become an “adult day care center.”
He then unloaded in an interview with The New York Times, saying in public what many of his Republican colleagues say in private — that the president is dangerously erratic, treats his high office like “a reality show,” has to be contained by his staff and is reckless enough to put the country “on the path to World War III.”
Mr. Corker, a moderate by temperament as well as ideology, had measured his occasional criticisms of Mr. Trump for months in hopes of influencing his foreign policy, but evidently feels liberated now that he has decided not to run for a third term.
Few other incumbent Republicans rushed to the microphones to echo his commen

In [55]:
print(data.iloc[2][3])

1


In [56]:
data.isnull().sum()

URLs         0
Headline     0
Body        21
Label        0
dtype: int64

In [57]:
# deleting the empty rows

data = data.dropna()

In [58]:
data.shape

(3988, 4)

In [59]:
data.isnull().sum()

URLs        0
Headline    0
Body        0
Label       0
dtype: int64

In [60]:
data.head()

Unnamed: 0,URLs,Headline,Body,Label
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1


In [61]:
data['News'] = data['Headline'] + " " + data['Body']

In [62]:
data.head()

Unnamed: 0,URLs,Headline,Body,Label,News
0,http://www.bbc.com/news/world-us-canada-414191...,Four ways Bob Corker skewered Donald Trump,Image copyright Getty Images\nOn Sunday mornin...,1,Four ways Bob Corker skewered Donald Trump Ima...
1,https://www.reuters.com/article/us-filmfestiva...,Linklater's war veteran comedy speaks to moder...,"LONDON (Reuters) - “Last Flag Flying”, a comed...",1,Linklater's war veteran comedy speaks to moder...
2,https://www.nytimes.com/2017/10/09/us/politics...,Trump’s Fight With Corker Jeopardizes His Legi...,The feud broke into public view last week when...,1,Trump’s Fight With Corker Jeopardizes His Legi...
3,https://www.reuters.com/article/us-mexico-oil-...,Egypt's Cheiron wins tie-up with Pemex for Mex...,MEXICO CITY (Reuters) - Egypt’s Cheiron Holdin...,1,Egypt's Cheiron wins tie-up with Pemex for Mex...
4,http://www.cnn.com/videos/cnnmoney/2017/10/08/...,Jason Aldean opens 'SNL' with Vegas tribute,"Country singer Jason Aldean, who was performin...",1,Jason Aldean opens 'SNL' with Vegas tribute Co...


In [63]:
# delete 3 columns
data = data.drop(columns = ['URLs', 'Headline', 'Body'] )

In [64]:
data

Unnamed: 0,Label,News
0,1,Four ways Bob Corker skewered Donald Trump Ima...
1,1,Linklater's war veteran comedy speaks to moder...
2,1,Trump’s Fight With Corker Jeopardizes His Legi...
3,1,Egypt's Cheiron wins tie-up with Pemex for Mex...
4,1,Jason Aldean opens 'SNL' with Vegas tribute Co...
...,...,...
4003,0,CNN and Globalist Exposed - Steve Quayle and A...
4004,0,Trends to Watch Trends to Watch\n% of readers ...
4005,0,Trump Jr. Is Soon To Give A 30-Minute Speech F...
4007,1,China to accept overseas trial data in bid to ...


# Data Preprocessing

In [65]:
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

In [66]:
sw = stopwords.words('english')
ss = SnowballStemmer('english')

In [67]:
def data_cleaning(document):
    document = document.lower()
    words = word_tokenize(document)
    
    # list comprehension
    # stopword removal + stemming
    words = [ss.stem(w) for w in words if w not in sw and len(w)>1]
    
    # convert tokens into string
    cleaned = " ".join(words)
    
    return cleaned

In [68]:
# cleaned one news.

data_cleaning(data['News'][0])

"four way bob corker skewer donald trump imag copyright getti imag sunday morn donald trump went twitter tirad member parti n't exact huge news 's far first time presid turn rhetor cannon rank time howev attack particular bite person essenti call tennesse senat bob corker chair power senat foreign relat committe coward run re-elect said mr corker `` beg '' presid 's endors refus give wrong claim mr corker 's support iranian nuclear agreement polit accomplish unlik colleagu mr corker free worri immedi polit futur n't hold tongu skip twitter post senbobcork 's shame white hous becom adult day care center someon obvious miss shift morn senat bob corker senbobcork octob 2017 report n't end though spoke new york time realli let presid four choic quot tennesse senat 's interview time particular damn `` n't know presid tweet thing true know everyon know '' ca n't realli sugarcoat one mr corker flat-out say presid liar everyon know senat particular challeng mr trump 's insist unsuccess plead e

In [69]:
data['Cleaned_News'] =  data['News'].apply(data_cleaning)

In [70]:
data.head()

Unnamed: 0,Label,News,Cleaned_News
0,1,Four ways Bob Corker skewered Donald Trump Ima...,four way bob corker skewer donald trump imag c...
1,1,Linklater's war veteran comedy speaks to moder...,linklat 's war veteran comedi speak modern ame...
2,1,Trump’s Fight With Corker Jeopardizes His Legi...,trump fight corker jeopard legisl agenda feud ...
3,1,Egypt's Cheiron wins tie-up with Pemex for Mex...,egypt 's cheiron win tie-up pemex mexican onsh...
4,1,Jason Aldean opens 'SNL' with Vegas tribute Co...,jason aldean open snl vega tribut countri sing...


In [71]:
X = data['Cleaned_News'].values
y = data['Label'].values

In [72]:
from sklearn.model_selection import train_test_split

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [74]:
X_train.shape

(2991,)

In [75]:
X_test.shape

(997,)

# Vectorization

In [76]:
# Bag of word
from sklearn.feature_extraction.text import CountVectorizer

In [77]:
cv = CountVectorizer(max_features=10000)

In [78]:
X_train = cv.fit_transform(X_train).toarray()

In [79]:
X_train.shape

(2991, 10000)

In [None]:
X_test.shape

In [None]:
X_test = cv.transform(X_test).toarray()

# Model Training

In [93]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression

In [94]:
# model = MultinomialNB()
model = LogisticRegression()

In [95]:
model.fit(X_train, y_train)

LogisticRegression()

# Prediction

In [96]:
X_test.shape

(997, 10000)

In [97]:
y_pred = model.predict(X_test)

In [98]:
y_pred[:5]

array([0, 0, 1, 0, 1])

In [99]:
y_test[:5]

array([0, 0, 1, 0, 1])

# Evaluation

In [100]:
np.sum(y_pred == y_test)

979

In [101]:
979/997

0.9819458375125376

In [102]:
model.score(X_test, y_test)

0.9819458375125376