# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import re
import string

from nltk.tokenize import word_tokenize
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression, PassiveAggressiveClassifier
from sklearn.tree import DecisionTreeClassifier

# Reading Dataset

In [2]:
fake_df = pd.read_csv("../input/fake-and-true-news-dataset/Fake-211023-185413.csv")
true_df = pd.read_csv("../input/fake-and-true-news-dataset/True-211023-185340.csv")

In [3]:
fake_df.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
true_df.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
fake_df.subject.value_counts()

News               9050
politics           6841
left-news          4459
Government News    1570
US_News             783
Middle-east         778
Name: subject, dtype: int64

In [6]:
true_df.subject.value_counts()

politicsNews    11272
worldnews       10145
Name: subject, dtype: int64

In [7]:
fake_df.shape

(23481, 4)

In [8]:
true_df.shape

(21417, 4)

In [9]:
fake_df['Target'] = 0
true_df['Target'] = 1

In [10]:
fake_df.head()

Unnamed: 0,title,text,subject,date,Target
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [11]:
true_df.head()

Unnamed: 0,title,text,subject,date,Target
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [12]:
df = pd.concat([fake_df, true_df], axis = 0)
df = df.sample(frac = 1)
df = df.reset_index(drop = True)
df = df.drop(['subject', 'date', 'title'], axis = 1)
print(df.columns)

Index(['text', 'Target'], dtype='object')


In [13]:
df.isnull().sum()

text      0
Target    0
dtype: int64

In [14]:
df

Unnamed: 0,text,Target
0,BARCELONA (Reuters) - Protesters clashed with ...,1
1,NEW YORK (Reuters) - Front-runner Donald Trump...,1
2,Career criminals professional grifters the ult...,0
3,After it was revealed in the leaked Access Hol...,0
4,After Friday s right-wing terror attack in Por...,0
...,...,...
44893,MEXICO CITY (Reuters) - Honduran opposition ca...,1
44894,There have recently been allegations claiming ...,0
44895,NEW YORK (Reuters) - The email Donald Trump’s ...,1
44896,The Army can t be bothered with defending or p...,0


# Tokenization

In [15]:
df['text'] = df['text'].apply(word_tokenize)

In [16]:
df.head()

Unnamed: 0,text,Target
0,"[BARCELONA, (, Reuters, ), -, Protesters, clas...",1
1,"[NEW, YORK, (, Reuters, ), -, Front-runner, Do...",1
2,"[Career, criminals, professional, grifters, th...",0
3,"[After, it, was, revealed, in, the, leaked, Ac...",0
4,"[After, Friday, s, right-wing, terror, attack,...",0


# Stemming

In [17]:
porter = SnowballStemmer("english")

In [18]:
def stem_it(text):
    return [porter.stem(word) for word in text]

In [19]:
df['text'] = df['text'].apply(stem_it)

In [20]:
df.head()

Unnamed: 0,text,Target
0,"[barcelona, (, reuter, ), -, protest, clash, w...",1
1,"[new, york, (, reuter, ), -, front-runn, donal...",1
2,"[career, crimin, profession, grifter, the, ult...",0
3,"[after, it, was, reveal, in, the, leak, access...",0
4,"[after, friday, s, right-w, terror, attack, in...",0


# Stopword Removal

In [21]:
def stop_it(text):
    dt = [word for word in text if len(word)>2]
    return dt

In [22]:
df['text'] = df['text'].apply(stop_it)

In [23]:
df.head()

Unnamed: 0,text,Target
0,"[barcelona, reuter, protest, clash, with, poli...",1
1,"[new, york, reuter, front-runn, donald, trump,...",1
2,"[career, crimin, profession, grifter, the, ult...",0
3,"[after, was, reveal, the, leak, access, hollyw...",0
4,"[after, friday, right-w, terror, attack, portl...",0


In [24]:
df['text'] = df['text'].apply(''.join)

# Splitting the Data

In [25]:
x = df.text
y = df.Target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 1)

# Vectorization

In [26]:
Tfidf = TfidfVectorizer(max_df=0.7)
tfidf_train = Tfidf.fit_transform(x_train)
tfidf_test = Tfidf.transform(x_test)

In [27]:
print(tfidf_train)

  (0, 138402)	0.7071067811865475
  (0, 158399)	0.7071067811865475
  (1, 99233)	0.7071067811865475
  (1, 207674)	0.7071067811865475
  (2, 101918)	0.16500068058122702
  (2, 144313)	0.16500068058122702
  (2, 262158)	0.16500068058122702
  (2, 250775)	0.16500068058122702
  (2, 254137)	0.16500068058122702
  (2, 257246)	0.16500068058122702
  (2, 36604)	0.16500068058122702
  (2, 51726)	0.15683801887743928
  (2, 82248)	0.31367603775487857
  (2, 60910)	0.29865533226715646
  (2, 281724)	0.1428485970419055
  (2, 75471)	0.15683801887743928
  (2, 57496)	0.15683801887743928
  (2, 96571)	0.15683801887743928
  (2, 169750)	0.15683801887743928
  (2, 279968)	0.15683801887743928
  (2, 46492)	0.16500068058122702
  (2, 120278)	0.16500068058122702
  (2, 173621)	0.10405475697195324
  (2, 38339)	0.16500068058122702
  (2, 228644)	0.15683801887743928
  :	:
  (33670, 250543)	0.22272314889924785
  (33670, 178923)	0.22272314889924785
  (33670, 201227)	0.22272314889924785
  (33670, 54194)	0.22272314889924785
  (33670

# Logistic Regression

In [28]:
lr = LogisticRegression(max_iter=900)
lr.fit(tfidf_train, y_train)
lr_pred = lr.predict(tfidf_test)
accuracy = round(accuracy_score(y_test, lr_pred), 2)*100
print('Accuracy', accuracy, '%')

Accuracy 86.0 %


# Decision Tree Classifier

In [29]:
dt = DecisionTreeClassifier()
dt.fit(tfidf_train, y_train)
dt_pred = dt.predict(tfidf_test)
accuracy = round(accuracy_score(y_test, dt_pred), 2)*100
print('Accuracy:', accuracy, '%')

Accuracy: 69.0 %


# PassiveAggressive Classifier

In [30]:
pa = PassiveAggressiveClassifier(max_iter = 1000)
pa.fit(tfidf_train, y_train)
pa_pred = pa.predict(tfidf_test)
accuracy = round(accuracy_score(y_test, pa_pred), 2)*100
print("Accuracy:", accuracy, "%")

Accuracy: 87.0 %


# Predictive Model

In [31]:
input_news = ['Republicans have had seven years to come up with a viable replacement for Obamacare but they failed miserably. After taking a victory lap for gifting the wealthy with a tax break on Wednesday, Donald Trump looked at the cameras and said,  We have essentially repealed Obamacare and we will come up with something that will be much better. Obamacare has been repealed in this bill,  he added. Well, like most things Trump says, that s just not true. But, if the former reality show star could have done that in order to eradicate former President Obama s signature legislation, he would have and without offering an alternative.Senate Majority Leader Mitch McConnell told NPR that  This has not been a very bipartisan year. I hope in the new year, we re going to pivot here and become more cooperative. An Obamacare repeal in 2018 is DOA. Well, we obviously were unable to completely repeal and replace with a 52-48 Senate,  the Kentucky Republican said.  We ll have to take a look at what that looks like with a 51-49 Senate. But I think we ll probably move on to other issues. NPR reports:McConnell hopes to focus instead on stabilizing the insurance marketplaces to keep premiums from skyrocketing in the early months of 2018, a promise he made to moderate Republican Sen. Susan Collins of Maine to get her support for the tax bill.On top of that McConnell broke with House Speaker Paul Ryan, R-Wis., on the approach to paring back spending on programs like Medicaid and food stamps. McConnell told NPR he is  not interested  in using Senate budget rules to allow Republicans to cut entitlements without consultation with Democrats. I think entitlement changes, to be sustained, almost always have to be bipartisan,  McConnell said.  The House may have a different agenda. If our Democratic friends in the Senate want to join us to tackle any kind of entitlement reform. I d be happy to take a look at it. This is coming from Mitch McConnell. He knows Donald Trump is destroying the GOP. It doesn t matter, Sen. McConnell. We still recall him saying that his  number one priority is making sure president Obama s a one-term president. Well, we re hoping that Trump doesn t last a full term. Funny how that works.Photo by Chip Somodevilla/Getty Images']
transformed_data = Tfidf.transform(input_news)

prediction = pa.predict(transformed_data)
if (prediction == 1):
    print("News is TRUE")
else:
    print("News is FALSE")

News is FALSE
