Importing Libraries

In [22]:
import pandas as pd
import numpy as np
import seaborn as snb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pickle
import re
import string

Importing Datasets

In [23]:
news_true = pd.read_csv("./datasets/True.csv")
news_fake = pd.read_csv("./datasets/Fake.csv")

In [24]:
news_fake["class"]=0
news_true["class"]=1

In [25]:
dataset = pd.concat([news_fake,news_true], axis=0)
dataset.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [26]:
data_only = dataset.drop(['title','subject','date'], axis=1)
data_only

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
...,...,...
21412,BRUSSELS (Reuters) - NATO allies on Tuesday we...,1
21413,"LONDON (Reuters) - LexisNexis, a provider of l...",1
21414,MINSK (Reuters) - In the shadow of disused Sov...,1
21415,MOSCOW (Reuters) - Vatican Secretary of State ...,1


In [27]:
data_only = data_only.sample(frac=1)
data_only

Unnamed: 0,text,class
1592,Ever since Donald Trump won the election (unfa...,0
9736,WASHINGTON (Reuters) - Republican presidential...,1
1198,WASHINGTON (Reuters) - U.S. President Donald T...,1
12319,MOSCOW (Reuters) - Russian President Vladimir ...,1
4307,The following statements were posted to the ve...,1
...,...,...
21221,TOKYO (Reuters) - Britain and Japan said on Th...,1
10707,WASHINGTON (Reuters) - Key senators said on Tu...,1
5676,(Reuters) - A Trump administration aide correc...,1
12487,"Well, this is rich Hillary Clinton had her hou...",0


In [28]:
data_only.reset_index(inplace=True)
data_only.drop(['index'], axis=1, inplace=True)
data_only.columns

Index(['text', 'class'], dtype='object')

Modifying the Text

In [29]:
def words(text):
    text = text.lower()
    text = re.sub('\[.*?\]','',text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+','',text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n','',text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [30]:
data_only['text'] = data_only['text'].apply(words)
data_only

Unnamed: 0,text,class
0,ever since donald trump won the election unfa...,0
1,washington reuters republican presidential...,1
2,washington reuters u s president donald t...,1
3,moscow reuters russian president vladimir ...,1
4,the following statements were posted to the ve...,1
...,...,...
44893,tokyo reuters britain and japan said on th...,1
44894,washington reuters key senators said on tu...,1
44895,reuters a trump administration aide correc...,1
44896,well this is rich hillary clinton had her hou...,0


Splitting the dataset


In [31]:
x = data_only['text']
y = data_only['class']

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

In [32]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
x_vec_train = vectorization.fit_transform(x_train)
x_vec_test = vectorization.transform(x_test)

In [33]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(x_vec_train,y_train)

In [34]:
LR_pred = LR.predict(x_vec_test)
LR.score(x_vec_test,y_test)

0.9871937639198218

In [35]:
print(classification_report(y_test,LR_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4639
           1       0.99      0.99      0.99      4341

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [36]:
from sklearn.tree import DecisionTreeClassifier
DTC = DecisionTreeClassifier()
DTC.fit(x_vec_train,y_train)

In [37]:
DTC_pred = DTC.predict(x_vec_test)
DTC.score(x_vec_test,y_test)

0.9968819599109131

In [38]:
print(classification_report(y_test,DTC_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4639
           1       1.00      1.00      1.00      4341

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980



In [39]:
def Output(n):
    if n==1:
        return 'True News'
    elif n==0:
        return 'Fake News'

def Predict(text):
    news = {"text":[text]}
    news = pd.DataFrame(news)
    news['text'] = news['text'].apply(words)
    x_test = news['text']
    x_test = vectorization.transform(x_test)
    #selected Decision Tree Classification
    pred = DTC.predict(x_test)
    print("Prediction:",Output(pred[0]))

Example News

In [None]:
TrueNewsEX = "GAZA, Dec 2 (Reuters) - Renewed fighting in Gaza stretched into a second day on Saturday after talks to extend a week-old truce with Hamas collapsed and mediators said Israeli bombardments were complicating attempts to again pause hostilities. Eastern areas of Khan Younis in southern Gaza came under intense bombardment as the truce deadline lapsed shortly after dawn on Friday, with columns of smoke rising into the sky, Reuters journalists in the city said. Residents took to the road with belongings heaped up in carts, searching for shelter further west. Israel said its ground, air and naval forces struck more than 200 'terror targets' in Gaza. By Friday evening, health officials in the coastal strip said Israeli strikes had killed 184 people, wounded at least 589 others and hit more than 20 houses."
FakeNewsEx = "Donald Trump really should have taken his staffers  advise and not tweeted about former national security adviser Michael Flynn because the former reality show star just implicated himself. I had to fire General Flynn because he lied to the Vice President and the FBI,  Trump tweeted.  He has pled guilty to those lies. It is a shame because his actions during the transition were lawful. There was nothing to hide! I had to fire General Flynn because he lied to the Vice President and the FBI. He has pled guilty to those lies. It is a shame because his actions during the transition were lawful. There was nothing to hide!  Donald J. Trump (@realDonaldTrump) December 2, 2017 Oh my god, he just admitted to obstruction of justice,  former Justice Department spokesman Matthew Miller tweeted."

Main

In [40]:
news = str(input())
Predict(news)

Prediction: Fake News
