In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import re
import string

In [2]:
fake_df = pd.read_csv('Fake.csv')
true_df = pd.read_csv('True.csv')

In [3]:
fake_df.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
true_df.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
fake_df['Fake or True'] = 0
true_df['Fake or True'] = 1

In [6]:
fake_df.shape, true_df.shape

((23481, 5), (21417, 5))

In [7]:
# Remove the last 10 data for test the model after training
fake_val = fake_df.tail(10)
for i in range(23480, 23470, - 1):
    fake_df.drop([i], axis = 0, inplace = True)

true_val = true_df.tail(10)
for i in range(21416, 21406, - 1):
    true_df.drop([i], axis = 0, inplace = True)

In [8]:
fake_df.shape, true_df.shape

((23471, 5), (21407, 5))

In [9]:
true_val.head()

Unnamed: 0,title,text,subject,date,Fake or True
21407,"Mata Pires, owner of embattled Brazil builder ...","SAO PAULO (Reuters) - Cesar Mata Pires, the ow...",worldnews,"August 22, 2017",1
21408,"U.S., North Korea clash at U.N. forum over nuc...",GENEVA (Reuters) - North Korea and the United ...,worldnews,"August 22, 2017",1
21409,"U.S., North Korea clash at U.N. arms forum on ...",GENEVA (Reuters) - North Korea and the United ...,worldnews,"August 22, 2017",1
21410,Headless torso could belong to submarine journ...,COPENHAGEN (Reuters) - Danish police said on T...,worldnews,"August 22, 2017",1
21411,North Korea shipments to Syria chemical arms a...,UNITED NATIONS (Reuters) - Two North Korean sh...,worldnews,"August 21, 2017",1


In [15]:
data_merge =  pd.concat([fake_df, true_df], axis = 0)
data_merge

Unnamed: 0,title,text,subject,date,Fake or True
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
...,...,...,...,...,...
21402,Exclusive: Trump's Afghan decision may increas...,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,worldnews,"August 22, 2017",1
21403,U.S. puts more pressure on Pakistan to help wi...,WASHINGTON (Reuters) - The United States sugge...,worldnews,"August 21, 2017",1
21404,Exclusive: U.S. to withhold up to $290 million...,WASHINGTON (Reuters) - The United States has d...,worldnews,"August 22, 2017",1
21405,Trump talks tough on Pakistan's 'terrorist' ha...,ISLAMABAD (Reuters) - Outlining a new strategy...,worldnews,"August 22, 2017",1


In [16]:
data_merge.columns

Index(['title', 'text', 'subject', 'date', 'Fake or True'], dtype='object')

In [17]:
# Remove unwanted columns
data = data_merge.drop(['title', 'subject', 'date'], axis = 1)

In [18]:
# Check for the null values
data.isnull().sum()

text            0
Fake or True    0
dtype: int64

In [19]:
# Random Shuffling
data = data.sample(frac = 1)

In [20]:
data.head()

Unnamed: 0,text,Fake or True
10581,WASHINGTON (Reuters) - Senate Majority Leader ...,1
6341,"NEW YORK (Reuters) - Anthony Scaramucci, a hed...",1
9779,WASHINGTON (Reuters) - House Speaker Paul Ryan...,1
4797,Following the mass exodus of Hispanic surrogat...,0
20477,PARIS (Reuters) - French president Emmanuel Ma...,1


In [21]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [22]:
data.columns

Index(['text', 'Fake or True'], dtype='object')

In [23]:
data.head()

Unnamed: 0,text,Fake or True
0,WASHINGTON (Reuters) - Senate Majority Leader ...,1
1,"NEW YORK (Reuters) - Anthony Scaramucci, a hed...",1
2,WASHINGTON (Reuters) - House Speaker Paul Ryan...,1
3,Following the mass exodus of Hispanic surrogat...,0
4,PARIS (Reuters) - French president Emmanuel Ma...,1


In [29]:
def process_text(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W", " ", text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [30]:
data['text'] = data['text'].apply(process_text)

In [34]:
X = data['text']
y = data['Fake or True']

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [46]:
# Text to vectors
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
train = vectorization.fit_transform(X_train)
test = vectorization.transform(X_test)

# Logistic Regression

In [54]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(train, y_train)

In [50]:
pred_lr = LR.predict(test)

In [51]:
LR.score(test, y_test)

0.9862745098039216

In [53]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5819
           1       0.99      0.98      0.99      5401

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



# Decision Tree

In [55]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(train, y_train)

In [56]:
pred_dt = DT.predict(test)

In [57]:
DT.score(test, y_test)

0.9954545454545455

In [58]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5819
           1       0.99      0.98      0.99      5401

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



# Random Forest

In [59]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier()
RF.fit(train, y_train)

In [60]:
pred_rf = RF.predict(test)

In [61]:
RF.score(test, y_test)

0.9889483065953654

In [62]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5819
           1       0.99      0.98      0.99      5401

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [72]:
def output_label(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"
    
def validation(news):
    # get the text part from the news
    validating_news = {'text':[news]}
    
    # convert to a data frame
    val_test = pd.DataFrame(validating_news)
    
    # Process the text
    val_test['text'] = val_test['text'].apply(process_text)
    new_x_test = val_test['text']
    
    # Text to vector
    vector_test = vectorization.transform(new_x_test)
    
    # Predicting
    pred_LR = LR.predict(vector_test)
    pred_DT = DT.predict(vector_test)
    pred_RF = RF.predict(vector_test)
    
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nRF Prediction: {}".format(output_label(pred_LR[0]),
                                                                                       output_label(pred_DT[0]),
                                                                                       output_label(pred_RF[0])))

In [73]:
news = str(input())
validation(news)

,"JAKARTA (Reuters) - Indonesia will buy 11 Sukhoi fighter jets worth $1.14 billion from Russia in exchange for cash and Indonesian commodities, two cabinet ministers said on Tuesday. The Southeast Asian country has pledged to ship up to $570 million worth of commodities in addition to cash to pay for the Suhkoi SU-35 fighter jets, which are expected to be delivered in stages starting in two years. Indonesian Trade Minister Enggartiasto Lukita said in a joint statement with Defence Minister Ryamizard Ryacudu that details of the type and volume of commodities were  still being negotiated . Previously he had said the exports could include palm oil, tea, and coffee. The deal is expected to be finalised soon between Indonesian state trading company PT Perusahaan Perdangangan Indonesia and Russian state conglomerate Rostec. Russia is currently facing a new round of U.S.-imposed trade sanctions. Meanwhile, Southeast Asia s largest economy is trying to promote its palm oil products amid threa

In [74]:
news = str(input())
validation(news)

21st Century Wire says As 21WIRE predicted in its new year s look ahead, we have a new  hostage  crisis underway.Today, Iranian military forces report that two small riverine U.S. Navy boats were seized in Iranian waters, and are currently being held on Iran s Farsi Island in the Persian Gulf. A total of 10 U.S. Navy personnel, nine men and one woman, have been detained by Iranian authorities. NAVY STRAYED: U.S. Navy patrol boat in the Persian Gulf (Image Source: USNI)According to the Pentagon, the initial narrative is as follows: The sailors were on a training mission around noon ET when their boat experienced mechanical difficulty and drifted into Iranian-claimed waters and were detained by the Iranian Coast Guard, officials added. The story has since been slightly revised by White House spokesman Josh Earnest to follow this narrative:The 2 boats were traveling en route from Kuwait to Bahrain, when they were stopped and detained by the Iranians.According to USNI, search and rescue te