# Importing the Dependencies

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import re

In [2]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Data Collection & Pre-Processing

In [3]:
data_true = pd.read_csv('True.csv')
data_fake = pd.read_csv('Fake.csv')

In [4]:
data_fake.isnull().sum()

title      0
text       0
subject    0
date       0
dtype: int64

In [5]:
data_true.isnull().sum()

title      0
text       0
subject    0
date       0
dtype: int64

# Label Encoding

In [6]:
data_true['label'] = 1
data_fake['label'] = 0

In [7]:
data = pd.concat([data_true,data_fake],axis = 0)

data['content'] = data['title']+' '+data['text']
data = data.drop(['title','text','subject','date'], axis = 1)

data = data.sample(frac = 1)
data.reset_index(inplace = True)
data = data.drop(['index'], axis = 1)
data.head()

Unnamed: 0,label,content
0,0,BOOM! NAVY SEAL VET DESTROYS Whiny Organizer O...
1,1,City of Oxford strips Aung San Suu Kyi of huma...
2,0,Arizona Democratic Party PERFECTLY Trolls Tru...
3,0,WATCH: DIAMOND AND SILK Rip On John Kerry Over...
4,0,DEPARTMENT STORE MANNEQUINS Changed to Give Na...


In [8]:
def generalise(text):
  text = text.lower()
  text = re.sub('[^a-z]',' ',text)
  return text

In [9]:
data['content'] = data['content'].apply(generalise)

# Splitting the data into training data & test data

In [10]:
X = data['content']
Y = data['label']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

# Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train,y_train)

In [13]:
Predict_LR = LR.predict(xv_test)

In [14]:
LR.score(xv_test,y_test)

0.9853006681514477

# Decision Tree Classifier

In [15]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train,y_train)

In [16]:
Predict_DT = DT.predict(xv_test)

In [17]:
DT.score(xv_test,y_test)

0.9965478841870824

# Random Forest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier()
RF.fit(xv_train,y_train)

In [19]:
Predict_RF = RF.predict(xv_test)

In [20]:
RF.score(xv_test,y_test)

0.9867483296213808

# Building a Predictive System

In [21]:
def label(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not a Fake News"
        
def manual_testing(text):
    test_news = {'content': [text]}
    df_testing = pd.DataFrame(test_news)
    df_testing['content'] = df_testing['content'].apply(generalise)
    new_x_text = df_testing['content']
    new_xv_test = vectorization.transform(new_x_text)
    Predict_LR = LR.predict(new_xv_test)
    Predict_DT = DT.predict(new_xv_test)
    Predict_RF = RF.predict(new_xv_test)
    
    return print ("LR Prediction: ",label(Predict_LR[0]),"\nDT Prediction: ",label(Predict_DT[0]),"\nRF Prediction: ",label(Predict_RF[0]))

In [22]:
news = str(input())
manual_testing(news)

 Donald Trump held a rally for Alabama Senate candidate and alleged pedophile Roy Moore in Pensacola, Florida on Friday night which he later claimed was  packed to the rafters  but the venue was barely half-filled with supporters. Outside of the rally, a liberal group targeted the former reality show star and Moore by using Ivanka Trump s own words.American Bridge used a mobile billboard featuring Ivanka Trump s criticism of Moore.  The truck displayed,  There s a special place in hell for people who prey on children  along with Trump s daughter s picture emblazoned across it.Happening now at @realDonaldTrump s rally: we re driving a mobile billboard promoting @IvankaTrump s condemnation of Roy Moore. https://t.co/XpDJYFZjzn #alsen pic.twitter.com/exQIAjZJ05  American Bridge (@American_Bridge) December 9, 2017The group s site says that the quote above and  I have no reason to doubt the victims  accounts  by Ivanka were blasted over a loudspeaker outside of the rally.Watch:We re in Pens

LR Prediction:  Fake News 
DT Prediction:  Fake News 
RF Prediction:  Fake News
