## DETECT IF NEWS FAKE OR REAL

### LIBRARIES

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import re
import string

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

### DATASETS

In [3]:
real=pd.read_csv('True.csv')
fake=pd.read_csv('Fake.csv')

### work

In [4]:
real['class']=1
fake['class']=0

In [5]:
real.shape, fake.shape

((21417, 5), (23481, 5))

In [6]:
fake_data_manual_testing = fake.tail(10)
for i in range(23480,23470,-1):
    fake.drop([i],axis = 0, inplace = True)


true_data_manual_testing = real.tail(10)
for i in range(21416,21406,-1):
    real.drop([i],axis = 0, inplace = True)


In [7]:
true_data_manual_testing['class']=1
fake_data_manual_testing['class']=0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  true_data_manual_testing['class']=1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fake_data_manual_testing['class']=0


In [9]:
merged=pd.concat([fake, real], axis = 0)
merged.head(20)

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",0
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",0
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",0
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",0
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",0


### Remove irrelevant features

In [10]:
merged.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [11]:
data=merged.drop(['title','subject','date'], axis = 1)

In [12]:
#shuffle
data = data.sample(frac = 1)

In [13]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [14]:
data.columns


Index(['text', 'class'], dtype='object')

In [15]:
data.head()

Unnamed: 0,text,class
0,BRUSSELS (Reuters) - Theresa May must deliver ...,1
1,VATICAN CITY (Reuters) - It is perhaps only a...,1
2,BEIJING (Reuters) - China said on Thursday its...,1
3,I think we are in a real moment like Nixon ??...,0
4,This is it this is our LAST CHANCE to turn the...,0


### Preprocessing

In [16]:
def func(text):
    text = text.lower()
    text = re.sub('\[.*?\]','',text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+',b'',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [17]:
data['text'] = data['text'].apply(func)

In [18]:
x = data['text']
y = data['class']

### Training

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.25)

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

###Logistic Regression

In [21]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(xv_train, y_train)
pred_lr = LR.predict(xv_test)

LR.score(xv_test, y_test)

0.9846702317290552

In [22]:
print (classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5911
           1       0.98      0.98      0.98      5309

    accuracy                           0.98     11220
   macro avg       0.98      0.98      0.98     11220
weighted avg       0.98      0.98      0.98     11220



###Decision Tree

In [23]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)
pred_dt = DT.predict(xv_test)
DT.score(xv_test, y_test)

0.9971479500891266

In [24]:
print (classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5911
           1       0.98      0.98      0.98      5309

    accuracy                           0.98     11220
   macro avg       0.98      0.98      0.98     11220
weighted avg       0.98      0.98      0.98     11220



### Gradient Boost

In [25]:
from sklearn.ensemble import GradientBoostingClassifier

GB = GradientBoostingClassifier(random_state = 0)
GB.fit(xv_train, y_train)
pred_gb = GB.predict(xv_test)
GB.score(xv_test, y_test)

0.9947415329768271

In [26]:
print(classification_report(y_test, pred_gb))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99      5911
           1       0.99      1.00      0.99      5309

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



### Random Forest

In [27]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(random_state = 0)
RF.fit(xv_train, y_train)
pred_rf = RF.predict(xv_test)

RF.score(xv_test, y_test)

0.9864527629233512

In [28]:
print (classification_report(y_test, pred_rf))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5911
           1       0.99      0.98      0.99      5309

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



###TEST

In [29]:
def output_lable(n):
    if n==0:
        return "Fake News"
    elif n==1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test['text'] = new_def_test["text"].apply(func)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GB = GB.predict(new_xv_test)
    pred_RF = RF.predict(new_xv_test)

    return print("\n\nLR Predicition: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction:{}".format(output_lable(pred_LR[0]),
                                                                                                             output_lable(pred_DT[0]),
                                                                                                             output_lable(pred_GB[0]),
                                                                                                             output_lable(pred_RF[0])))

###MANUAL TEST

In [30]:
news = str(input())
manual_testing(news)

BALTIMORE—In what astronomers called an alarming yet compelling observation, NASA officials announced Thursday the discovery of potential life on Mars after a giant eyeball in the middle of the planet looked directly into the James Webb Space Telescope. “We are excited to share telescopic evidence of a colossal, audibly blinking eyeball on the surface of Mars, a finding that suggests extraterrestrial life may be present,” said NASA administrator Bill Nelson, explaining that visual signals from the Martian eyeball, dubbed Olympus Peeper, were first picked up when its large batty eyelashes were fluttering in the direction of their telescope’s sensor. “Further analysis is required to determine whether its unbroken eye contact was a threat, curiosity, or perhaps even flirtation.” At press time, NASA had reportedly delayed any plans to launch a mission to Mars after the planet began rapidly heading toward Earth.


LR Predicition: Fake News 
DT Prediction: Fake News 
GBC Prediction: Fake New