# Fake News Detection using Machine Learning

In [1]:
import pandas as pd
import numpy as np

### Reading CSV files

In [5]:
true = pd.read_csv('True.csv')
fake = pd.read_csv('Fake.csv')

In [3]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


### Combining the 2 Dataframes

In [7]:
true['label'] = 1
fake['label'] = 0

In [8]:
true.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [9]:
fake.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [10]:
news = pd.concat([fake, true], axis = 0)

In [11]:
news.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [12]:
news.tail()

Unnamed: 0,title,text,subject,date,label
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017",1


In [13]:
news.isnull().sum()

Unnamed: 0,0
title,0
text,0
subject,0
date,0
label,0


In [14]:
news = news.drop(['title', 'subject', 'date'], axis = 1)

In [15]:
news.head()

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


###Shuffling the data to avoid any bias

In [16]:
news = news.sample(frac = 1)

In [17]:
news.head()

Unnamed: 0,text,label
21923,"170619-N-AA175-092..SAN DIEGO (June 19, 2017) ...",0
3736,"Once again, Donald Trump threw a temper tantru...",0
15877,SEOUL/BEIJING (Reuters) - Chinese President Xi...,1
1390,While vice-president Mike Pence gave a commenc...,0
10263,It s bad enough to have crazed costumed charac...,0


In [22]:
news.reset_index(inplace=True)

In [23]:
news.head()

Unnamed: 0,level_0,index,text,label
0,0,21923,"170619-N-AA175-092..SAN DIEGO (June 19, 2017) ...",0
1,1,3736,"Once again, Donald Trump threw a temper tantru...",0
2,2,15877,SEOUL/BEIJING (Reuters) - Chinese President Xi...,1
3,3,1390,While vice-president Mike Pence gave a commenc...,0
4,4,10263,It s bad enough to have crazed costumed charac...,0


In [27]:
news.drop(['index', 'level_0'], axis=1, inplace=True)

In [28]:
news.head()

Unnamed: 0,text,label
0,"170619-N-AA175-092..SAN DIEGO (June 19, 2017) ...",0
1,"Once again, Donald Trump threw a temper tantru...",0
2,SEOUL/BEIJING (Reuters) - Chinese President Xi...,1
3,While vice-president Mike Pence gave a commenc...,0
4,It s bad enough to have crazed costumed charac...,0


### Cleaning the data

In [29]:
import re

In [31]:
def wordopt(text):

  # Convert into lowercase
  text = text.lower()

  # Remove URLs
  text = re.sub(r'https?://\S+|www\.\S+', '', text)

  # Remove HTML tags
  text = re.sub(r'<.*?>', '', text)

  # Remove punctuation
  text = re.sub(r'[^\w\s]', '', text)

  # Remove digits
  text = re.sub(r'\d', '', text)

  # Remove newline characers
  text = re.sub(r'\n', ' ', text)

  return text

In [32]:
news['text'] = news['text'].apply(wordopt)

In [33]:
news['text']

Unnamed: 0,text
0,naasan diego june command master chief ret k...
1,once again donald trump threw a temper tantrum...
2,seoulbeijing reuters chinese president xi jin...
3,while vicepresident mike pence gave a commence...
4,it s bad enough to have crazed costumed charac...
...,...
44893,washington reuters a federal judge on friday ...
44894,beijing reuters a peaceful settlement of the ...
44895,watch as katy tur tries to hilariously convinc...
44896,doha reuters qatar has agreed to strengthen c...


### Logistic regression

In [34]:
X = news['text']
Y = news['label']

In [35]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [36]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
Xv_train = vectorization.fit_transform(X_train)
Xv_test = vectorization.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

LR = LogisticRegression()
LR.fit(Xv_train, Y_train)

pred_lr = LR.predict(Xv_test)

In [47]:
LR.score(Xv_test, Y_test)

0.98634001484781

In [48]:
print(classification_report(Y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7084
           1       0.98      0.99      0.99      6386

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



### Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier

DTC = DecisionTreeClassifier()
DTC.fit(Xv_train, Y_train)

pred_dtc = DTC.predict(Xv_test)

In [49]:
DTC.score(Xv_test, Y_test)

0.9958426132145508

In [50]:
print(classification_report(Y_test, pred_dtc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7084
           1       1.00      1.00      1.00      6386

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



### Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier()
RFC.fit(Xv_train, Y_train)

pred_rfc = RFC.predict(Xv_test)

In [51]:
RFC.score(Xv_test, Y_test)

0.9881217520415738

In [52]:
print(classification_report(Y_test, pred_rfc))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7084
           1       0.99      0.99      0.99      6386

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



### Gradient Boosting Classifier

In [53]:
from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier()
GBC.fit(Xv_train, Y_train)

pred_gbc = GBC.predict(Xv_test)

In [54]:
GBC.score(Xv_test, Y_test)

0.9951002227171493

In [55]:
print(classification_report(Y_test, pred_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      7084
           1       0.99      1.00      0.99      6386

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



### Manuel Testing of the predictive model

In [56]:
def output_label(n):

  if n == 0:
    return "Fake news"
  elif n == 1:
    return "Genuine news"

In [70]:
def manual_testing(news):

  testing_news = {"text" : [news]} # Corrected syntax for defining dictionnary
  new_def_test = pd.DataFrame(testing_news)
  new_def_test["text"] = new_def_test["text"].apply(wordopt)

  new_x_test = new_def_test["text"]
  new_xv_test = vectorization.transform(new_x_test)

  pred_lr = LR.predict(new_xv_test)
  pred_dtc = DTC.predict(new_xv_test)
  pred_gbc = GBC.predict(new_xv_test)
  pred_rfc = RFC.predict(new_xv_test)

  return "\n\nLR Prediction {} \nDTC Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_label(pred_lr[0]), output_label(pred_dtc[0]), output_label(pred_gbc[0]), output_label(pred_rfc[0]))

In [61]:
news_article = str(input())

Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down about as welll as you d expect.What kind of president sends a New Year s greeting like this despicable, petty, infantile gibberish? Only Trump! His lack of decency won t eve

In [71]:
print(manual_testing(news_article))



LR Prediction Fake news 
DTC Prediction: Fake news 
GBC Prediction: Fake news 
RFC Prediction: Fake news
