In [1]:
# Packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import re
import string
import warnings
warnings.filterwarnings('ignore', message='not allowed')

In [2]:
# Data Collection
fake_news = pd.read_csv("/content/drive/MyDrive/Fake.csv")
true_news = pd.read_csv("/content/drive/MyDrive/True.csv")

In [3]:
# Data Exploration
print("Fake news dataset:\n")
fake_news

Fake news dataset:



Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"
...,...,...,...,...
23476,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,"January 16, 2016"
23477,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,"January 16, 2016"
23478,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,"January 15, 2016"
23479,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,"January 14, 2016"


In [4]:
print("Shape:", fake_news.shape)
fake_news.info()

Shape: (23481, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23481 entries, 0 to 23480
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    23481 non-null  object
 1   text     23481 non-null  object
 2   subject  23481 non-null  object
 3   date     23481 non-null  object
dtypes: object(4)
memory usage: 733.9+ KB


In [5]:
print("True news dataset:\n")
true_news

True news dataset:



Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"
...,...,...,...,...
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017"
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017"
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017"
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017"


In [6]:
print("Shape:", true_news.shape)
true_news.info()

Shape: (21417, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21417 entries, 0 to 21416
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    21417 non-null  object
 1   text     21417 non-null  object
 2   subject  21417 non-null  object
 3   date     21417 non-null  object
dtypes: object(4)
memory usage: 669.4+ KB


In [7]:
print("Descriptive statistics for fake news dataset:")
print(fake_news.describe())

print("\nDescriptive statistics for true news dataset:")
print(true_news.describe())

Descriptive statistics for fake news dataset:
                                                    title   text subject  \
count                                               23481  23481   23481   
unique                                              17903  17455       6   
top     MEDIA IGNORES Time That Bill Clinton FIRED His...           News   
freq                                                    6    626    9050   

                date  
count          23481  
unique          1681  
top     May 10, 2017  
freq              46  

Descriptive statistics for true news dataset:
                                                    title  \
count                                               21417   
unique                                              20826   
top     Factbox: Trump fills top jobs for his administ...   
freq                                                   14   

                                                     text       subject  \
count                         

In [8]:
print("Missing values in fake news dataset:\n")
fake_news.isnull().sum()

Missing values in fake news dataset:



title      0
text       0
subject    0
date       0
dtype: int64

In [9]:
print("Missing values in true news dataset:\n")
true_news.isnull().sum()

Missing values in true news dataset:



title      0
text       0
subject    0
date       0
dtype: int64

In [10]:
# Data Preprocessing
fake_news["class"] = 0
true_news["class"] = 1

fake_news.shape, true_news.shape

((23481, 5), (21417, 5))

In [None]:
fake_manual_testing = fake_news.tail(10)
for i in range(23480,23470,-1):
    fake_news.drop([i], axis = 0, inplace = True)

true_manual_testing = true_news.tail(10)
for i in range(21416,21406,-1):
    true_news.drop([i], axis = 0, inplace = True)

fake_manual_testing["class"] = 0
true_manual_testing["class"] = 1

In [32]:
fake_news.shape, true_news.shape

((23471, 5), (21407, 5))

In [12]:
merge_news = pd.concat([fake_news, true_news], axis = 0 )
merge_news

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
...,...,...,...,...,...
21402,Exclusive: Trump's Afghan decision may increas...,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,worldnews,"August 22, 2017",1
21403,U.S. puts more pressure on Pakistan to help wi...,WASHINGTON (Reuters) - The United States sugge...,worldnews,"August 21, 2017",1
21404,Exclusive: U.S. to withhold up to $290 million...,WASHINGTON (Reuters) - The United States has d...,worldnews,"August 22, 2017",1
21405,Trump talks tough on Pakistan's 'terrorist' ha...,ISLAMABAD (Reuters) - Outlining a new strategy...,worldnews,"August 22, 2017",1


In [13]:
data = merge_news.drop(["title", "subject","date"], axis = 1)

data

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
...,...,...
21402,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,1
21403,WASHINGTON (Reuters) - The United States sugge...,1
21404,WASHINGTON (Reuters) - The United States has d...,1
21405,ISLAMABAD (Reuters) - Outlining a new strategy...,1


In [14]:
data.isnull().sum()

text     0
class    0
dtype: int64

In [15]:
data_shuffle = data.sample(frac = 1)
data_shuffle

Unnamed: 0,text,class
5908,BAGHDAD (Reuters) - An influential Shi’ite cle...,1
2512,When alleged president Donald Trump doesn t li...,0
18675,May 7th is likely going to be a day of clashes...,0
1007,A group of disabled Americans protesting the S...,0
15764,GENEVA (Reuters) - The United Nations human ri...,1
...,...,...
20937,"If we didn t know better, we d think the liber...",0
8199,Clearly riding the high off of his New Hampshi...,0
10287,WASHINGTON (Reuters) - The top U.S. military o...,1
5119,"BERKELEY, Calif./LANSING, Mich. (Reuters) - S...",1


In [16]:
data_shuffle.reset_index(inplace = True)
data_shuffle.drop(["index"], axis = 1, inplace = True)
data_shuffle

Unnamed: 0,text,class
0,BAGHDAD (Reuters) - An influential Shi’ite cle...,1
1,When alleged president Donald Trump doesn t li...,0
2,May 7th is likely going to be a day of clashes...,0
3,A group of disabled Americans protesting the S...,0
4,GENEVA (Reuters) - The United Nations human ri...,1
...,...,...
44873,"If we didn t know better, we d think the liber...",0
44874,Clearly riding the high off of his New Hampshi...,0
44875,WASHINGTON (Reuters) - The top U.S. military o...,1
44876,"BERKELEY, Calif./LANSING, Mich. (Reuters) - S...",1


In [17]:
def wordopt(t):
    tt = t.lower()
    t = re.sub('\[.*?\]', '', t)
    t = re.sub("\\W"," ",t)
    t = re.sub('https?://\S+|www\.\S+', '', t)
    t = re.sub('<.*?>+', '', t)
    t = re.sub('[%s]' % re.escape(string.punctuation), '', t)
    t = re.sub('\n', '', t)
    t = re.sub('\w*\d\w*', '', t)
    return t

data_shuffle["text"] = data_shuffle["text"].apply(wordopt)

In [18]:
x = data_shuffle["text"]
y = data_shuffle["class"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

In [19]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [20]:
# Model Building
LR = LogisticRegression()
LR.fit(xv_train,y_train)
pred_lr=LR.predict(xv_test)
LR.score(xv_test, y_test)

0.9866310160427807

In [21]:
print("\nClassification Report:\n")
print(classification_report(y_test, pred_lr))
lr_conf_matrix = confusion_matrix(y_test, pred_lr)
print("\nConfusion Matrix of Logistic Regression:\n")
print(lr_conf_matrix)


Classification Report:

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5905
           1       0.99      0.99      0.99      5315

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220


Confusion Matrix of Logistic Regression:

[[5826   79]
 [  71 5244]]


In [22]:
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)
pred_dt = DT.predict(xv_test)
DT.score(xv_test, y_test)

0.9962566844919786

In [23]:
print("\nClassification Report of Decision Tree Classifier:\n")
print(classification_report(y_test, pred_dt))
dt_conf_matrix = confusion_matrix(y_test, pred_dt)
print("\nConfusion Matrix of Decision Tree Classifier:\n")
print(dt_conf_matrix)


Classification Report of Decision Tree Classifier:

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      5905
           1       1.00      0.99      1.00      5315

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220


Confusion Matrix of Decision Tree Classifier:

[[5895   10]
 [  32 5283]]


In [24]:
GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)
pred_gbc = GBC.predict(xv_test)
GBC.score(xv_test, y_test)

0.9959001782531194

In [25]:
print("\nClassification Report of Gradient Boosting Claassifier:\n")
print(classification_report(y_test, pred_gbc))
gbc_conf_matrix = confusion_matrix(y_test, pred_gbc)
print("\nConfusion Matrix of Gradient Boosting Claassifier:\n")
print(gbc_conf_matrix)


Classification Report of Gradient Boosting Claassifier:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5905
           1       0.99      1.00      1.00      5315

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220


Confusion Matrix of Gradient Boosting Claassifier:

[[5878   27]
 [  19 5296]]


In [26]:
RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)
pred_rfc = RFC.predict(xv_test)
RFC.score(xv_test, y_test)

0.9898395721925134

In [27]:
print("\nClassification Report of Random Forest Classifier:\n")
print(classification_report(y_test, pred_rfc))
rfc_conf_matrix = confusion_matrix(y_test, pred_rfc)
print("\nConfusion Matrix of Random Forest Classifier:\n")
print(rfc_conf_matrix)


Classification Report of Random Forest Classifier:

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5905
           1       0.99      0.99      0.99      5315

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220


Confusion Matrix of Random Forest Classifier:

[[5859   46]
 [  68 5247]]


In [28]:
# Model Testing
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"

In [29]:
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR[0]), output_lable(pred_DT[0]), output_lable(pred_GBC[0]), output_lable(pred_RFC[0])))

In [30]:
news = str(input())
manual_testing(news)

21st Century Wire says It s a familiar theme. Whenever there is a dispute or a change of law, and two tribes go to war, there is normally only one real winner after the tribulation  the lawyers. Ars TechnicaIn late 2013, Yahoo was hit with six lawsuits over its practice of using automated scans of e-mail to produce targeted ads. The cases, which were consolidated in federal court, all argued that the privacy rights of non-Yahoo users, who  did not consent to Yahoo s interception and scanning of their emails,  were being violated by a multi-billion dollar company.Now, lawyers representing the plaintiffs are singing a different tune. Last week, they asked US District Judge Lucy Koh to accept a proposed settlement (PDF). Under the proposal, the massive class of non-Yahoo users won t get any payment, but the class lawyers at Girard Gibbs and Kaplan Fox intend to ask for up to $4 million in fees. (The ultimate amount of fees will be up to the judge, but Yahoo has agreed not to oppose any fe

In [31]:
news = str(input())
manual_testing(news)

MOSCOW (Reuters) - Vatican Secretary of State Cardinal Pietro Parolin said on Tuesday that there was  positive momentum  behind the idea of Pope Francis visiting Russia, but suggested there was more work to be done if it were to happen.  Parolin, speaking at a joint news conference in Moscow alongside Russian Foreign Minister Sergei Lavrov, did not give any date for such a possible visit. The Eastern and Western branches of Christianity split apart in 1054. The pope, leader of the world s 1.2 billion Catholics, is seeking to improve ties, and last year in Cuba held what was the first ever meeting between a Roman Catholic pope and a Russian Orthodox patriarch.  Parolin said he had also used his talks in the Russian capital to also raise certain difficulties faced by the Catholic Church in Russia. He said that Moscow and the Vatican disagreed about the plight of Christians in certain parts of the world. He did not elaborate. Parolin, who is due later on Tuesday to meet Patriarch Kirill, 