In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

In [2]:
df=pd.read_csv("Processed_Data1.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,class
0,0,Donald Trump just couldn t wish all Americans ...,0
1,1,House Intelligence Committee Chairman Devin Nu...,0
2,2,"On Friday, it was revealed that former Milwauk...",0
3,3,"On Christmas day, Donald Trump announced that ...",0
4,4,Pope Francis used his annual Christmas Day mes...,0


In [4]:
df.drop("Unnamed: 0",inplace=True,axis=1)

In [5]:
df

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
...,...,...
38633,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,1
38634,WASHINGTON (Reuters) - The United States sugge...,1
38635,WASHINGTON (Reuters) - The United States has d...,1
38636,ISLAMABAD (Reuters) - Outlining a new strategy...,1


In [6]:
df=df.sample(frac=1)

In [7]:
df.head()

Unnamed: 0,text,class
29072,"VIC/CERDANYOLA DEL VALLES, Spain (Reuters) - C...",1
6287,House Democrats did something extraordinary to...,0
29133,HANOI (Reuters) - Vietnam police on Wednesday ...,1
3849,The effects of a Donald Trump presidency are a...,0
14962,UPDATE: AND SO IT BEGINS A police officer hold...,0


In [8]:
df.reset_index(inplace=True)
df.drop(['index'],axis=1,inplace=True)

In [9]:
df.columns

Index(['text', 'class'], dtype='object')

In [10]:
df.head()

Unnamed: 0,text,class
0,"VIC/CERDANYOLA DEL VALLES, Spain (Reuters) - C...",1
1,House Democrats did something extraordinary to...,0
2,HANOI (Reuters) - Vietnam police on Wednesday ...,1
3,The effects of a Donald Trump presidency are a...,0
4,UPDATE: AND SO IT BEGINS A police officer hold...,0


In [11]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

  text = re.sub('\[.*?\]', '', text)
  text = re.sub('https?://\S+|www\.\S+', '', text)
  text = re.sub('\w*\d\w*', '', text)


In [12]:
df["text"]=df["text"].apply(wordopt)

Defining dependent and independent variables

In [14]:
x=df["text"]
y=df["class"]

Splitting training and testing

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

Converting text to vectors

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

Logistic Regression

In [20]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train,y_train)

In [21]:
pred_lr=LR.predict(xv_test)

In [22]:
LR.score(xv_test,y_test)

0.9841614906832298

In [23]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      4409
           1       0.98      0.99      0.99      5251

    accuracy                           0.98      9660
   macro avg       0.98      0.98      0.98      9660
weighted avg       0.98      0.98      0.98      9660



Decision Tree classification

In [25]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [26]:
pred_dt=DT.predict(xv_test)

In [27]:
DT.score(xv_test,y_test)

0.9942028985507246

In [28]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4409
           1       0.99      1.00      0.99      5251

    accuracy                           0.99      9660
   macro avg       0.99      0.99      0.99      9660
weighted avg       0.99      0.99      0.99      9660



Gradiant Boosting Classifier

In [30]:
from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)

In [31]:
pred_gbc=GBC.predict(xv_test)

In [32]:
GBC.score(xv_test,y_test)

0.9956521739130435

In [33]:
print(classification_report(y_test, pred_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      4409
           1       0.99      1.00      1.00      5251

    accuracy                           1.00      9660
   macro avg       1.00      1.00      1.00      9660
weighted avg       1.00      1.00      1.00      9660



Random Forest Classifier

In [35]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)

In [36]:
RFC.score(xv_test,y_test)

0.9820910973084886

In [37]:
print(classification_report(y_test, pred_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      4409
           1       0.99      1.00      1.00      5251

    accuracy                           1.00      9660
   macro avg       1.00      1.00      1.00      9660
weighted avg       1.00      1.00      1.00      9660



Model Testing

In [39]:
def output_lable(n):
    if n.count(0)>n.count(1):
        print("\nFake News\n")
    else:
        print("\nNot A Fake News\n")
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    my_ls=[pred_LR[0],pred_DT[0],pred_GBC[0],pred_RFC[0]]
    output_lable(my_ls)

In [40]:
news = str(input())
manual_testing(news)

 Seven Iranians freed in the prisoner swap have not returned to Iran,"21st Century Wire says This week, the historic international Iranian Nuclear Deal was punctuated by a two-way prisoner swap between Washington and Tehran, but it didn t end quite the way everyone expected. On the Iranian side, one of the U.S. citizens who was detained in Iran, Nosratollah Khosravi-Roodsari, has stayed in Iran, but on the U.S. side   all 7 of the Iranians held in U.S. prisons DID NOT show up to their flight to Geneva for the prisoner exchange   with at least 3 electing to stay in the U.S  TEHRAN SIDE: In Iran, 5 U.S. prisoners were released, with 4 of them making their way to Germany via Switzerland.Will Robinson Daily MailNone of the Iranians freed in the prisoner swap have returned home and could still be in the United States, it has been reported.The seven former inmates, who were released as part of a deal with the Islamic republic, did not show up to get a flight to Geneva, Switzerland, where the


Fake News



In [77]:
import pickle
with open("model_GBC.pkl","wb") as f1:
    pickle.dump(GBC,f1)

In [79]:
with open("model_LR.pkl","wb") as f2:
    pickle.dump(LR,f2)
with open("model_DT.pkl","wb") as f3:
    pickle.dump(DT,f3)
with open("model_RFC.pkl","wb") as f4:
    pickle.dump(RFC,f4)