## Installing Necessary Libraries

In [56]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import string
import re
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from zipfile import ZipFile
from tqdm import tqdm
from sklearn.feature_extraction.text import CountVectorizer

In [57]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

In [58]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from google.colab import drive
lemmatizer = WordNetLemmatizer()
import warnings
warnings.filterwarnings('ignore')
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [59]:
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [60]:
with ZipFile('/content/drive/MyDrive/Machine Learning Practice/fake_news_detection.zip','r') as zf:
  for members in tqdm(zf.infolist(),desc='Extracting'):
      zf.extract(members,'/content/drive/MyDrive/Machine Learning Practice/News_detection')

Extracting: 100%|██████████| 2/2 [00:02<00:00,  1.01s/it]


## Loading the data

In [61]:
data_fake=pd.read_csv("/content/drive/MyDrive/Machine Learning Practice/News_detection/News _dataset/Fake.csv")
data_true=pd.read_csv("/content/drive/MyDrive/Machine Learning Practice/News_detection/News _dataset/True.csv")

### Data Preview

In [62]:
data_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [63]:
data_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [64]:
data_fake["class"]=0
data_true['class']=1

In [65]:
data_fake.shape, data_true.shape

((23481, 5), (21417, 5))

In [66]:
data=pd.concat([data_fake, data_true], axis = 0)
data.head(10)

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",0
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",0
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",0
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",0
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",0


In [67]:

data.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [68]:
data=data.drop(['title','subject','date'], axis = 1)

In [69]:
#count of missing values
data.isnull().sum()

Unnamed: 0,0
text,0
class,0


In [70]:
data = data.sample(frac = 1)

In [71]:
data.head()

Unnamed: 0,text,class
14316,"This man deserves a medal for his courage, his...",0
3308,(Reuters) - Former FBI Director James Comey on...,1
20780,Ginni Thomas is one of the most hard-working a...,0
2027,WASHINGTON (Reuters) - A U.S. court on Thursda...,1
20522,WASHINGTON (Reuters) - U.S. President Donald T...,1


In [72]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

In [73]:
data.columns

Index(['text', 'class'], dtype='object')

In [74]:
data.head()

Unnamed: 0,text,class
0,"This man deserves a medal for his courage, his...",0
1,(Reuters) - Former FBI Director James Comey on...,1
2,Ginni Thomas is one of the most hard-working a...,0
3,WASHINGTON (Reuters) - A U.S. court on Thursda...,1
4,WASHINGTON (Reuters) - U.S. President Donald T...,1


## Preprocessing Text

In [75]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]','',text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+','',text)
    text = re.sub('<.*?>+',b'',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\w*\d\w*','',text)
    return text

In [76]:
tqdm.pandas()
data['text'] = data['text'].progress_apply(wordopt)

100%|██████████| 44898/44898 [00:28<00:00, 1560.14it/s]


#### Defining dependent and independent variable as x and y

In [77]:
x = data['text']
y = data['class']

## Training the model

In [78]:
x_train, x_test, y_train, y_test = train_test_split(x,y,train_size = 0.75)

### Extracting Features from the Text

#### Convert text to vectors

In [79]:
'''from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)'''

'from sklearn.feature_extraction.text import TfidfVectorizer\n\nvectorization = TfidfVectorizer()\nxv_train = vectorization.fit_transform(x_train)\nxv_test = vectorization.transform(x_test)'

In [80]:
vectorization = CountVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [81]:
#logisticRegression
logR=LogisticRegression()
logR.fit(xv_train, y_train)
y_pred=logR.predict(xv_test)
logR.score(xv_test, y_test)

0.9959910913140312

In [82]:
#Decision Tree Classifier
dtc=DecisionTreeClassifier()
dtc.fit(xv_train, y_train)
y_pred=dtc.predict(xv_test)
dtc.score(xv_test, y_test)

0.9965256124721603

In [83]:
#Random Forest Classifier
rfc=RandomForestClassifier()
rfc.fit(xv_train, y_train)
y_pred=rfc.predict(xv_test)
rfc.score(xv_test, y_test)

0.987706013363029

In [84]:
#MultinomialNB
nb=MultinomialNB()
nb.fit(xv_train, y_train)
y_pred=nb.predict(xv_test)
nb.score(xv_test, y_test)

0.9525167037861916

In [85]:
def output_lable(n):
    if n==0:
        return "Fake News"
    elif n==1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test['text'] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = logR.predict(new_xv_test)
    pred_DT = dtc.predict(new_xv_test)
    pred_GB = rfc.predict(new_xv_test)
    pred_RF = nb.predict(new_xv_test)

    return print("\n\nLR Predicition: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction:{}".format(output_lable(pred_LR[0]),
                                                                                                             output_lable(pred_DT[0]),
                                                                                                             output_lable(pred_GB[0]),
                                                                                                             output_lable(pred_RF[0])))

In [86]:
news = str(input())
manual_testing(news)

It s been two days since the Secret Service determined that there was no assassination attempt on Donald Trump s life, yet conservatives continue to harp about it.On Saturday night in Reno, an anti-Trump Republican attempted to hold up a sign. Trump supporters immediately surrounded the man. Someone in the crowd yelled  gun  and that was the excuse used to beat up the man until Secret Service intervened. Trump was rushed off stage.The man was quickly released after no gun was found and it became clear that there was never an assassination attempt.Let s repeat that. There was NO assassination attempt and this information has been available since early Sunday morning. But that hasn t stopped conservatives from posting about it as if Trump somehow heroically survived an attempt on his life.Not only did Bristol Palin post such bullshit on Sunday morning hours after the Secret Service made their determination, conservative white grievance host Tomi Lahren did the same thing on Monday mornin

In [87]:
news=str(input())
manual_testing(news)

DETROIT (Reuters) - Fiat Chrysler Automobiles (FCHA.MI) is ready to comply with the new rules on the auto industry the incoming administration of U.S. president-elect Donald Trump will introduce, the carmaker’s CEO Sergio Marchionne said on Monday. “We are waiting for the new rules and will adapt to them,” Marchionne said, speaking to reporters at the Detroit motor show. On Sunday Fiat Chrysler said it would invest $1 billion to modernize two plants in the U.S. Midwest and create 2,000 jobs. “The announcement to invest in the U.S. was already part of our plans,” Marchionne said, adding he had not met Trump since his election victory. Marchionne said he did not want to speak for the new U.S. President-to-be but added commercial barriers could be an “obstacle to development”. 


LR Predicition: Not A Fake News 
DT Prediction: Not A Fake News 
GBC Prediction: Not A Fake News 
RFC Prediction:Not A Fake News


In [100]:
pickle.dump(rfc,open("Fake_news_algorithm.pkl","wb"))

In [101]:
pickle.dump(logR,open("Fake_news_algorithm1.pkl","wb"))

In [106]:
pickle.dump(nb,open("Fake_news_algorithm2.pkl","wb"))

In [107]:
pickle.dump(vectorization,open("countVectorizer.pkl","wb"))

In [110]:
model=pickle.load(open("/content/Fake_news_algorithm2.pkl",'rb'))

In [111]:
model1=pickle.load(open("/content/countVectorizer.pkl",'rb'))

In [112]:
def output_lable(n):
    if n==0:
        return "Fake News"
    elif n==1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test['text'] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    print(new_x_test.shape)
    new_xv_test = model1.transform(new_x_test)
    print(new_xv_test.shape)
    pred_rfc = model.predict(new_xv_test)

    return print("\nRFC Prediction:{}".format(output_lable(pred_rfc[0])))

In [113]:
news = str(input())
manual_testing(news)

It s been two days since the Secret Service determined that there was no assassination attempt on Donald Trump s life, yet conservatives continue to harp about it.On Saturday night in Reno, an anti-Trump Republican attempted to hold up a sign. Trump supporters immediately surrounded the man. Someone in the crowd yelled  gun  and that was the excuse used to beat up the man until Secret Service intervened. Trump was rushed off stage.The man was quickly released after no gun was found and it became clear that there was never an assassination attempt.Let s repeat that. There was NO assassination attempt and this information has been available since early Sunday morning. But that hasn t stopped conservatives from posting about it as if Trump somehow heroically survived an attempt on his life.Not only did Bristol Palin post such bullshit on Sunday morning hours after the Secret Service made their determination, conservative white grievance host Tomi Lahren did the same thing on Monday mornin