In [1]:
# import libraries

import pandas as pd 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import re 
import string

In [2]:
# import Fake news and True News data 

df_fake = pd.read_csv('fake.csv')
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [3]:
df_fake.shape

(23481, 4)

In [4]:
# import true news data 

df_true = pd.read_csv('true.csv')
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
# add target columns 

df_fake['class'] = 0
df_true['class'] = 1

In [6]:
df_fake.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [7]:
df_true.head()

Unnamed: 0,title,text,subject,date,class
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [8]:
# shape of the data 

df_fake.shape , df_true.shape

((23481, 5), (21417, 5))

In [9]:
# drop unwanted Columns 

df_fake = df_fake.drop(['title' , 'subject' , 'date'] ,axis = 1)
df_true = df_true.drop(['title' , 'subject' , 'date'] ,axis = 1)

In [10]:
df_fake.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [11]:
df_true.head()

Unnamed: 0,text,class
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1
2,WASHINGTON (Reuters) - The special counsel inv...,1
3,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [12]:
# Extract the data from fake news for testing 10 Entries 

df_fake_manual_testing = df_fake.tail(10)

for i in range(23480 , 23470 , -1):
    df_fake.drop([i] ,axis = 0 , inplace = True)
    
    

In [13]:
df_fake_manual_testing

Unnamed: 0,text,class
23471,"21st Century Wire says This week, the historic...",0
23472,By Dady Chery and Gilbert MercierAll writers ...,0
23473,Vic Bishop Waking TimesOur reality is carefull...,0
23474,Paul Craig RobertsIn the last years of the 20t...,0
23475,Robert Fantina CounterpunchAlthough the United...,0
23476,21st Century Wire says As 21WIRE reported earl...,0
23477,21st Century Wire says It s a familiar theme. ...,0
23478,Patrick Henningsen 21st Century WireRemember ...,0
23479,21st Century Wire says Al Jazeera America will...,0
23480,21st Century Wire says As 21WIRE predicted in ...,0


In [14]:
# Extract the data from True news for testing 10 Entries 

df_true_manual_testing = df_true.tail(10)

for i in range(21416 , 21406 , -1):
    df_true.drop([i] ,axis = 0 , inplace = True)
    

In [15]:
df_true_manual_testing

Unnamed: 0,text,class
21407,"SAO PAULO (Reuters) - Cesar Mata Pires, the ow...",1
21408,GENEVA (Reuters) - North Korea and the United ...,1
21409,GENEVA (Reuters) - North Korea and the United ...,1
21410,COPENHAGEN (Reuters) - Danish police said on T...,1
21411,UNITED NATIONS (Reuters) - Two North Korean sh...,1
21412,BRUSSELS (Reuters) - NATO allies on Tuesday we...,1
21413,"LONDON (Reuters) - LexisNexis, a provider of l...",1
21414,MINSK (Reuters) - In the shadow of disused Sov...,1
21415,MOSCOW (Reuters) - Vatican Secretary of State ...,1
21416,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,1


In [16]:
df_fake_manual_testing.shape , df_true_manual_testing.shape

((10, 2), (10, 2))

In [17]:
# create manual dataset for testing 

df_manual_testing = pd.concat([df_fake_manual_testing ,df_true_manual_testing ] , axis = 0)

In [18]:
df_manual_testing

Unnamed: 0,text,class
23471,"21st Century Wire says This week, the historic...",0
23472,By Dady Chery and Gilbert MercierAll writers ...,0
23473,Vic Bishop Waking TimesOur reality is carefull...,0
23474,Paul Craig RobertsIn the last years of the 20t...,0
23475,Robert Fantina CounterpunchAlthough the United...,0
23476,21st Century Wire says As 21WIRE reported earl...,0
23477,21st Century Wire says It s a familiar theme. ...,0
23478,Patrick Henningsen 21st Century WireRemember ...,0
23479,21st Century Wire says Al Jazeera America will...,0
23480,21st Century Wire says As 21WIRE predicted in ...,0


In [19]:
df_manual_testing.to_csv('manual_test.csv' , index = False)

In [20]:
# Original data 

# concat the fake news and true news data 

df_merge = pd.concat([df_fake , df_true] , axis = 0)
df_merge.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [21]:
df_merge['class'].value_counts()

0    23471
1    21407
Name: class, dtype: int64

In [22]:
df_merge.columns

Index(['text', 'class'], dtype='object')

In [23]:
# check missing values 

df_merge.isnull().sum()

text     0
class    0
dtype: int64

In [24]:
df_merge

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0
...,...,...
21402,ON BOARD A U.S. MILITARY AIRCRAFT (Reuters) - ...,1
21403,WASHINGTON (Reuters) - The United States sugge...,1
21404,WASHINGTON (Reuters) - The United States has d...,1
21405,ISLAMABAD (Reuters) - Outlining a new strategy...,1


In [25]:
# shuffle data 

df = df_merge.sample(frac = 1)
df

Unnamed: 0,text,class
15064,Ever feel like a headline you re reading is to...,0
1931,Donald Trump is unwell.The beleaguered preside...,0
7579,A Zion Christian Church prophet named Alec Ndi...,0
1007,A group of disabled Americans protesting the S...,0
1295,If Ivanka Trump really cares about the LGBTQ c...,0
...,...,...
3829,WASHINGTON (Reuters) - The FBI’s acting head s...,1
10963,Someone needs to educate Ellen DeGeneres on Pr...,0
10421,Republican Senator Charles Grassley wants answ...,0
12872,We all know by now that Hillary Clinton will m...,0


In [26]:
# reset index 

df.reset_index(inplace = True)

In [27]:
df

Unnamed: 0,index,text,class
0,15064,Ever feel like a headline you re reading is to...,0
1,1931,Donald Trump is unwell.The beleaguered preside...,0
2,7579,A Zion Christian Church prophet named Alec Ndi...,0
3,1007,A group of disabled Americans protesting the S...,0
4,1295,If Ivanka Trump really cares about the LGBTQ c...,0
...,...,...,...
44873,3829,WASHINGTON (Reuters) - The FBI’s acting head s...,1
44874,10963,Someone needs to educate Ellen DeGeneres on Pr...,0
44875,10421,Republican Senator Charles Grassley wants answ...,0
44876,12872,We all know by now that Hillary Clinton will m...,0


In [28]:
# delete index column


df.drop('index' , axis = 1 ,inplace = True)

In [29]:
df

Unnamed: 0,text,class
0,Ever feel like a headline you re reading is to...,0
1,Donald Trump is unwell.The beleaguered preside...,0
2,A Zion Christian Church prophet named Alec Ndi...,0
3,A group of disabled Americans protesting the S...,0
4,If Ivanka Trump really cares about the LGBTQ c...,0
...,...,...
44873,WASHINGTON (Reuters) - The FBI’s acting head s...,1
44874,Someone needs to educate Ellen DeGeneres on Pr...,0
44875,Republican Senator Charles Grassley wants answ...,0
44876,We all know by now that Hillary Clinton will m...,0


In [30]:
# clean data 

def wordopt(text):
    
    text = text.lower()
    text = re.sub( "\[.*?\]", "" , text )
    text = re.sub( "\\W ",  "" , text)
    text = re.sub('https?://\S+|www\.S+' , "" , text)
    text = re.sub('<.*?>+' , "" , text)
    text = re.sub('[%s]'%re.escape(string.punctuation) , "" , text)
    text = re.sub('\n' ,"" , text )
    
    return text

In [31]:
df['text'] = df['text'].apply(wordopt)

In [32]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [33]:
# Example of re.sub

data = "hello %my name @is *junaid"
data
data = re.sub('%' , "", data)
data = re.sub('@' , "", data)
data = re.sub('\*' , "", data)
data

'hello my name is junaid'

In [34]:
# Seperate 

X = df['text']
y = df['class']

In [35]:
# split the data 

X_train , X_test , y_train , y_test = train_test_split(X,y , test_size =0.2 , random_state = 0)

In [36]:
len(X_train) , len(y_train)

(35902, 35902)

In [37]:
len(X_test) , len(y_test)

(8976, 8976)

In [38]:
# Convert text data 

from sklearn.feature_extraction.text import CountVectorizer

In [39]:
cv = CountVectorizer()

In [40]:
X_train = cv.fit_transform(X_train)
X_test = cv.transform(X_test)

In [41]:
# 1 - model Logistic Regression 

from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()

LR.fit(X_train , y_train)

pred_lr = LR.predict(X_test)

LR.score(X_test , y_test)*100

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


98.93048128342245

In [42]:
# 2 - Random Forest 

from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(random_state = 0)

RFC.fit(X_train , y_train)

pred_RFC = RFC.predict(X_test)

RFC.score(X_test , y_test)*100

97.42647058823529

In [43]:
# 3 - Naive Bayes 

from sklearn.naive_bayes import MultinomialNB

MN = MultinomialNB()

MN.fit(X_train , y_train)

pred_MN = MN.predict(X_test)

MN.score(X_test , y_test)*100

95.96702317290553

In [53]:

# 
def output_label(n):
        if n == 0:
            return "Fake News "
        else:
            return "True News"

        
        
# tes manual data 

def manual_testing(news):
    
    # convert into dataframe 
    
    testting_news = {"Text" : [news]}
    new_def_test = pd.DataFrame(testting_news)
    
    new_def_test['Text'] = new_def_test['Text'].apply(wordopt)
    
    # convert values 
    
    new_x_test = new_def_test['Text']
    new_x_test = cv.transform(new_x_test)
    
    # predict 
    #1 - lOGISTIC REGRESSION 
    yp_LR = LR.predict(new_x_test)
    #print("Logistic regression ",yp_LR)
    
    #2 - Random_forest
    
    yp_RFC = RFC.predict(new_x_test)
    #print("Random Forest Classifier ",yp_RFC)
    
    #3 - Naive Bayes 
    yp_MN = MN.predict(new_x_test)
    #print("Naive Bayes " ,yp_MN )
    
    print(f"\n\nLogistic Regression Prediction {output_label(yp_LR[0])}")
    print(f"\nRandom Forest Prediction {output_label(yp_RFC[0])}")
    print(f"\nNaive bayes Prediction {output_label(yp_MN[0])}")

In [58]:
news = input("Enter News :")



Enter News :New Delhi: The Trinamool has extended its contract with election strategists I-PAC, or Indian Political Action Committee, to 2026 after a successful partnership saw Mamata Banerjee sweep the April-May Assembly polls and return as Chief Minister of Bengal for a third straight term. This version of I-PAC, however, will not be led in day-to-day operations by master strategist Prashant Kishor, who guided the Trinamool (and, in Tamil Nadu, the DMK-Congress alliance) to victory over the BJP (and its southern ally, the AIADMK) and then told NDTV he wanted to "quit".  It will be interesting to see how well I-PAC and its new nine-member leadership team can function without Mr Kishor, and how efficiently it can win elections for Trinamool and its other clients.  The new contract says I-PAC will be involved in all state elections - panchayat and local body.  The contract extension will run till the next round of Assembly elections in Bengal, by which time key states, including UP, Guj

In [59]:
manual_testing(news)



Logistic Regression Prediction True News

Random Forest Prediction True News

Naive bayes Prediction True News
