# Fake News Detection

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import re
import string

In [17]:
fake_data = pd.read_csv("Fake.csv")
true_data = pd.read_csv("True.csv")

In [19]:
## Head will display only 5 row from the dataset
fake_data.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [20]:
true_data.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [21]:
## Shape Tells the total row and columns in dataset
fake_data.shape,true_data.shape

((23481, 4), (21417, 4))

In [22]:
## Adding a column to classify 0=Fake news and 1=True news
fake_data['class']=0
true_data["class"]=1

In [23]:
## Tail will show last 10 Row form the dataset
fake_data_manual_testing=fake_data.tail(10)
for i in range(23480,21470,-1):
    fake_data.drop([i],axis=0,inplace=True)
true_data_manual_testing=true_data.tail(10)
for i in range(21416,21406-1):
    true_data.drop([i],axis=0,inplace=True)
    

In [24]:
## Merging the two dataset into one by using concatenation
data_manual_testing=pd.concat([fake_data_manual_testing,true_data_manual_testing],axis=0)
data_manual_testing.to_csv("Manual_testing.csv")

In [25]:
## Merging the Fake and True news
data_merge=pd.concat([fake_data,true_data],axis=0)
data_merge.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [26]:
## Simply removing the title,subject and date from the dataset
df=data_merge.drop(["title","subject","date"],axis=1)
df.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [27]:
df=df.sample(frac=1)

In [28]:
df.head()

Unnamed: 0,text,class
5793,Thinking before you speak is basically a requi...,0
10829,"MARFA, Texas (Reuters) - U.S. Supreme Court Ju...",1
17063,The cover-ups and mistruths appear to be numer...,0
16427,MADRID (Reuters) - A statement by the leader o...,1
2626,Donald Trump should not be lashing out like th...,0


In [29]:
## Checking the null value in the dataset
df.isnull().sum()

text     0
class    0
dtype: int64

In [30]:
## Removing unwanted character form the dataset
def word_drop(text):
    text=text.lower()
    text=re.sub('\[.*?\]','',text)
    text=re.sub("\\W"," ",text)
    text=re.sub('https?://\S+|www\.\S+','',text)
    text=re.sub('<.*?>+','',text)
    text=re.sub('[%s]'% re.escape(string.punctuation),'',text)
    text=re.sub('\n','',text)
    text=re.sub('\w*\d\w*','',text)
    return text

In [34]:
## Applying the Above function to the dataset
df["text"]=df["text"].apply(word_drop)

In [33]:
df.head()

Unnamed: 0,text,class
5793,thinking before you speak is basically a requi...,0
10829,marfa texas reuters u s supreme court ju...,1
17063,the cover ups and mistruths appear to be numer...,0
16427,madrid reuters a statement by the leader o...,1
2626,donald trump should not be lashing out like th...,0


In [35]:
dependent_x =df["text"]
dependent_y=df["class"]

In [36]:
## Now Traning the dataset
x_train,x_test,y_train,y_test=train_test_split(dependent_x,dependent_y,test_size=.25)

In [39]:
## Vectorizing the text
from sklearn.feature_extraction.text import TfidfVectorizer

In [40]:
vectorization = TfidfVectorizer()
xv_train=vectorization.fit_transform(x_train)
xv_test=vectorization.transform(x_test)

# Logictic Regression

In [41]:
from sklearn.linear_model import LogisticRegression

In [42]:
LR = LogisticRegression()
LR.fit(xv_train,y_train)

LogisticRegression()

In [43]:
LR.score(xv_test,y_test)

0.9843312814773363

In [44]:
pred_LR = LR.predict(xv_test)

In [45]:
print(classification_report(y_test,pred_LR))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      5364
           1       0.98      0.99      0.98      5358

    accuracy                           0.98     10722
   macro avg       0.98      0.98      0.98     10722
weighted avg       0.98      0.98      0.98     10722



# Decision Tree Classification

In [46]:
from sklearn.tree import DecisionTreeClassifier

In [49]:
DT = DecisionTreeClassifier()
DT.fit(xv_train,y_train)

DecisionTreeClassifier()

In [48]:
DT.score(xv_test,y_test)

0.9949636261891438

In [50]:
pred_DT=DT.predict(xv_test)

In [51]:
print(classification_report(y_test,pred_DT))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5364
           1       1.00      1.00      1.00      5358

    accuracy                           1.00     10722
   macro avg       1.00      1.00      1.00     10722
weighted avg       1.00      1.00      1.00     10722



# Gradient Bosting Classifier

In [52]:
from sklearn.ensemble import GradientBoostingClassifier

In [53]:
GBC =GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train,y_train)

GradientBoostingClassifier(random_state=0)

In [54]:
GBC.score(xv_test,y_test)

0.9954299570975564

In [55]:
pred_GBC=GBC.predict(xv_test)

In [58]:
print(classification_report(y_test,pred_GBC))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5345
           1       0.99      1.00      1.00      5377

    accuracy                           1.00     10722
   macro avg       1.00      1.00      1.00     10722
weighted avg       1.00      1.00      1.00     10722



# Random Forest Classifier

In [56]:
from sklearn.ensemble import RandomForestClassifier

In [57]:
RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train,y_train)

RandomForestClassifier(random_state=0)

In [58]:
pred_RFC=RFC.predict(xv_test)

In [59]:
print(classification_report(y_test,pred_RFC))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5364
           1       0.99      0.99      0.99      5358

    accuracy                           0.99     10722
   macro avg       0.99      0.99      0.99     10722
weighted avg       0.99      0.99      0.99     10722



# Manual Testing 

In [60]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(word_drop) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR[0]), 
                                                                                                              output_lable(pred_DT[0]), 
                                                                                                              output_lable(pred_GBC[0]), 
                                                                                                              output_lable(pred_RFC[0])))

In [76]:
news = str(input())
manual_testing(news)

Paul Craig RobertsIn the last years of the 20th century fraud entered US foreign policy in a new way.  On false pretenses Washington dismantled Yugoslavia and Serbia in order to advance an undeclared agenda. In the 21st century this fraud multiplied many times. Afghanistan, Iraq, Somalia, and Libya were destroyed, and Iran and Syria would also have been destroyed if the President of Russia had not prevented it.  Washington is also behind the current destruction of Yemen, and Washington has enabled and financed the Israeli destruction of Palestine.  Additionally, Washington operated militarily within Pakistan without declaring war, murdering many women, children, and village elders under the guise of  combating terrorism.  Washington s war crimes rival those of any country in history.I have documented these crimes in my columns and books (Clarity Press). Anyone who still believes in the purity of Washington s foreign policy is a lost soul  Russia and China now have a strategic alliance 