# Fake News Detection

## 1- Import libraries.

In [1]:
import numpy as np
import pandas as pd
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier

## 2- Read datasets.

In [2]:
df_fake = pd.read_csv('D://Projects/Fake News Detection/Data/Fake.csv')
df_true = pd.read_csv('D://Projects/Fake News Detection/Data/True.csv')

## 3- Show the first 5 rows of datasets.

In [3]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


## 4- Check the shape of the datasets.

In [5]:
df_fake.shape , df_true.shape

((23481, 4), (21417, 4))

## 5- Creat new column named 'class' which has the kind of text.

In [6]:
df_fake['class'] = 0
df_true['class'] = 1

## 6- Split the last 10 rows from the datasets to testing data.

In [7]:
df_fake_test = df_fake.tail(10)
for i in range(23480,23470,-1):
    df_fake.drop(i,axis=0,inplace=True)
df_true_test = df_true.tail(10)
for i in range(21416,21406,-1):
    df_true.drop(i,axis=0,inplace=True)

## 7- Merge testing data.

In [8]:
df_test = pd.concat([df_fake_test,df_true_test],axis=0)
df_test.to_csv('Test.csv')

## 8- Merge data.

In [9]:
df_merge = pd.concat([df_fake,df_true],axis=0)

## 9- Remove unimportant columns.

In [10]:
df = df_merge.drop(columns=['title','subject','date'])
df.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


## 10- Check NoN values in dataset.

In [11]:
df.isna().sum()

text     0
class    0
dtype: int64

## 11- Build text convert function.

In [12]:
def word_drop(text):
    text = text.lower()
    text = re.sub ('\[.*?\]', '', text)
    text = re.sub("\\W"," ", text)
    text= re.sub('https?://\S+|www\.\S+','', text)
    text= re.sub ('<.*?>+','', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text= re.sub('\n', '', text)
    text = re.sub('\w*\d\w*','', text)
    return text

## 12- Apply word_drop function on our dataset.

In [13]:
df['text'] = df['text'].apply(word_drop)

## 13- Split data into feature X and target y.

In [14]:
X = df['text']
y = df['class']

## 14- Split X , y into X_train, X_test, y_train, y_test.

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

## 15- Fit vectorizer on the data.

In [16]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(X_train)
xv_test = vectorization.transform(X_test)

## 16- Build Logistic Regression model.

In [17]:
LR = LogisticRegression()
LR.fit(xv_train,y_train)
pred_LR = LR.predict(xv_test)
print(accuracy_score(y_test,pred_LR))
print(classification_report(y_test,pred_LR))

0.9876114081996435
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5894
           1       0.99      0.99      0.99      5326

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



## 18-Build Decidion Tree Classifier model.

In [18]:
DT = DecisionTreeClassifier()
DT.fit(xv_train,y_train)
pred_DT = DT.predict(xv_test)
print(accuracy_score(y_test,pred_DT))
print(classification_report(y_test,pred_DT))

0.9954545454545455
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5894
           1       1.00      1.00      1.00      5326

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



## 19- Build Gradient Boosting Classifier model.

In [19]:
GB = GradientBoostingClassifier()
GB.fit(xv_train,y_train)
pred_GB = GB.predict(xv_test)
print(accuracy_score(y_test,pred_GB))
print(classification_report(y_test,pred_GB))

0.9951871657754011
              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5894
           1       0.99      1.00      0.99      5326

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



## 20- Build Random Forest Classifier model.

In [20]:
RF = RandomForestClassifier()
RF.fit(xv_train,y_train)
pred_RF = RF.predict(xv_test)
print(accuracy_score(y_test,pred_RF))
print(classification_report(y_test,pred_RF))

0.9880570409982175
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5894
           1       0.99      0.99      0.99      5326

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [24]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "True News"
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test= pd.DataFrame (testing_news)
    new_def_test["text"] = new_def_test["text"].apply(word_drop)
    new_x_test= new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GB = GB.predict(new_xv_test)
    pred_RF = RF.predict(new_xv_test)
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR),
                                                                                                              output_lable(pred_DT),
                                                                                                              output_lable(pred_GB),
                                                                                                              output_lable(pred_RF)))

In [25]:
news = str(input('Enter your news'))
manual_testing(news)

Enter your newsLONDON (Reuters) - LexisNexis, a provider of legal, regulatory and business information, said on Tuesday it had withdrawn two products from the Chinese market in March this year after it was asked to remove some content.  The issue of academic freedom in China hit the headlines this week after the leading British academic publisher,  Cambridge University Press, said it had complied with a request to block online access to some scholarly articles in China.  It later reversed its position.   Earlier this year LexisNexis Business Insight Solutions in China was asked to remove some content from its database,  LexisNexis said in a statement.  In March 2017, the company withdrew two products (Nexis and LexisNexis Academic) from the Chinese market.  LexisNexis is owned by information group Relx. 


LR Prediction: True News 
DT Prediction: True News 
GBC Prediction: True News 
RFC Prediction: True News
