# Fake News Authenticator

In [1]:
import pandas as  pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import re 
import string

### Inserting fake and real dataset

In [2]:
df_fake = pd.read_csv("Fake.csv")
df_true = pd.read_csv("True.csv")

In [3]:
df_fake.head(10);

In [4]:
df_true.head(10);

##### Inserting a column called "class" for fake and real news dataset to categories fake and true news.

In [5]:
df_fake["class"] = 0 
df_true["class"] = 1 

##### Removing last 10 rows from both the dataset, for manual testing

In [6]:
df_fake.shape, df_true.shape;

In [7]:
df_fake_manual_testing = df_fake.tail(10)
for i in range(23480,23470,-1):
    df_fake.drop([i], axis = 0, inplace = True)
df_true_manual_testing = df_true.tail(10)
for i in range(21416,21406,-1):
    df_true.drop([i], axis = 0, inplace = True)

In [8]:
df_manual_testing = pd.concat([df_fake_manual_testing, df_true_manual_testing], axis=0 )
df_manual_testing.to_csv("manual_test.csv")

In [9]:
df_marge = pd.concat([df_fake, df_true], axis = 0)
df_marge.head(10);

In [10]:
df = df_marge.drop(["title", "subject", "date",], axis = 1)
df.head(10);

In [11]:
df_manual_testing = pd.concat([df_fake_manual_testing,df_true_manual_testing], axis = 0)
df_manual_testing.to_csv("manual_testing.csv")

##### Merging the main fake and true dataframe

In [12]:
df = df.sample(frac = 1)

In [13]:
df.head(10);

In [14]:
df.isnull().sum();

#### "title", "subject" and "date" columns is not required for detecting the fake news, so I am going to drop the columns.

In [15]:
def word_drop(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W", " ",text)
    text = re.sub('https://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]'% re.escape(string.punctuation), '', text)
    text = re.sub('\n','', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [16]:
df["text"] = df["text"].apply(word_drop)

In [17]:
df.head(10);

#### Defining dependent and independent variable as x and y

In [18]:
x = df["text"]
y = df["class"]

#### Splitting the dataset into training set and testing set.

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25)

#### Convert text to vectors

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [21]:
vectrorization = TfidfVectorizer()
xv_train =  vectrorization.fit_transform(x_train)
xv_test = vectrorization.transform(x_test)

### 1. Logistic Regression

In [22]:
from sklearn.linear_model import LogisticRegression

In [23]:
LR = LogisticRegression();
LR.fit(xv_train, y_train);

In [24]:
LR.score(xv_test, y_test);

In [25]:
pred_LR = LR.predict(xv_test)

In [26]:
print(classification_report(y_test, pred_LR));

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5843
           1       0.99      0.99      0.99      5377

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



### 2. Decision Tree Classification

In [27]:
from sklearn.tree import DecisionTreeClassifier

In [28]:
DT = DecisionTreeClassifier();
DT.fit(xv_train, y_train);

In [29]:
DT.score(xv_test, y_test);

In [30]:
pred_DT = DT.predict(xv_test)

In [31]:
print(classification_report(y_test, pred_DT));

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5843
           1       1.00      1.00      1.00      5377

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



### 3. Gradient Boosting Classifier

In [32]:
from sklearn.ensemble import GradientBoostingClassifier

In [33]:
GBC = GradientBoostingClassifier(random_state = 0);
GBC.fit(xv_train, y_train);

In [34]:
GradientBoostingClassifier(random_state=0);

In [35]:
GBC.score(xv_test, y_test);

In [36]:
pred_GBC = GBC.predict(xv_test)

In [37]:
print(classification_report(y_test, pred_GBC));

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5843
           1       0.99      1.00      1.00      5377

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



### 4. Random Forest Classifier

In [38]:
from sklearn.ensemble import RandomForestClassifier

In [39]:
RFC = RandomForestClassifier(random_state = 0);
RFC.fit(xv_train, y_train);

In [40]:
RandomForestClassifier(random_state=0);

In [41]:
RFC.score(xv_test, y_test);

In [42]:
pred_RFC = RFC.predict(xv_test)

In [43]:
print(classification_report(y_test, pred_RFC));

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5843
           1       0.99      0.99      0.99      5377

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



## Model Testing With Manual Entry

##### News

In [44]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"
        
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(word_drop)
    new_x_test = new_def_test["text"]
    new_xv_test = vectrorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR[0]),
                                                                                                              output_lable(pred_DT[0]), 
                                                                                                              output_lable(pred_GBC[0]),
                                                                                                              output_lable(pred_RFC[0])))                                                                                            


In [None]:
news = str(input())
manual_testing(news)