# Fake News Detection
This project uses various **Natural Language Processing Techniques** and **Machine Learning Algorithms** to classify fake news articles.

**Table of Contents**
1. Importing Libraries
2. Importing Dataset
3. Data Exploration
4. Feature Engineering
5. Building ML Models
6. Final Model Testing

# 1. Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

# 2. Importing Dataset
The dataset consists of two different .csv files, each containing real and fake news respectively.
The real news and the fake news are loaded into two different data frames.

In [None]:
df_fake = pd.read_csv("../input/fake-and-real-news-dataset/Fake.csv")
df_true = pd.read_csv("../input/fake-and-real-news-dataset/True.csv")

In [None]:
df_fake.head()

In [None]:
df_true.head() 

In [None]:
df_fake.shape 


In [None]:
df_true.shape 

# 3. Data Preprocessing

In [None]:
#A column named 'class' is added to both dataframes as a target feature to segregate them as real and fake.
df_fake["class"] = 0
df_true["class"] = 1

In [None]:
df_fake.head()

In [None]:
df_true.head()

In [None]:
# Removing last 10 rows for manual testing
df_fake_manual_testing = df_fake.tail(10)
for i in range(23480,23470,-1):
    df_fake.drop([i], axis = 0, inplace = True)
    
    
df_true_manual_testing = df_true.tail(10)
for i in range(21416,21406,-1):
    df_true.drop([i], axis = 0, inplace = True)

In [None]:
df_fake.shape, df_true.shape

In [None]:
#Similarly adding the target column for manual testing dataframes.
df_fake_manual_testing["class"] = 0
df_true_manual_testing["class"] = 1

In [None]:
df_fake_manual_testing.head(10)

In [None]:
df_true_manual_testing.head(10)

In [None]:
# Merging the dataframes for manual testing
df_manual_testing = pd.concat([df_fake_manual_testing,df_true_manual_testing], axis = 0)
df_manual_testing.to_csv("manual_testing.csv")

In [None]:
# Merging the original, True and Fake Dataframes
df_merge = pd.concat([df_fake, df_true], axis =0 )
df_merge.head(10)

In [None]:
df_merge.columns

In [None]:
#Removing columns which are not required
df = df_merge.drop(["title", "subject","date"], axis = 1)

In [None]:
#Checking the number of missing values (NaN) in the data set.
df.isnull().sum()

In [None]:
#Random Shuffling the dataframe
df = df.sample(frac = 1)

In [None]:
df.head()

In [None]:
# Add a new column of the current row index in the DataFrame.
df.reset_index(inplace = True)
# When we reset the index, the old index is added as a column, and a new sequential index is used:
df.head()

In [None]:
# Drop the column
df.drop(["index"], axis = 1, inplace = True)

In [None]:
df.columns

In [None]:
df.head()

# 4. Feature Engineering

In [None]:
#Defining function to replace symbols, numbers, links, numbers, punctuations etc with space)
def wordopt(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.split()
    text = ' '.join(text)
    return text

In [None]:
df["text"].head()

In [None]:
df["text"] = df["text"].apply(wordopt)

In [None]:
df["text"].head()

# 5. Building Machine Learning Models

In [None]:
x = df["text"]
y = df["class"]

In [None]:
# Splitting Training and Testing

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

In [None]:
# Converting text to vectors

from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [None]:
# Logistic Regression

from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train,y_train)

In [None]:
pred_lr=LR.predict(xv_test)

In [None]:
LR.score(xv_test, y_test)

In [None]:
#To measure the quality of predictions
print(classification_report(y_test, pred_lr))

In [None]:
# Decision Tree Classification

from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [None]:
pred_dt = DT.predict(xv_test)

In [None]:
DT.score(xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_dt))

In [None]:
#Gradient Boosting Classifier

from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)

In [None]:
pred_gbc = GBC.predict(xv_test)

In [None]:
GBC.score(xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_gbc)) 

In [None]:
# Random Forest Classifier

from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)

In [None]:
pred_rfc = RFC.predict(xv_test)

In [None]:
RFC.score(xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_rfc))

# 6. Final Model Testing

In [None]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)

    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR[0]),                                                                                                       output_lable(pred_DT[0]), 
                                                                                                              output_lable(pred_GBC[0]), 
                                                                                                              output_lable(pred_RFC[0])))

In [None]:
news = str(input())
manual_testing(news)

In [None]:
news = str(input())
manual_testing(news)