# Fake news Detection


### Importing required libraries

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

### Inserting fake and real dataset

In [4]:
df_fake = pd.read_csv("Fake.csv")
df_true = pd.read_csv("True.csv")

In [5]:
df_fake["class"] = 0
df_true["class"] = 1

In [6]:
df_merge = pd.concat([df_fake, df_true], axis =0 )

In [7]:
df = df_merge.drop(["title", "subject","date"], axis = 1)
df = df.sample(frac = 1)

In [8]:
df.reset_index(inplace = True)
df.drop(["index"], axis = 1, inplace = True)

In [9]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

In [10]:
df["text"] = df["text"].apply(wordopt)

In [11]:
df.head()

Unnamed: 0,text,class
0,washington reuters u s representative ste...,1
1,since cnn reported that trump and obama had be...,0
2,after tuesday s semi coronation of donald trum...,0
3,,0
4,austin texas reuters a u s district judg...,1


### Training Model

In [12]:
x = df["text"]
y = df["class"]

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

In [15]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression()
LR.fit(xv_train,y_train)
LR.score(xv_test, y_test)

0.986369710467706

In [19]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)
DT.score(xv_test, y_test)

0.995456570155902

In [20]:
# Define estimators
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

estimator_list = [
    ('lr',LR),
    ('dt',DT),
     ]

# Build stack model
stack_model = StackingClassifier(
    estimators=estimator_list, final_estimator=LogisticRegression()
)


In [21]:
stack_model.fit(xv_train,y_train)
stack_model.score(xv_test,y_test)

0.9969710467706013

In [22]:
# save model
import pickle
filename = 'finalized_model.pkl'
pickle.dump(stack_model, open(filename, 'wb'))

In [17]:
import pickle
filename = 'vectorizer.pkl'
pickle.dump(vectorization, open(filename, 'wb'))