## Fake News Detection

### Import Library

In [1]:
import pandas as pd
import numpy as np
# !pip install pandas # download and install pandas library

In [2]:
df = pd.read_csv("/content/drive/MyDrive/Internships/CodeClause/Fake News Detection/news.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [3]:
df.shape

(6335, 4)

In [4]:
df.isnull().sum() # check for missing values

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [5]:
labels = df.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_test, y_train, y_test = train_test_split(df["text"], labels,test_size = 0.2, random_state = 20)

In [8]:
x_train.head()

4741    NAIROBI, Kenya — President Obama spoke out Sun...
2089    Killing Obama administration rules, dismantlin...
4074    Dean Obeidallah, a former attorney, is the hos...
5376      WashingtonsBlog \nCNN’s Jake Tapper hit the ...
6028    Some of the biggest issues facing America this...
Name: text, dtype: object

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier

In [10]:
# initialize a TfidfVectorizer
vector = TfidfVectorizer(stop_words='english', max_df=0.7)

In [11]:
# fit and transform
tf_train = vector.fit_transform(x_train)
tf_test = vector.transform(x_test)

In [12]:
# Initialise a PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter = 50)
pac.fit(tf_train,y_train)

In [13]:
# prediction on the test dataset
from sklearn.metrics import accuracy_score, confusion_matrix
y_pred = pac.predict(tf_test)

In [14]:
score = accuracy_score(y_test, y_pred)
print(f"Accuracy : {round(score*100,2)} %")

Accuracy : 94.71 %


In [15]:
# Confusion matircs
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])

array([[622,  26],
       [ 41, 578]])

In [16]:
# save model 
import pickle
filename = '/content/drive/MyDrive/Internships/CodeClause/Fake News Detection/finalized_model.pkl'
pickle.dump(pac,open(filename,'wb'))

In [18]:
# save vectorizer
filename = '/content/drive/MyDrive/Internships/CodeClause/Fake News Detection/vectorizer.pkl'
pickle.dump(vector, open(filename, 'wb'))