# Setting up environment

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier

In [2]:
fake = pd.read_csv("C:\Manthan\Fake.csv")
true = pd.read_csv("C:\Manthan\True.csv")

In [3]:
# Adding target feature to distinguish between fake and real news

fake['target'] = 'fake'
true['target'] = 'true'

In [4]:
# Concatenate dataframes
data = pd.concat([fake, true]).reset_index(drop = True)
data.shape

(44898, 5)

In [5]:
# We can see that the first hafl of data contains fake news and second half contains trues news, 
# so we need to shuffle the data to increase the entropy so that the model learns properly.

from sklearn.utils import shuffle
data = shuffle(data)
data = data.reset_index(drop=True)

In [6]:
data.drop(["date"],axis=1,inplace=True)
data.drop(["title"],axis=1,inplace=True)
data.head()

Unnamed: 0,text,subject,target
0,WASHINGTON (Reuters) - President Donald Trump’...,politicsNews,true
1,"Only one day ago, FOX News published a factual...",left-news,fake
2,(Reuters) - Banks and other financial companie...,politicsNews,true
3,The new head of Donald Trump s presidential ca...,News,fake
4,Donald Trump used a recent television intervie...,News,fake


In [10]:
x = data['text']
y = data['target']

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=43)

In [14]:
tfvect = TfidfVectorizer(stop_words='english',max_df=0.7,max_features = 100000)
tfid_x_train = tfvect.fit_transform(x_train)
tfid_x_test = tfvect.transform(x_test)

In [17]:
classifier = PassiveAggressiveClassifier(max_iter=50)
classifier.fit(tfid_x_train,y_train)

In [18]:
y_pred = classifier.predict(tfid_x_test)
score = accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 99.4%


In [19]:
def fake_news_det(news):
    input_data = [news]
    vectorized_input_data = tfvect.transform(input_data)
    prediction = classifier.predict(vectorized_input_data)
    print(prediction)

In [20]:
fake_news_det('U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officia')

['true']


In [21]:
import pickle
pickle.dump(classifier,open('model.pkl', 'wb'))

In [22]:
# load the model from disk
loaded_model = pickle.load(open('model.pkl', 'rb'))

In [23]:
def fake_news_det1(news):
    input_data = [news]
    vectorized_input_data = tfvect.transform(input_data)
    prediction = loaded_model.predict(vectorized_input_data)
    print(prediction)

In [24]:
fake_news_det1("""Go to Article President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for H""")

['fake']


In [26]:
pickle.dump(tfvect,open('tfvect.pkl','wb'))

References

https://www.youtube.com/watch?v=rgr_aCg-338
https://www.youtube.com/watch?v=GS_ylghUtLQ
https://flask.palletsprojects.com/en/3.0.x/
https://docs.python.org/3/library/pickle.html
https://scikit-learn.org/stable/modules/linear_model.html#passive-aggressive-algorithms