## Imports

In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
import re
import joblib
import string

## Reading Data

In [None]:
true = pd.read_csv(r'C:\Users\nanda\OneDrive\Desktop\Learn\ML, DL\Fake News Prediction\Data\Take.csv')
fake = pd.read_csv(r'C:\Users\nanda\OneDrive\Desktop\Learn\ML, DL\Fake News Prediction\Data\Fake.csv')

In [74]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [75]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


## Data Preprocessing

In [76]:
# Creating Class Column [1=True & 0=False]
true['class']=1
fake['class']=0

In [77]:
# Concatenating both datasets
df = pd.concat([fake,true], axis=0)

In [78]:
df.sample(5)

Unnamed: 0,title,text,subject,date,class
11280,"Tillerson, Lavrov agree to continue North Kore...",WASHINGTON (Reuters) - The United States and R...,worldnews,"December 27, 2017",1
3956,Top Senate Republican rejects Trump filibuster...,WASHINGTON (Reuters) - The top Republican in t...,politicsNews,"May 2, 2017",1
20533,SELFIE OF MUSLIM WOMAN MAKING PEACE SIGN Goes ...,Another moderate Muslim A Muslim woman who w...,left-news,"May 23, 2016",0
20651,MAN BRUTALLY ASSAULTED At CA Trump Rally Tells...,This video would be on a 24/7 mainstream media...,left-news,"May 1, 2016",0
16884,Zuma spokesman dismisses reports South African...,JOHANNESBURG (Reuters) - Jacob Zuma s spokesma...,worldnews,"October 20, 2017",1


In [79]:
# Dropping unnecessary columns
df = df.drop(["title", "subject", "date"], axis=1)

In [80]:
df.sample(5)

Unnamed: 0,text,class
9810,John McCain really doesn t care what topic rep...,0
6364,"Whether you love him or hate him, most of us p...",0
2583,Donald Trump s Education Secretary is off to a...,0
19784,WASHINGTON (Reuters) - The U.S. Navy dismissed...,1
18452,"DERA ISMAIL KHAN, Pakistan (Reuters) - A Pakis...",1


In [81]:
df.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [82]:
# Function to clean the text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"\[.*?\]", "", text)
    text = re.sub(r"//W", "", text)
    text = re.sub(r"https?://\S+|www\.\S+", "", text)
    text = re.sub(r"<.*?>+", "", text)
    text = re.sub(r"[%s]" % re.escape(string.punctuation), "", text)
    text = re.sub(r"\n", "", text)
    text = re.sub(r"\w*\d\w*", "", text)
    return text


In [83]:
# Applying the function to text column
df["text"] = df["text"].apply(clean_text)

## Data Modelling

In [84]:
x = df["text"]
Y = df["class"]

In [85]:
# Splitting Data into Train & Test data
x_train, x_test, Y_train, Y_test = train_test_split(x, Y, test_size=0.2, random_state=42)

In [86]:
#Initializing Vectorizer to convert text into numbers for the machine to understand
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(x_train)
xv_test = vectorizer.transform(x_test)


In [87]:
# Model Initializing
model = LogisticRegression()
model.fit(xv_train, Y_train)

In [88]:
# Prediction
prediction = model.predict(xv_test)

In [None]:
# Calculating Accuracy
model.score(xv_test, Y_test)

0.9857461024498887

In [90]:
print(classification_report(Y_test,prediction))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4733
           1       0.98      0.99      0.98      4247

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [91]:
joblib.dump(vectorizer,"vectorizer.jb")
joblib.dump(model,"model.jb")

['model.jb']