In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import re
import joblib
import string 

In [2]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

In [3]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
fake['class'] = 0
true['class'] = 1

In [6]:
data = pd.concat([fake, true], axis = 0)

In [7]:
data.sample(10)

Unnamed: 0,title,text,subject,date,class
3856,U.S. Senate Republican leader opposes new Russ...,WASHINGTON (Reuters) - The U.S. Senate majorit...,politicsNews,"May 10, 2017",1
15401,Kremlin says Putin meeting with Trump at APEC ...,MOSCOW (Reuters) - A meeting between Russian P...,worldnews,"November 8, 2017",1
4044,Megyn Kelly Called Trump A ‘Predator’; What N...,"It s Fox News, but that doesn t mean that ever...",News,"October 26, 2016",0
9501,Clinton aide criticizes Trump has having 'biza...,WASHINGTON (Reuters) - An aide to U.S. Democra...,politicsNews,"May 17, 2016",1
11292,China to bring paramilitary police force under...,BEIJING (Reuters) - China will bring its param...,worldnews,"December 27, 2017",1
12814,OUCH! POST DEBATE: HILLARY GIVES Tim Kaine A P...,Things didn t work out very well for Vince Fos...,politics,"Oct 6, 2016",0
3793,Trump reassures farmers immigration crackdown ...,WASHINGTON/SAN FRANCISCO (Reuters) - President...,politicsNews,"May 15, 2017",1
94,House plan would increase Trump's disaster aid...,WASHINGTON (Reuters) - Republicans in the U.S....,politicsNews,"December 18, 2017",1
20190,GA POLICE Sergeant FIRED For Flying Confederat...,The political correctness police are apparentl...,left-news,"Jul 31, 2016",0
5574,Court hearing on Trump travel ban draws more t...,(Reuters) - More than 2.6 million people tuned...,politicsNews,"February 9, 2017",1


In [8]:
data = data.drop(["title", "subject", "date"], axis = 1)

In [9]:
data.reset_index(inplace=True)

In [10]:
data.drop(["index"], axis = 1, inplace=True)

In [11]:
data.sample(5)

Unnamed: 0,text,class
24123,WASHINGTON (Reuters) - U.S. Senate Republican ...,1
2527,Kellyanne Conway just embarrassed herself agai...,0
11702,TMZ caught up with Mavericks owner and Hillary...,0
31873,SAN FRANCISCO (Reuters) - California lawmakers...,1
22689,By Dady Chery and Gilbert MercierAll writers ...,0


In [12]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', "", text)
    text = re.sub(r"\\w", "", text)
    text = re.sub(r"https?:://\S+|www\.\S+", "", text)
    text = re.sub(r"<.*?>+", "", text)
    text = re.sub("[%s]" % re.escape(string.punctuation), "", text)
    text = re.sub(r"\n", "", text)
    text = re.sub(r"\w*\d\w*", "", text)
    return text

In [13]:
data["text"] = data["text"].apply(clean_text)

In [14]:
x = data["text"]
y = data["class"]

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=42)

In [15]:
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(xtrain)
xv_test = vectorizer.transform(xtest)

In [16]:
lr = LogisticRegression()
lr.fit(xv_train, ytrain)

In [17]:
prediction = lr.predict(xv_test)
lr.score(xv_test,ytest)

0.985924276169265

In [18]:
print(classification_report(ytest, prediction))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5895
           1       0.98      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



In [19]:
joblib.dump(vectorizer, "vectorizer.jb")
joblib.dump(lr, "lr_model.jb")

['lr_model.jb']