In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import re
import joblib
import string


In [3]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

In [4]:
fake.head()


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
true.head()


Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake['class']=0
true['class']=1

In [7]:
data = pd.concat([fake,true],axis = 0)

In [8]:
data.sample(10)

Unnamed: 0,title,text,subject,date,class
9939,OUCH! BERNIE SANDERS Responds To Hillary’s Cri...,WFB- The former Democratic presidential candid...,politics,"Sep 10, 2017",0
10735,DELUSIONAL HILLARY Calls Her Email Scandal The...,Hillary Clinton continues on the path to compl...,politics,"May 31, 2017",0
4760,U.S. Women’s Soccer Star Megan Rapinoe Kneels...,The protest is growing.Most people don t seem ...,News,"September 5, 2016",0
22754,SUNDAY SCREENING: Operation Hollywood (2004),Our weekly documentary film curated by our edi...,Middle-east,"October 15, 2017",0
14728,Gunmen shoot dead police officer and family in...,ISLAMABAD (Reuters) - Gunmen on a motorcycle s...,worldnews,"November 15, 2017",1
10008,America’s ‘Hottest’ Conservative Joins Fox New...,Tomi Lahren recently told The Hollywood Report...,politics,"Aug 30, 2017",0
6079,Standing Rock Sioux tribe opposes Trump order ...,(Reuters) - The Standing Rock Sioux tribe in a...,politicsNews,"January 24, 2017",1
10943,Exclusive: Obama to propose $2.5 billion tax c...,WASHINGTON (Reuters) - President Barack Obama ...,politicsNews,"February 5, 2016",1
14707,Peru's Kuczynski denies allegations of Odebrec...,LIMA (Reuters) - Peruvian President Pedro Pabl...,worldnews,"November 15, 2017",1
10228,Republican lawmakers to join Obama's Cuba visit,WASHINGTON (Reuters) - A small group of Republ...,politicsNews,"March 18, 2016",1


In [9]:
data =data.drop(["title","subject","date"],axis = 1)

In [10]:
data.reset_index(inplace=True)

In [11]:
data.drop(["index"],axis = 1,inplace=True)

In [12]:
data.sample(5)

Unnamed: 0,text,class
8076,The enormous debt students incur for college i...,0
8827,While the enormous sense of loss the world exp...,0
954,"This week, Donald Trump joined a collection of...",0
41975,DUBAI (Reuters) - Five Bahraini policemen were...,1
35067,WASHINGTON (Reuters) - The United States on Th...,1


In [13]:
def clean_text(text):
    text = text.lower()
    text = re.sub('\[.*?\]',"",text)
    text = re.sub("\W"," ",text)
    text = re.sub("https?:://\S+|www\.\S+","",text)
    text = re.sub("<.*?>+","",text)
    text = re.sub("[%s]"% re.escape(string.punctuation),"",text)
    text = re.sub("\n","",text)
    text = re.sub("\w*\d\w*","",text)
    return text
    

In [14]:
data["text"] = data["text"].astype(str).apply(clean_text)

In [15]:
x=data["text"]
y=data["class"]

xtrain, xtest, ytrain, ytest = train_test_split( x,y,test_size=0.25,random_state=42)

In [16]:
vectorizer = TfidfVectorizer()
xv_train = vectorizer.fit_transform(xtrain)
xv_test = vectorizer.transform(xtest)


In [17]:
lr = LogisticRegression()
lr.fit(xv_train, ytrain)

In [18]:
prediction = lr.predict(xv_test)
lr.score(xv_test,ytest)

0.9880623608017818

In [19]:
print(classification_report(ytest,prediction))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5899
           1       0.99      0.99      0.99      5326

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



In [20]:
joblib.dump(vectorizer,"vectorizer.jb")
joblib.dump(lr,"lr_model.jb")

['lr_model.jb']