In [14]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords 
from nltk.stem import PorterStemmer 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [15]:
news_df = pd.read_csv('WELFake_Dataset.csv')


In [16]:
news_df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,1,,Did they post their votes for Hillary already?,1
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [17]:
news_df.shape

(72134, 4)

In [18]:
news_df.isna().sum()

Unnamed: 0      0
title         558
text           39
label           0
dtype: int64

In [19]:
news_df = news_df.fillna(' ')

In [20]:
news_df.isna().sum()

Unnamed: 0    0
title         0
text          0
label         0
dtype: int64

In [21]:
news_df['content'] = news_df['title']+" "+news_df['text']

In [22]:
news_df

Unnamed: 0.1,Unnamed: 0,title,text,label,content
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1,LAW ENFORCEMENT ON HIGH ALERT Following Threat...
1,1,,Did they post their votes for Hillary already?,1,Did they post their votes for Hillary already?
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0,"Bobby Jindal, raised Hindu, uses story of Chri..."
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1,SATAN 2: Russia unvelis an image of its terrif...
...,...,...,...,...,...
72129,72129,Russians steal research on Trump in hack of U....,WASHINGTON (Reuters) - Hackers believed to be ...,0,Russians steal research on Trump in hack of U....
72130,72130,WATCH: Giuliani Demands That Democrats Apolog...,"You know, because in fantasyland Republicans n...",1,WATCH: Giuliani Demands That Democrats Apolog...
72131,72131,Migrants Refuse To Leave Train At Refugee Camp...,Migrants Refuse To Leave Train At Refugee Camp...,0,Migrants Refuse To Leave Train At Refugee Camp...
72132,72132,Trump tussle gives unpopular Mexican leader mu...,MEXICO CITY (Reuters) - Donald Trump’s combati...,0,Trump tussle gives unpopular Mexican leader mu...


In [23]:
news_df['content']

0        LAW ENFORCEMENT ON HIGH ALERT Following Threat...
1           Did they post their votes for Hillary already?
2        UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...
3        Bobby Jindal, raised Hindu, uses story of Chri...
4        SATAN 2: Russia unvelis an image of its terrif...
                               ...                        
72129    Russians steal research on Trump in hack of U....
72130     WATCH: Giuliani Demands That Democrats Apolog...
72131    Migrants Refuse To Leave Train At Refugee Camp...
72132    Trump tussle gives unpopular Mexican leader mu...
72133    Goldman Sachs Endorses Hillary Clinton For Pre...
Name: content, Length: 72134, dtype: object

In [24]:
# stemming
ps = PorterStemmer()
def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [ps.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

In [25]:
# news_df['content'] = news_df['content'].apply(stemming)


In [26]:
news_df['content']


0        LAW ENFORCEMENT ON HIGH ALERT Following Threat...
1           Did they post their votes for Hillary already?
2        UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...
3        Bobby Jindal, raised Hindu, uses story of Chri...
4        SATAN 2: Russia unvelis an image of its terrif...
                               ...                        
72129    Russians steal research on Trump in hack of U....
72130     WATCH: Giuliani Demands That Democrats Apolog...
72131    Migrants Refuse To Leave Train At Refugee Camp...
72132    Trump tussle gives unpopular Mexican leader mu...
72133    Goldman Sachs Endorses Hillary Clinton For Pre...
Name: content, Length: 72134, dtype: object

In [27]:
#split into dependent and independent features 
X = news_df['content'].values #->independent
y = news_df['label'].values #->Dependent

In [28]:
print(X)

['LAW ENFORCEMENT ON HIGH ALERT Following Threats Against Cops And Whites On 9-11By #BlackLivesMatter And #FYF911 Terrorists [VIDEO] No comment is expected from Barack Obama Members of the #FYF911 or #FukYoFlag and #BlackLivesMatter movements called for the lynching and hanging of white people and cops. They encouraged others on a radio show Tuesday night to  turn the tide  and kill white people and cops to send a message about the killing of black people in America.One of the F***YoFlag organizers is called  Sunshine.  She has a radio blog show hosted from Texas called,  Sunshine s F***ing Opinion Radio Show. A snapshot of her #FYF911 @LOLatWhiteFear Twitter page at 9:53 p.m. shows that she was urging supporters to  Call now!! #fyf911 tonight we continue to dismantle the illusion of white Below is a SNAPSHOT Twitter Radio Call Invite   #FYF911The radio show aired at 10:00 p.m. eastern standard time.During the show, callers clearly call for  lynching  and  killing  of white people.A 2:

In [29]:
vector = TfidfVectorizer()
vector.fit(X)


In [30]:
X = vector.transform(X)

In [31]:
print(X)

  (0, 216470)	0.02227758858547711
  (0, 216417)	0.028037877959932952
  (0, 216386)	0.01809908943118167
  (0, 216354)	0.01909656206027496
  (0, 216171)	0.1175285568789319
  (0, 215822)	0.03809222222185229
  (0, 213771)	0.016788411087768535
  (0, 212859)	0.012505168995122287
  (0, 212618)	0.03851270429773129
  (0, 212232)	0.07551513950358497
  (0, 212052)	0.019802242666920458
  (0, 211775)	0.014894190877790755
  (0, 211699)	0.02854247010164298
  (0, 211637)	0.11158312487552487
  (0, 211465)	0.011628679300827276
  (0, 211457)	0.05378928651771749
  (0, 211376)	0.02143535225400204
  (0, 211367)	0.017987135241977408
  (0, 210815)	0.012194497088869037
  (0, 210449)	0.02978309141316941
  (0, 210284)	0.10696663413239421
  (0, 210120)	0.01199757920109249
  (0, 209935)	0.05669114185181589
  (0, 209788)	0.03385415621304968
  (0, 209559)	0.029838086432494925
  :	:
  (72133, 27498)	0.03124028357676454
  (72133, 27374)	0.012310543339401129
  (72133, 27171)	0.07809055180792746
  (72133, 26957)	0.03048

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y, random_state=1)

In [33]:
X_train.shape

(57707, 244445)

In [34]:
X_test.shape

(14427, 244445)

In [35]:
model = LogisticRegression()

model.fit(X_train,y_train)

In [36]:
train_y_pred = model.predict(X_train)

print("train accurracy :",accuracy_score(train_y_pred,y_train))

train accurracy : 0.9654981198121545


In [37]:
test_y_pred = model.predict(X_test)
print("train accurracy :",accuracy_score(test_y_pred,y_test))

train accurracy : 0.9480141401538782


In [38]:
# prediction system
input_data = X_test[20]
prediction = model.predict(input_data)
if prediction[0] == 1:
    print('Fake news')
else:
    print('Real news')


Fake news


In [39]:
news_df['content'][20]

'America gives Grand Piano to horse Wednesday 9 November 2016 by Lucas Wilde America gives Grand Piano to horse \nAmerica has given a grand piano to a horse and is expecting some quality tunes. \n“I’m particularly looking forward to Beethoven’s Ninth,” beamed horse supporter and piano enthusiast, Jay Cooper. \n“A horse has never been given a piano before because, frankly, the establishment wouldn’t allow it. \n“Now, at last, change has come, and America will change for the better. \n“There are a lot of doubters out there, and those doubters will soon be silenced by the graceful notes of Chopin, Mozart and maybe even Little Richard.” \nHorse, Dobbin Williams, said, “I’m not really sure what’s expected of me here. \n“I’m a horse. I am absolutely not qualified to play a piano. \n“I mean… Look at these hooves and the way I am in general. I can’t even sit on the chair properly. \n“Why on earth did anyone think this was a good idea?” \nCooper grinned, “We did it. We’ve made pianos great agai