In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report


In [None]:
true = pd.read_csv('True.csv')
fake = pd.read_csv('Fake.csv')

In [None]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [None]:
true['label'] = 1
fake['label'] = 0

In [None]:
news = pd.concat([fake,true],axis=0)
news.head()
news.tail()

Unnamed: 0,title,text,subject,date,label
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017",1


In [None]:
news = news.drop(['title','subject','date'],axis = 1)

In [None]:
news.head()
news.tail()

Unnamed: 0,text,label
21412,BRUSSELS (Reuters) - NATO allies on Tuesday we...,1
21413,"LONDON (Reuters) - LexisNexis, a provider of l...",1
21414,MINSK (Reuters) - In the shadow of disused Sov...,1
21415,MOSCOW (Reuters) - Vatican Secretary of State ...,1
21416,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,1


In [None]:
news = news.sample(frac=1)    # Shuffling the dataset
news.head()

Unnamed: 0,text,label
12407,Portland rioters have been chasing a Trump sup...,0
9366,Has America reached a tipping point? Will more...,0
16166,,0
20720,The most ominous sign is that Hillary s losin...,0
15575,Does anyone care? If Hillary personally gassed...,0


In [None]:
news.reset_index(inplace=True)
news.head()

Unnamed: 0,index,text,label
0,12407,Portland rioters have been chasing a Trump sup...,0
1,9366,Has America reached a tipping point? Will more...,0
2,16166,,0
3,20720,The most ominous sign is that Hillary s losin...,0
4,15575,Does anyone care? If Hillary personally gassed...,0


In [None]:
news.drop(['index'],axis=1,inplace=True)  # axis = 1 means column

In [None]:
news.head()

Unnamed: 0,text,label
0,Portland rioters have been chasing a Trump sup...,0
1,Has America reached a tipping point? Will more...,0
2,,0
3,The most ominous sign is that Hillary s losin...,0
4,Does anyone care? If Hillary personally gassed...,0


In [None]:
import re     # to make it machine readable

In [None]:
def wordpot(text):
  #Convert into lowercase
  text = text.lower()

  #Remove HTML tags
  text = re.sub(r'<.*?>','',text)

  #Remove URLs
  text = re.sub(r'https?://\S+|www\.\S+','',text)

  #Remove punctuation
  text = re.sub(r'[^\w\s]','',text)

  #Remove digits
  text = re.sub(r'\d','',text)

  #Remove newline characters
  text = re.sub(r'\n','',text)

  return text

In [None]:
news['text'] = news['text'].apply(wordpot)

In [None]:
news['text']

Unnamed: 0,text
0,portland rioters have been chasing a trump sup...
1,has america reached a tipping point will more ...
2,
3,the most ominous sign is that hillary s losin...
4,does anyone care if hillary personally gassed ...
...,...
44893,london reuters british police are chasing dow...
44894,los angeleswashington reuters americans growi...
44895,washington reuters the us house panel investi...
44896,london reuters british prime minister theresa...


In [None]:
x = news['text']
y = news['label']

In [None]:
x

Unnamed: 0,text
0,portland rioters have been chasing a trump sup...
1,has america reached a tipping point will more ...
2,
3,the most ominous sign is that hillary s losin...
4,does anyone care if hillary personally gassed ...
...,...
44893,london reuters british police are chasing dow...
44894,los angeleswashington reuters americans growi...
44895,washington reuters the us house panel investi...
44896,london reuters british prime minister theresa...


In [None]:
y

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
...,...
44893,1
44894,1
44895,1
44896,1


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3)

In [None]:
x_train.shape   # this size matches to y_train because y is label to x i.e., y depends on x

(31428,)

In [None]:
y_train.shape

(31428,)

In [None]:
x_test.shape    # test and train divided on the basis of 0.3

(13470,)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
vectorization = TfidfVectorizer()

In [None]:
xv_train = vectorization.fit_transform(x_train)

In [None]:
xv_test = vectorization.transform(x_test)

In [None]:
xv_train

<31428x174862 sparse matrix of type '<class 'numpy.float64'>'
	with 6475312 stored elements in Compressed Sparse Row format>

In [None]:
xv_test

<13470x174862 sparse matrix of type '<class 'numpy.float64'>'
	with 2705508 stored elements in Compressed Sparse Row format>

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
LR = LogisticRegression()

In [None]:
LR.fit(xv_train,y_train)

In [None]:
pred_lr = LR.predict(xv_test)  #check for test

In [None]:
LR.score(xv_test,y_test)

0.9894580549368968

In [None]:
print(classification_report(y_test,pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7023
           1       0.99      0.99      0.99      6447

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
DTC = DecisionTreeClassifier()

In [None]:
DTC.fit(xv_train,y_train)

In [None]:
pred_dtc = DTC.predict(xv_test)

In [None]:
DTC.score(xv_test,y_test)

0.9960653303637713

In [None]:
print(classification_report(y_test,pred_dtc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7023
           1       1.00      1.00      1.00      6447

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier()

In [None]:
rfc.fit(xv_train,y_train)

In [None]:
pred_rfc = rfc.predict(xv_test)

In [None]:
rfc.score(xv_test,y_test)

0.9899777282850779

In [None]:
print(classification_report(y_test,pred_rfc))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7023
           1       0.99      0.99      0.99      6447

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [None]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"

In [None]:
def manual_testing(news):
  testing_news = {"text":[news]}
  new_def_test = pd.DataFrame(testing_news)
  new_def_test["text"] = new_def_test["text"].apply(wordpot)
  new_x_test = new_def_test["text"]
  new_xv_test = vectorization.transform(new_x_test)
  pred_LR = LR.predict(new_xv_test)
  pred_rfc = rfc.predict(new_xv_test)
  pred_DTC = DTC.predict(new_xv_test)
  #pred_gbc = gbc.predict(new_xv_test)
  return "LR Prediction: {}  RFC Prediction: {}   DTC Prediction: {}".format(output_lable(pred_LR[0]),output_lable(pred_rfc[0]),output_lable(pred_DTC[0]))

In [None]:
news_article = str(input())

Gas prices are expected to rise sharply this year, but Donald Trump thinks a ten-year low for a single day is something he should get credit for.On July 4th, gas prices dropped to an average $2.23 a gallon. It s the lowest it has been on Independence Day in ten years.So, of course, Trump bragged about it.Gas prices are the lowest in the U.S. in over ten years! I would like to see them go even lower.  Donald J. Trump (@realDonaldTrump) July 4, 2017Keep in mind, we are only halfway through 2017, and one day does not equal one year.Also, presidents do NOT control gas prices, something conservatives are always hypocritically pointing out when gas prices come down during a Democratic presidency or when gas prices are high during a Republican presidency. But when gas prices are low during a Republican presidency, conservatives throw that knowledge out the window and claim that the president did it.Such is the case now.But Trump had nothing to do with current gas prices. You see, the market h

In [None]:
manual_testing(news_article)

'LR Prediction: Fake News  RFC Prediction: Fake News   DTC Prediction: Fake News'