## True and Fake News Classifier Using Decision Tree Algorithm

In [1]:
# Importing Libraries

import pandas as pd
import numpy as np

Importing datasets

In [2]:
fake = pd.read_csv('fake_and_real_news_dataset_Fake.csv')
true = pd.read_csv('fake_and_real_news_dataset_True.csv')

#### We will sort news based on true as label 1 and false as label 0

In [3]:
true['Label']=1
fake['Label']=0

#### We will merge these two dataset into one

In [4]:
data_raw = pd.concat([true, fake])
data_raw.head()

Unnamed: 0,title,text,subject,date,Label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


#### here we will drop subject and date coloumn as we don't required them

In [5]:
data_raw=data_raw.drop(['subject','date'], axis=1)  # axis=1 to drop full column
data_raw=data_raw.dropna()  # dropping missing values

In [6]:
data_raw.head()

Unnamed: 0,title,text,Label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


#### here we will take all records so that we will get maximun accuracy

In [7]:
data_raw=data_raw[:]

In [8]:
# extracting all independant variables except last column i.e. label
x=data_raw.iloc[:,:-1].values 

In [9]:
# extracting all dependant variables last column i.e. label
y=data_raw.iloc[:,-1].values 

In [10]:
# Explore first independent variable entry
x[0]

array(['As U.S. budget fight looms, Republicans flip their fiscal script',
       'WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional 

In [11]:
# Explore first dependent variable entry
y[0]

1

### Building ML Model

to use ML algo we need to convert text data into numerical data so we use count vectoriser from sklearn

In [12]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=5000)
mat_body=cv.fit_transform(x[:,-1]).todense() # converting body of news to dense matrix

In [13]:
mat_body

matrix([[0, 0, 2, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 2, 0, ..., 0, 0, 0],
        [0, 1, 0, ..., 0, 0, 0],
        [0, 0, 1, ..., 0, 0, 0]], dtype=int64)

In [None]:
cv_head=CountVectorizer(max_features=5000)
mat_head=cv_head.fit_transform(x[:,0]).todense() # converting head of news to dense matrix

In [None]:
mat_head

In [None]:
# stacking all head and body matrices
X_mat=np.hstack((mat_head,mat_body))

### Train Test split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_mat,y, test_size = 0.20, random_state = 0)

### Building Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier(criterion='entropy')
dtc.fit(X_train,y_train)
y_pred=dtc.predict(X_test)

#### Building confusion matrics to find accuracy

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

In [None]:
# left diagonal value shows true prediction and right diagonal value shows False prediction
Accuracy=((4665+4287)/(4665+4287+5+23))*100

In [None]:
Accuracy