### Fake News Detection

In [1]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from statistics import mode


In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
df_true = pd.read_csv('True.csv')
df_fake = pd.read_csv('Fake.csv')

In [4]:
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [5]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [6]:
df_true.shape

(21417, 4)

In [7]:
df_fake.shape

(23481, 4)

In [8]:
df_true["class"] = 1
df_fake["class"] = 0

In [9]:
df_true.head()

Unnamed: 0,title,text,subject,date,class
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [10]:
df_fake.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [11]:
df_news = pd.concat([df_fake, df_true], axis=0)


In [12]:
df_news.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [13]:
df_news.shape

(44898, 5)

In [14]:
df_news = df_news[["text","class"]]

In [15]:
df_news.head()

Unnamed: 0,text,class
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [16]:
df_news.shape

(44898, 2)

In [17]:
df_news = df_news.sample(frac=1)
df_news = df_news.reset_index(drop=True)

In [18]:
df_news

Unnamed: 0,text,class
0,I heard Mr. McLellan on the radio yesterday an...,0
1,NEW DELHI (Reuters) - India s foreign minister...,1
2,"As the GOP presidential nominee, Republicans h...",0
3,Why would the Vatican invite an aggressive lef...,0
4,"Yesterday, Beverly Young Nelson sat with the s...",0
...,...,...
44893,The leftist media was quick to blame Donald Tr...,0
44894,We love Intellectual Froglegs and Joe Dan Gorm...,0
44895,"In a brand-new expos on Mother Jones, investi...",0
44896,GENEVA (Reuters) - Representatives of Syria s ...,1


In [19]:
df_news.isnull().sum()

text     0
class    0
dtype: int64

In [20]:
ps = PorterStemmer()

def stemming(text):
    stemming_text = re.sub('[^a-zA-Z]', ' ', text)
    stemming_text = stemming_text.lower()
    stemming_text = stemming_text.split()
    temp_stemming_text_list = []
    for word in stemming_text:
        if word not in stopwords.words('english'):
            temp_stemming_text_list.append(ps.stem(word))
    stemming_text = ' '.join(temp_stemming_text_list)
    return stemming_text


In [21]:
df_news["text"] = df_news["text"].apply(stemming)

In [22]:
x = df_news["text"].values
y = df_news["class"].values

In [23]:
vector = TfidfVectorizer()
vector.fit(x)
x = vector.transform(x)

In [24]:
x_train, x_test, y_train, y_test = train_test_split(x ,y , test_size=0.3, random_state=42)

In [25]:
print("x_train_shape:", x_train.shape)
print("y_train_shape:", y_train.shape)
print("x_test_shape:", x_test.shape)
print("y_test_shape:", y_test.shape)

x_train_shape: (31428, 89633)
y_train_shape: (31428,)
x_test_shape: (13470, 89633)
y_test_shape: (13470,)


### Create Logistic Regression

In [26]:
model_lr = LogisticRegression()
model_lr.fit(x_train, y_train)

In [27]:
lr_train_pred = model_lr.predict(x_train)
print('Logistic Regression Train Accuracy: ', accuracy_score(lr_train_pred,y_train))

Logistic Regression Train Accuracy:  0.9900089092528955


In [28]:
lr_test_pred = model_lr.predict(x_test)
print("Logistic Regression Test Accuracy: ", accuracy_score(lr_test_pred, y_test))

Logistic Regression Test Accuracy:  0.9865627319970305


### Create Decision Tree

In [29]:
model_dt = DecisionTreeClassifier()
model_dt.fit(x_train, y_train)

In [30]:
dt_train_pred = model_dt.predict(x_train)
print('Decision Tree Train Accuracy: ', accuracy_score(dt_train_pred,y_train))

Decision Tree Train Accuracy:  0.9999681812396589


In [31]:
dt_test_pred = model_dt.predict(x_test)
print("Decision Tree Test Accuracy: ", accuracy_score(dt_test_pred, y_test))

Decision Tree Test Accuracy:  0.9948032665181886


### Create Random Forest

In [32]:
model_rf = RandomForestClassifier()
model_rf.fit(x_train, y_train)

In [33]:
model_rf_train_pred = model_rf.predict(x_train)
print('Random Forest Train Accuracy: ', accuracy_score(model_rf_train_pred,y_train))

Random Forest Train Accuracy:  0.9999681812396589


In [34]:
model_rf_test_pred = model_rf.predict(x_test)
print("Random Forest Test Accuracy: ", accuracy_score(model_rf_test_pred, y_test))

Random Forest Test Accuracy:  0.9910913140311804


### Prediction

In [35]:
def lr_prediction(text):
    return model_lr.predict(text)

def dt_prediction(text):
    return model_dt.predict(text)

def rf_prediction(text):
    return model_rf.predict(text)

In [46]:
text = input("Enter the text: ")
text = stemming(text)
text = vector.transform([text])

lr_pred_result = lr_prediction(text)
dt_pred_result = dt_prediction(text)
rf_pred_result = rf_prediction(text)

if lr_pred_result == 1:
    print("Logistic Regression: Real News")
else:
    print("Logistic Regression: Fake News")

if dt_pred_result == 1:
    print("Decision Tree: Real News")
else:
    print("Decision Tree: Fake News")

if rf_pred_result == 1:
    print("Random Forest: Real News")
else:
    print("Random Forest: Fake News")


if mode([lr_pred_result[0], dt_pred_result[0], rf_pred_result[0]]) == 1:
    print("Final Decision: Real News")
else:
    print("Final Decision: Fake News")

Enter the text: BRUSSELS (Reuters) - NATO allies on Tuesday welcomed President Donald Trump s decision to commit more forces to Afghanistan, as part of a new U.S. strategy he said would require more troops and funding from America s partners. Having run for the White House last year on a pledge to withdraw swiftly from Afghanistan, Trump reversed course on Monday and promised a stepped-up military campaign against  Taliban insurgents, saying:  Our troops will fight to win .  U.S. officials said he had signed off on plans to send about 4,000 more U.S. troops to add to the roughly 8,400 now deployed in Afghanistan. But his speech did not define benchmarks for successfully ending the war that began with the U.S.-led invasion of Afghanistan in 2001, and which he acknowledged had required an   extraordinary sacrifice of blood and treasure .  We will ask our NATO allies and global partners to support our new strategy, with additional troops and funding increases in line with our own. We are 

### Saving Models

In [43]:
import pickle
from google.colab import files

# Save the Logistic Regression model
pickle_file = "model_lr.pkl"
with open(pickle_file, 'wb') as file:
    pickle.dump(model_lr, file)
files.download(pickle_file)

# Save the Decision Tree model
pickle_file = "model_dt.pkl"
with open(pickle_file, 'wb') as file:
    pickle.dump(model_dt, file)
files.download(pickle_file)

# Save the Random Forest model
pickle_file = "model_rf.pkl"
with open(pickle_file, 'wb') as file:
    pickle.dump(model_rf, file)
files.download(pickle_file)

# Save the Vectorizer
pickle_file = "vectorizer.pkl"
with open(pickle_file, 'wb') as file:
    pickle.dump(vector, file)
files.download(pickle_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [45]:
import sklearn
print(sklearn.__version__)


1.2.2


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>