In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
import joblib
from scipy.sparse import csr_matrix, load_npz

In [2]:
xv_test = load_npz('testing_data.npz')
manual_v = load_npz('manual_testing_data.npz')

In [3]:
print("Testing data dimensions:", xv_test.shape)
print("Manual testing data dimensions:", manual_v.shape)

Testing data dimensions: (7723, 88373)
Manual testing data dimensions: (20, 88373)


In [4]:
y_test = pd.read_csv('y_test.csv')
manual = pd.read_csv('manual_testing.csv')
training = pd.read_csv('training_data.csv')

In [5]:
y_test = y_test.drop('Unnamed: 0', axis=1)
manual = manual.drop('Unnamed: 0', axis=1)
training = training.drop('Unnamed: 0', axis=1)

In [6]:
y_test.shape, manual.shape, training.shape

((7723, 1), (20, 6), (30892, 1))

# FINAL CHECK FOR THE REPEAT OF DATA IN MANUAL AND TRAINING

In [7]:
print(type(manual['text'][0]))
print(type(training['text'][0]))
same = []
for i in range(0, len(manual)):
    for j in range(0, len(training)):
        if manual['text'][i] == training['text'][j]:
            print(i, '   ', j)
            same.append(j)

<class 'str'>
<class 'str'>


# SUPPORT VECTOR MACHINE

In [8]:
support = joblib.load('Support_Vector.joblib')

In [9]:
support.score(xv_test, y_test)

0.9917130648711641

In [10]:
pred_Support = support.predict(xv_test)

In [11]:
print(classification_report(y_test, pred_Support))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3483
           1       0.99      0.99      0.99      4240

    accuracy                           0.99      7723
   macro avg       0.99      0.99      0.99      7723
weighted avg       0.99      0.99      0.99      7723



In [12]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, pred_Support))

[[3444   39]
 [  25 4215]]


# LOGISTIC REGRESSION

In [13]:
logistics = joblib.load('Logistics_Regression.joblib')

In [14]:
num_features = logistics.coef_.shape[1]
print("Number of features in the model:", num_features)

Number of features in the model: 88373


In [15]:
logistics.score(xv_test, y_test)

0.9839440631878803

In [16]:
pred_Logistics = logistics.predict(xv_test)

In [17]:
print(classification_report(y_test, pred_Logistics))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      3483
           1       0.98      0.99      0.99      4240

    accuracy                           0.98      7723
   macro avg       0.98      0.98      0.98      7723
weighted avg       0.98      0.98      0.98      7723



In [18]:
print(confusion_matrix(y_test, pred_Logistics))

[[3406   77]
 [  47 4193]]


# DECISION TREE CLASSIFIER

In [19]:
decision = joblib.load('DecisionTree_Classifier.joblib')

In [20]:
num_features = decision.tree_.n_features
print("Number of features in the model:", num_features)

Number of features in the model: 88373


In [21]:
decision.score(xv_test, y_test)

0.9948206655444776

In [22]:
pred_Decision = decision.predict(xv_test)

In [23]:
print(classification_report(y_test, pred_Decision))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3483
           1       1.00      1.00      1.00      4240

    accuracy                           0.99      7723
   macro avg       0.99      0.99      0.99      7723
weighted avg       0.99      0.99      0.99      7723



In [24]:
print(confusion_matrix(y_test, pred_Decision))

[[3464   19]
 [  21 4219]]


# GRADIENT BOOSTING CLASSIFIER

In [25]:
gradient = joblib.load('GradientBoosting_Classifier.joblib')

In [26]:
gradient.score(xv_test, y_test)

0.995727049074194

In [27]:
pred_Gradient = gradient.predict(xv_test)

In [28]:
print(classification_report(y_test, pred_Gradient))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      3483
           1       0.99      1.00      1.00      4240

    accuracy                           1.00      7723
   macro avg       1.00      1.00      1.00      7723
weighted avg       1.00      1.00      1.00      7723



In [29]:
print(confusion_matrix(y_test, pred_Gradient))

[[3461   22]
 [  11 4229]]


# RANDOM FOREST CLASSIFIER

In [30]:
random = joblib.load('RandomForest_Classifier.joblib')

In [31]:
random.score(xv_test, y_test)

0.981224912598731

In [32]:
pred_Random = random.predict(xv_test)

In [33]:
print(classification_report(y_test, pred_Random))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98      3483
           1       0.98      0.99      0.98      4240

    accuracy                           0.98      7723
   macro avg       0.98      0.98      0.98      7723
weighted avg       0.98      0.98      0.98      7723



In [34]:
print(confusion_matrix(y_test, pred_Random))

[[3391   92]
 [  53 4187]]


# MANUAL TESTING

In [35]:
v_array = manual_v.toarray()

In [36]:
def result(num):
    if num == 0:
        return 'Fake news'
    elif num == 1:
        return 'Not Fake news'
    
def manual_testing(news):
    for i in range(0, len(manual)):
        if news == manual['text'][i]:
            req_index = i
            break
    pred_SV = support.predict(v_array[req_index].reshape(1, -1))
    pred_LR = logistics.predict(v_array[req_index].reshape(1, -1))
    pred_DT = decision.predict(v_array[req_index].reshape(1, -1))
    pred_GBC = gradient.predict(v_array[req_index].reshape(1, -1))
    pred_RFC = random.predict(v_array[req_index].reshape(1, -1))
    print('\nSVC Prediction: ' + result(pred_SV))
    print('\nLR Prediction: ' + result(pred_LR))
    print('\nDT Prediction: ' + result(pred_DT))
    print('\nGBC Prediction: ' + result(pred_GBC))
    print('\nRFC Prediction: ' + result(pred_RFC))

In [37]:
manual

Unnamed: 0,title,text,month,year,day,class
0,mcpain john mccain furious that iran treated u...,century wire says as reported earlier this wee...,1.0,2016,16,0
1,justice yahoo settles e mail privacy class act...,century wire says it s a familiar theme whenev...,1.0,2016,16,0
2,sunnistan us and allied safe zone plan to take...,patrick henningsen century wireremember when t...,1.0,2016,15,0
3,how to blow million al jazeera america finally...,century wire says al jazeera america will go d...,1.0,2016,14,0
4,usa navy sailors held by iranian military sign...,century wire says as predicted in its new year...,1.0,2016,12,0
5,the white house and the theatrics of gun control,century wire says all the world s a stage and ...,1.0,2016,7,0
6,activists or terrorists how media controls and...,randy johnson century wirethe majority of main...,1.0,2016,7,0
7,boiler room no surrender no retreat heads will...,tune in to the alternate current radio network...,1.0,2016,6,0
8,federal showdown looms in oregon after blm abu...,century wire says a new front has just opened ...,1.0,2016,4,0
9,a troubled king chicago s rahm emanuel despera...,century wire says it s not that far away guess...,1.0,2016,2,0


In [52]:
news = manual['text'][6]
manual_testing(news)


SVC Prediction: Fake news

LR Prediction: Fake news

DT Prediction: Fake news

GBC Prediction: Fake news

RFC Prediction: Fake news
