In [2]:
#import scikitplot as skplt
import matplotlib.pyplot as plt
from matplotlib import rcParams
#plt.gcf().subplots_adjust(bottom=0.15)
import pandas as pd
import numpy as np
import sys
from sklearn.externals import joblib
from sklearn.metrics import roc_curve, auc, classification_report
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

In [None]:
!pip install scikit-plot

In [3]:
df = pd.read_csv('reviews.csv', sep='|')
df.head()

df.dropna(inplace=True)

df['label'] = np.where(df['label'] == 'positive', 1, 0)

print("Total size: "+str(len(df)))

train = df[df.index % 5 != 0]
test = df[df.index % 5 == 0][1:]

# Slicing % of data
# percent = .05
# train = train[:int(percent*len(train))]
# test = test[:int(percent*len(train))]

print("sliced train size: "+str(len(train)))
print("sliced test size: "+str(len(test)))

X_train = train["text"]
y_train = train["label"]

X_test = test["text"]
y_test = test["label"]

FileNotFoundError: File b'reviews.csv' does not exist

In [None]:
model = joblib.load(open('NeuralNet.m', 'rb'))
modelTree = joblib.load(open('DecisionTree.m', 'rb'))
modelLR = joblib.load(open('LogisticRegression.m', 'rb'))
#prediction = model.predict(inputFile)

In [None]:
y_prediction = model.predict(X_test)

fpr, tpr, t = roc_curve(y_test, y_prediction, pos_label=1)
roc_auc = auc(fpr,tpr)

In [None]:
y_predictionTree = modelTree.predict(X_test)

fprTree, tprTree, t = roc_curve(y_test, y_predictionTree, pos_label=1)
roc_aucTree = auc(fprTree,tprTree)

In [None]:
y_predictionLR = modelLR.predict(X_test)

fprLR, tprLR, t = roc_curve(y_test, y_predictionLR, pos_label=1)
roc_aucLR = auc(fprLR,tprLR)

In [None]:
plt.figure()
lw = 2

#Neural Net
plt.plot(fpr, tpr, color='red',
         lw=lw, label='Neural network (AUC = %0.4f)' % roc_auc, linestyle='-.')

# Logistic regression
plt.plot(fprLR, tprLR, color=(1, 191/255, 0),
         lw=lw, label='Logistic Regression (AUC = %0.4f)' % roc_aucLR, linestyle=':')

# Decision Trees
plt.plot(fprTree, tprTree, color='black',
         lw=lw, label='Decision Tree (AUC = %0.4f)' % roc_aucTree, linestyle='-.')

plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate',fontsize=15)
plt.ylabel('True Positive Rate',fontsize=15)
plt.title('Receiver operating curves for 3 classifiers',fontsize=15)
plt.legend(loc="lower right")

plt.savefig('roc.png',bbox_inches='tight')

plt.show()

In [None]:
#classification_report()
print("Neural network precision recall")
print(classification_report(y_test, y_prediction))
print("###")
print("Decision Tree precision recall")
print(classification_report(y_test, y_predictionTree))
print("###")
print("Logistic Regression precision recall")
print(classification_report(y_test, y_predictionLR))
print("###")

In [None]:
nn_prec = precision_score(y_test, y_prediction, average="macro")
nn_rec = recall_score(y_test, y_prediction, average="macro")
nn_acc = accuracy_score(y_test, y_prediction)

dt_prec = precision_score(y_test, y_predictionTree, average="macro")
dt_rec = recall_score(y_test, y_predictionTree, average="macro")
dt_acc = accuracy_score(y_test, y_predictionTree)

lr_prec = precision_score(y_test, y_predictionLR, average="macro")
lr_rec = recall_score(y_test, y_predictionLR, average="macro")
lr_acc = accuracy_score(y_test, y_predictionLR)

In [None]:
objects = ('Decision Tree', 'Logistic Regression', 'Neural Net')
y_pos = np.arange(len(objects))
performance = [dt_acc, lr_acc, nn_acc]

plt.figure(figsize=(8,6))

axes = plt.gca()
#axes.set_xlim([0,3])
axes.set_ylim([0,1])

plt.bar(y_pos, performance, align='center', alpha=0.9, color=['black', (1, 191/255, 0), 'red'])
for a,b in zip(y_pos, performance):
   plt.text(a, b-.1, "%.2f" % round(b*100,2)+"%",fontsize=33, horizontalalignment='center', color='white') 
plt.xticks(y_pos, objects,fontsize=15)
plt.ylabel('Accuracy',fontsize=15)
plt.title('Accuracy results',fontsize=15)

plt.savefig('accuracy.png',bbox_inches='tight') 

plt.show()

In [None]:
objects = ('Decision Tree', 'Logistic Regression', 'Neural Net')
y_pos = np.arange(len(objects))
performance = [dt_prec, lr_prec, nn_prec]

plt.figure(figsize=(8,6))

axes = plt.gca()
#axes.set_xlim([0,3])
axes.set_ylim([0,1])

plt.bar(y_pos, performance, align='center', alpha=0.9, color=['black', (1, 191/255, 0), 'red'])
for a,b in zip(y_pos, performance):
    plt.text(a, b-.1, "%.2f" % round(b*100,2)+"%",fontsize=33, horizontalalignment='center', color='white')
plt.xticks(y_pos, objects,fontsize=15)
plt.ylabel('Average Precision',fontsize=15)
plt.title('Precision results',fontsize=15)

plt.savefig('precision.png',bbox_inches='tight') 

plt.show()

In [None]:
objects = ('Decision Tree', 'Logistic Regression', 'Neural Net')
y_pos = np.arange(len(objects))
performance = [dt_rec, lr_rec, nn_rec]

plt.figure(figsize=(8,6))

axes = plt.gca()
#axes.set_xlim([0,3])
axes.set_ylim([0,1])

plt.bar(y_pos, performance, align='center', alpha=0.9, color=['black', (1, 191/255, 0), 'red'])
for a,b in zip(y_pos, performance):
    plt.text(a, b-.1, "%.2f" % round(b*100,2)+"%",fontsize=33, horizontalalignment='center', color='white')
plt.xticks(y_pos, objects,fontsize=15)
plt.ylabel('Average Recall',fontsize=15)
plt.title('Recall results',fontsize=15)

plt.savefig('recall.png',bbox_inches='tight') 

plt.show()