In [31]:
import pandas as pd
import spacy 
from sklearn.metrics import classification_report, confusion_matrix

In [32]:
# Load the dataset
alexa_rev = pd.read_json('..\\scrape_reviews\\alexa_reviews.json')
drop_index = alexa_rev[alexa_rev['stars']==3].index
alexa_rev = alexa_rev.drop(drop_index).reset_index()
data = list(zip(alexa_rev['review'],alexa_rev['stars']))

In [33]:
# Loading the best model from the trained models
nlp = spacy.load('output\\model-best')

In [34]:
# creating a function to predict the polarity of review
def predict(text):
    doc =  nlp(text[0])
    print('Actual text:\n', text)
    print('\nPredicted class:\n', doc.cats)

In [35]:
predict(data[4331])

Actual text:
 ('Sound quality was awesome, ya it works good', 5)

Predicted class:
 {'POSITIVE': 0.909123420715332, 'NEGATIVE': 0.09008251875638962}


In [36]:
predict(data[4100])

Actual text:
 ('I have purchased this order but not fit on my expectation as it is only better for music on voice and set alarm nothing extra feature. Alexa is still not aware with any local indian details like market,Distance or navigation.it was not useful for me.', 2)

Predicted class:
 {'POSITIVE': 0.33145037293434143, 'NEGATIVE': 0.6618508100509644}


In [37]:
text = data[4500]
scores = nlp(text[0]).cats
label = max(scores, key=scores.get)
print('Actual text:\n', text)
print('\nPredicted class:', label)

Actual text:
 ("I disliked this product as it does not recognize what I'm saying and it's not responding properly.", 1)

Predicted class: NEGATIVE


In [38]:
# Indices in the range 4051-4501 are test data
df = alexa_rev.loc[4051:,['stars', 'review']]

y_true = df.stars.map(lambda x: 1 if x>3 else 0).values
y_pred = [int(max(nlp(rev).cats, key=nlp(rev).cats.get)=='POSITIVE') for rev in df['review']]

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))

[[ 45  28]
 [ 10 368]]
              precision    recall  f1-score   support

           0       0.82      0.62      0.70        73
           1       0.93      0.97      0.95       378

    accuracy                           0.92       451
   macro avg       0.87      0.79      0.83       451
weighted avg       0.91      0.92      0.91       451



The model has an accuracy of 92%