In [0]:
!pip install fasttext

In [0]:
import boto3
import fasttext
import nltk

nltk.download("punkt")

[nltk_data] Downloading package punkt to /home/ec2-user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [0]:
s3 = boto3.client('s3')
s3.download_file('yelp-dataset-pt-9', 'spencer/models/blazingtext/nltk/6m/stars/BlazingText-6m-stars/output/model.tar.gz', 'stars.tar.gz')
s3.download_file('yelp-dataset-pt-9', 'spencer/models/blazingtext/nltk/6m/pos_neg/BlazingText-6m-pos-neg/output/model.tar.gz', 'pos_neg.tar.gz')

In [0]:
!tar -xvf stars.tar.gz

model.bin


In [0]:
!mv model.bin stars.bin

In [0]:
!tar -xvf pos_neg.tar.gz

model.bin


In [0]:
!mv model.bin pos_neg.bin

In [0]:
stars_model = fasttext.load_model("stars.bin")
pos_neg_model = fasttext.load_model("pos_neg.bin")





In [0]:
def tokenize_text(text):
    return " ".join(nltk.word_tokenize(text))

In [0]:
stars_model.predict(tokenize_text("this is a good restaurant."))

(('__label__4',), array([0.75838745]))

In [0]:
pos_neg_model.predict(tokenize_text("this is a good restaurant."))

(('__label__POSITIVE',), array([0.7498517]))

In [0]:
import fasttext
import re
import lime.lime_text
import numpy as np
import webbrowser
from pathlib import Path

# This function regularizes a piece of text 
def strip_formatting(string):
    string = string.lower()
    string = re.sub(r"([.!?,'/()])", r" \1 ", string)
    return string

def tokenize_string(string):
    return string.split()

# Load our trained FastText model
classifier = fastText.load_model('model.bin')

# Create a LimeTextExplainer. This object knows how to explain a text-based
# prediction by dropping words randomly.
explainer = lime.lime_text.LimeTextExplainer(
    split_expression=tokenize_string,
    bow=False,
    class_names=["No Stars", "1 Star", "2 Stars", "3 Stars", "4 Stars", "5 Stars"]
)


def fasttext_prediction_in_sklearn_format(classifier, texts):
    res = []
    labels, probabilities = classifier.predict(texts, 10)
    for label, probs, text in zip(labels, probabilities, texts):
        order = np.argsort(np.array(label))
        res.append(probs[order])

    return np.array(res)

# Review to explain
review = "this is a good restaurant."

preprocessed_review = strip_formatting(review)

# Make a prediction and explain it!
exp = explainer.explain_instance(
    preprocessed_review,
    classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
    top_labels=1,
    num_features=20,
)


output_filename = Path(__file__).parent / "explanation.html"
exp.save_to_file(output_filename)
webbrowser.open(output_filename.as_uri())