In [66]:
import hydro_serving_grpc as hs
import grpc

In [67]:
import bz2
import re
import numpy as np

In [68]:
channel = grpc.insecure_channel("localhost:9090") 
stub = hs.PredictionServiceStub(channel)

# Amazon reviews dataset for sentiment analysis

## 1. Load Data

- you can skip this step if you do not want to test on real amazon data

In [4]:
train_file = bz2.BZ2File('../data/train.ft.txt.bz2')
test_file = bz2.BZ2File('../data/test.ft.txt.bz2')
train_file_lines = train_file.readlines()
test_file_lines = test_file.readlines()

In [5]:
train_file_lines = [x.decode('utf-8') for x in train_file_lines]
test_file_lines = [x.decode('utf-8') for x in test_file_lines]

In [6]:
train_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in train_file_lines]
train_sentences = [x.split(' ', 1)[1][:-1].lower() for x in train_file_lines]
for i in range(len(train_sentences)):
    train_sentences[i] = re.sub('\d','0',train_sentences[i])

In [7]:
test_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in test_file_lines]
test_sentences = [x.split(' ', 1)[1][:-1].lower() for x in test_file_lines]
for i in range(len(test_sentences)):
    test_sentences[i] = re.sub('\d','0', test_sentences[i])

In [8]:
for i in range(len(train_sentences)):
    if 'www.' in train_sentences[i] or 'http:' in train_sentences[i] or 'https:' in train_sentences[i] or '.com' in train_sentences[i]:
        train_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", train_sentences[i])
        
for i in range(len(test_sentences)):
    if 'www.' in test_sentences[i] or 'http:' in test_sentences[i] or 'https:' in test_sentences[i] or '.com' in test_sentences[i]:
        test_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", test_sentences[i])

In [9]:
review = test_sentences[0]
review

'great cd: my lovely pat has one of the great voices of her generation. i have listened to this cd for years and i still love it. when i\'m in a good mood it makes me feel better. a bad mood just evaporates like sugar in the rain. this cd just oozes life. vocals are jusat stuunning and lyrics just kill. one of life\'s hidden gems. this is a desert isle cd in my book. why she never made it big is just beyond me. everytime i play this, no matter black, white, young, old, male, female everybody says one thing "who was that singing ?"'

## 2. Test tokenizer

For tokenization we use our deployed model amazon tokenizer for which we created an application

In [10]:
tokenize_model = hs.ModelSpec(name="amazon_tokenizer") # change name to your application name

In [11]:
tensor_shape = hs.TensorShapeProto()
text_tensor = hs.TensorProto(dtype=hs.DT_STRING,
                                    string_val = [review.encode()],
                                    tensor_shape=tensor_shape)

In [12]:
request = hs.PredictRequest(model_spec=tokenize_model, inputs={"text":text_tensor})
result = stub.Predict(request)

In [13]:
tokenized_sentence = result.outputs.get('tokenized').int64_val

In [14]:
tokenized_sentence

[95, 21, 1531, 4475, 44, 24, 7, 1, 30, 1917, 7, 79, 1957, 3, 20, 1030, 5, 8, 95, 11, 138, 2, 3, 127, 81, 6, 51, 110, 10, 4, 32, 2062, 6, 209, 43, 229, 91, 4, 128, 2062, 36, 33, 2304, 10, 1, 2274, 8, 95, 36, 154, 1031, 23, 2, 646, 36, 1528, 24, 7, 6335, 2461, 4235, 8, 9, 4, 3888, 12033, 95, 10, 21, 19, 182, 97, 122, 129, 6, 220, 9, 36, 861, 43, 2693, 3, 235, 8, 54, 615, 429, 576, 459, 134, 1776, 1351, 2198, 498, 24, 162, 72, 13, 12, 910]

# 2. Test prediciton model

For sentiment prediction we use estimator model for which we created application

In [18]:
prediction_model = hs.ModelSpec(name="amazon_est") # change name to your own created application

In [19]:
tensor_shape = hs.TensorShapeProto(dim=[hs.TensorShapeProto.Dim(size=100)])
tokenized_tensor = hs.TensorProto(dtype=hs.DT_INT64,
                                    int64_val = np.array(tokenized_sentence),
                                    tensor_shape=tensor_shape)

In [20]:
request = hs.PredictRequest(model_spec=prediction_model, inputs={"tokenized":tokenized_tensor})
result = stub.Predict(request)

In [21]:
result

outputs {
  key: "confidence"
  value {
    dtype: DT_DOUBLE
    tensor_shape {
    }
    double_val: 0.9660149216651917
  }
}
outputs {
  key: "label"
  value {
    dtype: DT_INT32
    tensor_shape {
    }
    int_val: 1
  }
}

In [22]:
label_tensor, conf_tensor = result.outputs.get('label'), result.outputs.get('confidence')
predicted_label = label_tensor.int_val[0]
predicted_conf = conf_tensor.double_val[0]
print(f'Predicted score: {predicted_conf} \nPredicted label: {predicted_label} \nReal label: {test_labels[0]}')

Predicted score: 0.9660149216651917 
Predicted label: 1 
Real label: 1


# 3. Testing whole pipeline

For this test we created a pipeline application that consists of two stages:

- tokenization
- sentiment estimation

In this app we pass whole review text and receive it's sentiment prediction

In [37]:
pipeline_model = hs.ModelSpec(name="amazon_reviews") # change name to your own created application


In [38]:
tensor_shape = hs.TensorShapeProto()
text_tensor = hs.TensorProto(dtype=hs.DT_STRING,
                                    string_val = [review.encode()],
                                    tensor_shape=tensor_shape)

In [39]:
request = hs.PredictRequest(model_spec=pipeline_model, inputs={"text":text_tensor})
result = stub.Predict(request)
result

outputs {
  key: "confidence"
  value {
    dtype: DT_DOUBLE
    tensor_shape {
    }
    double_val: 0.9660149216651917
  }
}
outputs {
  key: "label"
  value {
    dtype: DT_INT32
    tensor_shape {
    }
    int_val: 1
  }
}

In [60]:
label_tensor, conf_tensor = result.outputs.get('label'), result.outputs.get('confidence')
predicted_label = label_tensor.int_val[0]
predicted_conf = conf_tensor.double_val[0]
print(f'Predicted score: {predicted_conf} \nPredicted label: {predicted_label} \nReal label: {test_labels[0]}')

Predicted score: 0.9660149216651917 
Predicted label: 1 
Real label: 1
