In [69]:
import hydro_serving_grpc as hs
import grpc

In [24]:
import bz2
import re
import numpy as np

In [68]:
channel = grpc.insecure_channel("localhost:9090") 
stub = hs.PredictionServiceStub(channel)

# Amazon reviews dataset for sentiment analysis

## 1. Load Data

- you can skip this step if you do not want to test on real amazon data

In [5]:
train_file = bz2.BZ2File('../data/train.ft.txt.bz2')
test_file = bz2.BZ2File('../data/test.ft.txt.bz2')
train_file_lines = train_file.readlines()
test_file_lines = test_file.readlines()

In [6]:
train_file_lines = [x.decode('utf-8') for x in train_file_lines]
test_file_lines = [x.decode('utf-8') for x in test_file_lines]

In [9]:
train_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in train_file_lines]
train_sentences = [x.split(' ', 1)[1][:-1].lower() for x in train_file_lines]
for i in range(len(train_sentences)):
    train_sentences[i] = re.sub('\d','0',train_sentences[i])

In [10]:
test_labels = [0 if x.split(' ')[0] == '__label__1' else 1 for x in test_file_lines]
test_sentences = [x.split(' ', 1)[1][:-1].lower() for x in test_file_lines]
for i in range(len(test_sentences)):
    test_sentences[i] = re.sub('\d','0', test_sentences[i])

In [11]:
for i in range(len(train_sentences)):
    if 'www.' in train_sentences[i] or 'http:' in train_sentences[i] or 'https:' in train_sentences[i] or '.com' in train_sentences[i]:
        train_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", train_sentences[i])
        
for i in range(len(test_sentences)):
    if 'www.' in test_sentences[i] or 'http:' in test_sentences[i] or 'https:' in test_sentences[i] or '.com' in test_sentences[i]:
        test_sentences[i] = re.sub(r"([^ ]+(?<=\.[a-z]{3}))", "<url>", test_sentences[i])

In [14]:
review = test_sentences[0]
review

'great cd: my lovely pat has one of the great voices of her generation. i have listened to this cd for years and i still love it. when i\'m in a good mood it makes me feel better. a bad mood just evaporates like sugar in the rain. this cd just oozes life. vocals are jusat stuunning and lyrics just kill. one of life\'s hidden gems. this is a desert isle cd in my book. why she never made it big is just beyond me. everytime i play this, no matter black, white, young, old, male, female everybody says one thing "who was that singing ?"'

## 2. Test tokenizer

In [15]:
tokenize_model = hs.ModelSpec(name="tokenizer") # change name to your application name

In [16]:
tensor_shape = hs.TensorShapeProto()
transaction_tensor = hs.TensorProto(dtype=hs.DT_STRING,
                                    string_val = [review.encode()],
                                    tensor_shape=tensor_shape)

In [18]:
request = hs.PredictRequest(model_spec=tokenize_model, inputs={"text":transaction_tensor})
result = stub.Predict(request)
result

outputs {
  key: "tokenized"
  value {
    dtype: DT_INT64
    tensor_shape {
    }
    int64_val: 95
    int64_val: 21
    int64_val: 1531
    int64_val: 4475
    int64_val: 44
    int64_val: 24
    int64_val: 7
    int64_val: 1
    int64_val: 30
    int64_val: 1917
    int64_val: 7
    int64_val: 79
    int64_val: 1957
    int64_val: 3
    int64_val: 20
    int64_val: 1030
    int64_val: 5
    int64_val: 8
    int64_val: 95
    int64_val: 11
    int64_val: 138
    int64_val: 2
    int64_val: 3
    int64_val: 127
    int64_val: 81
    int64_val: 6
    int64_val: 51
    int64_val: 110
    int64_val: 10
    int64_val: 4
    int64_val: 32
    int64_val: 2062
    int64_val: 6
    int64_val: 209
    int64_val: 43
    int64_val: 229
    int64_val: 91
    int64_val: 4
    int64_val: 128
    int64_val: 2062
    int64_val: 36
    int64_val: 33
    int64_val: 2304
    int64_val: 10
    int64_val: 1
    int64_val: 2274
    int64_val: 8
    int64_val: 95
    int64_val: 36
    int64_val: 154
    i

In [19]:
tokenized_sentence = answer = result.outputs.get('tokenized').int64_val

# 2. Test prediciton model

In [22]:
prediction_model = hs.ModelSpec(name="amazon_est") # change name to your own created application

In [26]:
tensor_shape = hs.TensorShapeProto()
tokenized_tensor = hs.TensorProto(dtype=hs.DT_INT64,
                                    int64_val = np.array(tokenized_sentence),
                                    tensor_shape=tensor_shape)

In [27]:
request = hs.PredictRequest(model_spec=prediction_model, inputs={"tokenized":tokenized_tensor})
result = stub.Predict(request)

In [28]:
result

outputs {
  key: "confidence"
  value {
    dtype: DT_DOUBLE
    tensor_shape {
    }
    double_val: 0.9660149216651917
  }
}
outputs {
  key: "label"
  value {
    dtype: DT_INT32
    tensor_shape {
    }
    int_val: 1
  }
}

In [41]:
label_tensor, conf_tensor = result.outputs.get('label'), result.outputs.get('confidence')

In [42]:
predicted_label = label_tensor.int_val[0]

In [43]:
predicted_label == test_labels[0]


True

# 3. Testing whole pipeline

In [56]:
pipeline_model = hs.ModelSpec(name="amazon_reviews") # change name to your own created application


In [59]:
tensor_shape = hs.TensorShapeProto()
transaction_tensor = hs.TensorProto(dtype=hs.DT_STRING,
                                    string_val = [review.encode()],
                                    tensor_shape=tensor_shape)

In [60]:
request = hs.PredictRequest(model_spec=pipeline_model, inputs={"text":transaction_tensor})
result = stub.Predict(request)
result

outputs {
  key: "confidence"
  value {
    dtype: DT_DOUBLE
    tensor_shape {
    }
    double_val: 0.9660149216651917
  }
}
outputs {
  key: "label"
  value {
    dtype: DT_INT32
    tensor_shape {
    }
    int_val: 1
  }
}