In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import torch
from pathlib import Path
from transformers import AutoTokenizer
import numpy as np
import torch
import config

In [3]:
path = Path('/data/yelp/model_save')
assert path.exists()

In [4]:
!ls {path}

model  special_tokens_map.json	tokenizer_config.json  vocab.txt


In [5]:
def load_model():
    model = torch.load(path/'model')
    model.cpu()
    model.eval();
    return model

In [6]:
tokenizer = AutoTokenizer.from_pretrained(config.PRE_TRAINED_MODEL_NAME)
model = load_model()

In [7]:
def encode(sequence):
    return tokenizer.encode_plus(
                sequence,
                add_special_tokens=True,
                max_length=512,
                return_token_type_ids=False,
                pad_to_max_length=True,
                return_attention_mask=True,
                return_tensors='pt'
    )

In [18]:
def predict(sequence='I love you a lot. You are really great. You are wonderful and awesome.'):
    encoded = encode(sequence)
    with torch.no_grad():
        output = model(encoded['input_ids'].cpu(), token_type_ids=None, attention_mask=encoded['attention_mask'].cpu())[0]
        pred_flat = np.argmax(output, axis=1).flatten()
        sig_factor = torch.sigmoid(output) / torch.sigmoid(output).sum()
        return {'proportional':  sig_factor.numpy().tolist(), 'sigmoid': torch.sigmoid(output).numpy().tolist(), 'stars': pred_flat.item() + 1, 'raw': output.numpy().tolist()}

In [19]:
predict()

{'proportional': [[0.01062739547342062,
   0.01555375661700964,
   0.0762726366519928,
   0.4238334000110626,
   0.47371283173561096]],
 'sigmoid': [[0.022402459755539894,
   0.03278718516230583,
   0.1607820689678192,
   0.8934372067451477,
   0.9985826015472412]],
 'stars': 5,
 'raw': [[-3.7759273052215576,
   -3.38438081741333,
   -1.6524205207824707,
   2.1263418197631836,
   6.557493209838867]]}

In [10]:
predict('This is really terrible. Just avoid it')

{'proportional': tensor([[0.4823, 0.4148, 0.0824, 0.0080, 0.0125]]),
 'sigmoid': tensor([[0.9988, 0.8588, 0.1707, 0.0166, 0.0258]]),
 'stars': 1,
 'raw': tensor([[ 6.6865,  1.8058, -1.5807, -4.0844, -3.6306]])}

In [11]:
predict('There are some good things and bad things about this business')

{'proportional': tensor([[0.2590, 0.3784, 0.3179, 0.0350, 0.0096]]),
 'sigmoid': tensor([[0.6652, 0.9718, 0.8163, 0.0899, 0.0248]]),
 'stars': 2,
 'raw': tensor([[ 0.6864,  3.5408,  1.4915, -2.3147, -3.6728]])}

In [12]:
predict('There are some bad things and good things about this business')

{'proportional': tensor([[0.1187, 0.2201, 0.2986, 0.1887, 0.1739]]),
 'sigmoid': tensor([[0.2934, 0.5441, 0.7381, 0.4666, 0.4299]]),
 'stars': 3,
 'raw': tensor([[-0.8791,  0.1769,  1.0361, -0.1338, -0.2821]])}

In [13]:
predict('This is quite good. There are better products, but this is worth my recommendation too.')

{'proportional': tensor([[0.0057, 0.0314, 0.3342, 0.3823, 0.2464]]),
 'sigmoid': tensor([[0.0149, 0.0816, 0.8694, 0.9944, 0.6410]]),
 'stars': 4,
 'raw': tensor([[-4.1894, -2.4214,  1.8958,  5.1725,  0.5797]])}