In [1]:
from deployment_model.seq_model import SeqModel
from preprocessing_helper import *

import torch.nn as nn
import torch
import torchtext
from torchtext.data import Field, Pipeline
from nltk.tokenize import word_tokenize
from wordcloud import WordCloud, STOPWORDS
import pickle
import re

In [47]:
pre_pipeline = Pipeline(lemmatize)
pre_pipeline.add()
pre_pipeline.add_before(preprocessing)

<torchtext.data.pipeline.Pipeline at 0x7f8fdd262210>

In [96]:
TEXT = Field(sequential=True, tokenize=word_tokenize, lower=True, preprocessing=pre_pipeline)

In [97]:
LABELS = ['neu', 'neg', 'pos']

In [98]:
vocab = {}
with open("vocab.pkl", "rb") as f:
    vocab = pickle.load(f)

In [99]:
TEXT.preprocess("I'm very exciting about this amazing event")

['i', 'm', 'very', 'excite', 'about', 'this', 'amaze', 'event']

In [100]:
best_config = {
    "hidden_size": 302,
    "lr": 0.00010769630091763721,
    "l2": 2.5888680371842294e-05,
    "nonlin": "tanh",
    "dropout": 0.1,
    "num_layers": 2,
    "mode": 0,
    "optimizer": "Adam",
    "momentum": 0.1,
}

best_model = SeqModel(
    embedding_size=100,
    vocab_size=len(vocab),
    output_size=3,
    hidden_size=best_config["hidden_size"],
    num_layers=best_config["num_layers"],
    nonlin=best_config["nonlin"],
    dropout_rate=best_config["dropout"],
    mode=best_config["mode"],
    unit="gru",
    more_features=False,
)

In [101]:
best_model.load_state_dict(torch.load("model_deploy.pt"))

<All keys matched successfully>

In [103]:
new_input = "I am not happy at all"
print(TEXT.preprocess(new_input))
trans_input = [vocab.get(token, 0) for token in TEXT.preprocess(new_input)]
trans_input

['i', 'be', 'not', 'happy', 'at', 'all']


[0, 0, 0, 498, 0, 0]

In [104]:
model_outputs = best_model(torch.LongTensor(trans_input).reshape((-1,1)))
model_outputs

tensor([[-1.1397, -2.0872, -0.5869]], grad_fn=<LogSoftmaxBackward>)

In [105]:
probabilities, predicted = torch.max(model_outputs.cpu().data, 1)

In [106]:
pred_label = LABELS[predicted]
pred_label

'pos'