In [6]:
## C:\Users\ASUS\AppData\Local\Programs\Python\Python36\Lib\site-packages
import torch
import torch.nn as nn
from torchtext import data
from torchtext import datasets
import pickle
import spacy

class Model(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim,
                 n_layers, bidirectional, dropout, pad_idx):

        super().__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)

        self.lstm = nn.LSTM(embedding_dim,
                           hidden_dim,
                           num_layers = n_layers,
                           bidirectional = bidirectional,
                           dropout = dropout)

        self.fc = nn.Linear(hidden_dim * 2, output_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):

        embedding = self.embedding(text)    ## shape = (sent_length, batch_size)
        embedded = self.dropout(embedding)  ## shape = (sent_length, batch_size, emb_dim)

        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)    ## pack sequence

        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)        ## unpack sequence

        ## output shape = (sent_len, batch_size, hid_dim * num_directions)
        ## output over padding tokens are zero tensors

        ## hidden shape = (num_layers * num_directions, batch_size, hid_dim)
        ## cell shape = (num_layers * num_directions, batch_size, hid_dim)

        ## concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        ## and apply dropout

        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)) ## shape = (batch_size, hid_dim * num_directions)

        return self.fc(hidden)

# pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-1.2.0/en_core_web_sm-1.2.0.tar.gz --no-deps
# python -m spacy download en
# TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
# LABEL = data.LabelField(dtype = torch.float)
# train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
#
# MAX_VOCAB_SIZE = 20000
#
# TEXT.build_vocab(train_data,
#                  max_size = MAX_VOCAB_SIZE,
#                  vectors = "glove.6B.100d", ## Global Vectors for Word Representation with 6B tokens and 100d
#                  unk_init = torch.Tensor.normal_) ## normal distribution for out-of-vocab words
#
# LABEL.build_vocab(train_data)


# from torchtext import vocab
# try:
#     vocab._default_unk_index
# except AttributeError:
#     def _default_unk_index():
#         return torch.Tensor.normal_
#     vocab._default_unk_index = _default_unk_index

# pip install torchtext==0.3.1
with open('vocab.txt', 'rb') as file:
    vocab = pickle.load(file)
    
vocab.unk_init = torch.Tensor.normal_
    
nlp = spacy.load('en')

def predict_sentiment(model, sentence):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model(tensor, length_tensor))
    return prediction.item()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model = torch.load('model.pt', map_location = device)
# sentence = "Shut the fuck up those who say that it is the bad film. It's amazing"
sentence = "I would'n recomend this movie. Don't watch it. It is bad"
print()
print(predict_sentiment(loaded_model, sentence))


0.23083165287971497
