# Review Classification

## Loading and Inference

### Import Libraries

In [1]:
import spacy
import pickle
import torch
import torch.nn as nn
import warnings

warnings.filterwarnings('ignore')

### Creating necessary functions to load tokenizer and vocabulary

In [2]:
# tokenizer
tok = spacy.load('en_core_web_sm')
# loading vocabulary
with open("./vocabdict.pkl", "rb") as f:
    vocab_dict = pickle.load(f)[0]
# creating tokenizer function
def tokenize_en(sent):
    sent = sent.lower()
    return [item.text for item in tok.tokenizer(sent)]
# function to prepare sentence for inference
def prepare_sentence(sent):
    tokens = tokenize_en(pos_sent)
    sent_idx = []

    for item in tokens:
        idx = vocab_dict.get(item, vocab_dict.get('<unk>'))
        sent_idx.append([idx])
        
    return torch.LongTensor(sent_idx)



### Model class

In [3]:
class BiDirectionalLstm(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_classes):
        super(BiDirectionalLstm, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.hidden_size = hidden_size
        self.cell = nn.LSTM(embedding_dim, hidden_size, bidirectional = True, dropout = 0.2)
        self.linear = nn.Linear(hidden_size * 2, num_classes)
        self.soft = nn.Softmax(dim=1)
        
    def forward(self, x, hstate = None):
        if hstate is None:
            hstate = self.init_hidden(self.hidden_size, x.shape[-1])
            
        cell_out, _ = self.cell(self.embedding(x), hstate)
        
        temp = torch.cat([cell_out[-1, :, :self.hidden_size], cell_out[0, :, self.hidden_size:]], axis = -1)
        
        out = self.linear(temp)
        
        return out
            
    def init_hidden(self, hidden_size, bs):
        return (torch.zeros(2, bs, hidden_size, device=dev), torch.zeros(2, bs, hidden_size, device=dev))
    
    def load_embeddings(self, embeddings):
        self.embedding.weight.data.copy_(embeddings)

### Necessary Variables and Model Initialization

In [4]:
VOCAB_SIZE = len(vocab_dict)
EMBEDDING_DIM = 300
HIDDEN_SIZE = 128
NUM_CLASSES = 5
dev= 'cpu'

net = BiDirectionalLstm(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_SIZE, NUM_CLASSES)
net.load_state_dict(torch.load("./models/network-val-acc-71.31.pt"))

net = net.eval()

### Example

#### Prepare sentence

In [5]:
pos_sent = "The place is good, nice vibe. Food was also great. Overall nice experience."

psent = prepare_sentence(pos_sent)

#### Running Inference

In [6]:
x = torch.softmax(net(psent), dim = -1)

#### Displaying Result

In [7]:
for i, prob in enumerate(x[0]):
    print("Rating {} Probability Percentage : {:.2f}%".format(i + 1, prob.item() * 100))

Rating 1 Probability Percentage : 0.04%
Rating 2 Probability Percentage : 0.51%
Rating 3 Probability Percentage : 15.37%
Rating 4 Probability Percentage : 63.12%
Rating 5 Probability Percentage : 20.97%
