## URLs:
GitHub: [https://github.com/gabrielloye/LSTM_Sentiment-Analysis/blob/master/main.ipynb](https://github.com/gabrielloye/LSTM_Sentiment-Analysis/blob/master/main.ipynb)

Article: [https://blog.floydhub.com/long-short-term-memory-from-zero-to-hero-with-pytorch/](https://blog.floydhub.com/long-short-term-memory-from-zero-to-hero-with-pytorch/)

Data: [https://www.kaggle.com/bittlingmayer/amazonreviews](https://www.kaggle.com/bittlingmayer/amazonreviews)

In [13]:
import json

import numpy as np

import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn

In [14]:
# Script settings.
SMALLER_SAMPLE = False

In [15]:
def trace(txt, var):
    print()
    if isinstance(var, (torch.Tensor, np.ndarray)):
        print("{}   <<{}>>:\n{}".format(txt, var.shape, var))
    elif isinstance(var, tuple) and isinstance(var[0], (torch.Tensor, np.ndarray)):
        print("{}:\n{}".format(txt, var))
        for idx, a in enumerate(var):
            print("[{}th]  {}   <<{}>>:\n{}".format(idx, txt, a.shape, a))
    else:
        print("{}:\n{}".format(txt, var))

def _print_(txt):
    print("\n{}\n".format(txt))

## Load Data 

In [16]:
# Load data.

if SMALLER_SAMPLE:
    filename = "./data/processed_small.npz"
else:
    filename = "./data/processed.npz"

npzfile = np.load(filename)
train_sentences, train_labels = npzfile["train_sentences"], npzfile["train_labels"]
test_sentences, test_labels = npzfile["test_sentences"], npzfile["test_labels"]
val_sentences, val_labels = npzfile["val_sentences"], npzfile["val_labels"]

txts = ("train_sentences", "train_labels", "test_sentences", "test_labels", "val_sentences", "val_labels")
for idx, var in enumerate((train_sentences, train_labels, test_sentences, test_labels, val_sentences, val_labels)):
    txt = txts[idx]
    trace(txt, var)


# Load dictionaries.

if SMALLER_SAMPLE:
    word2idx_fname = "./data/word2idx_small.json"
    idx2word_fname = "./data/idx2word_small.json"
else:
    word2idx_fname = "./data/word2idx.json"
    idx2word_fname = "./data/idx2word.json"

with open(word2idx_fname, "r") as f:
    word2idx = json.load(f)
with open(idx2word_fname, "r") as f:
    idx2word = json.load(f)


train_sentences   <<(800000, 200)>>:
[[    0     0     0 ...   313    15 16999]
 [    0     0     0 ...   168  2612     2]
 [    0     0     0 ...    52   264     2]
 ...
 [    0     0     0 ...  3764  2894     2]
 [    0     0     0 ...   610   488     2]
 [    0     0     0 ...   714   997     2]]

train_labels   <<(800000,)>>:
[1 1 1 ... 1 1 0]

test_sentences   <<(100000, 200)>>:
[[    0     0     0 ...  6274  6121     2]
 [    0     0     0 ...    11    24     2]
 [    0     0     0 ... 11723   442     2]
 ...
 [    0     0     0 ...    81   152     2]
 [   20   283    29 ... 11765    32     2]
 [    0     0     0 ...    50  4747     2]]

test_labels   <<(100000,)>>:
[1 0 1 ... 1 0 1]

val_sentences   <<(100000, 200)>>:
[[    0     0     0 ...   825    62    32]
 [    0     0     0 ...   177     9     2]
 [    0     0     0 ...  3705   517     2]
 ...
 [    0     0     0 ...   589    99    62]
 [    0     0     0 ...  2884  8045     2]
 [    0     0     0 ...  2721 25644     2]]


## PyTorch Prep

In [17]:
# Reproducibility:
seed = 12345
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [18]:
train_data = TensorDataset(torch.from_numpy(train_sentences), torch.from_numpy(train_labels))
val_data = TensorDataset(torch.from_numpy(val_sentences), torch.from_numpy(val_labels))
test_data = TensorDataset(torch.from_numpy(test_sentences), torch.from_numpy(test_labels))

batch_size = 400

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

In [19]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda:1")  # Use device 1, not 0.
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [20]:
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()

print("x (sentence): ", sample_x.shape, "\ny (label): ", sample_y.shape)

x (sentence):  torch.Size([400, 200]) 
y (label):  torch.Size([400])


## Define Model

Diagram:
![img](./content/ModelArchitecture.JPG)

In [21]:
class SentimentNet(nn.Module):
    
    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):
        super(SentimentNet, self).__init__()
        self.output_size = output_size
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_dim, output_size)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, hidden, trace_on=False):
        if trace_on:
            _print_("--- forward() ---")
        
        batch_size = x.size(0)
        if trace_on:
            trace("batch_size = x.size(0)", batch_size)
        x = x.long()
        if trace_on:
            trace("x = x.long()", x)
        embeds = self.embedding(x)
        if trace_on:
            trace("embeds = self.embedding(x)", embeds)
        lstm_out, hidden = self.lstm(embeds, hidden)
        if trace_on:
            trace("lstm_out <- self.lstm(embeds, hidden)", lstm_out)
            trace("hidden <- self.lstm(embeds, hidden)", hidden)
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)  # contiguous() is related to memory contiguity: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.contiguous
        if trace_on:
            trace("lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)", lstm_out)
        
        out = self.dropout(lstm_out)
        if trace_on:
            trace("out = self.dropout(lstm_out)", out)
        out = self.fc(out)
        if trace_on:
            trace("out = self.fc(out)", out)
        out = self.sigmoid(out)
        if trace_on:
            trace("out = self.sigmoid(out)", out)
        
        out = out.view(batch_size, -1)
        if trace_on:
            trace("out = out.view(batch_size, -1)", out)
        out = out[:,-1]
        if trace_on:
            trace("out = out[:,-1]", out)
            _print_("--- <end> forward() ---")
            
        return out, hidden
    
    def init_hidden(self, batch_size, trace_on=False):
        if trace_on:
            _print_("--- init_hidden() ---")
        
        weight = next(self.parameters()).data
        if trace_on:
            trace("weight = next(self.parameters()).data", weight)
        
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        if trace_on:
            trace("hidden = ...", hidden)
            _print_("--- <end> init_hidden() ---")
        
        return hidden

In [22]:
vocab_size = len(word2idx) + 1
output_size = 1
embedding_dim = 400
hidden_dim = 512
n_layers = 2

model = SentimentNet(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
model.to(device)
print(model)

SentimentNet(
  (embedding): Embedding(225536, 400)
  (lstm): LSTM(400, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


## Train Model 

In [23]:
# Training variables.

lr=0.005
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [24]:
epochs = 2
counter = 0
clip = 5
valid_loss_min = np.Inf

if SMALLER_SAMPLE:
    print_every = 10
else:
    print_every = 1000


model.train()

for i in range(epochs):
    
    trace_on = False
    if i == 0:
        trace_on = True
            
    h = model.init_hidden(batch_size, trace_on=trace_on)
    
    for idx, (inputs, labels) in enumerate(train_loader):
        
        trace_on = False
        if idx == 0 and i == 0:
            trace_on = True
        
        counter += 1
        
        inputs, labels = inputs.to(device), labels.to(device)
        
        model.zero_grad()
        
        h = tuple([e.data for e in h])
        output, h = model(inputs, h, trace_on=trace_on)
        
        loss = criterion(output.squeeze(), labels.float())
        loss.backward()
        
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        if counter % print_every == 0:
            
            val_h = model.init_hidden(batch_size, trace_on=False)
            val_losses = []
            model.eval()
            
            for inp, lab in val_loader:
                val_h = tuple([each.data for each in val_h])
                inp, lab = inp.to(device), lab.to(device)
                out, val_h = model(inp, val_h, trace_on=False)
                val_loss = criterion(out.squeeze(), lab.float())
                val_losses.append(val_loss.item())
                
            model.train()
            
            print("Epoch: {}/{}...".format(i+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))
            
            if np.mean(val_losses) <= valid_loss_min:
                torch.save(model.state_dict(), './state_dict.pt')
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, np.mean(val_losses)))
                valid_loss_min = np.mean(val_losses)


--- init_hidden() ---


weight = next(self.parameters()).data   <<torch.Size([225536, 400])>>:
tensor([[ 2.1229, -0.3070,  1.1609,  ...,  1.1793,  0.1334, -1.1225],
        [-0.0633,  3.3590,  0.0892,  ..., -0.3579,  0.7567, -0.0280],
        [ 0.0372, -1.5348, -1.0914,  ...,  0.2764, -1.5753,  0.3745],
        ...,
        [-0.5144,  1.7953,  1.6041,  ..., -0.0379, -0.4028, -0.4660],
        [ 0.4156,  2.0037,  1.1897,  ..., -1.3525,  0.6580,  0.3276],
        [ 1.6801, -1.6561,  0.8391,  ...,  0.7824, -0.4503, -0.5420]],
       device='cuda:1')

hidden = ...:
(tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0.,

In [25]:
#Loading the best model
model.load_state_dict(torch.load('./state_dict.pt'))

<All keys matched successfully>

In [26]:
# Check test set performance.

test_losses = []
num_correct = 0
h = model.init_hidden(batch_size, trace_on=False)

model.eval()
for inputs, labels in test_loader:
    h = tuple([each.data for each in h])
    inputs, labels = inputs.to(device), labels.to(device)
    output, h = model(inputs, h)
    test_loss = criterion(output.squeeze(), labels.float())
    test_losses.append(test_loss.item())
    pred = torch.round(output.squeeze()) #rounds the output to 0/1
    correct_tensor = pred.eq(labels.float().view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    num_correct += np.sum(correct)
        
print("Test loss: {:.3f}".format(np.mean(test_losses)))
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}%".format(test_acc*100))

Test loss: 0.179
Test accuracy: 93.075%
