# Imports

In [1]:
from collections import Counter
import numpy as np
from pathlib import Path
import re
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as opt
import torch.nn.functional as F
from string import punctuation

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
torch.__version__

'1.0.0'

# Load Data

In [8]:
# PATH = Path('/Users/imad/Documents/deep_learning_udacity/'\
#             'deep-learning-v2-pytorch/sentiment-rnn/data')
PATH = Path('../data/imdb/')
!ls {PATH}

labels.txt  reviews.txt


In [9]:
with open(PATH / 'reviews.txt') as f:
    reviews = f.read()
with open(PATH / 'labels.txt') as f:
    labels = f.read()

In [10]:
print(reviews[:400])

bromwell high is a cartoon comedy . it ran at the same time as some other programs about school life  such as  teachers  . my   years in the teaching profession lead me to believe that bromwell high  s satire is much closer to reality than is  teachers  . the scramble to survive financially  the insightful students who can see right through their pathetic teachers  pomp  the pettiness of the whole


In [11]:
print(labels[:20])

positive
negative
po


# Preprocessing

In [12]:
reviews = reviews.lower()
all_text = ''.join([char for char in reviews if char not in punctuation])

In [13]:
reviews_split = all_text.split('\n')
all_text = ' '.join(reviews_split)

In [14]:
words = all_text.split()
counts = Counter(words)
sorted_counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
sorted_counts[:10]

[('the', 336713),
 ('and', 164107),
 ('a', 163009),
 ('of', 145864),
 ('to', 135720),
 ('is', 107328),
 ('br', 101872),
 ('it', 96352),
 ('in', 93968),
 ('i', 87623)]

In [15]:
vocab = [x[0] for x in sorted_counts] 
words_to_idx = {word:idx for idx, word in enumerate(vocab, 1)}
idx_to_words = {idx:word for word, idx in words_to_idx.items()}
list(words_to_idx.items())[:10], list(idx_to_words.items())[:10]

([('the', 1),
  ('and', 2),
  ('a', 3),
  ('of', 4),
  ('to', 5),
  ('is', 6),
  ('br', 7),
  ('it', 8),
  ('in', 9),
  ('i', 10)],
 [(1, 'the'),
  (2, 'and'),
  (3, 'a'),
  (4, 'of'),
  (5, 'to'),
  (6, 'is'),
  (7, 'br'),
  (8, 'it'),
  (9, 'in'),
  (10, 'i')])

In [16]:
len(vocab)

74072

In [17]:
# Tokenize reviews
reviews_ints = []
for review in reviews_split:
    reviews_ints.append([words_to_idx[word] for word in review.split()])

print(reviews_ints[:1])

[[21025, 308, 6, 3, 1050, 207, 8, 2138, 32, 1, 171, 57, 15, 49, 81, 5785, 44, 382, 110, 140, 15, 5194, 60, 154, 9, 1, 4975, 5852, 475, 71, 5, 260, 12, 21025, 308, 13, 1978, 6, 74, 2395, 5, 613, 73, 6, 5194, 1, 24103, 5, 1983, 10166, 1, 5786, 1499, 36, 51, 66, 204, 145, 67, 1199, 5194, 19869, 1, 37442, 4, 1, 221, 883, 31, 2988, 71, 4, 1, 5787, 10, 686, 2, 67, 1499, 54, 10, 216, 1, 383, 9, 62, 3, 1406, 3686, 783, 5, 3483, 180, 1, 382, 10, 1212, 13583, 32, 308, 3, 349, 341, 2913, 10, 143, 127, 5, 7690, 30, 4, 129, 5194, 1406, 2326, 5, 21025, 308, 10, 528, 12, 109, 1448, 4, 60, 543, 102, 12, 21025, 308, 6, 227, 4146, 48, 3, 2211, 12, 8, 215, 23]]


In [18]:
# Convert labels to ints
labels = labels.split('\n')
labels = np.array([0 if label == 'negative' else 1 for label in labels])
labels[:10]

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])

In [19]:
len(reviews_ints)

25001

In [20]:
# Get the min and max of all reviews
reviews_lens = [len(review) for review in reviews_ints]
np.min(reviews_lens), np.max(reviews_lens)

(0, 2514)

In [21]:
# Lets delete the review that is empty
non_zero_idx = [idx for idx, review in enumerate(reviews_ints) if len(review) > 0]
reviews_ints = np.array(reviews_ints)[non_zero_idx] #[review for review in reviews_ints if len(review) > 0]
labels = labels[non_zero_idx]
len(reviews_ints), len(labels)

(25000, 25000)

In [22]:
# Pad each reviews to seq_length. If a review is longer than seq_length --> truncate
# anything to the right of seq_length. If it is shorter -- pad with zeros to the left
def pad_sequences(reviews, seq_length):
    padded_sequences = np.zeros((len(reviews), seq_length), dtype=int)

    for i, review in enumerate(reviews):
        padded_sequences[i, -len(review):] = np.array(review)[:seq_length]
    
    assert len(reviews) == len(padded_sequences)
    assert padded_sequences.shape[1] == seq_length

    return padded_sequences

In [23]:
seq_length = 200
padded_reviews = pad_sequences(reviews_ints, seq_length)
padded_reviews[0]

array([    0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0, 21025,   308,     6,
           3,  1050,   207,     8,  2138,    32,     1,   171,    57,
          15,    49,    81,  5785,    44,   382,   110,   140,    15,
        5194,    60,   154,     9,     1,  4975,  5852,   475,    71,
           5,   260,    12, 21025,   308,    13,  1978,     6,    74,
        2395,     5,   613,    73,     6,  5194,     1, 24103,     5,
        1983, 10166,     1,  5786,  1499,    36,    51,    66,   204,
         145,    67,  1199,  5194, 19869,     1, 37442,     4,     1,
         221,   883,

In [24]:
len(reviews_ints[0])

140

# Split Data

In [25]:
padded_reviews.shape, labels.shape

((25000, 200), (25000,))

In [26]:
train_reviews, test_reviews, train_labels, test_labels = train_test_split(padded_reviews, labels, test_size=0.2)
valid_reviews, test_reviews, valid_labels, test_labels = train_test_split(test_reviews, test_labels, test_size=0.5)
train_reviews.shape, valid_reviews.shape, test_reviews.shape

((20000, 200), (2500, 200), (2500, 200))

# DataLoaders

In [27]:
# Create Datasets
train_ds = TensorDataset(torch.from_numpy(train_reviews), torch.from_numpy(train_labels))
valid_ds = TensorDataset(torch.from_numpy(valid_reviews), torch.from_numpy(valid_labels))
test_ds = TensorDataset(torch.from_numpy(test_reviews), torch.from_numpy(test_labels))

# Create DataLoaders
bs = 50
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True)

In [28]:
sample_review, sample_labels = next(iter(train_dl))
sample_review, sample_labels

(tensor([[ 1747,  1104,   885,  ...,     1,  1421,     7],
         [    0,     0,     0,  ...,     4,    45,     4],
         [    0,     0,     0,  ...,   958,     6,  1553],
         ...,
         [ 1237,    97,    39,  ...,     4,   323, 46755],
         [   40,    26,    58,  ...,     3,  1098,  6754],
         [   11,    18,   920,  ...,    12,   157, 11201]]),
 tensor([1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
         1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
         1, 0]))

In [29]:
sample_review.shape, sample_labels.shape

(torch.Size([50, 200]), torch.Size([50]))

# Model

In [31]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [37]:
class SentimentRNN(nn.Module):
    
    def __init__(self, input_size, output_size, embed_dim,
                 n_layers, hidden_size, drop_prob):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = nn.Embedding(input_size, embedding_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_size, n_layers,
                            dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        batch_size = x.shape[0]
        embeddings = self.embedding(x)
        out, hidden = self.lstm(embeddings, hidden)
        out = self.dropout(out)
        out = out.contiguous().view(-1, hidden_size)
        out = torch.sigmoid(self.fc(out))
        # Get the prediction of the last time step of each sequence
        out = out.view(batch_size, -1)[:, -1] 
        
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_size).zero_().to(device))
        
        return hidden

In [43]:
vocab_size = len(vocab) + 1
embedding_dim = 300
hidden_size = 256
n_layers = 2
output_size = 1

net = SentimentRNN(vocab_size, output_size, embedding_dim, n_layers, hidden_size, drop_prob=0.5)
net.to(device)

SentimentRNN(
  (embedding): Embedding(74073, 300)
  (lstm): LSTM(300, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

# Train

In [44]:
lr = 1e-3
epochs = 4
criterion = nn.BCELoss()
optimizer = opt.Adam(net.parameters(), lr=lr)

In [45]:
counter = 0
net.train()

for epoch in range(epochs):
    hidden = net.init_hidden(bs)
    
    for inputs, labels in train_dl:
        counter += 1
        inputs, labels = inputs.to(device), labels.to(device)
        hidden = tuple([each.data for each in hidden])
        
        optimizer.zero_grad()
        
        output, hidden = net(inputs, hidden)
        
        loss = criterion(output, labels.float())
        
        loss.backward()
        nn.utils.clip_grad_norm_(net.parameters(), 5)
        optimizer.step()
        
        if counter % 100 == 0:
            net.eval()
            val_hidden = net.init_hidden(bs)
            val_losses = []
            
            for inputs, labels in valid_dl:
                inputs, labels = inputs.to(device), labels.to(device)
                val_hidden = tuple([each.data for each in val_hidden])
                
                output, val_hidden = net(inputs, val_hidden)
                
                val_loss = criterion(output, labels.float())
                val_losses.append(val_loss.item())
            
                            
            print(f'Epoch : {epoch + 1:02d}/{epochs} ... '
                  f'Step : {counter} ... ',
                  f'Loss : {loss.item():.4f} , Val Loss : {np.mean(val_losses):.4f}')
        net.train()

Epoch : 01/4 ... Step : 100 ...  Loss : 0.6679 , Val Loss : 0.6415
Epoch : 01/4 ... Step : 200 ...  Loss : 0.6932 , Val Loss : 0.6888
Epoch : 01/4 ... Step : 300 ...  Loss : 0.6650 , Val Loss : 0.6430
Epoch : 01/4 ... Step : 400 ...  Loss : 0.6823 , Val Loss : 0.7057
Epoch : 02/4 ... Step : 500 ...  Loss : 0.7153 , Val Loss : 0.7108
Epoch : 02/4 ... Step : 600 ...  Loss : 0.5543 , Val Loss : 0.5414
Epoch : 02/4 ... Step : 700 ...  Loss : 0.4652 , Val Loss : 0.5005
Epoch : 02/4 ... Step : 800 ...  Loss : 0.3954 , Val Loss : 0.5317
Epoch : 03/4 ... Step : 900 ...  Loss : 0.5296 , Val Loss : 0.4525
Epoch : 03/4 ... Step : 1000 ...  Loss : 0.3496 , Val Loss : 0.4030
Epoch : 03/4 ... Step : 1100 ...  Loss : 0.4184 , Val Loss : 0.3915
Epoch : 03/4 ... Step : 1200 ...  Loss : 0.3152 , Val Loss : 0.3983
Epoch : 04/4 ... Step : 1300 ...  Loss : 0.2261 , Val Loss : 0.4041
Epoch : 04/4 ... Step : 1400 ...  Loss : 0.3427 , Val Loss : 0.4389
Epoch : 04/4 ... Step : 1500 ...  Loss : 0.2650 , Val Los

In [50]:
net.eval()
test_hidden = net.init_hidden(bs)
test_losses = []
total_correct = 0

for inputs, labels in test_dl:
    inputs, labels = inputs.to(device), labels.to(device)
    test_hidden = tuple([each.data for each in test_hidden])

    output, test_hidden = net(inputs, test_hidden)

    test_loss = criterion(output, labels.float())
    test_losses.append(test_loss.item())
    
    preds = output.round().long()
    total_correct += (labels == preds).sum().item()

test_loss = np.mean(test_losses)
test_acc = total_correct / len(test_ds)

print(f'Test loss : {test_loss:.4f}')
print(f'Test accuracy : {test_acc:.2%}')

Test loss : 0.4074
Test accuracy : 83.88%


# Use Pretrained Embeddings (GloVe)

In [54]:
!wget 'http://nlp.stanford.edu/data/glove.6B.zip'

--2018-12-25 20:13:31--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2018-12-25 20:13:31--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2018-12-25 20:13:43 (69.3 MB/s) - ‘glove.6B.zip’ saved [862182613/862182613]



In [57]:
!ls ../data/glove/

glove.6B.100d.txt  glove.6B.200d.txt  glove.6B.300d.txt  glove.6B.50d.txt


In [67]:
# Load Embedding with 300 embedding dimensions
embeddings_index = {}
with open('../data/glove/glove.6B.300d.txt', 'r') as f:
    for line in f.readlines():
        data = line.split()
        word = data[0]
        values = np.array(data[1:]).astype(float)
        embeddings_index[word] = values

In [68]:
len(values)

300

In [72]:
embeddings_index['the']

array([ 4.6560e-02,  2.1318e-01, -7.4364e-03, -4.5854e-01, -3.5639e-02,
        2.3643e-01, -2.8836e-01,  2.1521e-01, -1.3486e-01, -1.6413e+00,
       -2.6091e-01,  3.2434e-02,  5.6621e-02, -4.3296e-02, -2.1672e-02,
        2.2476e-01, -7.5129e-02, -6.7018e-02, -1.4247e-01,  3.8825e-02,
       -1.8951e-01,  2.9977e-01,  3.9305e-01,  1.7887e-01, -1.7343e-01,
       -2.1178e-01,  2.3617e-01, -6.3681e-02, -4.2318e-01, -1.1661e-01,
        9.3754e-02,  1.7296e-01, -3.3073e-01,  4.9112e-01, -6.8995e-01,
       -9.2462e-02,  2.4742e-01, -1.7991e-01,  9.7908e-02,  8.3118e-02,
        1.5299e-01, -2.7276e-01, -3.8934e-02,  5.4453e-01,  5.3737e-01,
        2.9105e-01, -7.3514e-03,  4.7880e-02, -4.0760e-01, -2.6759e-02,
        1.7919e-01,  1.0977e-02, -1.0963e-01, -2.6395e-01,  7.3990e-02,
        2.6236e-01, -1.5080e-01,  3.4623e-01,  2.5758e-01,  1.1971e-01,
       -3.7135e-02, -7.1593e-02,  4.3898e-01, -4.0764e-02,  1.6425e-02,
       -4.4640e-01,  1.7197e-01,  4.6246e-02,  5.8639e-02,  4.14

In [89]:
embeddings_matrix = np.zeros((vocab_size, 300))
for word, i in words_to_idx.items():
    embeddings_vector = embeddings_index.get(word)
    if embeddings_vector is not None:
        embeddings_matrix[i - 1] = embeddings_vector


In [90]:
embeddings_matrix.shape

(74073, 300)

In [91]:
emb_layer = nn.Embedding(vocab_size, 300)
emb_layer

Embedding(74073, 300)

In [93]:
emb_layer.load_state_dict({'weight': torch.from_numpy(embeddings_matrix)})

In [94]:
emb_layer.weight.data

tensor([[ 0.0466,  0.2132, -0.0074,  ...,  0.0091, -0.2099,  0.0539],
        [ 0.0385, -0.0398,  0.0827,  ..., -0.3343,  0.0118,  0.0597],
        [-0.2971,  0.0940, -0.0967,  ...,  0.0597, -0.2285,  0.2960],
        ...,
        [ 0.0585,  0.9133, -0.0220,  ...,  0.2419,  0.1127, -0.2170],
        [-0.1070, -0.2626,  0.6227,  ...,  0.0524, -0.1153,  0.1335],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [95]:
embeddings_matrix

array([[ 0.04656  ,  0.21318  , -0.0074364, ...,  0.0090611, -0.20989  ,
         0.053913 ],
       [ 0.038466 , -0.039792 ,  0.082747 , ..., -0.33427  ,  0.011807 ,
         0.059703 ],
       [-0.29712  ,  0.094049 , -0.096662 , ...,  0.059717 , -0.22853  ,
         0.29602  ],
       ...,
       [ 0.058459 ,  0.91331  , -0.022032 , ...,  0.24188  ,  0.11273  ,
        -0.21702  ],
       [-0.10701  , -0.26264  ,  0.6227   , ...,  0.052448 , -0.11528  ,
         0.13352  ],
       [ 0.       ,  0.       ,  0.       , ...,  0.       ,  0.       ,
         0.       ]])

In [96]:
emb_layer.weight.data.shape

torch.Size([74073, 300])

In [97]:
def create_embed_layer(num_embeddings, embedding_dim, embeddings_matrix, trainable=False):
    emb_layer = nn.Embedding(num_embeddings, embedding_dim)
    emb_layer.load_state_dict({'weight': torch.from_numpy(embeddings_matrix)})
    
    if not trainable:
        emb_layer.weight.requires_grad = False
    
    return emb_layer

In [111]:
class SentimentRNN(nn.Module):
    
    def __init__(self, input_size, output_size, embed_dim,
                 n_layers, hidden_size, drop_prob, embeddings_matrix):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = create_embed_layer(input_size, embed_dim, embeddings_matrix)
        self.lstm = nn.LSTM(embed_dim, self.hidden_size, self.n_layers,
                            dropout=drop_prob, batch_first=True)
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(self.hidden_size, output_size)
    
    def forward(self, x, hidden):
        batch_size = x.shape[0]
        embeddings = self.embedding(x)
        out, hidden = self.lstm(embeddings, hidden)
        out = self.dropout(out)
        out = out.contiguous().view(-1, hidden_size)
        out = torch.sigmoid(self.fc(out))
        # Get the prediction of the last time step of each sequence
        out = out.view(batch_size, -1)[:, -1] 
        
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_size).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_size).zero_().to(device))
        
        return hidden

In [118]:
vocab_size = len(vocab) + 1
embedding_dim = 300
hidden_size = 256
n_layers = 2
output_size = 1

net = SentimentRNN(vocab_size, output_size, embedding_dim,
                   n_layers, hidden_size, drop_prob=0.5,
                   embeddings_matrix=embeddings_matrix)
net.to(device)

SentimentRNN(
  (embedding): Embedding(74073, 300)
  (lstm): LSTM(300, 256, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [119]:
net.embedding.weight.requires_grad

False

In [120]:
lr = 1e-3
epochs = 8
criterion = nn.BCELoss()
optimizer = opt.Adam(net.parameters(), lr=lr)

In [121]:
counter = 0
net.train()

for epoch in range(epochs):
    hidden = net.init_hidden(bs)
    
    for inputs, labels in train_dl:
        counter += 1
        inputs, labels = inputs.to(device), labels.to(device)
        hidden = tuple([each.data for each in hidden])
        
        optimizer.zero_grad()
        
        output, hidden = net(inputs, hidden)
        
        loss = criterion(output, labels.float())
        
        loss.backward()
        nn.utils.clip_grad_norm_(net.parameters(), 5)
        optimizer.step()
        
        if counter % 100 == 0:
            net.eval()
            val_hidden = net.init_hidden(bs)
            val_losses = []
            
            for inputs, labels in valid_dl:
                inputs, labels = inputs.to(device), labels.to(device)
                val_hidden = tuple([each.data for each in val_hidden])
                
                output, val_hidden = net(inputs, val_hidden)
                
                val_loss = criterion(output, labels.float())
                val_losses.append(val_loss.item())
            
                            
            print(f'Epoch : {epoch + 1:02d}/{epochs} ... '
                  f'Step : {counter} ... ',
                  f'Loss : {loss.item():.4f} , Val Loss : {np.mean(val_losses):.4f}')
        net.train()

Epoch : 01/8 ... Step : 100 ...  Loss : 0.6765 , Val Loss : 0.6925
Epoch : 01/8 ... Step : 200 ...  Loss : 0.6739 , Val Loss : 0.6936
Epoch : 01/8 ... Step : 300 ...  Loss : 0.7045 , Val Loss : 0.6833
Epoch : 01/8 ... Step : 400 ...  Loss : 0.6743 , Val Loss : 0.6713
Epoch : 02/8 ... Step : 500 ...  Loss : 0.6470 , Val Loss : 0.6743
Epoch : 02/8 ... Step : 600 ...  Loss : 0.5625 , Val Loss : 0.7115
Epoch : 02/8 ... Step : 700 ...  Loss : 0.6412 , Val Loss : 0.6475
Epoch : 02/8 ... Step : 800 ...  Loss : 0.6501 , Val Loss : 0.6712
Epoch : 03/8 ... Step : 900 ...  Loss : 0.6229 , Val Loss : 0.6321
Epoch : 03/8 ... Step : 1000 ...  Loss : 0.5135 , Val Loss : 0.6168
Epoch : 03/8 ... Step : 1100 ...  Loss : 0.5657 , Val Loss : 0.6201
Epoch : 03/8 ... Step : 1200 ...  Loss : 0.7493 , Val Loss : 0.5753
Epoch : 04/8 ... Step : 1300 ...  Loss : 0.5506 , Val Loss : 0.5724
Epoch : 04/8 ... Step : 1400 ...  Loss : 0.4530 , Val Loss : 0.5448
Epoch : 04/8 ... Step : 1500 ...  Loss : 0.4838 , Val Los

In [122]:
net.eval()
test_hidden = net.init_hidden(bs)
test_losses = []
total_correct = 0

for inputs, labels in test_dl:
    inputs, labels = inputs.to(device), labels.to(device)
    test_hidden = tuple([each.data for each in test_hidden])

    output, test_hidden = net(inputs, test_hidden)

    test_loss = criterion(output, labels.float())
    test_losses.append(test_loss.item())
    
    preds = output.round().long()
    total_correct += (labels == preds).sum().item()

test_loss = np.mean(test_losses)
test_acc = total_correct / len(test_ds)

print(f'Test loss : {test_loss:.4f}')
print(f'Test accuracy : {test_acc:.2%}')

Test loss : 0.5580
Test accuracy : 80.40%
