# Test LSTM in GPU


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import string
from random import shuffle, choice
from timeit import default_timer
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.utils.data as datautils
from torchtext import data, datasets

## Choose device

In [2]:
t = torch.rand(3, 4)
# Let's now move our tensor to a GPU, if one is available
if torch.cuda.is_available():
    t = t.to('cuda')
    print('Device used to store tensor:', t.device)
    print('Device name:', torch.cuda.get_device_name(0))
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**2,3), 'MB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**2,3), 'MB')
    device = torch.device('cuda:0')
else:
    print('No GPU available.')
    device = torch.device('cpu')
    
print(device)

Device used to store tensor: cuda:0
Device name: Quadro RTX 3000
Allocated: 0.0 MB
Cached:    2.0 MB
cuda:0


## Dataset Utils

In [3]:
LABELS = ['English', 'French', 'Portuguese', 'Spanish']
LETTERS = list(string.ascii_letters + " .,;'-") + ['<eos>']

def input_encoding(input_str):
    """ Receives a string as input and returns, as output, a Pytorch tensor
        containing the one-hot encoding of the provided string. """

    one_hot_string = torch.zeros(len(input_str), 1, len(LETTERS), dtype=torch.long)
    
    for letter_idx in range(len(input_str)):
        letter = input_str[letter_idx]
        one_hot_string[letter_idx][0][LETTERS.index(letter)] = 1

    return one_hot_string

def label_encoding(output_str):
    """ Receives a string as input and returns, as output, a Pytorch tensor
        containing the one-hot encoding of the provided label string. """

    one_hot_label = torch.zeros(1, len(LABELS), dtype=torch.long)
    label_idx = LABELS.index(output_str)
    one_hot_label[0][label_idx] = 1

    return one_hot_label

In [4]:
# We now create our custom class
class NamesDataset(datautils.Dataset):
    def __init__(self, names_file):

        # We load the data from the csv file
        name_data = pd.read_csv(names_file)

        # We create a list to store the input and output pairs
        self.samples = []

        # We run through the data in the Dataframe and fill in both lists
        for idx in range(len(name_data)):
            name  = name_data['Name'][idx]
            label = name_data['Label'][idx]

            self.samples += [(name, label)]

        shuffle(self.samples)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        item = self.samples[idx]

        name = item[0]
        label = item[1]
        
        # Since we're using an embedding layer, we no longer 
        # use one-hot encoding, but store only the index        
        input_tensor  = torch.tensor([[LETTERS.index(x)] for x in name],
                                     dtype=torch.long)
        label_tensor  = label_encoding(label)
        target_tensor = torch.tensor([LETTERS.index(x) for x in name[1:]] + [LETTERS.index('<eos>')],
                                     dtype=torch.long)
        
        item_dict = {"label": label,
                     "name": name,
                     "label_tensor": label_tensor.to('cuda'),
                     "input_tensor": input_tensor.to('cuda'),
                     "target_tensor": target_tensor.to('cuda')}        

        return item_dict
    
train_set = NamesDataset('train_data.csv')

## LSTM Network

In [5]:
class LSTMNetwork(nn.Module):
    def __init__(self, input_size, n_labels, embedding_size, hidden_size, output_size, dropout=0.):
        super().__init__()
        
        # First, an embedding layer is used to convert the one-hot encoding
        # into a feature vector
        self.i2f_layer = nn.Embedding(input_size, embedding_size)

        # We then create an LSTM layer
        self.f2h_layer = nn.LSTM(embedding_size + n_labels, hidden_size, 1)

        # Then, a linear layer that turns the LSTM hidden state into an
        # output prediction
        self.h2o_layer = nn.Linear(hidden_size, output_size)
        
        # We include a dropout layer for the embedding
        self.dropout = nn.Dropout(dropout)

        # We add initialization parameters for the hidden state and cell
        self.hidden_init = nn.Parameter(torch.zeros(1, hidden_size))
        self.cell_init   = nn.Parameter(torch.zeros(1, hidden_size))
        
        # self.LETTERS = list(string.ascii_letters + " .,;'-") + ['<eos>']

    def single_pass(self, letter_tensor, label_tensor, hidden, cell):
        # Compute embedding
        f = self.dropout(self.i2f_layer(letter_tensor))
        
        # Peform lstm pass
        o, (h, c) = self.f2h_layer(torch.cat((f, label_tensor), 1), (hidden, cell))
        
        # Compute output
        o = self.h2o_layer(o)

        return o, h, c

    def forward(self, input):
        name = input['input_tensor']
        label = input['label_tensor']

        h = self.hidden_init
        c = self.cell_init

        outputs = []

        for letter in name:
            out, h, c = self.single_pass(letter, label, h, c)
            outputs += [out]

        # We return all outputs
        return torch.cat(outputs)

    def sample(self, label, start_letter, max_length=20):
        """ We will use this function to generate names given a label. """
            
        # During sampling, we store no gradient information
        self.eval()

        with torch.no_grad():
            label_tensor = label_encoding(label).to(device)
            letter_tensor = torch.tensor([LETTERS.index(start_letter)], dtype=torch.long).to(device)
          
            h = self.hidden_init
            c = self.cell_init

            output = [start_letter]

            for i in range(max_length):
                o, h, c = self.single_pass(letter_tensor, label_tensor, h, c)
                
                _, next_idx = torch.max(o, dim=1)
                next_letter = LETTERS[next_idx]

                if next_letter == "<eos>":
                    break
                else:
                    output += [next_letter]
                    letter_tensor = torch.tensor([next_idx], dtype=torch.long).to(device)

        self.train()

        return ''.join(output)

### Train network

In [6]:
# We create an instance of our LSTM network.
lstm_net = LSTMNetwork(len(LETTERS), len(LABELS), 128, 256, len(LETTERS), dropout=0.3).to(device)
loss = nn.CrossEntropyLoss()
optim = torch.optim.Adam(lstm_net.parameters(), lr=0.001)

train_losses = []

num_epochs = 20

for ep in range(num_epochs):
    epoch_start_time = default_timer()
    print('\n- Training epoch: %i -' % ep)

    # We use auxiliary variables to keep track of loss and accuracy within 
    # an epoch
    running_loss = 0.

    for sample in train_set:
        target = sample['target_tensor']

        # We zero-out the gradient
        optim.zero_grad()

        # We initialize the loss to 0
        l = 0
        
        # Compute output
        outputs = lstm_net(sample)

        # We now compute the loss for each letter in the input name, given the target
        for i in range(len(target)):
            l += loss(outputs[i], target[i])

        # Compute gradient
        l.backward()
        # Perform optimization step
        optim.step()
        # Update total running loss. We account for the number of points in the batch
        running_loss += l.item()
             
    train_losses += [running_loss / len(train_set)]

    print(f'Training loss: {train_losses[-1]:.4f}')
    epoch_end_time = default_timer()
    print(f'Epoch time: {epoch_end_time - epoch_start_time} (s)')


- Training epoch: 0 -
Training loss: 14.1207
Epoch time: 23.89944734300002 (s)

- Training epoch: 1 -
Training loss: 12.3142
Epoch time: 31.151397010000096 (s)

- Training epoch: 2 -
Training loss: 11.5496
Epoch time: 25.409626731000003 (s)

- Training epoch: 3 -
Training loss: 10.9259
Epoch time: 25.688770825000006 (s)

- Training epoch: 4 -
Training loss: 10.4256
Epoch time: 24.138412084000038 (s)

- Training epoch: 5 -
Training loss: 10.0019
Epoch time: 24.089317722000033 (s)

- Training epoch: 6 -
Training loss: 9.6226
Epoch time: 23.88697811599991 (s)

- Training epoch: 7 -
Training loss: 9.2569
Epoch time: 24.034181203999992 (s)

- Training epoch: 8 -
Training loss: 8.9615
Epoch time: 24.386107373000073 (s)

- Training epoch: 9 -
Training loss: 8.6488
Epoch time: 23.97241528400002 (s)

- Training epoch: 10 -
Training loss: 8.3880
Epoch time: 23.955219046000025 (s)

- Training epoch: 11 -
Training loss: 8.1302
Epoch time: 23.53554447099998 (s)

- Training epoch: 12 -
Training los

#### Network Parameters

In [7]:
for name, param in lstm_net.named_parameters():
    print(name, param.shape)
    
print('Total parameters:', sum(p.numel() for p in lstm_net.parameters()))

hidden_init torch.Size([1, 256])
cell_init torch.Size([1, 256])
i2f_layer.weight torch.Size([59, 128])
f2h_layer.weight_ih_l0 torch.Size([1024, 132])
f2h_layer.weight_hh_l0 torch.Size([1024, 256])
f2h_layer.bias_ih_l0 torch.Size([1024])
f2h_layer.bias_hh_l0 torch.Size([1024])
h2o_layer.weight torch.Size([59, 256])
h2o_layer.bias torch.Size([59])
Total parameters: 422587


### Generate Outputs

In [8]:
for label in LABELS:
    for i in range(10):
        letter = np.random.choice(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'))
        print(f'Generated name in {label}:', lstm_net.sample(label, letter))

Generated name in English: Kenward
Generated name in English: Harris
Generated name in English: Thornhill
Generated name in English: Thornhill
Generated name in English: Ellerby
Generated name in English: Thornhill
Generated name in English: Mcneil
Generated name in English: Alden
Generated name in English: Yeardsley
Generated name in English: Alden
Generated name in French: Abascal
Generated name in French: Whitelaw
Generated name in French: Fabian
Generated name in French: Zamontaine
Generated name in French: Beller
Generated name in French: De la fuente
Generated name in French: Quiros
Generated name in French: Abascal
Generated name in French: Villeneuve
Generated name in French: Xarter
Generated name in Portuguese: Pares
Generated name in Portuguese: De la cruz
Generated name in Portuguese: Castro
Generated name in Portuguese: Wood
Generated name in Portuguese: Tomas
Generated name in Portuguese: Belo
Generated name in Portuguese: Albuquerque
Generated name in Portuguese: Fabra
Ge