In [26]:
import torch
import torch.nn as nn
from torch.functional import F

#### get data

In [2]:
# # uncomment if not downloaded previously
# !wget https://download.pytorch.org/tutorial/data.zip
# !unzip data.zip

#### format data

In [3]:
import string
all_letters = string.ascii_letters + " .,;'"

In [4]:
# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
import unicodedata

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

Slusarski


In [5]:
import os
category_lines = {}
categories = []
path = '02-data/names/'
for fname in os.listdir(path):
    lang = fname.split('.')[0]
    categories.append(lang)
    category_lines[lang] = []
    with open(path+fname, 'r') as f:
      for line in f:
        category_lines[lang].append(unicodeToAscii(line.strip()))
', '.join(categories)

'Korean, Irish, Portuguese, Vietnamese, Czech, Russian, Scottish, German, Polish, Spanish, English, French, Japanese, Dutch, Greek, Chinese, Italian, Arabic'

In [6]:
categories

['Korean',
 'Irish',
 'Portuguese',
 'Vietnamese',
 'Czech',
 'Russian',
 'Scottish',
 'German',
 'Polish',
 'Spanish',
 'English',
 'French',
 'Japanese',
 'Dutch',
 'Greek',
 'Chinese',
 'Italian',
 'Arabic']

In [7]:
print(category_lines['Italian'][:5])

['Abandonato', 'Abatangelo', 'Abatantuono', 'Abate', 'Abategiovanni']


#### helpers

In [8]:
num_letters = len(all_letters);num_letters

57

In [9]:
def letterToIndex(letter):
    return all_letters.find(letter)
letterToIndex('n')

13

In [10]:
def char_to_one_hot(char):
    zeros = torch.zeros(1, num_letters)
    zeros[0][letterToIndex(char)] = 1
    return zeros

In [11]:
char_to_one_hot('n')

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])

In [12]:
def seq_onehot(word):
    zeros = torch.zeros(len(word), 1, num_letters)
    for i, ch in enumerate(word):
        zeros[i][0][letterToIndex(ch)] = 1
    return zeros

In [13]:
seq_onehot('nan'), seq_onehot('nan').shape

(tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]]]),
 torch.Size([3, 1, 57]))

In [31]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_and_hidden_to_next_hidden = nn.Linear(input_size + hidden_size, hidden_size)
        self.new_hidden_to_output = nn.Linear(hidden_size, output_size)
        
    def forward(self, input_tensor, hidden):
        next_hidden = self.input_and_hidden_to_next_hidden(torch.cat((input_tensor, hidden), 1))
        output = self.new_hidden_to_output(next_hidden)

        return F.log_softmax(output, dim=1), next_hidden

In [32]:
rnn = RNN(num_letters, 50, len(categories))

In [33]:
hidden = torch.rand((1, 50))

In [34]:
for char in seq_onehot('nan'):
    output, hidden = rnn(char, hidden)
    print(output, hidden)

tensor([[-2.8482, -2.9735, -3.0370, -2.8171, -3.2030, -3.0298, -2.8035, -2.9455,
         -3.1980, -2.8140, -3.1805, -2.6500, -3.0575, -2.9277, -2.4932, -2.4603,
         -2.8385, -3.1965]], grad_fn=<LogSoftmaxBackward0>) tensor([[-0.5016,  0.1422, -0.0884,  0.0171, -0.1455, -0.0130, -0.0572,  0.2246,
          0.5929,  0.1226,  0.2759, -0.1772,  0.3595,  0.0986,  0.1152,  0.1045,
         -0.2089, -0.2540, -0.2240, -0.4257, -0.0678,  0.3477,  0.1459, -0.1279,
          0.3287,  0.1390,  0.2207,  0.3165,  0.0074, -0.0308, -0.0623, -0.0495,
          0.1834,  0.0832, -0.3110,  0.2726,  0.4092,  0.0866, -0.1679, -0.4909,
          0.2964, -0.1197, -0.0023, -0.5089,  0.3814,  0.2589, -0.0415,  0.1600,
          0.3638,  0.1026]], grad_fn=<AddmmBackward0>)
tensor([[-2.8722, -3.0085, -2.9137, -2.7321, -3.0278, -2.9501, -2.8081, -2.8734,
         -3.1450, -2.8087, -3.0212, -2.8265, -3.1134, -2.8665, -2.6831, -2.6646,
         -2.8621, -3.0091]], grad_fn=<LogSoftmaxBackward0>) tensor([[-0.297

In [35]:
torch.cat((seq_onehot('nan')[0], hidden), 1).shape

torch.Size([1, 107])

In [37]:
loss_fn = nn.MSELoss()

In [62]:
import random

def get_random_sample():
    def rand(collection):
        rand_int = random.randint(0, len(collection)-1)
        return collection[rand_int]
    cat = rand(categories)
    item = rand(category_lines[cat])
    cat_tensor = torch.tensor([categories.index(cat)])
    item_tensor = seq_onehot(item)
    return cat, item, cat_tensor, item_tensor

In [63]:
get_random_sample()

('French',
 'Bonfils',
 tensor([11]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [64]:
cat, item, cten, itens = get_random_sample()

In [67]:
for ch in itens:
    out, hidden = rnn(ch, hidden)

In [76]:
out[0][torch.argmax(out).item()]

tensor(-2.7200, grad_fn=<SelectBackward0>)

In [83]:
loss_fn(out[0][torch.argmax(out).item()], cten.float()).backward()

In [79]:
hidden = torch.rand((1, 50))

In [107]:
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.001)


In [111]:
for i in range(100):
    cat, item, cten, itens = get_random_sample()

    for ch in itens:
        out, hidden = rnn(ch, hidden)
    
    loss = loss_fn(out[0][torch.argmax(out).item()], cten.float())
    loss.backward()
    hidden.detach_()
    out.detach_()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%0 == 0:
        print(f'Loss: {loss}')
        print(out[0][torch.argmax(out).item()] == cten.item())

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [102]:
for gg in rnn.parameters():
    print(gg.grad.zero_())
    break

<built-in method zero_ of Tensor object at 0x7fb5a021a5e0>
