In [1]:
import torch
import torch.nn as nn
from torch.functional import F

#### get data

In [2]:
# # uncomment if not downloaded previously
# !wget https://download.pytorch.org/tutorial/data.zip
# !unzip data.zip

#### format data

In [3]:
import string
all_letters = string.ascii_letters + " .,;'"

In [4]:
# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
import unicodedata

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

Slusarski


In [5]:
import os
category_lines = {}
categories = []
path = '02-data/names/'
for fname in os.listdir(path):
    lang = fname.split('.')[0]
    categories.append(lang)
    category_lines[lang] = []
    with open(path+fname, 'r') as f:
      for line in f:
        category_lines[lang].append(unicodeToAscii(line.strip()))
', '.join(categories)

'Korean, Irish, Portuguese, Vietnamese, Czech, Russian, Scottish, German, Polish, Spanish, English, French, Japanese, Dutch, Greek, Chinese, Italian, Arabic'

In [6]:
categories

['Korean',
 'Irish',
 'Portuguese',
 'Vietnamese',
 'Czech',
 'Russian',
 'Scottish',
 'German',
 'Polish',
 'Spanish',
 'English',
 'French',
 'Japanese',
 'Dutch',
 'Greek',
 'Chinese',
 'Italian',
 'Arabic']

In [7]:
print(category_lines['Italian'][:5])

['Abandonato', 'Abatangelo', 'Abatantuono', 'Abate', 'Abategiovanni']


#### helpers

In [8]:
num_letters = len(all_letters);num_letters

57

In [9]:
def letterToIndex(letter):
    return all_letters.find(letter)
letterToIndex('n')

13

In [10]:
def char_to_one_hot(char):
    zeros = torch.zeros(1, num_letters)
    zeros[0][letterToIndex(char)] = 1
    return zeros

In [11]:
char_to_one_hot('n')

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])

In [12]:
def seq_onehot(word):
    zeros = torch.zeros(len(word), 1, num_letters)
    for i, ch in enumerate(word):
        zeros[i][0][letterToIndex(ch)] = 1
    return zeros

In [13]:
seq_onehot('nan'), seq_onehot('nan').shape

(tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]]]),
 torch.Size([3, 1, 57]))

In [14]:
print(seq_onehot('Jones').size())

torch.Size([5, 1, 57])


In [15]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_and_hidden_to_next_hidden = nn.Linear(input_size + hidden_size, hidden_size)
        self.new_hidden_to_output = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input_tensor, hidden):
        next_hidden = self.input_and_hidden_to_next_hidden(torch.cat((input_tensor, hidden), 1))
        output = self.new_hidden_to_output(next_hidden)
        output = self.softmax(output)
        

        return output, next_hidden

In [16]:
rnn = RNN(num_letters, 50, len(categories))

In [17]:
hidden = torch.zeros((1, 50))

In [18]:
for char in seq_onehot('nan'):
    output, hidden = rnn(char, hidden)
    print(output.shape)

torch.Size([1, 18])
torch.Size([1, 18])
torch.Size([1, 18])


In [19]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return categories[category_i], category_i

In [20]:
categoryFromOutput(output)

('Spanish', 9)

In [21]:
torch.cat((seq_onehot('nan')[0], hidden), 1).shape

torch.Size([1, 107])

In [22]:
loss_fn = nn.NLLLoss()

In [23]:
import random

def get_random_sample():
    def rand(collection):
        rand_int = random.randint(0, len(collection)-1)
        return collection[rand_int]
    cat = rand(categories)
    item = rand(category_lines[cat])
    cat_tensor = torch.tensor([categories.index(cat)])
    item_tensor = seq_onehot(item)
    return cat, item, cat_tensor, item_tensor

In [24]:
get_random_sample()

('Greek',
 'Alexandropoulos',
 tensor([14]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 1., 0., 0., 0

In [25]:
cat, item, cten, itens = get_random_sample()

In [26]:
for ch in itens:
    out, hidden = rnn(ch, hidden)

In [27]:
out[0][torch.argmax(out).item()]

tensor(-2.7322, grad_fn=<SelectBackward0>)

In [28]:
# loss_fn(out[0][torch.argmax(out).item()], cten.float()).backward()

In [None]:
hidden

In [30]:
n_hidden = 50
rnn = RNN(num_letters, n_hidden, len(categories))
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.001)


def train(input_tensor, cat_tensor):
    hidden = torch.zeros((1, n_hidden))

    for ch in input_tensor:
        out, hidden = rnn(ch, hidden)
    
    loss = loss_fn(out, cat_tensor)
    # # hidden.detach_()
    # # out.detach_()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return output, loss.item()


In [31]:
for i in range(1000):
    cat, item, cten, itens = get_random_sample()
    output, loss = train(itens, cten)
    
    if i % 1000 == 0:
        print(f'Loss: {loss}')
        print([torch.argmax(output).item()] == cten.item())


Loss: 2.934276580810547
False


In [32]:
cten.float().item()

8.0

In [33]:
output.dtype

torch.float32

In [34]:
for gg in rnn.parameters():
    print(gg.grad.zero_())
    break

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [35]:
randomTrainingExample = get_random_sample

In [36]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

def train(category_tensor, line_tensor):
    hidden = torch.zeros((1, n_hidden))

    rnn.zero_grad()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = loss_fn(output, category_tensor.float())
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item()

In [38]:
import time
import math

n_iters = 10000
print_every = 5000
plot_every = 1000



# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

    # Print ``iter`` number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

RuntimeError: expected scalar type Long but found Float

In [39]:
categories[:5], "_________",{k: category_lines[k][:2] for k in category_lines}

(['Korean', 'Irish', 'Portuguese', 'Vietnamese', 'Czech'],
 '_________',
 {'Korean': ['Ahn', 'Baik'],
  'Irish': ['Adam', 'Ahearn'],
  'Portuguese': ['Abreu', 'Albuquerque'],
  'Vietnamese': ['Nguyen', 'Tron'],
  'Czech': ['Abl', 'Adsit'],
  'Russian': ['Ababko', 'Abaev'],
  'Scottish': ['Smith', 'Brown'],
  'German': ['Abbing', 'Abel'],
  'Polish': ['Adamczak', 'Adamczyk'],
  'Spanish': ['Abana', 'Abano'],
  'English': ['Abbas', 'Abbey'],
  'French': ['Abel', 'Abraham'],
  'Japanese': ['Abe', 'Abukara'],
  'Dutch': ['Aalsburg', 'Aalst'],
  'Greek': ['Adamidis', 'Adamou'],
  'Chinese': ['Ang', 'AuYong'],
  'Italian': ['Abandonato', 'Abatangelo'],
  'Arabic': ['Khoury', 'Nahas']})

In [40]:
allLetters = all_letters

In [41]:
numLetters = len(all_letters)

In [42]:
numLetters

57

In [43]:
class RNN2(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN2, self).__init__()
        self.hidden_size = hidden_size
        self.input_and_hidden_to_new_hidden = nn.Linear(input_size+hidden_size, hidden_size)
        self.new_hidden_to_output = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input_tensor, hidden_tensor):
        concatted = torch.cat((input_tensor, hidden_tensor), dim=1)
        next_hidden = self.input_and_hidden_to_new_hidden(concatted)
        output = self.new_hidden_to_output(next_hidden)
        # print(output)
        # print(output.argmax())
        # print("--------------")
        # print(nn.Softmax(dim=1)(output))
        output = self.softmax(output)
        # print(output)
        # print(output.argmax())
        return output, next_hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)
        

In [44]:
def init_hidden(hidden_size):
    return torch.zeros(1, hidden_size)

In [45]:
init_hidden(128)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.]])

In [46]:
get_random_sample()

('Polish',
 'Jaskulski',
 tensor([8]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [47]:
label, name, labelTensor, nameTensor = get_random_sample()

In [48]:
numLetters = num_letters

In [49]:
numCats = len(categories)

In [50]:
numHidden = 128

In [51]:
rnn2 = RNN2(numLetters, numHidden, numCats)

In [52]:
hidden = init_hidden(numHidden)

In [53]:
for i in range(nameTensor.size()[0]):
    output, hidden = rnn2(nameTensor[i], hidden)


tensor([[-0.0107,  0.0112,  0.0289,  0.0029, -0.0863, -0.0592,  0.0808, -0.0493,
          0.0191,  0.1192, -0.0926,  0.0505,  0.0171,  0.0919, -0.0454,  0.0426,
         -0.0711,  0.0335]], grad_fn=<AddmmBackward0>)
tensor(9)
--------------
tensor([[0.0546, 0.0558, 0.0568, 0.0554, 0.0506, 0.0520, 0.0598, 0.0525, 0.0563,
         0.0622, 0.0503, 0.0581, 0.0562, 0.0605, 0.0527, 0.0576, 0.0514, 0.0571]],
       grad_fn=<SoftmaxBackward0>)
tensor([[-2.9075, -2.8856, -2.8679, -2.8939, -2.9831, -2.9560, -2.8160, -2.9461,
         -2.8777, -2.7776, -2.9894, -2.8463, -2.8797, -2.8049, -2.9422, -2.8542,
         -2.9679, -2.8633]], grad_fn=<LogSoftmaxBackward0>)
tensor(9)
tensor([[-0.0461, -0.0249,  0.0626, -0.0049, -0.0408, -0.0331,  0.0709, -0.0035,
         -0.0154,  0.1084, -0.1216, -0.0200, -0.0833,  0.0923,  0.0044,  0.0204,
         -0.0852,  0.0118]], grad_fn=<AddmmBackward0>)
tensor(9)
--------------
tensor([[0.0533, 0.0544, 0.0594, 0.0555, 0.0536, 0.0540, 0.0599, 0.0556, 0.0549,
    

In [54]:
tt = torch.tensor([[0.0503, 0.0554, 0.0539, 0.0609, 0.0575, 0.0518, 0.0531, 0.0571, 0.0536,
         0.0602, 0.0603, 0.0549, 0.0533, 0.0536, 0.0547, 0.0514, 0.0572, 0.0607]])

In [55]:
learning_rate = 0.005

In [56]:
a = torch.log(tt)

In [67]:
nn.Softmax(dim=1)(a)

tensor([[0.0503, 0.0554, 0.0539, 0.0609, 0.0575, 0.0518, 0.0531, 0.0571, 0.0536,
         0.0602, 0.0603, 0.0549, 0.0533, 0.0536, 0.0547, 0.0514, 0.0572, 0.0607]])

In [68]:
a.topk(3, 1, False)

torch.return_types.topk(
values=tensor([[-2.9898, -2.9681, -2.9604]]),
indices=tensor([[ 0, 15,  5]]))

In [69]:
a.topk??

[0;31mDocstring:[0m
topk(k, dim=None, largest=True, sorted=True) -> (Tensor, LongTensor)

See :func:`torch.topk`
[0;31mType:[0m      builtin_function_or_method


In [70]:
def train(model, optimizer, loss_fn, sampleTensor, labelTensor):
    model.train()
    
    model.zero_grad()
    hidden = model.init_hidden()
    for i in range(nameTensor.size()[0]):
        output, hidden = rnn2(nameTensor[i], hidden)
    
    loss = loss_fn(output, labelTensor)
    loss.backward()
    
    # Add parameters' gradients to their values, multiplied by learning rate
    for p in model.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)
    
    return loss.item()


In [71]:
rnn2 = RNN2(numLetters, numHidden, numCats)
optimizer = torch.optim.SGD(rnn2.parameters(), lr=0.0001)
loss_fn = torch.nn.NLLLoss()

In [72]:

%time
for i in range(1):
    label, name, labelTensor, nameTensor = get_random_sample()
    loss = train(rnn2,optimizer,loss_fn,nameTensor, labelTensor)
    
    if i % 1000 == 0:
        print(f"{i} Loss: {loss}")

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 11.2 µs
tensor([[ 0.0118,  0.0134,  0.0240,  0.0485,  0.0200,  0.0051, -0.0322,  0.0269,
          0.0327, -0.0620, -0.0198, -0.0334, -0.0238,  0.0822, -0.0160,  0.0009,
         -0.0203, -0.0327]], grad_fn=<AddmmBackward0>)
tensor(13)
--------------
tensor([[0.0561, 0.0562, 0.0568, 0.0582, 0.0566, 0.0557, 0.0537, 0.0570, 0.0573,
         0.0521, 0.0544, 0.0536, 0.0541, 0.0602, 0.0546, 0.0555, 0.0543, 0.0537]],
       grad_fn=<SoftmaxBackward0>)
tensor([[-2.8806, -2.8790, -2.8683, -2.8438, -2.8724, -2.8873, -2.9245, -2.8655,
         -2.8596, -2.9544, -2.9122, -2.9258, -2.9162, -2.8102, -2.9084, -2.8915,
         -2.9126, -2.9251]], grad_fn=<LogSoftmaxBackward0>)
tensor(13)
tensor([[-0.0571, -0.0227, -0.0101,  0.0483,  0.0561,  0.0041, -0.0838,  0.0416,
          0.0312, -0.0639,  0.0265, -0.0069, -0.1050,  0.0832,  0.0011,  0.0207,
          0.0398, -0.0293]], grad_fn=<AddmmBackward0>)
tensor(13)
--------------
tensor([[0.052

In [73]:
def evaluate():
    hidden = rnn2.init_hidden()
    label, name, labelTensor, nameTensor = get_random_sample()
    for i in range(nameTensor.shape[0]):
        output, hidden = rnn2(nameTensor[i], hidden)
    print(f"{output.argmax() == labelTensor} name: {name}; predicted: {categories[output.argmax().item()]}; Actual: {label}")
    return (output.argmax() == labelTensor).float().item()
    

In [74]:
    label, name, labelTensor, nameTensor = get_random_sample()
    for i in range(nameTensor.shape[0]):
        output, hidden = rnn2(nameTensor[i], hidden)

tensor([[ 0.0332,  0.0235,  0.0336,  0.0723,  0.0317,  0.0239, -0.0473,  0.0583,
          0.0471, -0.0653,  0.0032, -0.0488, -0.0161,  0.0747,  0.0027,  0.0008,
         -0.0235, -0.0435]], grad_fn=<AddmmBackward0>)
tensor(13)
--------------
tensor([[0.0569, 0.0563, 0.0569, 0.0591, 0.0568, 0.0563, 0.0525, 0.0583, 0.0577,
         0.0515, 0.0552, 0.0524, 0.0541, 0.0593, 0.0552, 0.0551, 0.0537, 0.0527]],
       grad_fn=<SoftmaxBackward0>)
tensor([[-2.8670, -2.8766, -2.8665, -2.8278, -2.8685, -2.8762, -2.9475, -2.8418,
         -2.8531, -2.9655, -2.8970, -2.9489, -2.9162, -2.8255, -2.8974, -2.8993,
         -2.9237, -2.9436]], grad_fn=<LogSoftmaxBackward0>)
tensor(13)
tensor([[-0.0471, -0.0367, -0.0073,  0.0595,  0.0418, -0.0072, -0.0861,  0.0371,
          0.0389, -0.0786,  0.0285, -0.0009, -0.0887,  0.0916, -0.0261,  0.0017,
          0.0279, -0.0221]], grad_fn=<AddmmBackward0>)
tensor(13)
--------------
tensor([[0.0531, 0.0537, 0.0553, 0.0591, 0.0581, 0.0553, 0.0511, 0.0578, 0.0579,
 

In [75]:
(output.argmax() == labelTensor).float().item()

0.0

In [77]:
categories[output.argmax().item()]

'Dutch'

In [81]:
correct = 0
for i in range(1):
    correct += evaluate()

correct / 100
    

tensor([[-0.0145, -0.0511, -0.0079,  0.0249,  0.0056,  0.0550, -0.0252,  0.0747,
          0.0079, -0.0966,  0.0509, -0.0525, -0.0673,  0.0552,  0.0139,  0.0120,
          0.0033, -0.0500]], grad_fn=<AddmmBackward0>)
tensor(7)
--------------
tensor([[0.0549, 0.0529, 0.0552, 0.0571, 0.0560, 0.0588, 0.0543, 0.0600, 0.0561,
         0.0506, 0.0586, 0.0528, 0.0521, 0.0588, 0.0565, 0.0564, 0.0559, 0.0530]],
       grad_fn=<SoftmaxBackward0>)
tensor([[-2.9025, -2.9391, -2.8959, -2.8631, -2.8824, -2.8330, -2.9132, -2.8133,
         -2.8801, -2.9846, -2.8371, -2.9405, -2.9553, -2.8328, -2.8741, -2.8760,
         -2.8847, -2.9380]], grad_fn=<LogSoftmaxBackward0>)
tensor(7)
tensor([[-0.0149, -0.0478, -0.0023, -0.0065,  0.0710,  0.0258, -0.0887,  0.0532,
          0.0057, -0.1096,  0.0071, -0.0553, -0.0620,  0.1054, -0.0016,  0.0245,
          0.0212, -0.0755]], grad_fn=<AddmmBackward0>)
tensor(13)
--------------
tensor([[0.0551, 0.0533, 0.0558, 0.0556, 0.0601, 0.0574, 0.0512, 0.0590, 0.0563,
   

0.0