# Rekurencyjne Sieci Neuronowe (RNN)

### Importy i Utilsy  (odpalić i schować )

In [2]:
# imports 
import torch
import os
import unicodedata
import string
import numpy as np
from typing import Tuple, Optional, List

from torch.nn.functional import cross_entropy

import matplotlib.pyplot as plt 
from sklearn.metrics import f1_score

from torch.utils.data import Dataset, DataLoader

all_letters = string.ascii_letters
n_letters = len(all_letters)


class ListDataset(Dataset):
    
    def __init__(self, data, targets):
        
        self.data = data
        self.targets = targets
        
    def __getitem__(self, ind):
        
        return self.data[ind], self.targets[ind]
    
    def __len__(self):
        return len(self.targets)

    
def unicode_to__ascii(s: str) -> str:
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'
                                                                 and c in all_letters)
                   

def read_lines(filename: str) -> List[str]:
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicode_to__ascii(line) for line in lines]


def letter_to_index(letter: str) -> int:
    return all_letters.find(letter)


def line_to_tensor(line: str) -> torch.Tensor:
    tensor = torch.zeros(len(line), n_letters)
    for i, letter in enumerate(line):
        tensor[i][letter_to_index(letter)] = 1
    return tensor

## Dane sekwencyjne

Modele, którymi zajmowaliśmy się wcześniej zakładały konkretny kształt danych. Dla przykładu klasyczna sieć neuronowa fully-connected dla MNISTa zakładała, że na wejściu dostanie wektory rozmiaru 784 - dla wektorów o innej wymiarowości i innych obiektów model zwyczajnie nie będzie działać.

Takie założenie bywa szczególnie niewygodne przy pracy z niektórymi typami danych, takimi jak:
* językiem naturalny (słowa czy zdania mają zadanej z góry liczby znaków)
* szeregi czasowe (dane giełdowe ciągną się właściwie w nieskończoność) 
* dźwięk (nagrania mogą być krótsze lub dłuższe).

Do rozwiązania tego problemu służą rekuencyjne sieci neuronowe (*recurrent neural networks, RNNs*), które zapamiętują swój stan z poprzedniej iteracji.

### Ładowanie danych
Poniższe dwie komórki ściągają dataset nazwisk z 18 różnych narodowości. Każda litera w danym nazwisku jest zamieniana na jej indeks z alfabetu w postaci kodowania "one-hot". Inaczej mówiąc, każde nazwisko jest binarną macierzą rozmiaru `n_letters` $\times$ `len(name)`. 

Dodatkowo, ponieważ ten dataset jest mocno niezbalansowany, użyjemy specjalnego samplera do losowania przykładów treningowych, tak aby do uczenia sieć widziała tyle samo przykładów z każdej klasy.

Ponieważ nazwiska mogą mieć różne długości będziemy rozważać `batch_size = 1` w tym notebooku (choć implementacje modeli powinny działać dla dowolnych wartości `batch_size`!)

In [3]:
!wget https://download.pytorch.org/tutorial/data.zip
!unzip data.zip

--2022-01-19 20:46:02--  https://download.pytorch.org/tutorial/data.zip
Resolving download.pytorch.org (download.pytorch.org)... 13.226.52.128, 13.226.52.51, 13.226.52.36, ...
Connecting to download.pytorch.org (download.pytorch.org)|13.226.52.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2882130 (2.7M) [application/zip]
Saving to: ‘data.zip’


2022-01-19 20:46:03 (21.9 MB/s) - ‘data.zip’ saved [2882130/2882130]

Archive:  data.zip
   creating: data/
  inflating: data/eng-fra.txt        
   creating: data/names/
  inflating: data/names/Arabic.txt   
  inflating: data/names/Chinese.txt  
  inflating: data/names/Czech.txt    
  inflating: data/names/Dutch.txt    
  inflating: data/names/English.txt  
  inflating: data/names/French.txt   
  inflating: data/names/German.txt   
  inflating: data/names/Greek.txt    
  inflating: data/names/Irish.txt    
  inflating: data/names/Italian.txt  
  inflating: data/names/Japanese.txt  
  inflating: data/names/Korean.

In [5]:
# NOTE: you can change the seed or remove it completely if you like
torch.manual_seed(1337)

data_dir = 'data/names'

# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

data = []
targets = [] 
label_to_idx = {}

# read each natonality file and process data 
for label, file_name in enumerate(os.listdir(data_dir)):
    
    label_to_idx[label] = file_name.split('.')[0].lower()
    
    names = read_lines(os.path.join(data_dir, file_name))
    data += [line_to_tensor(name) for name in names]
    targets += len(names) * [label]

# split into train and test indices
test_frac = 0.1
n_test = int(test_frac * len(targets))
test_ind = np.random.choice(len(targets), size=n_test, replace=False)
train_ind = np.setdiff1d(np.arange(len(targets)), test_ind)

targets = torch.tensor(targets)
train_targets = targets[train_ind]

# calculate weights for BalancedSampler
uni, counts = np.unique(train_targets, return_counts=True)
weight_per_class = len(targets) / counts
weight = [weight_per_class[c] for c in train_targets]
# preapre the sampler
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=weight, num_samples=len(weight)) 

train_dataset = ListDataset(data=[x for i, x in enumerate(data) if i in train_ind], targets=train_targets)
train_loader = DataLoader(train_dataset, shuffle=False, batch_size=1, sampler=sampler)

test_dataset = ListDataset(data=[x for i, x in enumerate(data) if i in test_ind], targets=targets[test_ind])
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=1)

In [6]:
# check out the content of the dataset
for i, (x, y) in enumerate(train_loader):
    break

print("x.shape:", x.shape)
print("name: ", end="")
for letter_onehot in x[0]:
    print(all_letters[torch.argmax(letter_onehot)], end="")

print("\ny:", label_to_idx[y.item()])

x.shape: torch.Size([1, 4, 52])
name: Paul
y: french


In [7]:
for i, (x, y) in enumerate(train_loader):
    break
print(x.shape[1])
for i in range(len(x[0])):
  print(x[0,i])

7
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

## Zadanie 1. (2 pkt.)

Zaimplementuj "zwykłą" sieć rekurencyjną. 
![rnn](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/RNN-unrolled.png)

* W klasie `RNN` należy zainicjalizować potrzebne wagi oraz zaimplementować główną logikę dla pojedynczej chwili czasowej $x_t$
* Wyjście z sieci może mieć dowolny rozmiar, potrzebna jest również warstwa przekształacjąca stan ukryty na wyjście.
* W pętli uczenia należy dodać odpowiednie wywołanie sieci. HINT: pamiętać o iterowaniu po wymiarze "czasowym".


In [7]:
class RNN(torch.nn.Module):
    
    def __init__(self, 
                 input_size: int,
                 hidden_size: int, 
                 output_size: int):
        """
        :param input_size: int
            Dimensionality of the input vector
        :param hidden_size: int
            Dimensionality of the hidden space
        :param output_size: int
            Desired dimensionality of the output vector
        """
        super(RNN, self).__init__()

        self.hidden_size = hidden_size

        self.input_to_hidden = torch.nn.Linear(input_size + hidden_size, hidden_size)
        
        self.hidden_to_output = torch.nn.Linear(input_size + hidden_size,output_size)
    
    # for the sake of simplicity a single forward will process only a single timestamp 
    def forward(self, 
                input: torch.tensor, 
                hidden: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        """
        :param input: torch.tensor 
            Input tesnor for a single observation at timestep t
            shape [batch_size, input_size]
        :param hidden: torch.tensor
            Representation of the memory of the RNN from previous timestep
            shape [batch_size, hidden_size]
        """
        # print(input.shape)
        # print(hidden.shape)
        combined = torch.cat([input, hidden], 1 ) 
        hidden = torch.sigmoid(self.input_to_hidden(combined))
        output =  self.hidden_to_output(combined)
        return output, hidden
    
    def init_hidden(self, batch_size: int) -> torch.Tensor:
        """
        Returns initial value for the hidden state
        """
        return torch.zeros(batch_size, self.hidden_size, requires_grad=True).cuda()

### Pętla uczenia

In [8]:
n_class = len(label_to_idx)

# initialize network and optimizer
rnn = RNN(n_letters, 512, n_class).cuda()
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.001)   

# we will train for only a single epoch 
epochs = 1


# main loop
for epoch in range(epochs):
    
    loss_buffer = []
    
    for i, (x, y) in enumerate(train_loader):  
        
        x = x.cuda()
        y = y.cuda()
        
        optimizer.zero_grad()
        # get initial hidden state
        hidden = rnn.init_hidden(x.shape[0])
        
        # get output for the sample, remember that we treat it as a sequence
        # so you need to iterate over the 2nd, time dimensiotn

        seq_len = x.shape[1]
        
        for c in range(seq_len):
          output, hidden= rnn(x[:,c,:],hidden)
            
        loss = cross_entropy(output, y)
        loss.backward()
        optimizer.step()  
        
        loss_buffer.append(loss.item())
        
        if i % 1000 == 1:
            print(f"Epoch: {epoch} Progress: {100 * i/len(train_loader):2.0f}% Loss: {np.mean(loss_buffer):.3f}")
            loss_buffer = []
    

# evaluate on the test set
with torch.no_grad():
    ps = []
    ys = []
    correct = 0
    for i, (x, y) in enumerate(test_loader):
        x = x.cuda()
        ys.append(y.numpy())

        hidden = rnn.init_hidden(x.shape[0])
        seq_len = x.shape[1]
        
        for c in range(seq_len):
            output, hidden_state= rnn(x[:,c,:],hidden)

        pred = output.argmax(dim=1)
        ps.append(pred.cpu().numpy())
    
    ps = np.concatenate(ps, axis=0)
    ys = np.concatenate(ys, axis=0)
    f1 = f1_score(ys, ps, average='weighted')
    
    print(f"Final F1 score: {f1:.5f}")
    assert f1 > 0.15, "You should get over 0.15 f1 score, try changing some hyperparams!"

Epoch: 0 Progress:  0% Loss: 3.162
Epoch: 0 Progress:  6% Loss: 2.919
Epoch: 0 Progress: 11% Loss: 2.900
Epoch: 0 Progress: 17% Loss: 2.896
Epoch: 0 Progress: 22% Loss: 2.890
Epoch: 0 Progress: 28% Loss: 2.881
Epoch: 0 Progress: 33% Loss: 2.869
Epoch: 0 Progress: 39% Loss: 2.872
Epoch: 0 Progress: 44% Loss: 2.856
Epoch: 0 Progress: 50% Loss: 2.845
Epoch: 0 Progress: 55% Loss: 2.834
Epoch: 0 Progress: 61% Loss: 2.837
Epoch: 0 Progress: 66% Loss: 2.821
Epoch: 0 Progress: 72% Loss: 2.811
Epoch: 0 Progress: 77% Loss: 2.808
Epoch: 0 Progress: 83% Loss: 2.802
Epoch: 0 Progress: 89% Loss: 2.793
Epoch: 0 Progress: 94% Loss: 2.785
Epoch: 0 Progress: 100% Loss: 2.784
Final F1 score: 0.18438


## Zadanie 2. (0.5 pkt.)
Zaimplementuj funkcje `predict`, która przyjmuje nazwisko w postaci stringa oraz model RNN i wypisuje 3 najlepsze predykcje narodowości dla tego nazwiska razem z ich logitami.

**Hint**: Przyda się tutaj jedna z funkcji z pierwszej komórki notebooka.

In [51]:
def predict(name: str, rnn: RNN):
    """Prints the name and model's top 3 predictions with scores"""
    rnn.eval()
    tensor_name=line_to_tensor(name).cuda()
    # print(tensor_name)
    # print(tensor_name.shape)
    with torch.no_grad():
      hidden=rnn.init_hidden(1)

      for c in range(len(tensor_name)):
        output, hidden= rnn(torch.tensor(tensor_name[c,:].reshape(1,52)),hidden)

      tmp, predicted=torch.topk(output,3,dim=1)
      print( "Predicted:\n1. " + label_to_idx[predicted[0][0].item()] + " ("+str(tmp[0][0].item()) + ") " )
      print( "2. " + label_to_idx[predicted[0][1].item()] +" (" + str(tmp[0][1].item()) + ") " )
      print("3. " + label_to_idx[predicted[0][2].item()] + " ("+ str(tmp[0][2].item()) + ") "  )
      return None
    

In [16]:
ten=torch.tensor([3.,4.,1.,2.,6.])
tmp,predicted= torch.topk(ten,3)

print("Predicted: " + str(ten[predicted[0][0].item()]) + " ("+str( tmp[0].item() ) + ") "  )

Predicted: tensor(6.) (6.0) 


In [52]:
some_names = ["Satoshi", "Jackson", "Schmidhuber", "Hinton", "Kowalski"]

for name in some_names:
    print(name)
    predict(name, rnn)
    print()

Satoshi
Predicted:
1. polish (0.42064955830574036) 
2. english (0.3277337849140167) 
3. arabic (0.31126537919044495) 

Jackson
Predicted:
1. irish (0.6088565587997437) 
2. english (0.4980972409248352) 
3. arabic (0.27436670660972595) 

Schmidhuber
Predicted:
1. english (0.4500594437122345) 
2. arabic (0.3877067565917969) 
3. irish (0.2683996856212616) 

Hinton
Predicted:
1. irish (0.6081615090370178) 
2. english (0.49816036224365234) 
3. arabic (0.2739345133304596) 

Kowalski
Predicted:
1. polish (0.4369840621948242) 
2. english (0.32872897386550903) 
3. arabic (0.3119771480560303) 



  # This is added back by InteractiveShellApp.init_path()


In [None]:
CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1


# otrzymywałem taki error przy różnych próbach, niestety po zmianie kodu nawet na taki który działał wcześniej
# ten error nadal się pokazywał. Resetowanie Colaba nie pomagało, dopiero połącznie się z lokalnym hostem (gdzie również cuda jest dostępny) 
# pomogło wyeliminować ten error


## Zadanie 3 (4 pkt.)
Ostatnim zadaniem jest implementacji komórki i sieci LSTM. 

![lstm](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-chain.png)

* W klasie `LSTMCell` ma znaleźć się główna logika LSTMa, czyli wszystkie wagi do stanów `hidden` i `cell` jak i bramek kontrolujących te stany. 
* W klasie `LSTM` powinno znaleźć się wywołanie komórki LSTM, HINT: poprzednio było w pętli uczenia, teraz przenisiemy to do klasy modelu.
* W pętli uczenia należy uzupełnić brakujące wywołania do uczenia i ewaluacji modelu.

Zdecydowanie polecam [materiały Chrisa Olaha](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) do zarówno zrozumienia jak i ściągi do wzorów.

Zadaniem jest osiągnięcie wartości `f1_score` lepszej niż na sieci RNN, przy prawidłowej implementacji nie powinno być z tym problemów używając podanych hiperparametrów. Dozwolona jest oczywiście zmiana `random seed`.

#### Komórka LSTM

In [53]:
class LSTMCell(torch.nn.Module):

    def __init__(self, 
                 input_size: int, 
                 hidden_size: int):
        """
        :param input_size: int
            Dimensionality of the input vector
        :param hidden_size: int
            Dimensionality of the hidden space
        """
        
        super(LSTMCell, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size

        # initialize LSTM weights 
        # NOTE: there are different approaches that are all correct 
        # (e.g. single matrix for all input opperations), you can pick
        # whichever you like for this task
    
        self.weight_f=torch.nn.Parameter(torch.rand(hidden_size, hidden_size + input_size))
        self.weight_i=torch.nn.Parameter(torch.rand(hidden_size, hidden_size + input_size))
        self.weight_c=torch.nn.Parameter(torch.rand(hidden_size, hidden_size + input_size))
        self.weight_o=torch.nn.Parameter(torch.rand(hidden_size, hidden_size + input_size))

        self.bias_f=torch.nn.Parameter(torch.rand(hidden_size))
        self.bias_i=torch.nn.Parameter(torch.rand(hidden_size))
        self.bias_c=torch.nn.Parameter(torch.rand(hidden_size))
        self.bias_o=torch.nn.Parameter(torch.rand(hidden_size))

    def forward(self, 
                input: torch.tensor, 
                states: Tuple[torch.tensor, torch.tensor]) -> Tuple[torch.tensor, torch.tensor]:
        
        hidden, cell = states
        
        # Compute input, forget, and output gates
        # then compute new cell state and hidden state
        # see http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 
        conc= torch.cat([input,hidden],1)

        f= torch.sigmoid( self.weight_f @ conc[0] + self.bias_f )
        # print(f.shape)
        i = torch.sigmoid( self.weight_i @ conc[0] + self.bias_i )
        c = torch.tanh( self.weight_c @ conc[0] + self.bias_c )
        o = torch.sigmoid( self.weight_o @ conc[0] + self.bias_o )
        cell = f*cell + i*c
        
        hidden = o*torch.tanh(cell)
        # print(hidden.shape)
        # print(cell.shape)

        return hidden, cell

### Klasa modelu LSTM

In [54]:
class LSTM(torch.nn.Module):

    def __init__(self, 
                 input_size: int, 
                 hidden_size: int):
        """
        :param input_size: int
            Dimensionality of the input vector
        :param hidden_size: int
            Dimensionality of the hidden space
        """
        
        super(LSTM, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.cell = LSTMCell(input_size=input_size, hidden_size=hidden_size)
        
    def forward(self, 
                input: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        """
        :param input: torch.tensor 
            Input tesnor for a single observation at timestep t
            shape [batch_size, input_size]
        Returns Tuple of two torch.tensors, both of shape [seq_len, batch_size, hidden_size]
        """
        
        batch_size = input.shape[0]
        
        hidden, cell = self.init_hidden_cell(batch_size)
        
        hiddens = []
        cells = []
        
        # this time we will process the whole sequence in the forward method
        # as oppose to the previous exercise, remember to loop over the timesteps
        
        time_steps = input.shape[1]


        for i in range(time_steps):         
          hidden, cell=self.cell(input[:,i,:],[hidden,cell])
          
          hiddens.append(hidden)        
          cells.append(cell) 

        return hiddens, cells
    
    def init_hidden_cell(self, batch_size):
        """
        Returns initial value for the hidden and cell states
        """
        return (torch.zeros(batch_size, self.hidden_size, requires_grad=True).cuda(), 
                torch.zeros(batch_size, self.hidden_size, requires_grad=True).cuda())

### Pętla uczenia

In [91]:
from itertools import chain

# torch.manual_seed(1337)

# build data loaders
train_loader = DataLoader(train_dataset, batch_size=1, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=1)

# initialize the lstm with an additional cliassifier layer at the top
lstm = LSTM(input_size=len(all_letters), hidden_size=128).cuda()
clf = torch.nn.Linear(in_features=128, out_features=len(label_to_idx)).cuda()

# initialize a optimizer
params = chain(lstm.parameters(), clf.parameters())
optimizer = torch.optim.Adam(params, lr=0.008) 

# we will train for only a single epoch 
epoch = 1

# main loop
for epoch in range(epoch):
    
    loss_buffer = []
    
    for i, (x, y) in enumerate(train_loader):   
        
        x = x.cuda()
        y = y.cuda()
        
        optimizer.zero_grad()
        
        # get output for the sample, remember that we treat it as a sequence
        # so you need to iterate over the sequence length here
        # don't forget about the classifier!
        hidden,cell = lstm(x)
        # print(hidden[1].shape)
        output = clf(torch.tensor(hidden[0]))

        # calucate the loss
        loss = cross_entropy(output, y)
        loss.backward()
        optimizer.step()                                
        
        loss_buffer.append(loss.item())
        
        if i % 1000 == 1:
            print(f"Epoch: {epoch} Progress: {100 * i/len(train_loader):2.0f}% Loss: {np.mean(loss_buffer):.3f}")
            loss_buffer = []

# evaluate on the test set
with torch.no_grad():
    
    ps = []
    ys = []
    for i, (x, y) in enumerate(test_loader): 
        
        x = x.cuda()
        ys.append(y.numpy())
        
        hidden,cell = lstm(x)
        output = clf(hidden[0])

        pred = output.argmax(dim=1)
        ps.append(pred.cpu().numpy())
    
    ps = np.concatenate(ps, axis=0)
    ys = np.concatenate(ys, axis=0)
    f1 = f1_score(ys, ps, average='weighted')
    
    print(f"Final F1 score: {f1:.2f}")
    assert f1 > 0.18, "You should get over 0.18 f1 score, try changing some hiperparams!"



Epoch: 0 Progress:  0% Loss: 3.106
Epoch: 0 Progress:  6% Loss: 3.242
Epoch: 0 Progress: 11% Loss: 3.130
Epoch: 0 Progress: 17% Loss: 3.127
Epoch: 0 Progress: 22% Loss: 3.024
Epoch: 0 Progress: 28% Loss: 3.027
Epoch: 0 Progress: 33% Loss: 3.002
Epoch: 0 Progress: 39% Loss: 2.967
Epoch: 0 Progress: 44% Loss: 3.007
Epoch: 0 Progress: 50% Loss: 3.025
Epoch: 0 Progress: 55% Loss: 3.038
Epoch: 0 Progress: 61% Loss: 3.001
Epoch: 0 Progress: 66% Loss: 2.968
Epoch: 0 Progress: 72% Loss: 2.955
Epoch: 0 Progress: 77% Loss: 2.920
Epoch: 0 Progress: 83% Loss: 2.950
Epoch: 0 Progress: 89% Loss: 2.972
Epoch: 0 Progress: 94% Loss: 2.917
Epoch: 0 Progress: 100% Loss: 2.938
Final F1 score: 0.25


## Zadanie 4. (0.5 pkt.)
Zaimplementuj analogiczną do funkcji `predict` z zadania 2 dla modelu `lstm+clf`.


In [111]:
def predict_lstm(name: str, lstm: LSTM, clf: torch.nn.Module):
    """Prints the name and model's top 3 predictions with scores"""
    lstm.eval()
    clf.eval()
    tensor_name=line_to_tensor(name).cuda()
    tensor_name.unsqueeze_(0)
    # print(tensor_name)
    # print(tensor_name.shape)
    with torch.no_grad():

      hidden,call=lstm(tensor_name)
      output = clf(torch.tensor(hidden[0]))
      tmp, predicted=torch.topk(output,3,dim=1)
      print( "Predicted:\n1. " + label_to_idx[predicted[0][0].item()] + " ("+str(tmp[0][0].item()) + ") " )
      print( "2. " + label_to_idx[predicted[0][1].item()] +" (" + str(tmp[0][1].item()) + ") " )
      print("3. " + label_to_idx[predicted[0][2].item()] + " ("+ str(tmp[0][2].item()) + ") "  )
      return None

    pass

In [112]:
# test your lstm predictor
some_names = ["Satoshi", "Jackson", "Schmidhuber", "Hinton", "Kowalski"]
    
for name in some_names:
    print(name)
    predict_lstm(name, lstm, clf)

Satoshi
Predicted:
1. korean (-2.275707721710205) 
2. arabic (-3.0948100090026855) 
3. japanese (-3.291802167892456) 
Jackson
Predicted:
1. korean (-1.4437400102615356) 
2. russian (-1.5827007293701172) 
3. scottish (-2.1397290229797363) 
Schmidhuber
Predicted:
1. korean (-2.275707721710205) 
2. arabic (-3.0948100090026855) 
3. japanese (-3.291802167892456) 
Hinton
Predicted:
1. russian (-2.3798959255218506) 
2. english (-2.571331739425659) 
3. arabic (-2.7084598541259766) 
Kowalski
Predicted:
1. japanese (-1.8412301540374756) 
2. english (-2.639096736907959) 
3. greek (-2.646069049835205) 


  if sys.path[0] == '':
