### **Vanilla RNN (Recurrent Neural Network)**
- PyTorch Implementation
- Name Classification Task
- Many-to-One RNN

##### **Dataset:** https://download.pytorch.org/tutorial/data.zip

In [32]:
import os
import glob
import unicodedata
import string
import torch
import torch.nn as nn

In [33]:
#-- We define the set of all possible letters we'll use
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

print(f"All letters: {all_letters} \t(Total: {n_letters})")

All letters: abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,;' 	(Total: 57)


In [34]:
#-- Function to convert Unicode to plain ASCII
def unicode_to_ascii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn' and c in all_letters
    )

unicode_to_ascii("Ślusàrski$%")

'Slusarski'

In [35]:
#-- Load and Process the Data
category_lines = {}
all_categories = []

def read_lines(filename):
    with open(filename, encoding='utf-8') as f:
        lines = f.read().strip().split('\n')
    return [unicode_to_ascii(line) for line in lines]

for filename in glob.glob('data/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = read_lines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

In [36]:
print(f"Categories: {all_categories[:3]}... \t(Total Categories: {n_categories})")

Categories: ['Arabic', 'Chinese', 'Czech']... 	(Total Categories: 18)


In [37]:
for k,v in category_lines.items():
	print(f"{k}: {v[:3]}... (Total Values: {len(v)})")
	break

Arabic: ['Khoury', 'Nahas', 'Daher']... (Total Values: 2000)


In [38]:
#-- Turn Letters into Tensors using One-Hot Encoding
def letter_to_tensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][all_letters.find(letter)] = 1
    return tensor

def line_to_tensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for i, letter in enumerate(line):
        tensor[i][0][all_letters.find(letter)] = 1
    return tensor

In [39]:
letter_to_tensor('a')

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0.]])

In [40]:
line_to_tensor('abc')

tensor([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]],

        [[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]]])

In [41]:
#-- Define the Simple RNN
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)  # Concatenate input and hidden state
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [42]:
#-- Training Utilities (Get Random Training Example)
import random

def random_training_example():
    category = random.choice(all_categories)
    line = random.choice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line_to_tensor(line)
    return category, line, category_tensor, line_tensor

In [43]:
random_training_example()

('German',
 'Habicht',
 tensor([6]),
 tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0.]],
 
         [[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [44]:
#-- Training the Network
criterion = nn.NLLLoss()
learning_rate = 0.005
rnn = RNN(n_letters, 128, n_categories)  # 128 hidden size

def train(category_tensor, line_tensor):
    hidden = rnn.init_hidden()
    rnn.zero_grad()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)

    loss = criterion(output, category_tensor)
    loss.backward()

    for p in rnn.parameters():
        p.data -= learning_rate * p.grad.data

    return output, loss.item()

In [45]:
# Train over many iterations
n_iters = 10_000
print_every = 1000
for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = random_training_example()
    output, loss = train(category_tensor, line_tensor)

    if iter % print_every == 0:
        guess = all_categories[output.topk(1)[1].item()]
        correct = '✓' if guess == category else f'✗ ({category})'
        print(f'{iter} {loss:.4f} {line} → {guess} {correct}')

1000 2.8842 Colon → Russian ✗ (Spanish)
2000 3.0750 Rog → Scottish ✗ (Polish)
3000 2.6404 Okamura → Japanese ✓
4000 2.5952 Tong → Chinese ✗ (Vietnamese)
5000 2.2663 Nervetti → Italian ✓
6000 2.2229 Coelho → Czech ✗ (Portuguese)
7000 1.6784 Naoimhin → Irish ✓
8000 2.0870 Eoghan → Arabic ✗ (Irish)
9000 1.2479 Zha → Chinese ✓
10000 2.9220 Koenig → Spanish ✗ (German)


In [46]:
import math

#-- Prediction Function
def predict(input_line, n_predictions=3):
	with torch.inference_mode():
		line_tensor = line_to_tensor(input_line)
		hidden = rnn.init_hidden()

		for i in range(line_tensor.size()[0]):
			output, hidden = rnn(line_tensor[i], hidden)

		topv, topi = output.topk(n_predictions, 1, True)
		predictions = []

		for i in range(n_predictions):
			value = topv[0][i].item()
			category_index = topi[0][i].item()
			predictions.append((value, all_categories[category_index]))

		results = []
		for value, category in predictions:
			results.append(
				{
					'category': category,
					'probability': math.exp(value),
					'log_probability': value
				}
			)

		return results

# Example usage
print(predict("Satoshi"))

[{'category': 'Polish', 'probability': 0.22427910753730615, 'log_probability': -1.4948639869689941}, {'category': 'Italian', 'probability': 0.11635690942881172, 'log_probability': -2.151093006134033}, {'category': 'Japanese', 'probability': 0.11105543667289425, 'log_probability': -2.197725772857666}]
