In [39]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import tqdm

In [34]:
class RNNCell(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, nonlinearity='tanh') -> None:
    super().__init__()
    self.i2o = nn.Linear(input_size+hidden_size, output_size)
    self.i2h = nn.Linear(input_size+hidden_size, hidden_size)
    self.tanh = nn.Tanh()
  
  def forward(self, input_tensor, hidden_tensor):
    combined = torch.cat((input_tensor, hidden_tensor), dim=1)
    output = self.tanh(self.i2o(combined))
    hidden = self.tanh(self.i2h(combined))

    return output, hidden

class RNN(nn.Module):
  def __init__(self, input_size, output_size, hidden_size, num_layers) -> None:
    super(RNN, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.layers = nn.ModuleList()

    for i in range(self.num_layers):
      self.layers.append(
        RNNCell(self.input_size, self.hidden_size, self.output_size)
      )
    self.fc = nn.Linear(self.input_size, self.output_size)

  def forward(self, input_tensor, hidden_tensor):
    for i in range(self.num_layers):
      input_tensor, hidden_tensor[i] = self.layers[i](input_tensor, hidden_tensor[i])
    output = self.fc(input_tensor)
    return torch.argmax(output), hidden_tensor

In [32]:
# Loading Dataset
import string
import os
import unicodedata
import random

# parameters
alphabets = string.ascii_letters + " .,;'"
data_path = './data/names/'
total_chars = len(alphabets)

# some functions
def unicode_to_ascii(s):
  return ''.join(
      c for c in unicodedata.normalize('NFD', s)
      if unicodedata.category(c) != 'Mn'
      and c in alphabets
  )

def word_to_tensor(word):
  tensor = torch.zeros(len(word), 1, total_chars)
  for i, letter in enumerate(word):
    tensor[i][0][alphabets.find(letter)] = 1
  return tensor

def random_example(languages, data):
  language = random.choice(languages)
  language_tensor = torch.tensor([languages.index(language)])
  word = random.choice(data[language])
  return language_tensor, word_to_tensor(word)

# some functions
languages = []
data = {}
for file_name in os.listdir(data_path):
  category = file_name.split('.')[0]
  file = open(data_path+file_name, 'r', encoding='utf-8')
  languages.append(category)
  data[category] = []
  for line in file.readlines():
    data[category].append(unicode_to_ascii(line))

In [38]:
# parameters
input_size = len(alphabets)
output_size = len(languages)
hidden_size = 128
num_layers = 3
learning_rate = 0.005
iterations = 10000
rnn = RNN(input_size, output_size, hidden_size, num_layers)
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

In [44]:
losses = []
curr_loss = 0
for i in range(iterations):
  language, word = random_example(languages, data)
  hidden = torch.randn(num_layers, 1, hidden_size)
  for letter in word:
    output, hidden = rnn(letter, hidden)
  loss = criterion(output, language)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  losses.append(loss.item())

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x146 and 185x18)