# Name Generation using LSTM

In [21]:
import torch
import torch.nn as nn
import numpy as np
import string
import random
import sys
import unidecode
from torch.utils.tensorboard import SummaryWriter

Set the device to `cuda` if it is available

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Let's get all characters from `string.printable`

In [23]:
all_characters = string.printable
n_characters = len(all_characters)
print(f" The total number of characters : {n_characters}")

 The total number of characters : 100


Now it's time to load the data and read the large text file.

In [24]:
file = unidecode.unidecode(open("data/names.txt").read())

### Build the model

We will use a Recurrent Neural Network (RNN) in this notebook. RNNs are popular and strong tools that can handle dynamic sequences of varying size.

In [25]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden, cell):
        out = self.embed(x)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        out = self.fc(out.reshape(out.shape[0], -1))
        return out, (hidden, cell)

    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell

Now Let's create our generator as follows:

In [34]:

class Generator():
    def __init__(self, chunk_len, num_epchos, batch_size, hidden_size, 
                 num_layers, lr):
        self.chunk_len = chunk_len
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.print_every = 50
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lr = lr


    def char_tensor(self, string):
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            tensor[c] = all_characters.index(string[c])
        return tensor

    def get_random_batch(self):
        start_idx = random.randint(0, len(file) - self.chunk_len)
        end_idx = start_idx + self.chunk_len + 1
        text_str = file[start_idx:end_idx]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)

        for i in range(self.batch_size):
            text_input[i, :] = self.char_tensor(text_str[:-1])
            text_target[i, :] = self.char_tensor(text_str[1:])

        return text_input.long(), text_target.long()

    def generate(self, initial_str="A", predict_len=100, temperature=0.85):
        hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
        initial_input = self.char_tensor(initial_str)
        predicted = initial_str

        for p in range(len(initial_str) - 1):
            _, (hidden, cell) = self.rnn(
                initial_input[p].view(1).to(device), hidden, cell
            )

        last_char = initial_input[-1]

        for p in range(predict_len):
            output, (hidden, cell) = self.rnn(
                last_char.view(1).to(device), hidden, cell
            )
            output_dist = output.data.view(-1).div(temperature).exp()
            top_char = torch.multinomial(output_dist, 1)[0]
            predicted_char = all_characters[top_char]
            predicted += predicted_char
            last_char = self.char_tensor(predicted_char)

        return predicted
    
        # input_size, hidden_size, num_layers, output_size
    def train(self):
        self.rnn = RNN(
            n_characters, self.hidden_size, self.num_layers, n_characters
        ).to(device)

        optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter(f"runs/names0")  # for tensorboard

        print(" Start Training ...")

        for epoch in range(1, self.num_epochs + 1):
            inp, target = self.get_random_batch()
            hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)

            self.rnn.zero_grad()
            loss = 0
            inp = inp.to(device)
            target = target.to(device)

            for c in range(self.chunk_len):
                output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
                loss += criterion(output, target[:, c])

            loss.backward()
            optimizer.step()
            loss = loss.item() / self.chunk_len

            if epoch % self.print_every == 0:
                print(f"Epoch {epoch}/{num_epochs}, Loss: {loss}")
                print(self.generate())

            writer.add_scalar("Training loss", loss, global_step=epoch)

In [35]:
chunk_len = 250
num_epochs = 1000
batch_size = 1
hidden_size = 256
num_layers = 2
lr = 0.005
initial_str = "A"
predict_len = 100

gennames = Generator(chunk_len, num_epochs, batch_size, hidden_size, 
                     num_layers, lr)
gennames.train()


 Start Training ...
Epoch 50/1000, Loss: 2.45411865234375
Aley
Taihnsa
Larcie
Kerreeli
Jorannan
Tarsere
Latvina
Mowisto
Larli
Mania
Jeliccon
Ketzyn
Aaria
Hisen
Epoch 100/1000, Loss: 2.1676904296875
Avie
Sharia
Rotha
Jocharia
Frian
Damile
Darida
Oleto
Mairbie
Malla
Nistte
Alleisepha
Pathie
Carroni
M
Epoch 150/1000, Loss: 2.331116455078125
Aley
Blarina
Josly
Casler
Jade
Erine
Lelary
Nathar
Tro
Turia
Kardelina
Rorbine
Jurit
Dobrin
Marenna
R
Epoch 200/1000, Loss: 1.981636474609375
Alen
Lolett
Patty
Keonell
Ell
Melony
Bornin
Mirgiage
Glarce
Dorron
Josnin
Stanton
Tover
Kerie
Bennell
Epoch 250/1000, Loss: 2.064339111328125
Ara
Gendra
Erta
Jeul
Jamen
Henavie
Gorile
Chard
Claree
Brece
Latalda
Jossa
Athel
Ferel
Teris
Jashele

Epoch 300/1000, Loss: 1.9355018310546874
Alie
Jeya
Gila
Sharry
Colly
Leomine
Airna
Ryenton
Noga
Teahel
Bian
Artha
Eesamue
Gevi
Tomin
Belly
Eri
Epoch 350/1000, Loss: 1.951608642578125
Amen
Stamen
Pekelisond
Gilliela
Joy
Coosa
Chacy
Flonny
Logey
Bex
Trise
Deffferip
Kelliso
Lo