In [1]:
import torch
import torch.nn as nn
import string
import random
import unidecode
import sys
from torch.utils.tensorboard import SummaryWriter

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
all_characters = string.printable
n_characters = len(all_characters)

In [2]:
file = unidecode.unidecode(open('./names.txt').read())

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embed = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, X, hidden, cell):
        print(X)
        out = self.embed(X)
        print(out.shape)
        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
        print(out.shape)
        out = self.fc(out.reshape(out.shape[0], -1))
        print(out.shape)
        input()
        return out, (hidden, cell)
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
        return hidden, cell
    
class Generator():
    def __init__(self):
        self.chunk_len = 250
        self.num_epochs = 5000
        self.batch_size = 1
        self.print_every = 50
        self.hidden_size = 256
        self.num_layers = 2
        self.lr = 3e-3
        
    def char_tensor(self, string):
        tensor = torch.zeros(len(string)).long()
        for c in range(len(string)):
            tensor[c] = all_characters.index(string[c])
            
        return tensor
    
    def get_random_batch(self):
        start_index = random.randint(0, len(file) - self.chunk_len)
        end_index = start_index + self.chunk_len + 1
        text_str = file[start_index:end_index]
        text_input = torch.zeros(self.batch_size, self.chunk_len)
        text_target = torch.zeros(self.batch_size, self.chunk_len)
        
        for i in range(self.batch_size):
            text_input[i,:] = self.char_tensor(text_str[:-1])
            text_target[i,:] = self.char_tensor(text_str[1:])
            
        return text_input.long(), text_target.long()
    
    def generate(self, init_str="Ty", prediction_len=100, temperature=0.85):
        hidden, cell = self.rnn.init_hidden(self.batch_size)
        init_inp = self.char_tensor(init_str)
        pred = init_str
        
        for p in range(len(init_str) - 1):
            , (hidden, cell) = self.rnn(init_inp[p].view(1).to(device), hidden, cell)
            
        last_char = init_inp[-1]
        
        for p in range(prediction_len):
            output, (hidden, cell) = self.rnn(last_char.view(1).to(device), hidden, cell)
            output_distance = output.data.view(-1).div(temperature).exp()
            top_char = torch.multinomial(output_distance, 1)[0]
            predicted_char = all_characters[top_char]
            pred += predicted_char
            last_char = self.char_tensor(predicted_char)
            
        return pred
    
    def train(self):
        self.rnn = RNN(n_characters, 
                       self.hidden_size, 
                       self.num_layers, 
                       n_characters).to(device)
        
        optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter(f'runs/names0')
        
        for epoch in range(1, self.num_epochs+1):
            inp, target = self.get_random_batch()
            hidden, cell = self.rnn.init_hidden(self.batch_size)
            self.rnn.zero_grad()
            loss = 0
            inp = inp.to(device)
            target = target.to(device)
            
            for c in range(self.chunk_len):
                output, (hidden, cell) = self.rnn(inp[:,c], hidden, cell)
                loss += criterion(output, target[:,c])
            
            loss.backward()
            optimizer.step()
            loss = loss.item() / self.chunk_len
            if epoch % self.print_every == 0:
                print(f'Loss: {loss}')
                print(self.generate())
                
            writer.add_scalar('Training loss', loss, global_step=epoch)
            

In [None]:
gennames = Generator()
gennames.train()

tensor([38], device='cuda:0')
torch.Size([1, 256])
torch.Size([1, 1, 256])
torch.Size([1, 100])
