In [1]:
# Simple Character RNN 
# Code by GunhoChoi

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

In [48]:
import string
import random
import re

chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ?!.,:;01"
char_list = [i for i in chars]
char_len = len(char_list)
print("char len =",char_len)

file = open('./shakespeare.txt',encoding="utf-8")
file_list = [i for i in file]

file_to_char = []
for i in file_list:
    for j in range(len(i)):
        file_to_char.append(i[j])
   
file_len = len(file_to_char)
print('file_len =', file_len)

char len = 61
file_len = 1115394


In [3]:
# String to onehot vector
# a -> [1 0 0 ... 0 0]

def string_to_onehot(string):
    start = np.zeros(shape=len(char_list) ,dtype=int)
    end = np.zeros(shape=len(char_list) ,dtype=int)
    start[-2] = 1
    end[-1] = 1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=char_len ,dtype=int)
        zero[idx]=1
        start = np.vstack([start,zero])
    output = np.vstack([start,end])
    return output

In [4]:
# Onehot vector to word
# [1 0 0 ... 0 0] -> a 

def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [5]:
# Hyperparameters & Initialization of rnn
# hidden vector size do matter

batch_size = 5
seq_len = 1
num_layers = 1
input_size = char_len
hidden_size = 35 
lr = 0.01
num_epochs = 1000

one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

print(one_hot.size())

torch.Size([70, 35])


In [6]:
# RNN with 1 hidden layer

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size,num_layers):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
        
    def forward(self,input,hidden,cell):
        output,(hidden,cell) = self.lstm(input,(hidden,cell))
        
        return output,hidden,cell
    
    def init_hidden_cell(self):
        hidden = Variable(torch.zeros(num_layers,seq_len*batch_size,hidden_size)).cuda()
        cell = Variable(torch.zeros(num_layers,seq_len*batch_size,hidden_size)).cuda()
        
        return hidden,cell
    
rnn = RNN(input_size,hidden_size, num_layers).cuda()

In [7]:
# Loss function & Optimizer

loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [8]:
j=0
input_data = Variable(one_hot[j:j+batch_size].view(batch_size,seq_len,-1)).cuda()
print(input_data.size())

hidden,cell = rnn.init_hidden_cell()
print(hidden.size(),cell.size())

output,hidden,cell = rnn(input_data,hidden,cell)
print(output.size(),hidden.size(),cell.size())

unroll_len = one_hot.size()[0]//seq_len -1

for i in range(num_epochs):
    hidden,cell = rnn.init_hidden_cell()
    
    loss = 0
    for j in range(unroll_len-batch_size+1):
        
        input_data = torch.stack([one_hot[j+k:j+k+seq_len] for k in range(batch_size)],dim=0)
        label = torch.stack([one_hot[j+k+1:j+k+seq_len+1] for k in range(batch_size)],dim=0)
        
        input_data = Variable(input_data).cuda()
        label = Variable(label).cuda()
        
        optimizer.zero_grad()

        output, hidden, cell = rnn(input_data,hidden,cell)
        loss += loss_func(output.view(1,-1),label.view(1,-1))
        
    loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(loss)

torch.Size([5, 1, 35])
torch.Size([1, 5, 35]) torch.Size([1, 5, 35])
torch.Size([5, 1, 35]) torch.Size([1, 5, 35]) torch.Size([1, 5, 35])
Variable containing:
 2.1071
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 1.5918
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 1.4087
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 1.1934
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.9893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.8168
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.6770
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.5725
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.4854
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.4315
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.3759
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 0.3537
[torch.cuda.FloatTensor of siz

In [9]:
hidden,cell = rnn.init_hidden_cell()

for j in range(unroll_len-batch_size+1):
    input_data = torch.stack([one_hot[j+k:j+k+seq_len] for k in range(batch_size)],dim=0)
    label = torch.stack([one_hot[j+k+1:j+k+seq_len+1] for k in range(batch_size)],dim=0)

    input_data = Variable(input_data).cuda()
    label = Variable(label).cuda()
    
    output, hidden, cell = rnn(input_data,hidden,cell)
    for k in range(batch_size):
        print(onehot_to_word(output[k].cpu().data))
        if j < unroll_len-batch_size:
            break


h
e
l
l
o
 
p
y
t
o
r
c
h
.
 
h
o
w
 
l
o
n
g
 
c
a
n
 
a
 
r
n
n
 
c
e
l
l
 
r
e
m
e
m
b
e
r
?
 
s
h
o
w
 
m
e
 
y
o
u
r
 
l
i
m
i
t
!
1
