In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
n_hidden = 35
lr = 0.01
epochs = 1000

string = "hello pytorch. how long can a rnn cell remember?" # show is your limit!
chars = "abcdefghijklmnopqrstuvwxyz ?!.,:;01"
char_list = [i for i in chars]
n_letters = len(char_list)

In [3]:
def string_to_onehot(string):
    start = np.zeros(shape=len(char_list), dtype=int)
    end = np.zeros(shape=len(char_list), dtype=int)
    start[-2] = 1
    end[-2] = 1
    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape=n_letters, dtype=int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [4]:
def onehot_to_word(onehot_1):
    onehot = torch.Tensor.numpy(onehot_1)
    return char_list[onehot.argmax()]

In [5]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2h = nn.Linear(hidden_size, hidden_size)
        self.i2o = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()

    def forward(self, input, hidden):
        hidden = self.act_fn(self.i2h(input)+self.h2h(hidden))
        output = self.i2o(hidden)
        return output, hidden

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

rnn = RNN(n_letters, n_hidden, n_letters)

In [6]:
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=lr)

In [7]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

for i in range(epochs):
    rnn.zero_grad()
    total_loss = 0
    hidden = rnn.init_hidden()

    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1,:]
        target = one_hot[j+1]

        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss
        input_ = output

    total_loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(total_loss)

tensor(1.9652, grad_fn=<AddBackward0>)
tensor(0.8797, grad_fn=<AddBackward0>)
tensor(0.5571, grad_fn=<AddBackward0>)
tensor(0.3682, grad_fn=<AddBackward0>)
tensor(0.2537, grad_fn=<AddBackward0>)
tensor(0.1927, grad_fn=<AddBackward0>)
tensor(0.1510, grad_fn=<AddBackward0>)
tensor(0.1216, grad_fn=<AddBackward0>)
tensor(0.1001, grad_fn=<AddBackward0>)
tensor(0.0877, grad_fn=<AddBackward0>)
tensor(0.0750, grad_fn=<AddBackward0>)
tensor(0.0638, grad_fn=<AddBackward0>)
tensor(0.0591, grad_fn=<AddBackward0>)
tensor(0.0578, grad_fn=<AddBackward0>)
tensor(0.0473, grad_fn=<AddBackward0>)
tensor(0.0410, grad_fn=<AddBackward0>)
tensor(0.0365, grad_fn=<AddBackward0>)
tensor(0.0329, grad_fn=<AddBackward0>)
tensor(0.0400, grad_fn=<AddBackward0>)
tensor(0.0316, grad_fn=<AddBackward0>)
tensor(0.0277, grad_fn=<AddBackward0>)
tensor(0.0248, grad_fn=<AddBackward0>)
tensor(0.0338, grad_fn=<AddBackward0>)
tensor(0.0259, grad_fn=<AddBackward0>)
tensor(0.0216, grad_fn=<AddBackward0>)
tensor(0.0242, grad_fn=<A

In [10]:
start = torch.zeros(1, len(char_list))
start[:, -2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start
    output_string = ""
    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_word(output.data)
        input_ = output

print(output_string)

hello p eorm eonc c ememlt ro poe emalnnw memrtr


In [24]:
num_epochs = 2000
print_every = 100
plot_every = 10

chunk_len = 200

hidden_size = 100
batch_size = 1
num_layers = 1
embedding_size = 70
lr = 0.002

In [12]:
import string

all_characters = string.printable
n_characters = len(all_characters)
print(all_characters)
print("num_chars =", n_characters)

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

num_chars =  100


In [16]:
!pip install unidecode

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 15.5 MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.6


In [18]:
import unidecode

file = unidecode.unidecode(open('/content/shakespeare.txt').read())
file_len = len(file)
print('file_len =', file_len)

file_len = 1115394


In [19]:
import random

def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

In [20]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return tensor

print(char_tensor('ABCdef'))

tensor([36, 37, 38, 13, 14, 15])


In [21]:
def random_training_set():
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

In [28]:
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.RNN(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        out = self.encoder(input.view(1, -1))
        out, hidden = self.rnn(out, hidden)
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, batch_size, hidden_size)
        return hidden

In [29]:
model = RNN(input_size=n_characters,
            embedding_size=embedding_size,
            hidden_size=hidden_size,
            output_size=n_characters,
            num_layers=2)

In [30]:
inp = char_tensor("A")
hidden = model.init_hidden()
out, hidden = model(inp, hidden)

In [35]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

In [36]:
def test():
    start_str = "b"
    inp = char_tensor(start_str)
    hidden = model.init_hidden()
    x = inp

    print(start_str,end="")
    for i in range(200):
        output,hidden = model(x,hidden)

        output_dist = output.data.view(-1).div(0.8).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        predicted_char = all_characters[top_i]

        print(predicted_char,end="")

        x = char_tensor(predicted_char)

In [39]:
for i in range(num_epochs):
    total = char_tensor(random_chunk())
    inp = total[:-1]
    label = total[1:]
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer.zero_grad()
    for j in range(chunk_len-1):
        x = inp[j]
        y_ = label[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer.step()

    if i % 100 == 0:
        print("\n",loss/chunk_len,"\n")
        test()
        print("\n","="*100)


 tensor([4.6213], grad_fn=<DivBackward0>) 

(0_@xJUsLf

 tensor([2.3143], grad_fn=<DivBackward0>) 

bnd tour for lalve shite the tou thef s bacare theth y mnbnbe do ties kagotos. lithe  oind win iy shin rere Knir .ay int th tuperes lili th theraty hall Jide our the'd thr Io has at thanitC.

nor b't.


 tensor([2.2534], grad_fn=<DivBackward0>) 

bod this wisteabe hat the, ilg and belerr hage agonk, younn, beriorce, and then stanly t'oed chall thou deith warce the orghall me mest and four.

CICEDIBAENG:
Thy wil
EYEN.:
Wour on thand thea wore th

 tensor([2.1245], grad_fn=<DivBackward0>) 

buth in my bet will thun, :upecrs not mith rof male than woved
The brust the be wond wreruse whe in inny
Se the hien lour in in me ciase thy me of moree, and to wist to bean the ins oul sour wear sest,

 tensor([1.9960], grad_fn=<DivBackward0>) 

brow the thert.

PEN.
TUI Hot Bome seoss copit the diy wore ky pake to naFt, is of eost Poth sie thean thee,
Ant I shon yout the is his my so praee vulldo,
EV

In [38]:
# GRU로 바꾸고 싶다면, 클래스 내에 
# self.rnn = nn.RNN(embedding_size, hidden_size, num_layers)를 self.rnn = nn.GRU(embedding_size, hidden_size, num_layers)로 바꾸면 된다.

In [None]:
# LSTM의 경우에는 클래스를 이렇게 바꾸면 된다
class RNN(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.embedding_size = embedding_size

        self.encoder = nn.Embedding(input_size, embedding_size)
        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        out = self.encoder(input.view(batch_size, -1))
        out, (hidden, cell) = self.rnn(out, (hidden, cell))
        out = self.decoder(out.view(batch_size, -1))
        return out, hidden, cell

    def init_hidden(self):
        hidden = torch.zeros(num_layers, batch_size, hidden_size)
        cell = torch.zeros(num_layers, batch_size, hidden_size)
        return hidden, cell