In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

## RNN with a simple example

In [2]:
HIDDEN_DIM = 35 # hidden state의 size
LEARNING_RATE = 0.01
EPOCHS = 100

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
string = "hello pytorch and data analytics."

In [5]:
chars = "abcdefghijklmnopqrstuvwxyz .01" # 알파벳, 공백, 콤마, 01(start와 end를 구분하기 위해서)
char_list = [i for i in chars]
n_letters = len(char_list)
n_letters
# 원핫인코딩으로 임베딩하면 임베딩이 30차원이 됨

30

In [6]:
def string_to_onehot(string):
    start = np.zeros(shape = n_letters, dtype = int)
    end = np.zeros(shape = n_letters, dtype = int)

    start[-2] = 1
    end[-1] = 1

    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape = n_letters, dtype = int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [7]:
string_to_onehot("data")
# 처음과 끝 벡터에는 설정한 자라에 1이 들어간 것을 확인 가능

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1]])

In [8]:
# 디코딩해주는 함수 정의
def onehot_to_string(onehot):
    onehot_value = torch.Tensor.numpy(onehot)
    return char_list[onehot_value.argmax()]

In [9]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.hidden2output = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()

    def forward(self, input, hidden):
        hidden = self.act_fn(self.input2hidden(input) + self.hidden2hidden(hidden))
        output = self.hidden2output(hidden)
        return output, hidden

    def init_hidden(self): # hidden vector를 초기화 시켜주는 함수
        return torch.zeros(1, self.hidden_size)

In [10]:
rnn = RNN(n_letters, HIDDEN_DIM, n_letters).to(device)

In [11]:
print(rnn)

RNN(
  (input2hidden): Linear(in_features=30, out_features=35, bias=True)
  (hidden2hidden): Linear(in_features=35, out_features=35, bias=True)
  (hidden2output): Linear(in_features=35, out_features=30, bias=True)
  (act_fn): Tanh()
)


In [12]:
loss_func = nn.MSELoss().to(device)
optimizer_rnn = torch.optim.Adam(rnn.parameters(), lr = LEARNING_RATE)

In [13]:
rnn.parameters

<bound method Module.parameters of RNN(
  (input2hidden): Linear(in_features=30, out_features=35, bias=True)
  (hidden2hidden): Linear(in_features=35, out_features=35, bias=True)
  (hidden2output): Linear(in_features=35, out_features=30, bias=True)
  (act_fn): Tanh()
)>

In [17]:
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

for i in range(EPOCHS):
    optimizer_rnn.zero_grad()
    hidden = rnn.init_hidden()
    total_loss = 0

    for j in range(one_hot.size()[0]-1):
        input_ = one_hot[j:j+1, :].to(device)
        target = one_hot[j+1].to(device)
        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss

    total_loss.backward()
    optimizer_rnn.step()

    if i % 50 == 0:
        print(total_loss)

tensor(0.0107, grad_fn=<AddBackward0>)
tensor(0.0047, grad_fn=<AddBackward0>)


In [18]:
start_tkn = torch.zeros(1, n_letters)
start_tkn[:, -2] = 1 # start_token

with torch.no_grad():
    hidden = rnn.init_hidden() # 초기화
    input_ = start_tkn.to(device)
    output_string = ""

    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_string(output.data)
        input_ = output

print(output_string)

hello pytorch and data anddich ad


## RNN and LSTM

In [19]:
!wget https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt -P ./data

--2022-11-30 11:52:03--  https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘./data/input.txt’


2022-11-30 11:52:03 (20.6 MB/s) - ‘./data/input.txt’ saved [1115394/1115394]



In [20]:
!pip install unidecode

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 5.4 MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.6


In [21]:
import re
import unidecode
import random
import string
import time, math

import torch
import torch.nn as nn
import torch.nn.functional as F

In [22]:
EPOCHS = 1000
HIDDEN_DIM = 100
BATCH_SIZE = 1
CHUNK_LEN = 250 # 데이터 일부만 추출하여 학습하기 위해서
NUM_LAYERS = 1 # 1층 RNN
EMBEDDING = 70 # 임베딩 벡터 차원 수
LEARNING_RATE = 0.004

In [25]:
characters = string.printable
n_characters = len(characters)
characters

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [26]:
text_file = unidecode.unidecode(open('./data/input.txt').read())
len_text_file = len(text_file)
len_text_file

1115394

In [27]:
def random_chunk():
    start_index = random.randint(0, len_text_file - CHUNK_LEN)
    end_index = start_index + CHUNK_LEN + 1
    return text_file[start_index : end_index]

print(random_chunk())

ce of England and our persons' safety,
Enforced us to this execution?

Lord Mayor:
Now, fair befall you! he deserved his death;
And you my good lords, both have well proceeded,
To warn false traitors from the like attempts.
I never look'd for better a


In [28]:
def character_to_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for char in range(len(string)):
        tensor[char] = characters.index(string[char])
    return tensor

print(character_to_tensor('ABCde'))

tensor([36, 37, 38, 13, 14])


In [29]:
def random_training_set():
    chunk = random_chunk()
    input = character_to_tensor(chunk[:-1])
    target = character_to_tensor(chunk[1:])
    return input, target

In [30]:
random_training_set()

(tensor([13, 14, 10, 27, 94, 28, 14, 27, 31, 18, 12, 14, 28, 96, 51, 10, 28, 29,
         94, 10, 23, 13, 94, 29, 24, 94, 12, 24, 22, 14, 73, 94, 29, 17, 10, 29,
         94, 34, 24, 30, 94, 13, 24, 94, 12, 17, 10, 23, 16, 14, 94, 29, 17, 18,
         28, 94, 25, 30, 27, 25, 24, 28, 14, 73, 96, 58, 17, 18, 12, 17, 94, 11,
         14, 18, 23, 16, 94, 28, 24, 94, 17, 24, 27, 27, 18, 11, 21, 14, 73, 94,
         28, 24, 94, 11, 21, 24, 24, 13, 34, 73, 94, 22, 30, 28, 29, 96, 47, 14,
         10, 13, 94, 24, 23, 94, 29, 24, 94, 28, 24, 22, 14, 94, 15, 24, 30, 21,
         94, 18, 28, 28, 30, 14, 77, 94, 32, 14, 94, 10, 21, 21, 94, 20, 23, 14,
         14, 21, 75, 96, 96, 47, 40, 50, 49, 55, 40, 54, 77, 96, 44, 94, 10, 22,
         94, 10, 94, 15, 14, 10, 29, 17, 14, 27, 94, 15, 24, 27, 94, 14, 10, 12,
         17, 94, 32, 18, 23, 13, 94, 29, 17, 10, 29, 94, 11, 21, 24, 32, 28, 77,
         96, 54, 17, 10, 21, 21, 94, 44, 94, 21, 18, 31, 14, 94, 24, 23, 94, 29,
         24, 94, 28, 14, 14,

### Make RNN model

In [31]:
class EN_RNN_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_RNN_DE, self).__init__()

        self.input_size = input_size # 100
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.rnn = nn.RNN(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden):
        en_output = self.encoder(input.view(1, -1)) # flatten된 형태로 통과
        output, hidden = self.rnn(en_output, hidden)
        de_output = self.decoder(output.view(1, -1)) # flatten된 형태로 통과
        return de_output, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden

In [32]:
model = EN_RNN_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [33]:
inp = character_to_tensor("A")
print(inp.size())
hidden = model.init_hidden()
print(hidden.size())
out,hidden = model(inp,hidden)
print(hidden.size())
print(out.size())

torch.Size([1])
torch.Size([1, 1, 100])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [34]:
print(model)

EN_RNN_DE(
  (encoder): Embedding(100, 70)
  (rnn): RNN(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)


In [35]:
optimizer_model = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [36]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_model.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_model.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.6278], grad_fn=<DivBackward0>)
tensor([2.3878], grad_fn=<DivBackward0>)
tensor([2.2159], grad_fn=<DivBackward0>)
tensor([2.0908], grad_fn=<DivBackward0>)
tensor([1.9464], grad_fn=<DivBackward0>)
tensor([1.9986], grad_fn=<DivBackward0>)
tensor([2.1181], grad_fn=<DivBackward0>)
tensor([2.0662], grad_fn=<DivBackward0>)
tensor([1.7870], grad_fn=<DivBackward0>)
tensor([1.8726], grad_fn=<DivBackward0>)


In [None]:
start_string = "b"

input = character_to_tensor(start_string)
hidden = model.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden = model(input, hidden)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

buth! Lost shouns not iake in this and losten.

VOKINCE:
But thensarings
to have spongeson I waist the have sain cance And not aiter you sperser Or?

DUKE OF OFTERO LINCENCES:
Thine the werth her me const be in you? coury shall a prorray that hast him: I himsent with whanst chall your good you, me an

### Make LSTM model

In [37]:
class EN_LSTM_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_LSTM_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.num_layers) 
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, cell):
        en_output = self.encoder(input.view(1, -1))
        output, (hidden, cell) = self.lstm(en_output, (hidden, cell)) # hidden와 cell이 동시에 들어감
        de_output = self.decoder(output.view(1, -1))
        return de_output, hidden, cell

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        cell = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden, cell

In [38]:
model_LSTM = EN_LSTM_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [39]:
print(model)

EN_RNN_DE(
  (encoder): Embedding(100, 70)
  (rnn): RNN(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)


In [None]:
input = character_to_tensor("A")
print(input)

hidden, cell = model_LSTM.init_hidden()
print(hidden.size())

output, hidden, cell = model_LSTM(input, hidden, cell)
print(output.size())

tensor([36])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [40]:
optimizer_lstm = torch.optim.Adam(model_LSTM.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [41]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden, cell = model_LSTM.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_lstm.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden, cell = model_LSTM(x, hidden, cell)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_lstm.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.5857], grad_fn=<DivBackward0>)
tensor([2.5657], grad_fn=<DivBackward0>)
tensor([2.1868], grad_fn=<DivBackward0>)
tensor([2.0831], grad_fn=<DivBackward0>)
tensor([1.9339], grad_fn=<DivBackward0>)
tensor([2.0539], grad_fn=<DivBackward0>)
tensor([1.9564], grad_fn=<DivBackward0>)
tensor([1.8441], grad_fn=<DivBackward0>)
tensor([2.0637], grad_fn=<DivBackward0>)
tensor([1.8053], grad_fn=<DivBackward0>)


In [42]:
start_string = "b"

input = character_to_tensor(start_string)
hidden, cell = model_LSTM.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden, cell = model_LSTM(input, hidden, cell)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

bars; and if and as and these preadess the peate tances as mipted me your shall for shall brow,-famabuines yef waspands warp but to my held with Mardafe,
And the hea, sare to she lord weat is qubanding.

LURENCIO:
He as the worm faces and leaves proobed, if menders! I grays be and at wath he fears pe