In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

## RNN with a simple example

In [2]:
HIDDEN_DIM = 35
LEARNING_RATE = 0.01
EPOCHS = 100

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
string = "hello pytorch and data analytics."

overview:  
1) input: h -> output: e 가 되도록 학습  
2) input: e -> output: l 이 되도록 학습  
3) input: l -> output: l 이 되도록 학습  
...  

이렇게 문장 전체에 대해서 학습을 진행.

# string -> embedding 진행
- 여기에선 one-hot encode

In [5]:
chars = "abcdefghijklmnopqrstuvwxyz .01"
char_list = [i for i in chars]
n_letters = len(char_list)
n_letters

30

In [6]:
def string_to_onehot(string):
    start = np.zeros(shape = n_letters, dtype = int)
    end = np.zeros(shape = n_letters, dtype = int)

    start[-2] = 1
    end[-1] = 1

    for i in string:
        idx = char_list.index(i)
        zero = np.zeros(shape = n_letters, dtype = int)
        zero[idx] = 1
        start = np.vstack([start, zero])
    output = np.vstack([start, end])
    return output

In [7]:
string_to_onehot("data")

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1]])

ㄴ (예시)start, d, a, t, a, end 순서로 one-hot encode된 결과를 볼 수 있다.

In [8]:
def onehot_to_string(onehot):
    onehot_value = torch.Tensor.numpy(onehot)
    return char_list[onehot_value.argmax()]

ㄴ 출력을 위해 encode 된 벡터를 다시 decode 하는 함수

#모델 정의

In [9]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.hidden2output = nn.Linear(hidden_size, output_size)
        self.act_fn = nn.Tanh()

    def forward(self, input, hidden):
        hidden = self.act_fn(self.input2hidden(input) + self.hidden2hidden(hidden))
        output = self.hidden2output(hidden)
        return output, hidden

    def init_hidden(self):  #hidden vector 초기화
        return torch.zeros(1, self.hidden_size)

In [10]:
#input : 30차원(one-hot), hyperparameter, output:30차원(one-hot)
rnn = RNN(n_letters, HIDDEN_DIM, n_letters).to(device) 

In [11]:
#loss_function : MSE사용
loss_func = nn.MSELoss().to(device)
optimizer_rnn = torch.optim.Adam(rnn.parameters(), lr = LEARNING_RATE)

In [12]:
rnn.parameters

<bound method Module.parameters of RNN(
  (input2hidden): Linear(in_features=30, out_features=35, bias=True)
  (hidden2hidden): Linear(in_features=35, out_features=35, bias=True)
  (hidden2output): Linear(in_features=35, out_features=30, bias=True)
  (act_fn): Tanh()
)>

In [13]:
#embedding 후에 pytorch 연산을 위해 tensor로 형변환 진행
one_hot = torch.from_numpy(string_to_onehot(string)).type_as(torch.FloatTensor())

for i in range(EPOCHS):
    hidden = rnn.init_hidden()
    total_loss = 0

    for j in range(one_hot.size()[0]-1):
        #one_hot은 2차원행렬. 각 알파벳이 encode 되어 있음.
        input_ = one_hot[j:j+1, :].to(device)
        target = one_hot[j+1].to(device)
        output, hidden = rnn.forward(input_, hidden)
        loss = loss_func(output.view(-1), target.view(-1))
        total_loss += loss

    optimizer_rnn.zero_grad()
    total_loss.backward()
    optimizer_rnn.step()

    if i % 50 == 0:
        print(total_loss)

tensor(1.8876, grad_fn=<AddBackward0>)
tensor(0.0666, grad_fn=<AddBackward0>)


# 결과 출력(decode)

In [14]:
start_tkn = torch.zeros(1, n_letters)
start_tkn[:, -2] = 1

with torch.no_grad():
    hidden = rnn.init_hidden()
    input_ = start_tkn.to(device)
    output_string = ""

    for i in range(len(string)):
        output, hidden = rnn.forward(input_, hidden)
        output_string += onehot_to_string(output.data)
        input_ = output

print(output_string)

hello pytorch and do t co aydo.a 


## RNN and LSTM

In [15]:
!wget https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt -P ./data

--2022-12-12 16:16:06--  https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘./data/input.txt’


2022-12-12 16:16:07 (32.7 MB/s) - ‘./data/input.txt’ saved [1115394/1115394]



In [16]:
!pip install unidecode

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting unidecode
  Downloading Unidecode-1.3.6-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 9.3 MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.6


In [17]:
import re
import unidecode
import random
import string
import time, math

import torch
import torch.nn as nn
import torch.nn.functional as F

In [19]:
EPOCHS = 1000
HIDDEN_DIM = 100
BATCH_SIZE = 1
CHUNK_LEN = 250 #일부 dataset 만 사용
NUM_LAYERS = 1 #보통 rnn하나만 있는 구조로 강의에서 설명했었음
EMBEDDING = 70
LEARNING_RATE = 0.004

In [20]:
characters = string.printable
n_characters = len(characters)
characters

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [21]:
#unidecode 라이브러리에서 제공하는 dataset 
text_file = unidecode.unidecode(open('./data/input.txt').read())
len_text_file = len(text_file)
len_text_file

1115394

In [22]:
#백만개 모두 사용 못함.. 일부만 사용 = chunk
def random_chunk():
    start_index = random.randint(0, len_text_file - CHUNK_LEN)
    end_index = start_index + CHUNK_LEN + 1
    return text_file[start_index : end_index]

print(random_chunk())

by which means I saw whose purse was best in
picture; and what I saw, to my good use I
remembered. My clown, who wants but something to
be a reasonable man, grew so in love with the
wenches' song, that he would not stir his pettitoes
till he had both 


In [23]:
#문자 -> tensor로 형변환(=embedding)
def character_to_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for char in range(len(string)):
        tensor[char] = characters.index(string[char])
    return tensor

print(character_to_tensor('ABCde'))

tensor([36, 37, 38, 13, 14])


#train_data 준비

In [24]:
def random_training_set():
    chunk = random_chunk()
    input = character_to_tensor(chunk[:-1])
    target = character_to_tensor(chunk[1:])
    return input, target

In [25]:
#첫번째 tensor : input
#두번째 tensor : target
random_training_set()

(tensor([94, 21, 24, 31, 14, 94, 24, 27, 94, 23, 24, 73, 96, 17, 14, 94, 32, 10,
         31, 14, 13, 94, 18, 23, 13, 18, 15, 15, 14, 27, 14, 23, 29, 21, 34, 94,
         68, 29, 32, 18, 33, 29, 94, 13, 24, 18, 23, 16, 94, 29, 17, 14, 22, 94,
         23, 14, 18, 29, 17, 14, 27, 96, 16, 24, 24, 13, 94, 23, 24, 27, 94, 17,
         10, 27, 22, 77, 94, 11, 30, 29, 94, 17, 14, 94, 28, 14, 14, 20, 28, 94,
         29, 17, 14, 18, 27, 94, 17, 10, 29, 14, 94, 32, 18, 29, 17, 94, 16, 27,
         14, 10, 29, 14, 27, 96, 13, 14, 31, 24, 29, 18, 24, 23, 94, 29, 17, 10,
         23, 94, 12, 10, 23, 94, 27, 14, 23, 13, 14, 27, 94, 18, 29, 94, 17, 18,
         22, 78, 94, 10, 23, 13, 94, 21, 14, 10, 31, 14, 28, 96, 23, 24, 29, 17,
         18, 23, 16, 94, 30, 23, 13, 24, 23, 14, 94, 29, 17, 10, 29, 94, 22, 10,
         34, 94, 15, 30, 21, 21, 34, 94, 13, 18, 28, 12, 24, 31, 14, 27, 94, 17,
         18, 22, 94, 29, 17, 14, 18, 27, 96, 24, 25, 25, 24, 28, 18, 29, 14, 75,
         94, 49, 24, 32, 73,

### Make RNN model

In [26]:
class EN_RNN_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_RNN_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        #one-hot이 아닌 embedding 사용
        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.rnn = nn.RNN(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden):
        en_output = self.encoder(input.view(1, -1)) #먼저 flatten해서 encoding 진행
        output, hidden = self.rnn(en_output, hidden)
        de_output = self.decoder(output.view(1, -1)) #faltten형태로 input해서 output 얻음(decoding)
        return de_output, hidden

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden

In [27]:
model = EN_RNN_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [28]:
inp = character_to_tensor("A")
print(inp.size())
hidden = model.init_hidden()
print(hidden.size())
out,hidden = model(inp,hidden)
print(hidden.size())
print(out.size())

torch.Size([1])
torch.Size([1, 1, 100])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [29]:
model.parameters

<bound method Module.parameters of EN_RNN_DE(
  (encoder): Embedding(100, 70)
  (rnn): RNN(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)>

In [30]:
optimizer_model = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [31]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden = model.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_model.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden = model(x, hidden)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_model.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.5852], grad_fn=<DivBackward0>)
tensor([2.2687], grad_fn=<DivBackward0>)
tensor([2.2293], grad_fn=<DivBackward0>)
tensor([1.9883], grad_fn=<DivBackward0>)
tensor([2.1531], grad_fn=<DivBackward0>)
tensor([1.9639], grad_fn=<DivBackward0>)
tensor([2.0123], grad_fn=<DivBackward0>)
tensor([1.8963], grad_fn=<DivBackward0>)
tensor([1.8055], grad_fn=<DivBackward0>)
tensor([1.8809], grad_fn=<DivBackward0>)


In [32]:
start_string = "b"

input = character_to_tensor(start_string)
hidden = model.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden = model(input, hidden)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

by.

KING conking you me, no san them in rees oree wall their it I do Concrow ther hanfest in would cance: and steet beserver?

LOLIZAND:
Which hay will dikn so be subled!

BETHANI EDIZA:
Aust washing free and the me, you and;
Shought,
And he have in of how't I wead, the stand hath wonernled, it on t

### Make LSTM model

In [33]:
class EN_LSTM_DE(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers):
        super(EN_LSTM_DE, self).__init__()

        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.num_layers)
        self.decoder = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, cell):
        en_output = self.encoder(input.view(1, -1))
        output, (hidden, cell) = self.lstm(en_output, (hidden, cell))
        de_output = self.decoder(output.view(1, -1))
        return de_output, hidden, cell

    def init_hidden(self):
        hidden = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        cell = torch.zeros(self.num_layers, BATCH_SIZE, self.hidden_size)
        return hidden, cell

In [34]:
model_LSTM = EN_LSTM_DE(n_characters, EMBEDDING, HIDDEN_DIM, n_characters, NUM_LAYERS).to(device)

In [35]:
model_LSTM.parameters

<bound method Module.parameters of EN_LSTM_DE(
  (encoder): Embedding(100, 70)
  (lstm): LSTM(70, 100)
  (decoder): Linear(in_features=100, out_features=100, bias=True)
)>

In [36]:
input = character_to_tensor("A")
print(input)

hidden, cell = model_LSTM.init_hidden()
print(hidden.size())

output, hidden, cell = model_LSTM(input, hidden, cell)
print(output.size())

tensor([36])
torch.Size([1, 1, 100])
torch.Size([1, 100])


In [37]:
optimizer_lstm = torch.optim.Adam(model_LSTM.parameters(), lr = LEARNING_RATE)
loss_func = nn.CrossEntropyLoss()

In [38]:
for i in range(EPOCHS):
    input, target = random_training_set()
    input = input.to(device)
    target = target.to(device)
    hidden, cell = model_LSTM.init_hidden()

    loss = torch.tensor([0]).type(torch.FloatTensor)
    optimizer_lstm.zero_grad()

    for j in range(CHUNK_LEN-1):
        x = input[j]
        y_ = target[j].unsqueeze(0).type(torch.LongTensor)
        y, hidden, cell = model_LSTM(x, hidden, cell)
        loss += loss_func(y, y_)

    loss.backward()
    optimizer_lstm.step()

    if i % 100 == 0:
        print(loss/CHUNK_LEN)

tensor([4.5833], grad_fn=<DivBackward0>)
tensor([2.4017], grad_fn=<DivBackward0>)
tensor([2.2085], grad_fn=<DivBackward0>)
tensor([2.0843], grad_fn=<DivBackward0>)
tensor([2.3034], grad_fn=<DivBackward0>)
tensor([2.1050], grad_fn=<DivBackward0>)
tensor([1.9766], grad_fn=<DivBackward0>)
tensor([1.9451], grad_fn=<DivBackward0>)
tensor([1.8856], grad_fn=<DivBackward0>)
tensor([1.9183], grad_fn=<DivBackward0>)


In [39]:
start_string = "b"

input = character_to_tensor(start_string)
hidden, cell = model_LSTM.init_hidden()

print(start_string, end="")

for i in range(300):
    output, hidden, cell = model_LSTM(input, hidden, cell)

    output_dist = output.data.view(-1).div(0.8).exp()
    top_i = torch.multinomial(output_dist, 1)[0]
    predicted_char = characters[top_i]

    print(predicted_char, end="")

    input = character_to_tensor(predicted_char)

bre in nither and is the seempred be Creather dead;
I pant, as reacuery reforien
Frampored sheat, her and give?
'Ter the ountacted tingredere, that and fall, yet end much in maty have his is the shall exone in and it ture and bet my flokn cashempoured and and anmer julie, a connery moyess my now a ma