In [1]:
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('말뭉치 크기:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
말뭉치 크기: 600893


In [2]:
# 60개 글자로 된 시퀀스를 추출합니다.
maxlen = 60

# 세 글자씩 건너 뛰면서 새로운 시퀀스를 샘플링합니다.
step = 3

# 추출한 시퀀스를 담을 리스트
sentences = []

# 타깃(시퀀스 다음 글자)을 담을 리스트
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('시퀀스 개수:', len(sentences))

# 말뭉치에서 고유한 글자를 담은 리스트
chars = sorted(list(set(text)))
print('고유한 글자:', len(chars))
# chars 리스트에 있는 글자와 글자의 인덱스를 매핑한 딕셔너리
char_indices = dict((char, chars.index(char)) for char in chars)

# 글자를 원-핫 인코딩하여 0과 1의 이진 배열로 바꿉니다.
print('벡터화...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

시퀀스 개수: 200278
고유한 글자: 58
벡터화...


In [4]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

import random
import sys

random.seed(42)
start_index = random.randint(0, len(text) - maxlen - 1)

# 60 에포크 동안 모델을 훈련합니다
for epoch in range(1, 10):
    print('에포크', epoch)
    # 데이터에서 한 번만 반복해서 모델을 학습합니다
    model.fit(x, y, batch_size=128, epochs=1)

    # 무작위로 시드 텍스트를 선택합니다
    seed_text = text[start_index: start_index + maxlen]
    print('--- 시드 텍스트: "' + seed_text + '"')

    # 여러가지 샘플링 온도를 시도합니다
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ 온도:', temperature)
        generated_text = seed_text
        sys.stdout.write(generated_text)

        # 시드 텍스트에서 시작해서 400개의 글자를 생성합니다
        for i in range(400):
            # 지금까지 생성된 글자를 원-핫 인코딩으로 바꿉니다
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            # 다음 글자를 샘플링합니다
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

에포크 1
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도: 0.2
the slowly ascending ranks and classes, in which,
through formed and may complession of the self be and something to the some the same of the superful and soul the are all the soul have and self one of the self in the intervated and the who are the self the stall of the same the same of the self any the sense of the spirit of the same the presented to the superficulty and soul the superfical and the something to the same and the sense of the may the self a
------ 온도: 0.5
the slowly ascending ranks and classes, in which,
through for one of moral of an an
the precises different and resired of the who superies the presente of the being or the something may it as been them of the in the as the before the superficuently and self and soul. in
the worther which same and to complession of the had be typen to may not to the presention of can be dangerous and all betion of the some to the self to an a

willing instipent manceds, whatever stroog what was knowlejosments to same oves-foobxish,
but

에포크 5
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도: 0.2
the slowly ascending ranks and classes, in which,
through for the suppor the sension and the sensition of the soul, and the sensition of the sensition of the the sensition of the soul as the present of the reason of the sensition of the same the man of the world of the one of the more and the soul of the more and the desire that the thing as the contradict and the morality of the sensition of the same the artists that is the sensition of the world that the
------ 온도: 0.5
the slowly ascending ranks and classes, in which,
through for the self-conforally an their thing and sensition of the man is his sour end of the precisely to be the sense in the name
than the something and soul ever are contrary reading as the most doing and which the tragess of conditions of man which has also allow what are some 

leods. unjusture hes wichwalld" unvengle resprect as "vinging gets furth human, who least os the old other have
lifeff fare it
feeling. we we great
custome,
think.


2alv, sulferiarible a 
에포크 9
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도: 0.2
the slowly ascending ranks and classes, in which,
through for the powerful and instance of the self-conduct, and the self-religion of such a socially and and means of the same the spirit of the same the self-conducting and the self-responsibitions of the conduct the same and and the such a self-religion and all the self-religion of the spirit of the sense of the spirit of the self-religion of the destruction of the most self-conduct and self-every spirit o
------ 온도: 0.5
the slowly ascending ranks and classes, in which,
through for hard this conduct. but and it is also not one than itself and that with the religion, and fearful itself has a master itself in the self sense of the bad and all some its also t