# LSTM으로 텍스트 생성하기

- 탐욕적 샘플링(greedy sampling): 확률 분포에서 가장 높은 확률을 가지는 값을 선택하는 방법.
- 확률적 샘플링(stochastic sampling): 확률 분포에서 샘플링하는 과정에 무작위성을 주입하는 방법. 모델의 소프트맥스 출력을 확률적 샘플링에 사용.

In [2]:
# 다른 온도 값을 사용하여 확률 분포의 가중치 바꾸기(temperature=1.0일 때가 원본 확률 분포.)
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution)

## 글자 수준의 LSTM 텍스트 생성 모델 구현

In [3]:
# 원본 텍스트 파일을 내려받아 파싱하기
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)
text = open(path).read().lower()
print('말뭉치 크기:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
말뭉치 크기: 600893


In [5]:
# 글자 시퀀스 벡터화하기
maxlen = 60 # 60개의 글자로 된 시퀀스를 추출
step = 3 # 세 글자씩 건너뛰면서 새로운 시퀀스를 샘플링

sentences = [] # 추출한 시퀀스를 담을 리스트

next_chars = [] # 타깃(시퀀스 다음 글자)을 담을 리스트

for i in range(0, len(text)-maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print('시퀀스 개수: ', len(sentences))

chars = sorted(list(set(text))) # 말뭉치에서 고유한 글자를 담은 리스트
print('고유한 글자:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars) # chars 리스트에 있는 글자와 글자의 인덱스를 매핑한 딕셔너리

print('벡터화...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

시퀀스 개수:  200278
고유한 글자: 57
벡터화...


In [6]:
# 다음 글자를 예측하기 위한 단일 LSTM 모델
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [8]:
# 모델 컴파일 설정하기
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [9]:
# 모델의 예측이 주어졌을 때 새로운 글자를 샘플링하는 함수
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
# 텍스트 생성 루프
import random
import sys

random.seed(42)
start_index = random.randint(0, len(text) - maxlen - 1)

for epoch in range(1, 60):
    print('에포크', epoch)
    model.fit(x, y, batch_size=128, epochs=1) # 데이터에서 한 번만 반복해서 모델을 학습
    
    seed_text = text[start_index: start_index+maxlen] # 무작위로 시드 텍스트를 선택
    print('--- 시드 텍스트: "' + seed_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ 온도: ', temperature)
        generated_text = seed_text
        sys.stdout.write(generated_text)
        
        for i in range(400): # 시드 텍스트에서 시작해서 400개의 글자를 생성.
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
                
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

에포크 1
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through for the strong to the strength and the sense of the same and the sense and the same and all the strength and the strength and all the strength and the same the same and the strength and the strength and the man in the sense and mankind and the discoversed and the same the same the strength and all the strength and the sense of the strength and all the same the supposing and the same and the more and
------ 온도:  0.5
the slowly ascending ranks and classes, in which,
through for a discovirition of the dispasing it is and exist itself of this man in order of the oble, the chould and concerned the christian this the strength which his feelings of the same the exerated would from however or construt and the strength and the same being all the sense and disconce of the individual condition and the same a most can in the fame have away and l

mindy, "badblegries.ntvertk--i leakes
contin undeds god pe
에포크 5
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through forming in the superior and the superson of the such a world and the such a conscience of the superficial and the more of the world of the such a far a conscience of the such a conscience of the cause of the such a conscience of the such a father and the such a conscience of the such a deep that who have been the superior that is the superficial that is not to the such a success of the such a man an
------ 온도:  0.5
the slowly ascending ranks and classes, in which,
through found that what a noble that it is that he is conduct to the other who hal communication and resulfection of his pathor from understood that insorbes, and a success of the are and been a still distrust of the such a way and soon and such a
become explanach. but should be the entires of the sick in the conseq

herelighter of raily tolity. preer time; opilia ungreerous between the during in which he could as ieverd, in reli
에포크 9
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through former the spirit and the sensition of the subject and all the soul and sensition of the sense of the desire to the most such a such a stands and states and such a man is the superisted the sense of the states and scientifer and conscience of the sense in the conscience the superior of the sense and supposition is that is the spirit and the superistness of the sense of the desire the present and the
------ 온도:  0.5
the slowly ascending ranks and classes, in which,
through former good to that in the greatest and still still be satisfacks the by in all the seconning of the "conceptions to has the other operates of intervalle in does not the existence and
consequence and there is the maint in all any better of the developme

good points he
abstathd.". mode be opinionle. it supportunling there. it was idino
whether, i
always yet, which philosophy,les which gann upon with the biant--chunationr
vieus who con
에포크 13
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through for the entire the senses of the spiritualities of the same the endow the specter of the subject to the seriousness of the self-content of the same worth the superspetorian and the spectatly and self sentiments of the senses and the explanation of the same whole of the senses of the subject and the source of the state of the world to the spiritualities of the reason of the same tragic of the respect
------ 온도:  0.5
the slowly ascending ranks and classes, in which,
through fore-of the world to refered to perceived
and specially and in the recognization of the constructove of his reference of the construmption to the specter the explanation and is not ta

  after removing the cwd from sys.path.


to had lockable might only germans how ware, i seld donewnes. mode
, ixperality not broughtess of
most but as he "hence spiringidue all fovor and
discovered: man sfert opposed increald abouts dit-equality hear, frupt,--and still not.

181] has, he thlirm, domgh" inasfat wort which,
one would leess ao m
에포크 17
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through forething the strength to the same desire to the same art of the suffering and the super--as the same more and and intellectually and the conscious and profound, and the super--the same words it is a hadning the super-is(nar and the same taste of the same dear of the same things it is a highest in the same story and the mastery of the same tearned the more to the same a had a soul and the present in
------ 온도:  0.5
the slowly ascending ranks and classes, in which,
through followist what i attentary to self-aknifted by a rest of the we

mark=)); us rank, i judge, betrays bothing before to them seeks ruled to the revaris whole may but wookest our an" , supposings, it
-the waction of
mediocre
and tste? it fear
formangera
tibeble, frienger europamish had very harder no
moonly
strengs included manifess must neverther to, if it is,
subtlezzedseomad, no wortre
of mankind has necesnes not one forthingimative and hamile suu
에포크 21
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through former the contempt of the same impulse and the same will to the desires to the great present conscience of the same time and the same will to the subject to the same time and the charmer the present with the strength of the contempt of the same will to the same this senses the charmer the charmer of the same well and the same will to the same will to the same taste of the presentiment of the same w
------ 온도:  0.5
the slowly ascending ranks and classes

through forlouncy, onxternal german first generation aspect cases, hichaishers
or ad"'sapacom.


a. i hare nature, lightlouss or amove processel;
only, is intunctiwed", however, bresionates
noble enamar, even formerfigary.
as comparous faculty of dations of verbtation to what the sa, and
were reads overtonevsfor dreamenc-retander are a ide rade pluouirian requirements of
lister races. the mellarly has most 
에포크 25
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through formation of the property of the same the profound of the sufferent of the same the subject themselves and in the same the sublime them that is a subject to the same the words of the present continual between the same strong the surbest of the same the subject the convires of the same words of the same times and in the same the same the sublime and thereby a superiority of the subject the conduct an
------ 온도:  0.5
the slowly asc

through for themself. in the question". but aly finge to enjoym and moral outad en"aring havy, enjoy
faruuser, upon man is reeuritating. the peculait of agesagonaly. a verrpare to all nightie
to divarlies man, acuks, alriptinesship the wholly. unchains would
elivement sainty so. cormining that which perfect, crectibility make secor really sha--and swely straintle spuciricate moralible in such as").ded in , 
에포크 29
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through form of the same it is not promises and the soul-is even and man wishes to the states of the states of the spirit and the morality, and all the spirit of the spirit and all the most of the strength of the same the contradiction to the spirit of the spirit and still to the spirit of the spirit of the spirit of the most profound of the same something the spirit of the spirit of the same in the most co
------ 온도:  0.5
the slowly asc

through former morality whether , while no--blaw lone
va,tve learned mouth. the wis"nts who
believe:
hears,
upons, themning, in sklessrity", n"gaity who not lit a lacte a florable
and dudiesmin--with slave in a
hin to get nottinus europe of vulitsibuly in its desire
ofnquis word, in fact,
that elevationing
is these veeth? is sees it is caflu a to his lactity yout still one who e
semority
whose wickedness ak
에포크 33
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through formerly the subjective experience and still them and soul and as the suffering of the subject of the world and and soul and an anticed and even in the same time in the sense of the same time in a determine of the subjective the such a stronger and still to the subject the spirit is always to the most still the general with the sufferent and souls of the spirit and the same time the sense of the sam
------ 온도:  0.5
the slowly asc

through form shottlism has
bich"; at least
developature and
the a devosuporage of
thus     new religion
upon the plato, hi hin for
"thing was forcough these man with righed taken
creeling to of a
still! but to one's own strupguations, saint of the kind to his
waying--in due it significance itself a perfucted, immoraorigy and the endich refined fourings, should
world
me nature prebeet to original-"dignions e
에포크 37
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through form of the same time and all the more of the present out of the most self-conscience and considerable the general development of the same time and the same strong the great profoundes of the same time and the worst of the same thing to the spirit and an account and the superiorical sense and the discipline and the consequently and the same through the same time and the same intelligence of the geni
------ 온도:  0.5
the slowly asc

through foundation, i di beed itself so canterey: in point are
fritts among onluise he never
sugfe as howed? it is o; would be meat beness decreased becomes understand to bet whether moral fancy of antivernes europe,
upbed at arise pewited to threm libluiny: that it deey, who avowing! rabernon, us. imagely bakes
diderou as ie, luthers.

feic of in. perriter.

1
12n faith "perhaps seachs, easism. other ompli
에포크 41
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through former world for the fact that is the super-ident of the strength and conscience of the consequences of the spirit and souls of the same time the spirit of the same the sense of the same the most entire of the same time the strength and spirit of the developing of the same the disagerman and sense and soul the strendt of the states of the strength, and the strength of the states of the same time the
------ 온도:  0.5
the slowly asc

through form, and self-science, uponter amsines and
excration which manh, of lidement thinker, that of ideal.--objecturation is otherwise, when this being destrucinnom, decided by knowledge,
such as warbfal
beyond or this ancest coutt threator that proved bedo pance dingary sidu of those upon the human liqken: of
motiver
of thisk feor an vice of the experiences it, spirit of zeduces his pould one aroul brow
에포크 45
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through form of the same stranger of the account of the most men the consequence of the contrary the contrary the contrary and better and strong of the superious and concealed in the same strength and contrary the contrary the concealed the entire consequence is the most and self-consequence and the more present men that is the privilege of the sense of the strength of the present spirituality of the contra
------ 온도:  0.5
the slowly asc

through fo power troun dangerous, accordinged from unjust put bird backm in mode.=--"this what wonse as ident, nasulaths under the
votion maladitel. in the silencrion, not one people of himself to must now variation! a self-niction of lidded, in forms 
regarding as pushed from taste? very one thingsajvqbsing.

1313
 he
desire two meoprame the indeparched and
evill. about or the
determined and
inviluine is f
에포크 49
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through form of the same artists and self-conscience of the same sense of the same will and there are at all the more spirits of the same self-soul and conscience of the same sense of the same strengther of the same interesting them and in the same sense of the suffered and there is a man who with the same will to be the conservers of the strength of the spirit is to be any sense and soul of the same streng
------ 온도:  0.5
the slowly asc

through forgotting. but the operatemess,
through dis resect.=--the jonurier of the spirits an, if they
will not cleave, thus, there is no
general with regard to ; hand withony
different" to skepticism to
effolic lose but
molory his depoessfle) be
finds therrit popuritable of their zend: not taken epoch
of the contemnt means of which tolkens old,
wurd towever an a he
bewarl to
cave:
"are differed in artilite
에포크 53
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through for the states of the strength of the stupidian so impulse and the suffering of the spirit of the spirit of the states and the most the strength of the souls of the spirit of the strength of the conscience of the spirit of the states of the spirit of the strength of the spirit of the spirit of the strength of the fact that which the sense of the states of the spirit and so that the states of the suf
------ 온도:  0.5
the slowly asc

through footer-fall into
golofistic shameveny in its own cupide of
resfacte and
stises of a reshfections, he assest conduct to feels, already the salves. there basile of
the values, and
blad a skul painfor ils do not weacas," onr-inably sunder, away motiollemation in the funtly together affaus of nature thinkness,
will been notess because almost justice, on
ollemness ides that the commonly
profound? with hi
에포크 57
--- 시드 텍스트: "the slowly ascending ranks and classes, in which,
through fo"
------ 온도:  0.2
the slowly ascending ranks and classes, in which,
through founded the strength of the subject the moral present demonstrable to the fact that in the present demonstrable to the subject to the spirit and the consequence of the subject to the present strive to the subject to the spirit and still and the subject to the present desires the strength of the spirit and the moral person to who in the same and struggle and the basis of the same strength of the fact
------ 온도:  0.5
the slowly asc