<a href="https://colab.research.google.com/github/RogerUltimate/-Keras-study/blob/main/8_1_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://keras.io/examples/generative/lstm_character_level_text_generation/

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import random
import io
keras.__version__

'2.5.0'

In [None]:
path = keras.utils.get_file(
    "nietzsche.txt", origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt"
)
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [None]:
chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40  # 40개의 단어로 된 시퀸스를 추출
step = 3  # 세 글자씩 건너뛰면서 새로운 시퀸스 샘플링
sentences = [] # 추출한 시퀸스를 담을 list
next_chars = [] # 타겟(시퀸스 다음 글자)을 담을 list
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

# 벡터화 # 원핫인코딩 > 0,1 의 이진배열로 변경
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Total chars: 56
Number of sequences: 200285


In [None]:
# 다음 글자 예측을 위한 단일 LSTM 모델
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)



In [None]:
type(text)

str

1. 지금까지 생성된 텍스트를 주입하여 모델에서 다음 글자에 대한 확률 분포를 뽑습니다.
2. 특정 온도로 이 확률 분포의 가중치를 조정합니다.
3. 가중치가 조정된 분포에서 무작위로 새로운 글자를 샘플링합니다.
4.새로운 글자를 생성된 텍스트의 끝에 추가합니다.

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1) 
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]: # 책의 temperature
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen] # 무작위로 텍스트를 선택
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars))) # 지금까지 생성된 글자를원핫인코딩으로 변경
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0

            preds = model.predict(x_pred, verbose=0)[0] # 다음 글자를 샘플링
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()


Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: " citadel, one thing is then certain: he "
...Generated:  which the would be the sension of the strives and the period and the stronger of the strives and the would a man and the sention of the would be the possibition of the most stranger of the pather and the sension of the soul as the hight and the hist the strong have conscience the sension of the stronger and instance of the striver and the pain the sension of the sension of the sention of the striv

...Diversity: 0.5
...Generating with seed: " citadel, one thing is then certain: he "
...Generated:  is his all their great in its such a there is it is really and their are to their not the suble be of be the extreal periaded than the and the forms and in the feelingly to not its extally to presence in the would "                                                                                                                                                     

  after removing the cwd from sys.path.


...Generated:   - becaused from your casesness and more distinctional possimitating? should perhaps, but to balvanless women tersility perhaps the case of it, the will and the right for keepor in were accompanical intentive der sight whe serve earcy to nature a cureatance of spirit shature-c, are that is angettictes of god and speak of morals of conneal dince and about for its young party from who are menfless m

...Diversity: 1.2
...Generating with seed: "-latin siegfried: well, wagner atoned am"
...Generated:  porhy of mors to lan question and god--more hemmay a persser. they wandially the inintidely of from orieraitoges empets know stfred it willed "forcess after perhaps simply, hummpes to the prodig na agreerous disposed to his founder of will who yet like elevoled we may parents wen races-ansadverfully me, a din prabled ity the valut ideals "nate, but people,--this towards causalan obresidified tenta


Generating text after epoch: 14
...Diversity: 0.2
...Generating with seed: "wis