In [0]:
# 글자 수준의 신경망 언어 모델 LSTM
# character-level neural language model

import numpy as np

# original_distribution = 전체 합이 1인 1D numpy , softmax result
def reweight_distribution(original_distribution, temperature = 0.5):
  distribution = np.log(original_distribution) / temperature
  distribution = np.exp(distribution)

  return distribution / np.sum(distribution)


In [2]:
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()

print('size', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
size 600893


In [0]:
maxlen = 60
step = 3

sentences = []

next_chars = []

for i in range(0, 10000, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])

  chars = sorted(list(set(text)))
  char_indices = dict((char, char.index(char)) for char in chars)

  x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
  y = np.zeros((len(sentences), len(chars)), dtype=np.bool)


  for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
      x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [0]:
from keras import layers
from keras import optimizers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [25]:
import random
import sys

random.seed(42)
start_index = random.randint(0, 10000 -1)

for epoch in range(1, 60):
  print('epoch', epoch)
  model.fit(x, y, batch_size=128, epochs=1)

  seed_text = text[start_index: start_index + maxlen]
  print('seed text:', seed_text)

  for temperature in [0.01, 0.2, 0.5, 1.0, 1.2]:
    print('temperature: ', temperature)
    generated_text = seed_text
    temp = generated_text
    for i in range(400):
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1.

      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      generated_text += next_char
      generated_text = generated_text[1:]
      temp += next_char
      #print('a: ',next_char)
      #sys.stdout.write(next_char)
      #sys.stdout.flush()
    #print(temp)

epoch 1
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
temperature:  0.01
temperature:  0.2
temperature:  0.5
temperature:  1.0
temperature:  1.2
epoch 2
Epoch 1/1

KeyboardInterrupt: ignored