In [0]:
# 글자 수준의 신경망 언어 모델 LSTM
# character-level neural language model

import numpy as np

# original_distribution = 전체 합이 1인 1D numpy , softmax result
def reweight_distribution(original_distribution, temperature = 0.5):
  distribution = np.log(original_distribution) / temperature
  distribution = np.exp(distribution)

  return distribution / np.sum(distribution)


In [1]:
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()

print('size', len(text))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
size 600893


In [0]:
maxlen = 60
step = 3

sentences = []

next_chars = []

for i in range(0, 10000, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])

  chars = sorted(list(set(text)))
  char_indices = dict((char, chars.index(char)) for char in chars)

  x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
  y = np.zeros((len(sentences), len(chars)), dtype=np.bool)


  for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
      x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [0]:
from keras import layers
from keras import optimizers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [18]:
char_indices

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 "'": 4,
 '(': 5,
 ')': 6,
 ',': 7,
 '-': 8,
 '.': 9,
 '0': 10,
 '1': 11,
 '2': 12,
 '3': 13,
 '4': 14,
 '5': 15,
 '6': 16,
 '7': 17,
 '8': 18,
 '9': 19,
 ':': 20,
 ';': 21,
 '=': 22,
 '?': 23,
 '[': 24,
 ']': 25,
 '_': 26,
 'a': 27,
 'b': 28,
 'c': 29,
 'd': 30,
 'e': 31,
 'f': 32,
 'g': 33,
 'h': 34,
 'i': 35,
 'j': 36,
 'k': 37,
 'l': 38,
 'm': 39,
 'n': 40,
 'o': 41,
 'p': 42,
 'q': 43,
 'r': 44,
 's': 45,
 't': 46,
 'u': 47,
 'v': 48,
 'w': 49,
 'x': 50,
 'y': 51,
 'z': 52,
 'ä': 53,
 'æ': 54,
 'é': 55,
 'ë': 56}

In [23]:
import random
import sys

random.seed(42)
start_index = random.randint(0, 10000 -1)

for epoch in range(1, 40):
  print('epoch', epoch)
  model.fit(x, y, batch_size=128, epochs=1)

  seed_text = text[start_index: start_index + maxlen]
  print('seed text:', seed_text)

  for temperature in [0.01, 0.2, 0.5, 1.0, 1.2]:
    if epoch > 10:
      print('--------- temperature: ', temperature)
    generated_text = seed_text
    temp = generated_text
    for i in range(400):
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1.

      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      generated_text += next_char
      generated_text = generated_text[1:]
      temp += next_char
    if epoch > 10:
      print(temp)

epoch 1
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 2
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 3
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 4
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 5
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 6
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 7
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 8
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 9
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 10
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
epoch 11
Epoch 1/1
seed text: ibe themselves upon the heart of humanity with
everlasting c
--------

KeyboardInterrupt: ignored