# Loading data

In [0]:
# https://keras.io/
!pip install -q keras
import keras

In [34]:
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')

text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


## Processing input data

In [0]:
# Length of sequence
maxlen = 60

step = 3  # sample new sequence every 3 char

sentences = []  # holds the extracted sequences
next_chars = []  # holds the targets

for i in range(0, len(text)-maxlen, step):
  sentences.append(text[i: i+maxlen])
  next_chars.append(text[i+maxlen])

print('Number of sequence : ', len(sentences))

### Vectorization

In [0]:
chars = sorted(list(set(text)))

In [37]:
chars = sorted(list(set(text)))
print('Unique Chars: ', len(chars))

Unique Chars:  57


In [38]:
char_indices = dict((char, chars.index(char)) for char in chars)
char_indices

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 "'": 4,
 '(': 5,
 ')': 6,
 ',': 7,
 '-': 8,
 '.': 9,
 '0': 10,
 '1': 11,
 '2': 12,
 '3': 13,
 '4': 14,
 '5': 15,
 '6': 16,
 '7': 17,
 '8': 18,
 '9': 19,
 ':': 20,
 ';': 21,
 '=': 22,
 '?': 23,
 '[': 24,
 ']': 25,
 '_': 26,
 'a': 27,
 'b': 28,
 'c': 29,
 'd': 30,
 'e': 31,
 'f': 32,
 'g': 33,
 'h': 34,
 'i': 35,
 'j': 36,
 'k': 37,
 'l': 38,
 'm': 39,
 'n': 40,
 'o': 41,
 'p': 42,
 'q': 43,
 'r': 44,
 's': 45,
 't': 46,
 'u': 47,
 'v': 48,
 'w': 49,
 'x': 50,
 'y': 51,
 'z': 52,
 'ä': 53,
 'æ': 54,
 'é': 55,
 'ë': 56}

In [0]:
# Shape of the input = (sentences, maxlen_input, length_chars)
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

## Building the network

In [0]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss=keras.losses.categorical_crossentropy, optimizer = optimizer)

## Training the model

In [0]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [0]:
import random
import sys

for epoch in range(1, 60):
  print('Epoch: ', epoch)
  model.fit(x, y, batch_size=128, epochs=1)
  
  # Select a start text at random
  start_index = random.randint(0, len(text)-maxlen-1)
  generated_text = text[start_index: start_index+maxlen]
  print("----- Generating with seed: ", generated_text)
  
  for temperature in [0.2, 0.5, 1.0, 1.2]:
    print("--- Temperature: ", temperature)
    sys.stdout.write(generated_text)
    
    for i in range(400):
      sampled = np.zeros((1, maxlen, len(chars)))  # (1, 60, 57)
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1
      
      preds = model.predict(sampled, verbose =0)[0]  # (1, 57) [0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]
      
      generated_text += next_char
      generated_text = generated_text[1:]
      
      sys.stdout.write(next_char)
    sys.stdout.write('\n')