##### LSTM Recurrent Neural Network

Rede LSTM para escrever frases de acordo com o texto de Nietzsche, disponível no dataset na amazon através do link abaixo.

https://s3.amazonaws.com/text-datasets/nietzsche.txt

In [8]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.layers import LSTM, Dense
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
from keras.callbacks import TensorBoard
import numpy as np
from time import time
import keras
import os
import random
import sys
import io

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10662079940295219559
]


##### Getting dataset

In [9]:
path = get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('Corpus lenght:', len(text))

Corpus lenght: 600893


##### Tensorboard settings

In [10]:
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
keras.callbacks.TensorBoard(log_dir='./logs',
                            histogram_freq=0, 
                            batch_size=32, 
                            write_graph=True, 
                            write_grads=False, 
                            write_images=False, 
                            embeddings_freq=0, 
                            embeddings_layer_names=None, 
                            embeddings_metadata=None, 
                            embeddings_data=None, 
                            update_freq='epoch')

<keras.callbacks.TensorBoard at 0x16c929663c8>

##### Settings parameters of RNN and treatment of dataset

In [11]:
chars = sorted(list(set(text)))
print('total chars:',len(chars))
char_indices = dict((c,i) for i, c in enumerate(chars))
indices_char = dict((i,c) for i, c in enumerate(chars))

total chars: 57


In [12]:
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 200285


In [13]:
print('vetorização....')
x = np.zeros((len(sentences), maxlen, len(chars)),dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1     

vetorização....


In [14]:
print('Build model...')
model = Sequential()
model.add(LSTM(128,input_shape=(maxlen,len(chars))))
model.add(Dense(len(chars),activation='softmax'))

Build model...


In [15]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [16]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1,preds, 1)
    return np.argmax(probas)

In [17]:
def on_epoch_end(epoch, _):
    print()
    print('---------------- Generating text after Epoch: %d' % epoch)
    
    start_index = random.randint(0,len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1,2]:
        print('-----------------diversity:',diversity)
        
        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----------------Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        
        for i in range(400):
            x_pred = np.zeros((1,maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0,t,char_indices[char]] = 1.
                
                
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            
            generated += next_char
            sentence = sentence[1:] + next_char
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
            
        print()                 

In [18]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)  

##### Trainning the model

In [None]:
model.fit(x, y,
         batch_size=128,
         epochs=60,
         callbacks=[tensorboard])

Epoch 1/60

##### Tensorboard call´s
Using anaconda prompt

In [None]:
tensorboard --logs=/C:\Users\leandro.r.a.silvA\MachineLearning-Leandro\RNN - LSTM\logs\1540835119.57339