In [1]:
#https://karpathy.github.io/2015/05/21/rnn-effectiveness/
#https://gist.github.com/danijar/d11c77c5565482e965d1919291044470
#https://github.com/crestonbunch/neural-namer/blob/master/modeler/network.py
#https://danijar.com/variable-sequence-lengths-in-tensorflow/
#https://r2rt.com/recurrent-neural-networks-in-tensorflow-iii-variable-length-sequences.html

In [2]:
import numpy as np
import tensorflow as tf
from random import shuffle


In [3]:
data = open('BIBLIA COMPLETA.txt', 'r').read()
data = data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))

There are 4041446 total characters and 64 unique characters in your data.


In [4]:
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print(ix_to_char)

{0: '\n', 1: ' ', 2: '!', 3: '#', 4: '(', 5: ')', 6: '*', 7: ',', 8: '-', 9: '.', 10: '/', 11: '0', 12: '1', 13: '2', 14: '3', 15: '4', 16: '5', 17: '6', 18: '7', 19: '8', 20: '9', 21: ':', 22: ';', 23: '=', 24: '?', 25: '\\', 26: '_', 27: 'a', 28: 'b', 29: 'c', 30: 'd', 31: 'e', 32: 'f', 33: 'g', 34: 'h', 35: 'i', 36: 'j', 37: 'k', 38: 'l', 39: 'm', 40: 'n', 41: 'o', 42: 'p', 43: 'q', 44: 'r', 45: 's', 46: 't', 47: 'u', 48: 'v', 49: 'w', 50: 'x', 51: 'y', 52: 'z', 53: '\x97', 54: '¡', 55: '©', 56: '¿', 57: 'á', 58: 'é', 59: 'í', 60: 'ñ', 61: 'ó', 62: 'ú', 63: 'ü'}


In [5]:
def to_one_vector(characters, char_to_ix, paddTo = 0):
    one_hot_vectors = np.zeros((len(characters), len(char_to_ix)))
    one_hot_vectors[ np.arange(len(characters)), [char_to_ix[ch] for ch in characters] ]= 1
    return one_hot_vectors


In [6]:
print(to_one_vector(data[:100], char_to_ix))
print(data[:100])

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
la santa biblia, antiguo testamento, versión de casiodoro de reina (1569) revisada por cipriano de v


In [7]:
batch_iteration = 0
def next_batch(data, char_to_ix, n_steps, batch_size):
    global batch_iteration
    if batch_iteration >= len(data) // (n_steps * batch_size):
        batch_iteration = 0
    X_batch = np.empty((batch_size, n_steps , vocab_size))
    y_batch = np.empty((batch_size, n_steps ))
    data_offset = (batch_iteration * batch_size)
    for batch_example in range(batch_size):
        data_chunk = data[ (batch_example +  data_offset) * n_steps : (batch_example +  data_offset + 1) * n_steps]
        x_ = to_one_vector( data_chunk , char_to_ix)
        X_batch[batch_example] = x_
        y_batch[batch_example] = np.asarray([char_to_ix[c] for c in data_chunk[1:] + data[(batch_example +  data_offset + 1) * n_steps + 1]])
    batch_iteration += 1
    return X_batch, y_batch

In [8]:
n_neurons = 100
batch_size = 32
n_steps = 500
n_layers = 2

In [9]:
X = tf.placeholder(tf.float32, [None, None, vocab_size], name = "inputs")
y = tf.placeholder(tf.int32, [None, n_steps], name = "targets") #Shape => Batch_Size x Steps

In [10]:
layers = [tf.contrib.rnn.GRUCell(num_units =  n_neurons) for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype = tf.float32)

In [11]:
stacked_outputs = tf.reshape(outputs, [-1, n_neurons])
stacked_outputs_dense = tf.layers.dense(stacked_outputs, vocab_size)
outputs_2 = tf.reshape(stacked_outputs_dense, [-1, n_steps, vocab_size])

In [12]:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = outputs_2)
loss = tf.reduce_mean(xentropy)

In [13]:
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01)
gvs = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
training_op = optimizer.apply_gradients(capped_gvs)

In [14]:
init = tf.global_variables_initializer()

In [None]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(20000):
         
        X_batch, y_batch = next_batch(data, char_to_ix, n_steps, batch_size)
        batch_dict = {X: X_batch, y: y_batch}
        sess.run(training_op, feed_dict = batch_dict)
        print("Epoch number {}".format(epoch))    
        if epoch % 500 == 0 and epoch != 0:
            print(epoch, loss.eval(feed_dict = batch_dict))    
            sentence = []
            x = np.zeros((n_steps,vocab_size)).reshape(-1, n_steps, vocab_size)
            x[0][-1][np.random.randint(x.shape[2])] = 1
            for iter_number in range(300):
                out = sess.run(outputs_2, feed_dict = {X: x[:,-n_steps:,:]})
                last = out[0,-1,:]
                last_softmax = tf.nn.softmax(last).eval()

                choice = np.random.choice(range(vocab_size), p = last_softmax)

                one_hot = np.zeros(vocab_size)
                one_hot[choice] = 1

                x = np.append(x, one_hot.reshape(1,1,-1), axis=1)
                sentence.append(ix_to_char[choice])
            sentence = ''.join(sentence) 
            print(sentence)
            


500 1.9827399
irancpacio de -8 paco, su ociir, a
3 premeltentres vefí él no n suzfos? posasmaedte se oros esgaras, se que voanbra simo, puintes, y portoso ejmudes, na cueste y sibes el buesgo fuen unpías namél víalimderesronción seirecuprizen de referco en detrimos jey qei preserabées embles nosonos. 
le zdim mec
1000 1.7960237
tur les gioste todo al señores, porcezmos de cemo como lojaron isciente no previentos y tenidarimerestos el buento unipicres. 7 a siguste alsas acontrures de los sieles; hemijos buecio da hocidos. 15 porque tres porcún salamuna a los el santetasan encreerando. 11 14 prispodediratadio y que todo infu
1500 1.5676452
atando a jesús conalamón de tambido desistiando, en los señoles; lo os espírital en no sisié mal in davás escondiciendos nada es dias uviado sin conforde, los cracicata semedetal de aquien, y ver el dios, comedi en branciona, en cisa geupanficias de dodo os dios lucistos, toma es dios a nosotros qui
2000 1.5611542
 16, se he habga debelderos; para una 