In [13]:
dataset_tumbado = './datasetFinal.txt'

In [14]:
from tensorflow import keras as kn

In [15]:
with open(dataset_tumbado,encoding='utf-8') as f:
    corridos_tumbados_text = f.read()

In [16]:
verso = len("""[Letra de "Abriendo el Camino"]

[Verso]
Y los errores del pasado me hicieron ver en donde estoy
Y adonde voy, también quién soy
Salí cabrón, no soy de acción
En California ven mis tráilers
Ahí festejo y brindando estoy
Gracias a Dios ahí va otro gol
Billetes son al por mayor'""")

In [17]:
linea = len("Salí cabrón, no soy de acción")

In [18]:
tokenizar = kn.preprocessing.text.Tokenizer(char_level=True)
tokenizar.fit_on_texts(corridos_tumbados_text)

In [7]:
tokenizar.texts_to_sequences(['Corridos Tumbados'])

[[15, 4, 7, 7, 8, 13, 4, 6, 1, 9, 11, 14, 19, 3, 13, 4, 6]]

In [8]:
tokenizar.sequences_to_texts([[15, 4, 7, 7, 8, 13, 4, 6, 1, 9, 11, 14, 19, 3, 13, 4, 6]])

['c o r r i d o s   t u m b a d o s']

In [19]:
max_id = len(tokenizar.word_index)
dataset_size = tokenizar.document_count

In [20]:
import numpy as np
[encoded] = np.array(tokenizar.texts_to_sequences([corridos_tumbados_text])) - 1

In [21]:
import tensorflow as tf
train_size = int(dataset_size * 90/100)
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

In [22]:
n_steps = verso
window_length = n_steps + linea

In [23]:
tf.random.set_seed(42)
batch_size = 32
encoded_parts = np.array_split(encoded[:train_size],batch_size)
datasets = []
for encoded_part in encoded_parts:
    dataset = tf.data.Dataset.from_tensor_slices(encoded_part)
    dataset = dataset.window(window_length,shift=n_steps,drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_length))
    datasets.append(dataset)
dataset = tf.data.Dataset.zip(tuple(datasets)).map(lambda *windows: tf.stack(windows))
dataset = dataset.map(lambda windows:(windows[:,:-1],windows[:,1:]))
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch,depth=max_id),Y_batch))
dataset = dataset.prefetch(1)
    

In [24]:
from keras.mixed_precision.loss_scale_optimizer import optimizer
model = kn.models.Sequential([
    kn.layers.GRU(128,return_sequences=True,stateful=True,dropout=0.3, recurrent_dropout=0.3,
                  batch_input_shape = [batch_size,None,max_id]),
    kn.layers.GRU(128,return_sequences=True,stateful=True,dropout=0.3,recurrent_dropout=0.3),
    kn.layers.TimeDistributed(kn.layers.Dense(max_id,activation="softmax"))
])


In [25]:
class ResetStatesCallback(kn.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [26]:
model.compile(loss="sparse_categorical_crossentropy",optimizer="adam")

In [17]:
history = model.fit(dataset,epochs=50,callbacks=[ResetStatesCallback()])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
import joblib
neuronas_tumbadas = history
joblib.dump(neuronas_tumbadas,"neuronas_tumbadas.pkl")

In [20]:
stateless_model = kn.models.Sequential([
    kn.layers.GRU(128,return_sequences=True,input_shape=[None,max_id]),
    kn.layers.GRU(128,return_sequences=True),
    kn.layers.TimeDistributed(kn.layers.Dense(max_id,activation='softmax'))
])

In [27]:
modelo_neuronas_tumbadas = joblib.load("neuronas_tumbadas.pkl")
#stateless_model.build(tf.TensorShape([None,None,max_id]))
#stateless_model.set_weights(modelo_neuronas_tumbadas.get_weights())
#modelo_neuronas_tumbadas = stateless_model
#joblib.dump(modelo_neuronas_tumbadas,"modelo-neuronas-tumbadas.pkl")

In [28]:
import numpy as np
import tensorflow as tf
def preprocess(texts):
    X = np.array(tokenizar.texts_to_sequences(texts))-1
    return tf.one_hot(X,max_id)

In [29]:
def next_char(text,temperature=1):
    X_new = preprocess([text])
    y_proba = modelo_neuronas_tumbadas.predict(X_new)[0,-1:,:]
    rescaled_logits = tf.math.log(y_proba)/temperature
    char_id = tf.random.categorical(rescaled_logits,num_samples=1)+1
    return tokenizar.sequences_to_texts(char_id.numpy())[0]

In [30]:
def generating_corrido_tumbado(text,n_chars = 920,temperature=1):
    for _ in range(n_chars):
        text += next_char(text,temperature)
        corrido = text, '\n', text
        return corrido

In [None]:
from tensorflow import keras as kn
tokenizar = kn.preprocessing.text.Tokenizer(char_level=True)
X_new = preprocess(["Soy belic"])
Y_pred = modelo_neuronas_tumbadas.predict_classes(X_new)
tokenizar.sequences_to_texts(Y_pred+1)[0][-1]
#expected result 'o'