#Creation of Calaveritas
##Rodrigo Espíritu Berra 173863

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Cargar el archivo de calaveritas
with open("calaveritas.csv", "r", encoding="utf-8") as file:
    calaveritas_text = file.read()


In [5]:
# Dividir las calaveritas en una lista de poemas, cada uno separado por un salto de línea doble (\n\n)
calaveritas = calaveritas_text.strip().split("\n\n")

# Crear el DataFrame
calaveritas_df = pd.DataFrame({"calaverita": calaveritas})

# Ver el DataFrame
calaveritas_df.head()


Unnamed: 0,calaverita
0,"calaverita\n""El estudiante tan valiente,\nen l..."


In [6]:
# Cargar y Preprocesar los Datos
calaveritas_text = " ".join(calaveritas_df["calaverita"])

# Tokenización del texto
tokenizer = Tokenizer(char_level=True)  # Tokenización a nivel de carácter
tokenizer.fit_on_texts([calaveritas_text])
total_chars = len(tokenizer.word_index) + 1

# Convertir el texto en secuencias de enteros
input_sequences = []
for i in range(1, len(calaveritas_text)):
    sequence = calaveritas_text[:i + 1]
    token_list = tokenizer.texts_to_sequences([sequence])[0]
    input_sequences.append(token_list)

# Asegurar que todas las secuencias tengan la misma longitud
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

# Dividir en entradas (X) y etiquetas (y)
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_chars)

# Construir el Modelo RNN
model = Sequential([
    Embedding(total_chars, 64, input_length=max_sequence_len - 1),
    LSTM(100, return_sequences=True),
    LSTM(100),
    Dense(total_chars, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Entrenar el Modelo
history = model.fit(X, y, epochs=100, verbose=1)

# Generar nuevas Calaveritas
def generate_calaverita(seed_text, next_chars=100):
    for _ in range(next_chars):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_char = tokenizer.index_word[predicted[0]]
        seed_text += output_char
    return seed_text

# Ejemplo de generación de calaverita
seed_text = "La muerte llegó"
print(generate_calaverita(seed_text, next_chars=100))




Epoch 1/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 87ms/step - accuracy: 0.0783 - loss: 3.4544
Epoch 2/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 86ms/step - accuracy: 0.1289 - loss: 3.0616
Epoch 3/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 87ms/step - accuracy: 0.1404 - loss: 3.0812
Epoch 4/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 92ms/step - accuracy: 0.1339 - loss: 3.0312
Epoch 5/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.1324 - loss: 3.0423
Epoch 6/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.1399 - loss: 3.0398
Epoch 7/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.2228 - loss: 2.9421
Epoch 8/100
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - accuracy: 0.2259 - loss: 2.8668
Epoch 9/100
[1m38/38[0m [32m━━━━━━━━