In [1]:
# Load data
with open("calaveritas.txt", "r", encoding="utf-8") as file:
    text = file.read().lower()  # Convert to lowercase to reduce the vocabulary size

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import pad_sequences
import numpy as np

# Create a mapping from characters to indices and vice versa
chars = sorted(list(set(text)))
char_to_index = {c: i for i, c in enumerate(chars)}
index_to_char = {i: c for i, c in enumerate(chars)}

# Convert the text to indices
encoded_text = [char_to_index[c] for c in text]

# Prepare input-output sequences for training
sequence_length = 40  # Length of each input sequence
X = []
y = []
for i in range(0, len(encoded_text) - sequence_length):
    X.append(encoded_text[i:i + sequence_length])
    y.append(encoded_text[i + sequence_length])

# Reshape and normalize the input
X = np.array(X)
y = np.array(y)
y = np.array([to_categorical(c, num_classes=len(chars)) for c in y])  # One-hot encoding

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

# Define the RNN model
model = Sequential([
    Embedding(len(chars), 50, input_length=sequence_length),
    LSTM(256, return_sequences=True),
    LSTM(256),
    Dense(len(chars), activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')



In [5]:
# Train the model
epochs = 30  # Adjust as necessary for your dataset
model.fit(X, y, epochs=epochs, batch_size=64)

Epoch 1/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 77ms/step - loss: 3.4802
Epoch 2/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 81ms/step - loss: 3.1156
Epoch 3/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step - loss: 3.0803
Epoch 4/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 104ms/step - loss: 3.0438
Epoch 5/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 114ms/step - loss: 3.0804
Epoch 6/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 202ms/step - loss: 3.0402
Epoch 7/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 196ms/step - loss: 3.0570
Epoch 8/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 207ms/step - loss: 3.0213
Epoch 9/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 180ms/step - loss: 3.0206
Epoch 10/30
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 195ms/step - loss:

<keras.src.callbacks.history.History at 0x1a739d233e0>

In [8]:
def generate_text(model, seed_text, length=100):
    seed_text = seed_text.lower()  # Convert seed text to lowercase
    result = seed_text
    for _ in range(length):
        # Preprocess the seed text
        encoded_seed = [char_to_index[char] for char in seed_text[-sequence_length:]]
        encoded_seed = pad_sequences([encoded_seed], maxlen=sequence_length, padding='pre')
        
        # Predict the next character
        predicted_index = np.argmax(model.predict(encoded_seed), axis=-1)[0]
        predicted_char = index_to_char[predicted_index]
        
        # Append the predicted character to the result
        result += predicted_char
        seed_text += predicted_char

    return result

In [12]:
# Example usage
seed_text = "En el cementerio a solas "
print(generate_text(model, seed_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24

In [11]:
# Increase epochs for more training
epochs = 50
model.fit(X, y, epochs=epochs, batch_size=64)

Epoch 1/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step - loss: 1.3328
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 157ms/step - loss: 1.2178
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 183ms/step - loss: 1.1098
Epoch 4/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 194ms/step - loss: 0.9873
Epoch 5/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 181ms/step - loss: 0.8658
Epoch 6/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 180ms/step - loss: 0.7787
Epoch 7/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 190ms/step - loss: 0.6948
Epoch 8/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 107ms/step - loss: 0.5870
Epoch 9/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 188ms/step - loss: 0.4877
Epoch 10/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 190ms/step - los

<keras.src.callbacks.history.History at 0x1a745893650>

In [None]:
# Example usage
seed_text = "En el cementerio a solas "
print(generate_text(model, seed_text))