In [4]:
import tensorflow as tf
import numpy as np

# ============================
# 1. Texto base (corpus largo)
# ============================
text = (
    "En un lugar de la Mancha, de cuyo nombre no quiero acordarme, "
    "no ha mucho tiempo que vivía un hidalgo de los de lanza en astillero, "
    "adarga antigua, rocín flaco y galgo corredor. "
    "Este texto es suficientemente largo para entrenar un modelo de prueba."
)

# Diccionario de caracteres
chars = sorted(list(set(text)))
char2idx = {c: i for i, c in enumerate(chars)}
idx2char = {i: c for i, c in enumerate(chars)}

# Convertir texto a índices
seq = [char2idx[c] for c in text]

# ============================
# 2. Preparar dataset
# ============================
seq_length = 40  # longitud de secuencia
char_dataset = tf.data.Dataset.from_tensor_slices(seq)

# Crear secuencias de longitud seq_length+1
sequences = char_dataset.batch(seq_length+1, drop_remainder=False)

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(100).batch(64, drop_remainder=True)

# ============================
# 3. Definir modelo
# ============================
vocab_size = len(chars)
embedding_dim = 64
rnn_units = 128

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True),
    tf.keras.layers.Dense(vocab_size)
])

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer="adam", loss=loss)

# ============================
# 4. Entrenamiento
# ============================
history = model.fit(dataset, epochs=10)

# ============================
# 5. Función de generación
# ============================
def generate_text(model, start_string, num_generate=200):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []
    temperature = 1.0  # controla aleatoriedad

    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.concat([input_eval, tf.expand_dims([predicted_id], 0)], axis=1)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

# ============================
# 6. Probar generación
# ============================
print(generate_text(model, start_string="En un lugar "))


ValueError: Expected input data to be non-empty.