# 🧠 Mini Transformer Mejorado para Generación de Texto
Este notebook entrena un mini modelo tipo GPT desde cero con corpus ampliado, control de temperatura y arquitectura mejorada.

In [None]:
!pip install tensorflow numpy



In [None]:

import tensorflow as tf
import numpy as np

# Corpus de entrenamiento extendido
text = (
    """
    Bienvenido al curso de inteligencia artificial. Aquí aprenderás sobre algoritmos,
    redes neuronales, transformers y modelos generativos. La inteligencia artificial
    está transformando el mundo. Este modelo fue entrenado con texto de prueba.
    """ * 10
)

vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])
vocab_size = len(vocab)


In [None]:

seq_length = 50
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    return chunk[:-1], chunk[1:]

dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(10000).batch(4, drop_remainder=True).repeat()


In [None]:

def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    return tf.cast(angle_rads[np.newaxis, ...], dtype=tf.float32)


In [None]:

class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])
        self.layernorm1 = tf.keras.layers.LayerNormalization()
        self.layernorm2 = tf.keras.layers.LayerNormalization()

    def call(self, x, training):
        attn_output = self.mha(x, x, x)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + ffn_output)


In [None]:

class MiniTransformer(tf.keras.Model):
    def __init__(self, vocab_size, seq_len, d_model, num_heads, dff):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, d_model)
        self.pos_encoding = positional_encoding(seq_len, d_model)
        self.transformer = TransformerBlock(d_model, num_heads, dff)
        self.final = tf.keras.layers.Dense(vocab_size)

    def call(self, x, training=False):  # 🔥 <== ESTA ES LA CLAVE
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x += self.pos_encoding[:, :seq_len, :]
        x = self.transformer(x, training=training)  # <== Pasamos 'training' internamente
        return self.final(x)



In [None]:

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn)
model.fit(dataset, epochs=10, steps_per_epoch=50)


Epoch 1/10


TypeError: Exception encountered when calling MiniTransformer.call().

[1mmissing a required argument: 'training'[0m

Arguments received by MiniTransformer.call():
  • x=tf.Tensor(shape=(4, 50), dtype=int64)
  • training=True

In [None]:

def generate_text(model, start_string, gen_length=200, max_seq_len=50, temperature=0.8):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []

    for _ in range(gen_length):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        input_eval = tf.concat([input_eval, [[predicted_id]]], axis=-1)
        if input_eval.shape[1] > max_seq_len:
            input_eval = input_eval[:, -max_seq_len:]
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

print(generate_text(model, "La inteligencia"))
