In [1]:
import numpy as np 

import tensorflow as tf 
from tensorflow.keras.layers import Layer, Embedding, Dense, MultiHeadAttention, LayerNormalization, Dropout 
from tensorflow.keras.models import Model 
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# ======= 1. BLOQUE TRANSFORMER ======= 
class TransformerBlock(Layer): 
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): 
        super(TransformerBlock, self).__init__() 
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) 
        self.ffn = tf.keras.Sequential([ 
        Dense(ff_dim, activation="relu"), 
        Dense(embed_dim) 
        ]) 
        self.layernorm1 = LayerNormalization(epsilon=1e-6) 
        self.layernorm2 = LayerNormalization(epsilon=1e-6) 
        self.dropout1 = Dropout(rate) 
        self.dropout2 = Dropout(rate) 
        
    def call(self, inputs, training): 
        attn_output = self.att(inputs, inputs)  # Autoatención 
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output) 
        ffn_output = self.ffn(out1) 
        ffn_output = self.dropout2(ffn_output, training=training) 
        return self.layernorm2(out1 + ffn_output) 

In [6]:
# ======= 2. MODELO TRANSFORMER ======= 
class MiniGPT(Model): 
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, num_blocks, max_len): 
        super(MiniGPT, self).__init__() 
        self.embed_dim = embed_dim 
        self.embedding = Embedding(vocab_size, embed_dim) 
        self.pos_embedding = Embedding(max_len, embed_dim) 
        self.transformer_blocks = [TransformerBlock(embed_dim, num_heads, 
        ff_dim) for _ in range(num_blocks)] 
        self.norm = LayerNormalization(epsilon=1e-6) 
        self.out_layer = Dense(vocab_size, activation="softmax") 
    def call(self, inputs, training): 
        positions = tf.range(start=0, limit=tf.shape(inputs)[-1], delta=1) 
        embedded_inputs = self.embedding(inputs) + self.pos_embedding(positions) 
        x = embedded_inputs 
        for transformer_block in self.transformer_blocks: 
            x = transformer_block(x, training=training) 
        x = self.norm(x) 
        return self.out_layer(x)

In [7]:
# ======= 3. PREPARAR EL MODELO ======= 
vocab_size = 10000  # Número de palabras en el vocabulario 
embed_dim = 128  # Dimensión de los embeddings 
num_heads = 8  # Número de cabezas de atención 
ff_dim = 512  # Dimensión de la red feedforward 
num_blocks = 4  # Número de bloques Transformer 
max_len = 50  # Longitud máxima de la secuencia 
# Crear el modelo 
transformer = MiniGPT(vocab_size, embed_dim, num_heads, ff_dim, num_blocks, max_len) 
transformer.compile(optimizer="adam", loss="sparse_categorical_crossentropy") 
transformer.summary() 
# Simulación de datos aleatorios (¡Usar datos reales en la práctica!) 
X_train = np.random.randint(0, vocab_size, (1000, max_len))  # 1000 ejemplos 
y_train = np.random.randint(0, vocab_size, (1000, max_len)) 
# Entrenar el modelo 
transformer.fit(X_train, y_train, batch_size=32, epochs=10)

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 358ms/step - loss: 9.2246
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.1643
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.1494
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.1425
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.1356
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.1351
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 9.1236
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 9.1202
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 9.1182
Epoch 10/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 9.11

<keras.src.callbacks.history.History at 0x7e4ce5c520e0>

In [8]:
## CÓDIGO DEL PROGRAMA DE GENERACIÓN DE TEXTO BASADO EN EL TRANSFORMER 
# ======= 1. TOKENIZADOR ======= 
# Simulación de un vocabulario pequeño (en práctica usar más datos) 
vocab_size = 10000 
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>") 
tokenizer.fit_on_texts(["Hola, ¿cómo estás?", "El modelo Transformer genera texto.", "Aprender inteligencia artificial es emocionante."]) 
 
# ======= 2. FUNCIÓN PARA GENERAR TEXTO ======= 
def generar_texto(model, seed_text, max_len=50, num_words=10): 
    for _ in range(num_words): 
        # Convertir el texto en tokens 
        token_list = tokenizer.texts_to_sequences([seed_text])[0] 
        token_list = pad_sequences([token_list], maxlen=max_len, padding='pre') 
 
        # Predecir siguiente palabra 
        predicted_probs = model.predict(token_list, verbose=0)[0][-1]  # Última palabra 
        predicted_index = np.argmax(predicted_probs)  # Elegir la palabra con mayor probabilidad 
         
        # Convertir índice en palabra 
        for word, index in tokenizer.word_index.items(): 
            if index == predicted_index: 
                seed_text += " " + word 
                break 
 
    return seed_text 
 
# ======= 3. PRUEBA DE GENERACIÓN ======= 
texto_generado = generar_texto(transformer, "Hola, ¿cómo", num_words=10) 
print("Texto generado:", texto_generado)

Texto generado: Hola, ¿cómo
