In [1]:
import tensorflow as tf

# Configurar para que TensorFlow utilice la GPU por defecto
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Configurar para que TensorFlow asigne memoria dinámicamente
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        # Especificar la GPU por defecto
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Manejar error
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [2]:
import tensorflow_datasets as tfds
import numpy as np
import os

# Cargar el dataset de noticias
dataset, info = tfds.load('ag_news_subset', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

# Convertir a texto plano
train_texts = []
for text, label in tfds.as_numpy(train_dataset):
    train_texts.append(text.decode('utf-8'))

# Concatenar todos los textos en uno solo
text = ' '.join(train_texts)
print(f'Texto total: {len(text)} caracteres')

Texto total: 23328241 caracteres


In [3]:
# Crear un mapeo de caracteres a índices
vocab = sorted(set(text))
char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Convertir los caracteres a índices
text_as_int = np.array([char2idx[c] for c in text])

# Crear secuencias de entrada y salida
seq_length = 100
examples_per_epoch = len(text) // seq_length

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Crear lotes de entrenamiento
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [None]:
EPOCHS = 50

In [4]:
# Construir el modelo
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]),
    tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
])

# Función de pérdida
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

history = model.fit(dataset, epochs=EPOCHS)

model.save('model_char.h5')

Epoch 1/50


KeyboardInterrupt: 

In [11]:
import re

# Tokenizar el texto en palabras
words = re.findall(r'\b\w+\b', text.lower())
word2idx = {u: i for i, u in enumerate(set(words))}
idx2word = np.array(list(set(words)))

# Convertir las palabras a índices
text_as_int = np.array([word2idx[w] for w in words])

# Crear secuencias de entrada y salida
seq_length = 20
examples_per_epoch = len(words) // seq_length

word_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = word_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Crear lotes de entrenamiento
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [12]:
# Construir el modelo
vocab_size = len(word2idx)
embedding_dim = 256
rnn_units = 1024

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[BATCH_SIZE, None]),
    tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
])

# Función de pérdida
model.compile(optimizer='adam', loss=loss)

# Entrenar el modelo
history = model.fit(dataset, epochs=EPOCHS)

model.save('model_words.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
import numpy as np

# Función para generar texto
def generate_text(model, start_string, char2idx, idx2char, num_generate=1000, temperature=1.0):
    # Vectorizar el texto de entrada
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Crear una lista para almacenar el texto generado
    text_generated = []

    # Restablecer el estado del modelo
    model.reset_states()

    for i in range(num_generate):
        predictions = model(input_eval)
        
        # Quitar la dimensión del batch
        predictions = tf.squeeze(predictions, 0)

        # Usar una distribución categórica para predecir el próximo carácter
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Pasar el carácter predicho como la siguiente entrada al modelo
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

start_string = "The"
print(generate_text(model, start_string, char2idx, idx2char))


ValueError: Exception encountered when calling layer "sequential_3" "                 f"(type Sequential).

Input 0 of layer "gru_3" is incompatible with the layer: expected shape=(64, None, 256), found shape=(1, 3, 256)

Call arguments received by layer "sequential_3" "                 f"(type Sequential):
  • inputs=tf.Tensor(shape=(1, 3), dtype=int32)
  • training=None
  • mask=None

In [None]:
model_chars = tf.keras.models.load_model('model_char.h5')
model_words = tf.keras.models.load_model('model_words.h5')

In [None]:
# Generar texto con ambos modelos
generated_text_char = generate_text(model_chars, start_string, char2idx, idx2char)
generated_text_word = generate_text(model_words, start_string, word2idx, idx2word)

print("Texto generado a nivel de caracteres:")
print(generated_text_char)
print("\nTexto generado a nivel de palabras:")
print(generated_text_word)

In [None]:
import tensorflow as tf

# Define the custom loss function
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

# Load the model with the custom loss function
with tf.keras.utils.custom_object_scope({'loss': loss}):
    original_model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/Aprendizaje automatico II/model_words.h5')

# Recreate the model architecture with batch size 1 for inference
vocab_size = original_model.layers[0].input_dim
embedding_dim = original_model.layers[0].output_dim
rnn_units = original_model.layers[1].units

inference_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[1, None]),
    tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
])

# Load the weights from the original model
inference_model.set_weights(original_model.get_weights())

# Text generation function
def generate_text(model, start_string, words2idx, idx2words, num_generate=1000, temperature=1.0):
    # Vectorize the input text
    input_eval = [words2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Create a list to store the generated text
    text_generated = []

    # Reset the model state
    model.reset_states()

    for i in range(num_generate):
        predictions = model(input_eval)

        # Remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # Use a categorical distribution to predict the next wordsacter
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        # Pass the predicted wordsacter as the next input to the model
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2words[predicted_id])

    return start_string + ''.join(text_generated)

# Generate text
generated_text_words = generate_text(inference_model, "The", words2idx, idx2words)
print("Generated text (wordsacter level):")
print(generated_text_words)