<a href="https://colab.research.google.com/github/CristianS1988/GooSL/blob/main/Laboratorio3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Generador de texto


In [1]:
import tensorflow as tf
import requests

# Descargar el texto del libro
url = "https://www.gutenberg.org/files/47631/47631-0.txt"  # Reemplaza esta URL con la del libro que desees
response = requests.get(url)
book_text = response.text

# Tokenización
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts([book_text])
total_words = len(tokenizer.word_index) + 1

# Crear secuencias de n-gramas
sequences = []
lines = book_text.split('\n')
for line in lines:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        sequences.append(n_gram_sequence)

# Padding de secuencias
max_sequence_length = max([len(seq) for seq in sequences])
sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_length, padding="pre")

# Separar datos de entrada y etiquetas
input_sequences, labels = sequences[:, :-1], sequences[:, -1]
labels = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# Modelo básico
modelo = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_length - 1),
    tf.keras.layers.LSTM(150),
    tf.keras.layers.Dense(total_words, activation="softmax")
])

# Compilar
modelo.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer="adam", metrics=["accuracy"])

# Entrenamiento
modelo.fit(x=input_sequences, y=labels, epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7c50a24b5480>

### Modelo mejorado

In [2]:
# Modelo mejorado
modelo_mejorado = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_length - 1),
    tf.keras.layers.LSTM(150, return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(total_words/2, activation="relu"),
    tf.keras.layers.Dense(total_words, activation="softmax")
])

# Compilar
modelo_mejorado.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer="adam", metrics=["accuracy"])

# Entrenamiento
modelo_mejorado.fit(x=input_sequences, y=labels, epochs=30)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7c50a2e40a90>

### Ajuste de Hiperparámetros

In [5]:
!pip install keras-tuner
from kerastuner.tuners import RandomSearch

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Embedding(total_words, hp.Int('embedding_dim', 50, 150, 10), input_length=max_sequence_length - 1))
    model.add(tf.keras.layers.LSTM(hp.Int('lstm_units', 50, 200, 10)))
    model.add(tf.keras.layers.Dense(total_words, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=2
)

tuner.search(x=input_sequences, y=labels, epochs=5, validation_split=0.2)


Reloading Tuner from ./untitled_project/tuner0.json


### Validación y Pruebas

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(input_sequences, labels, test_size=0.2)
modelo.fit(x=x_train, y=y_train, validation_data=(x_val, y_val), epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7c50a2487e50>

### Visualizaciones

In [7]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs')
modelo.fit(x=input_sequences, y=labels, epochs=20, callbacks=[tensorboard_callback])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7c50a0c46d70>

### Regularización

In [8]:
from tensorflow.keras.regularizers import l2

modelo = tf.keras.Sequential([
    tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_length - 1),
    tf.keras.layers.LSTM(150, recurrent_dropout=0.2, kernel_regularizer=l2(0.01)),
    tf.keras.layers.Dense(total_words, activation="softmax")
])


In [9]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)


{'embedding_dim': 80, 'lstm_units': 180}


In [10]:
best_model = tuner.get_best_models(num_models=1)[0]


In [11]:
tuner.results_summary()


Results summary
Results in ./untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 1 summary
Hyperparameters:
embedding_dim: 80
lstm_units: 180
Score: 0.10389610379934311

Trial 0 summary
Hyperparameters:
embedding_dim: 90
lstm_units: 160
Score: 0.10064934939146042

Trial 3 summary
Hyperparameters:
embedding_dim: 120
lstm_units: 170
Score: 0.08116883039474487

Trial 4 summary
Hyperparameters:
embedding_dim: 130
lstm_units: 60
Score: 0.07467532530426979

Trial 2 summary
Hyperparameters:
embedding_dim: 130
lstm_units: 50
Score: 0.07467532530426979


In [12]:
best_model.fit(x=input_sequences, y=labels, epochs=50)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7c50a0f4dbd0>

NameError: ignored

<Figure size 1200x600 with 0 Axes>