<a href="https://colab.research.google.com/github/Mrsnellek/MSDS_686_22F8W2/blob/23S8W1/MSDS%20686/Week_3/Optional_Reuters_Regularization_and_Dropout_TensorBoard_Extra_Work.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reuters Regularization and Dropout Using TensorBoard
## Adapted from Deep Learning with Python by Francois Chollet
#### Use the Keras Reuters dataset to classify news wires into 46 different categories.  Use regularization and stopearly call backs to improve your neural network.

In [60]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

  from IPython.core.display import display, HTML


In [40]:
# Import all the necessary libraries and set the np and tf seed
import time

import keras_tuner as kt
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [41]:
# Import the Reuters data set from the Keras library. https://keras.io/datasets/
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.reuters.load_data(num_words=20000, skip_top=20)
train_data.shape, test_data.shape

((8982,), (2246,))

In [42]:
# Define a function to vectorize the data.
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

In [43]:
best_trial = tuner.oracle.get_best_trials()[0]
best_trial.summary()

Trial 01 summary
Hyperparameters:
n_hidden: 1
n_units: 256
learning_rate: 0.005957170974089395
dropout: 0.2606644755079035
Score: 0.8450000286102295


In [44]:
best_trial.best_step

1

In [45]:
best_model = tuner.get_best_models()[0]
loss, accuracy = best_model.evaluate(test_ds)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.8112 - loss: 0.9041
Test Loss: 0.9328
Test Accuracy: 0.8081


In [46]:
# Vectorize the train_data and test_data
train_data = vectorize_sequences(train_data, 20000)
test_data = vectorize_sequences(test_data, 20000)
train_data.shape, test_data.shape

((8982, 20000), (2246, 20000))

In [47]:
# Encode the labes to categorical. 

In [48]:
# Split the data into training and validation
BATCH_SIZE = 256


test_ds = tf.data.Dataset.from_tensor_slices((test_data, test_labels)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_ds = tf.data.Dataset.from_tensor_slices((train_data[:1000], train_labels[:1000])).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
train_ds = tf.data.Dataset.from_tensor_slices((train_data[1000:], train_labels[1000:])).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Let us try to improve on our model using TensorBoard and optimize the hyper-parameters.
## This tutorial was adapted from these sources: https://www.tensorflow.org/tensorboard/get_started and https://towardsdatascience.com/tensorboard-hyperparameter-optimization-a51ef7af71f5

In [49]:
# Import libaries needed for TensorBoard


In [50]:
# Clear old log files


In [51]:
# Define the hyper-parameters to grid search

# Write all the hyperpareters to file


In [52]:
# Define the nerual net model.  Notice the hyperparameters added and the callbacks
def model_builder(hp):
    n_hidden = hp.Int('n_hidden', min_value=1, max_value=2)
    n_units = hp.Int('n_units', min_value=256, max_value=1024, step=2, sampling='log')
    learning_rate = hp.Float('learning_rate', min_value=1e-3, max_value=1e-2, sampling='log')
    dropout = hp.Float('dropout', min_value=0.2, max_value=0.9)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(20000,)))
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_units, activation='relu', kernel_initializer='he_normal'))
        model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.Dense(46, activation='softmax'))
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [53]:
# Create a function to save hyperparamters and accuray output to file
log_dir = "logs/fit/" + time.strftime("%Y%m%d-%H%M%S")

tuner = kt.BayesianOptimization(model_builder, objective='val_accuracy', max_trials=20, project_name='reuters_TB')
tuner.search(train_ds, validation_data=val_ds, epochs=100, callbacks=[tf.keras.callbacks.EarlyStopping('val_accuracy', patience=3, restore_best_weights=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])


Reloading Tuner from ./reuters_TB/tuner0.json


In [54]:
best_trial = tuner.oracle.get_best_trials()[0]
best_trial.summary()

Trial 01 summary
Hyperparameters:
n_hidden: 1
n_units: 256
learning_rate: 0.005957170974089395
dropout: 0.2606644755079035
Score: 0.8450000286102295


In [55]:
best_trial.best_step

1

In [56]:
best_model = tuner.get_best_models()[0]
loss, accuracy = best_model.evaluate(test_ds)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.8112 - loss: 0.9041
Test Loss: 0.9328
Test Accuracy: 0.8081


In [57]:
# Loop through hyperparameters and save outputs to file


In [61]:
# Load tensorboard and view output.  
%load_ext tensorboard
%tensorboard --logdir logs/fit

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 70271), started 0:00:48 ago. (Use '!kill 70271' to kill it.)