# Basics of Hyperparameter Optimization
In this exercise, we tune the *layer_size* and *learning_rate* hyperparameters. The *layer_size* parameter decides the number of units in the hidden layer and the *learning_rate* parameter is an optimizer configuration.

#### We try the following values:

| *layer_size* | *learning_rate* |
| --- | --- |
| 5 | .01 |
| 10 | .1 |
| 20 | .001 |
| 50 | .1 |

In [84]:
import random
import tensorflow as tf
import numpy as np

from tensorflow.keras import layers, losses, optimizers

In [87]:
X = tf.random.uniform((20, 5))
Y = tf.squeeze(
    tf.one_hot(tf.random.uniform((20, 1), 0, 5, tf.int64), 5)
)

# A set of pairs of layer size and learning rate to run HPO.
S = [
    (5, .01),
    (10, .1),
    (20, .001),
    (50, .1),
]

def create_model(size):
    return tf.keras.Sequential([
        tf.keras.Input(shape=(5,5)),
        layers.Dense(size, activation='relu'),
        layers.Dense(5, activation='softmax')
    ])

SEARCH_RESULTS = []

for trial_id, (layer_size, learning_rate) in enumerate(S):
    model = create_model(size=layer_size)
    opt = optimizers.SGD(learning_rate=learning_rate)
    losses = []

    for iteration in range(2000):
        with tf.GradientTape() as tape:
            output = model(X)
            loss = tf.reduce_mean(tf.math.square(Y - output))
            grads = tape.gradient(loss, model.trainable_variables)
            opt.apply_gradients(zip(grads, model.trainable_variables))
            losses.append(loss.numpy())

    min_loss = np.min(losses)
    SEARCH_RESULTS.append(min_loss)
    fmt = 'Trial: {} learning_rate: {} layer_size: {} loss: {}'
    print(fmt.format(trial_id, learning_rate, layer_size, min_loss))

best_trial_id = np.argmin(SEARCH_RESULTS)
best_loss = np.min(SEARCH_RESULTS)

print('\n=============== Search Summary ===============')
print('Best Trial: {} Loss: {}'.format(best_trial_id, best_loss))

Trial: 0 learning_rate: 0.01 layer_size: 5 loss: 0.1583523005247116
Trial: 1 learning_rate: 0.1 layer_size: 10 loss: 0.12014190107584
Trial: 2 learning_rate: 0.01 layer_size: 20 loss: 0.1500110924243927
Trial: 3 learning_rate: 0.01 layer_size: 50 loss: 0.15035775303840637

Best Trial: 1 Loss: 0.12014190107584
