# Problem 3 - Ray Tune for Hyperparameter Optimization

Sources: https://docs.ray.io/en/latest/tune/index.html

## 3.1

In [None]:
#!pip install -U "ray[tune]"

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential

import ray
from ray import tune
from ray.tune.integration.keras import TuneReportCallback
from ray.tune.schedulers import ASHAScheduler
from ray.air import session
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.schedulers import HyperBandScheduler
from ray.tune.search.hyperopt import HyperOptSearch

import time
import numpy as np


In [None]:
# Initialize Ray
ray.init(ignore_reinit_error=True)

2023-12-05 01:02:30,639	INFO worker.py:1673 -- Started a local Ray instance.


0,1
Python version:,3.8.6
Ray version:,2.8.1


### Grid Search

In [None]:
def train_mnist(config):
    num_classes = 10
    epochs = 12

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train.reshape(-1, 28, 28, 1) / 255.0, x_test.reshape(-1, 28, 28, 1) / 255.0
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=config["conv_filters"], kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(config["dropout"]),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=config["lr"]),
        metrics=["accuracy"])

    for epoch in range(epochs):
        model.fit(
            x_train,
            y_train,
            batch_size=config["batch_size"],
            epochs=1,
            verbose=0,
            validation_data=(x_test, y_test))

        # Evaluate the model
        _, accuracy = model.evaluate(x_test, y_test, verbose=0)
        session.report({"mean_accuracy": accuracy})


In [None]:
start_time = time.time()

grid_analysis = tune.run(
    train_mnist,
    name="exp",
    metric="mean_accuracy",
    mode="max",
    stop={"mean_accuracy": 0.99},
    resources_per_trial={"gpu": 1},
    config={
        "conv_filters": tune.grid_search([64, 128, 256]),
        "lr": tune.grid_search([0.001, 0.01, 0.1]),
        "batch_size": tune.grid_search([64, 128, 256]),
        "dropout": tune.grid_search([0.0, 0.25, 0.5, 0.75, 0.9]),
    }
)

end_time = time.time()
grid_time = end_time - start_time

2023-11-24 16:00:22,466	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-11-24 17:27:57
Running for:,01:27:27.60
Memory:,40.1/377.3 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,lr,acc,iter,total time (s)
train_mnist_7b9c5_00000,TERMINATED,10.32.35.60:3069754,64,64,0.0,0.001,0.9864,12,37.562
train_mnist_7b9c5_00001,TERMINATED,10.32.35.60:3070976,128,64,0.0,0.001,0.9864,12,23.3354
train_mnist_7b9c5_00002,TERMINATED,10.32.35.60:3071872,256,64,0.0,0.001,0.9873,12,19.6695
train_mnist_7b9c5_00003,TERMINATED,10.32.35.60:3072733,64,128,0.0,0.001,0.9876,12,33.132
train_mnist_7b9c5_00004,TERMINATED,10.32.35.60:3073722,128,128,0.0,0.001,0.9846,12,26.1848
train_mnist_7b9c5_00005,TERMINATED,10.32.35.60:3074964,256,128,0.0,0.001,0.9872,12,22.5316
train_mnist_7b9c5_00006,TERMINATED,10.32.35.60:3076055,64,256,0.0,0.001,0.9855,12,42.8917
train_mnist_7b9c5_00007,TERMINATED,10.32.35.60:3077305,128,256,0.0,0.001,0.9859,12,39.9443
train_mnist_7b9c5_00008,TERMINATED,10.32.35.60:3078805,256,256,0.0,0.001,0.9844,12,79.7661
train_mnist_7b9c5_00009,TERMINATED,10.32.35.60:3080465,64,64,0.25,0.001,0.9894,12,113.503


[36m(pid=3069754)[0m 2023-11-24 16:00:31.576563: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3069754)[0m 2023-11-24 16:00:31.623251: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3069754)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(train_mnist pid=3069754)[0m 2023-11-24 16:00:36.468520: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31141 MB memory:  -> device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:2f:00.0, compute capability: 7.0
[36m(trai

Trial name,mean_accuracy
train_mnist_7b9c5_00000,0.9864
train_mnist_7b9c5_00001,0.9864
train_mnist_7b9c5_00002,0.9873
train_mnist_7b9c5_00003,0.9876
train_mnist_7b9c5_00004,0.9846
train_mnist_7b9c5_00005,0.9872
train_mnist_7b9c5_00006,0.9855
train_mnist_7b9c5_00007,0.9859
train_mnist_7b9c5_00008,0.9844
train_mnist_7b9c5_00009,0.9894


[36m(pid=3070976)[0m 2023-11-24 16:01:12.884145: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3070976)[0m 2023-11-24 16:01:12.930691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3070976)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(train_mnist pid=3070976)[0m 2023-11-24 16:01:14.925942: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31141 MB memory:  -> device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:2f:00.0, compute capability: 7.0
[36m(trai

### Beyesian Search

In [None]:
#!pip install bayesian-optimization #--yes

In [None]:
def train_mnist(config):
    # Convert continuous parameter back to categorical
    batch_size_map = {0: 64, 1: 128, 2: 256}

    config["conv_filters"] = int(round(config["conv_filters"]))
    config["batch_size"] = batch_size_map[int(round(config["batch_size"]))]

    # Load MNIST data
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train.reshape(-1, 28, 28, 1) / 255.0, x_test.reshape(-1, 28, 28, 1) / 255.0

    # Define the model
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=config["conv_filters"], kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(config["dropout"]),
        tf.keras.layers.Dense(10, activation="softmax")  # Assuming 10 classes for MNIST
    ])

    # Compile the model
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=config["lr"]),
        metrics=["accuracy"]
    )

    # Train the model
    model.fit(
        x_train,
        y_train,
        batch_size=config["batch_size"],
        epochs=12,  # or any other number of epochs you wish to use
        verbose=1,
        validation_data=(x_test, y_test)
    )

    # Evaluate the model
    _, accuracy = model.evaluate(x_test, y_test, verbose=0)
    session.report({"mean_accuracy": accuracy})

In [None]:
# Define the modified search space
search_space = {
    "conv_filters": tune.uniform(64,256),
    "lr": tune.loguniform(0.001, 0.1),
    "batch_size": tune.uniform(0, 2),  # 0 for 64, 1 for 128, 2 for 256
    "dropout": tune.uniform(0, 1)
}

# Initialize Bayesian optimization search algorithm
bayesopt_search = BayesOptSearch()

# Run the optimization using Bayesian search
start_time = time.time()

bayes_analysis = tune.run(
    train_mnist,
    name="exp_bayes",
    metric="mean_accuracy",
    mode="max",
    stop={"mean_accuracy": 0.99},
    resources_per_trial={"gpu": 1},
    config=search_space,
    search_alg=bayesopt_search,
    num_samples=10
)

end_time = time.time()
bayes_time = end_time - start_time

2023-12-05 01:39:33,602	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-12-05 02:00:18
Running for:,00:20:44.58
Memory:,65.4/377.3 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,lr,acc,iter,total time (s)
train_mnist_fb005315,TERMINATED,10.32.35.160:3295966,0.74908,246.537,0.731994,0.0602672,0.1135,1,179.266
train_mnist_f2b95443,TERMINATED,10.32.35.160:3296841,0.312037,93.9509,0.0580836,0.0867514,0.9085,1,90.4304
train_mnist_b9914140,TERMINATED,10.32.35.160:3297460,1.20223,199.95,0.0205845,0.0970211,0.862,1,159.762
train_mnist_ffca9cac,TERMINATED,10.32.35.160:3298267,1.66489,104.769,0.181825,0.019157,0.9812,1,70.9554
train_mnist_a60fcaaf,TERMINATED,10.32.35.160:3298881,0.608484,164.753,0.431945,0.0298317,0.9616,1,116.677
train_mnist_65d41f89,TERMINATED,10.32.35.160:3299519,1.22371,90.7828,0.292145,0.0372698,0.9508,1,66.2914
train_mnist_cd0082ba,TERMINATED,10.32.35.160:3300066,0.91214,214.754,0.199674,0.0519092,0.9597,1,179.012
train_mnist_1a912c10,TERMINATED,10.32.35.160:3300944,1.18483,72.9185,0.607545,0.0178819,0.9793,1,53.3985
train_mnist_eecf567c,TERMINATED,10.32.35.160:3301447,0.130103,246.186,0.965632,0.0810313,0.1135,1,226.149
train_mnist_40c5a41c,TERMINATED,10.32.35.160:3302450,0.609228,82.753,0.684233,0.0445751,0.8942,1,61.0914


[36m(pid=3295966)[0m 2023-12-05 01:39:35.572982: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3295966)[0m 2023-12-05 01:39:35.575219: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3295966)[0m 2023-12-05 01:39:35.620141: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3295966)[0m 2023-12-05 01:39:35.620637: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3295966)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate com

[36m(train_mnist pid=3295966)[0m Epoch 1/12
  1/469 [..............................] - ETA: 6:11 - loss: 2.3099 - accuracy: 0.0781
  5/469 [..............................] - ETA: 13s - loss: 46.7496 - accuracy: 0.1031
  9/469 [..............................] - ETA: 13s - loss: 27.0000 - accuracy: 0.1050
 13/469 [..............................] - ETA: 13s - loss: 19.4022 - accuracy: 0.1064
 17/469 [>.............................] - ETA: 13s - loss: 15.3785 - accuracy: 0.1085
 19/469 [>.............................] - ETA: 13s - loss: 14.0030 - accuracy: 0.1090
 23/469 [>.............................] - ETA: 13s - loss: 11.9684 - accuracy: 0.1073
 27/469 [>.............................] - ETA: 13s - loss: 10.5366 - accuracy: 0.1045
 31/469 [>.............................] - ETA: 12s - loss: 9.4743 - accuracy: 0.1058
 35/469 [=>............................] - ETA: 12s - loss: 8.6547 - accuracy: 0.1051
 37/469 [=>............................] - ETA: 12s - loss: 8.3116 - accuracy: 0.1047


Trial name,mean_accuracy
train_mnist_1a912c10,0.9793
train_mnist_40c5a41c,0.8942
train_mnist_65d41f89,0.9508
train_mnist_a60fcaaf,0.9616
train_mnist_b9914140,0.862
train_mnist_cd0082ba,0.9597
train_mnist_eecf567c,0.1135
train_mnist_f2b95443,0.9085
train_mnist_fb005315,0.1135
train_mnist_ffca9cac,0.9812


[36m(pid=3296841)[0m 2023-12-05 01:42:38.962325: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3296841)[0m 2023-12-05 01:42:38.964613: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3296841)[0m 2023-12-05 01:42:39.013146: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3296841)[0m 2023-12-05 01:42:39.013681: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3296841)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate com

[36m(train_mnist pid=3296841)[0m Epoch 1/12
  1/938 [..............................] - ETA: 9:55 - loss: 2.3031 - accuracy: 0.0625
 15/938 [..............................] - ETA: 7s - loss: 28.0819 - accuracy: 0.2708
 29/938 [..............................] - ETA: 6s - loss: 15.0646 - accuracy: 0.4634
 43/938 [>.............................] - ETA: 6s - loss: 10.4302 - accuracy: 0.5632
 57/938 [>.............................] - ETA: 6s - loss: 8.0299 - accuracy: 0.6234
 71/938 [=>............................] - ETA: 6s - loss: 6.5844 - accuracy: 0.6613
 85/938 [=>............................] - ETA: 6s - loss: 5.5761 - accuracy: 0.6963
 99/938 [==>...........................] - ETA: 6s - loss: 4.8650 - accuracy: 0.7181
113/938 [==>...........................] - ETA: 6s - loss: 4.3104 - accuracy: 0.7389
127/938 [===>..........................] - ETA: 6s - loss: 3.8837 - accuracy: 0.7546
141/938 [===>..........................] - ETA: 5s - loss: 3.5404 - accuracy: 0.7673
155/938 [===>.

[36m(pid=3297460)[0m 2023-12-05 01:44:13.943903: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3297460)[0m 2023-12-05 01:44:13.991935: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3297460)[0m 2023-12-05 01:44:13.992455: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3297460)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3297460)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3297460)[0m   return 

[36m(train_mnist pid=3297460)[0m Epoch 1/12
  3/469 [..............................] - ETA: 12s - loss: 194.5676 - accuracy: 0.1224
  7/469 [..............................] - ETA: 12s - loss: 85.2098 - accuracy: 0.2009 
 11/469 [..............................] - ETA: 12s - loss: 54.5836 - accuracy: 0.3764
 15/469 [..............................] - ETA: 12s - loss: 40.2388 - accuracy: 0.4901
 17/469 [>.............................] - ETA: 12s - loss: 35.5873 - accuracy: 0.5267
 22/469 [>.............................] - ETA: 11s - loss: 27.6425 - accuracy: 0.5952
 26/469 [>.............................] - ETA: 11s - loss: 23.4841 - accuracy: 0.6337
 30/469 [>.............................] - ETA: 11s - loss: 20.4398 - accuracy: 0.6625
 34/469 [=>............................] - ETA: 11s - loss: 18.0810 - accuracy: 0.6900
 38/469 [=>............................] - ETA: 11s - loss: 16.2210 - accuracy: 0.7085
 41/469 [=>............................] - ETA: 11s - loss: 15.0814 - accuracy: 0.

[36m(pid=3298267)[0m 2023-12-05 01:46:57.960253: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3298267)[0m 2023-12-05 01:46:57.962491: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3298267)[0m 2023-12-05 01:46:58.007638: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3298267)[0m 2023-12-05 01:46:58.008125: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3298267)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate com

[36m(train_mnist pid=3298267)[0m Epoch 1/12
  4/235 [..............................] - ETA: 5s - loss: 4.4735 - accuracy: 0.1787  
  9/235 [>.............................] - ETA: 5s - loss: 3.0384 - accuracy: 0.3351
 11/235 [>.............................] - ETA: 5s - loss: 2.6836 - accuracy: 0.3995
 17/235 [=>............................] - ETA: 5s - loss: 1.9486 - accuracy: 0.5441
 20/235 [=>............................] - ETA: 5s - loss: 1.7158 - accuracy: 0.5928
 26/235 [==>...........................] - ETA: 5s - loss: 1.3952 - accuracy: 0.6663
 30/235 [==>...........................] - ETA: 5s - loss: 1.2480 - accuracy: 0.6986
 35/235 [===>..........................] - ETA: 4s - loss: 1.1066 - accuracy: 0.7316
 37/235 [===>..........................] - ETA: 4s - loss: 1.0593 - accuracy: 0.7427
 43/235 [====>.........................] - ETA: 4s - loss: 0.9418 - accuracy: 0.7703
 46/235 [====>.........................] - ETA: 4s - loss: 0.8947 - accuracy: 0.7808
 52/235 [=====>..

[36m(pid=3298881)[0m 2023-12-05 01:48:12.996169: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3298881)[0m 2023-12-05 01:48:12.998432: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3298881)[0m 2023-12-05 01:48:13.044090: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3298881)[0m 2023-12-05 01:48:13.044591: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3298881)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate com

[36m(train_mnist pid=3298881)[0m Epoch 1/12
  4/469 [..............................] - ETA: 8s - loss: 11.6311 - accuracy: 0.1543 
 10/469 [..............................] - ETA: 8s - loss: 6.0415 - accuracy: 0.1156
 16/469 [>.............................] - ETA: 8s - loss: 4.6400 - accuracy: 0.1172
 19/469 [>.............................] - ETA: 8s - loss: 4.2650 - accuracy: 0.1324
 25/469 [>.............................] - ETA: 8s - loss: 3.6833 - accuracy: 0.1991
 31/469 [>.............................] - ETA: 8s - loss: 3.1754 - accuracy: 0.2865
 37/469 [=>............................] - ETA: 8s - loss: 2.7903 - accuracy: 0.3640
 43/469 [=>............................] - ETA: 8s - loss: 2.4995 - accuracy: 0.4246
 49/469 [==>...........................] - ETA: 8s - loss: 2.2761 - accuracy: 0.4700
 55/469 [==>...........................] - ETA: 8s - loss: 2.0917 - accuracy: 0.5075
 58/469 [==>...........................] - ETA: 7s - loss: 2.0155 - accuracy: 0.5237
 64/469 [===>....

[36m(pid=3299519)[0m 2023-12-05 01:50:14.053198: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3299519)[0m 2023-12-05 01:50:14.100512: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3299519)[0m 2023-12-05 01:50:14.101004: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3299519)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3299519)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3299519)[0m   return 

[36m(train_mnist pid=3299519)[0m Epoch 1/12
  6/469 [..............................] - ETA: 5s - loss: 10.1741 - accuracy: 0.2240 
 16/469 [>.............................] - ETA: 5s - loss: 4.6836 - accuracy: 0.4214
 26/469 [>.............................] - ETA: 4s - loss: 3.2032 - accuracy: 0.5385
 36/469 [=>............................] - ETA: 4s - loss: 2.4880 - accuracy: 0.6118
 46/469 [=>............................] - ETA: 4s - loss: 2.0875 - accuracy: 0.6532
 56/469 [==>...........................] - ETA: 4s - loss: 1.8194 - accuracy: 0.6790
 66/469 [===>..........................] - ETA: 4s - loss: 1.6299 - accuracy: 0.6998
 76/469 [===>..........................] - ETA: 4s - loss: 1.4888 - accuracy: 0.7161
 86/469 [====>.........................] - ETA: 4s - loss: 1.3690 - accuracy: 0.7298
 96/469 [=====>........................] - ETA: 4s - loss: 1.2734 - accuracy: 0.7424
106/469 [=====>........................] - ETA: 3s - loss: 1.1968 - accuracy: 0.7521
[36m(train_mnist

[36m(pid=3300066)[0m 2023-12-05 01:51:24.057194: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3300066)[0m 2023-12-05 01:51:24.105627: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3300066)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3300066)[0m 2023-12-05 01:51:24.105130: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3300066)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3300066)[0m   return 

[36m(train_mnist pid=3300066)[0m Epoch 1/12
  3/469 [..............................] - ETA: 14s - loss: 79.7654 - accuracy: 0.1094
  7/469 [..............................] - ETA: 14s - loss: 35.5304 - accuracy: 0.1060
 11/469 [..............................] - ETA: 14s - loss: 23.4561 - accuracy: 0.1101
 15/469 [..............................] - ETA: 13s - loss: 17.8238 - accuracy: 0.1036
 17/469 [>.............................] - ETA: 13s - loss: 15.9969 - accuracy: 0.1071
 21/469 [>.............................] - ETA: 13s - loss: 13.3905 - accuracy: 0.1012
 25/469 [>.............................] - ETA: 13s - loss: 11.6191 - accuracy: 0.0975
 27/469 [>.............................] - ETA: 13s - loss: 10.9272 - accuracy: 0.0998
 29/469 [>.............................] - ETA: 13s - loss: 10.3325 - accuracy: 0.0989
 31/469 [>.............................] - ETA: 13s - loss: 9.8144 - accuracy: 0.0988 
 35/469 [=>............................] - ETA: 13s - loss: 8.9600 - accuracy: 0.098

[36m(pid=3300944)[0m 2023-12-05 01:54:27.057413: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3300944)[0m 2023-12-05 01:54:27.106056: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3300944)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3300944)[0m 2023-12-05 01:54:27.105539: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3300944)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3300944)[0m   return 

[36m(train_mnist pid=3300944)[0m Epoch 1/12
  7/469 [..............................] - ETA: 4s - loss: 3.0643 - accuracy: 0.2690  
 19/469 [>.............................] - ETA: 3s - loss: 1.8464 - accuracy: 0.5041
 25/469 [>.............................] - ETA: 3s - loss: 1.5434 - accuracy: 0.5806
 37/469 [=>............................] - ETA: 3s - loss: 1.2191 - accuracy: 0.6603
 49/469 [==>...........................] - ETA: 3s - loss: 1.0301 - accuracy: 0.7124
 61/469 [==>...........................] - ETA: 3s - loss: 0.9072 - accuracy: 0.7459
 73/469 [===>..........................] - ETA: 3s - loss: 0.8172 - accuracy: 0.7708
 85/469 [====>.........................] - ETA: 3s - loss: 0.7476 - accuracy: 0.7888
 97/469 [=====>........................] - ETA: 3s - loss: 0.6976 - accuracy: 0.8024
109/469 [=====>........................] - ETA: 3s - loss: 0.6553 - accuracy: 0.8126
[36m(train_mnist pid=3300944)[0m Epoch 2/12
  1/469 [..............................] - ETA: 5s - los

[36m(pid=3301447)[0m 2023-12-05 01:55:25.084528: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3301447)[0m 2023-12-05 01:55:25.133737: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3301447)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3301447)[0m 2023-12-05 01:55:25.133233: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3301447)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3301447)[0m   return 

[36m(train_mnist pid=3301447)[0m Epoch 1/12
  1/938 [..............................] - ETA: 9:54 - loss: 2.3186 - accuracy: 0.1250
  7/938 [..............................] - ETA: 17s - loss: 164.5546 - accuracy: 0.1049
 13/938 [..............................] - ETA: 17s - loss: 89.6986 - accuracy: 0.1154 
 19/938 [..............................] - ETA: 17s - loss: 62.1062 - accuracy: 0.1127
 25/938 [..............................] - ETA: 17s - loss: 47.7578 - accuracy: 0.1075
 31/938 [..............................] - ETA: 16s - loss: 38.9607 - accuracy: 0.1033
 37/938 [>.............................] - ETA: 16s - loss: 33.0175 - accuracy: 0.1001
 40/938 [>.............................] - ETA: 16s - loss: 30.7146 - accuracy: 0.0980
 43/938 [>.............................] - ETA: 16s - loss: 28.7328 - accuracy: 0.0988
 46/938 [>.............................] - ETA: 16s - loss: 27.0085 - accuracy: 0.1036
 52/938 [>.............................] - ETA: 16s - loss: 24.1586 - accuracy: 0.

[36m(pid=3302450)[0m 2023-12-05 01:59:15.119165: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3302450)[0m 2023-12-05 01:59:15.167325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3302450)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3302450)[0m 2023-12-05 01:59:15.166816: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3302450)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3302450)[0m   return 

[36m(train_mnist pid=3302450)[0m Epoch 1/12
  6/469 [..............................] - ETA: 4s - loss: 9.5227 - accuracy: 0.1589  
 17/469 [>.............................] - ETA: 4s - loss: 4.8520 - accuracy: 0.1324
 28/469 [>.............................] - ETA: 4s - loss: 3.7983 - accuracy: 0.1660
 39/469 [=>............................] - ETA: 4s - loss: 3.1849 - accuracy: 0.2514
 45/469 [=>............................] - ETA: 4s - loss: 2.9294 - accuracy: 0.2983
 57/469 [==>...........................] - ETA: 4s - loss: 2.5455 - accuracy: 0.3746
 68/469 [===>..........................] - ETA: 3s - loss: 2.3034 - accuracy: 0.4243
 80/469 [====>.........................] - ETA: 3s - loss: 2.1026 - accuracy: 0.4640
 92/469 [====>.........................] - ETA: 3s - loss: 1.9499 - accuracy: 0.4956
104/469 [=====>........................] - ETA: 3s - loss: 1.8296 - accuracy: 0.5230
[36m(train_mnist pid=3302450)[0m Epoch 2/12
  1/469 [..............................] - ETA: 5s - los

2023-12-05 02:00:18,216	INFO tune.py:1047 -- Total run time: 1244.61 seconds (1244.58 seconds for the tuning loop).


### Hyperband

In [None]:
def train_mnist(config):
    num_classes = 10
    epochs = 12

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train.reshape(-1, 28, 28, 1) / 255.0, x_test.reshape(-1, 28, 28, 1) / 255.0
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=config["conv_filters"], kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(config["dropout"]),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(learning_rate=config["lr"]),
        metrics=["accuracy"])

    for epoch in range(epochs):
        model.fit(
            x_train,
            y_train,
            batch_size=config["batch_size"],
            epochs=1,
            verbose=0,
            validation_data=(x_test, y_test))

        # Evaluate the model
        i, accuracy = model.evaluate(x_test, y_test, verbose=0)
        session.report({"mean_accuracy": accuracy})

In [None]:
# Define the search space
search_space = {
    "conv_filters": tune.choice([64, 128, 256]),
    "lr": tune.loguniform(0.001, 0.1),
    "batch_size": tune.choice([64, 128, 256]),
    "dropout": tune.uniform(0, 1)
}

# Define the Hyperband scheduler
hyperband = HyperBandScheduler(
    time_attr="training_iteration",
    max_t=100,  # Maximum training iterations
    reduction_factor=3
)

# Run the optimization using Hyperband
start_time = time.time()

hyperband_analysis = tune.run(
    train_mnist,
    name="exp_hyperband",
    metric="mean_accuracy",
    mode="max",
    stop={"training_iteration": 12},  # Adjust based on your epochs
    resources_per_trial={"gpu": 1},
    config=search_space,
    num_samples=10,  # Number of different hyperparameter configurations to try
    scheduler=hyperband
)

end_time = time.time()
hyperband_time = end_time - start_time

2023-12-05 02:01:18,660	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-12-05 02:22:43
Running for:,00:21:24.77
Memory:,68.9/377.3 GiB

Trial name,status,loc,batch_size,conv_filters,dropout,lr,acc,iter,total time (s)
train_mnist_16ec0_00000,TERMINATED,10.32.35.160:3303162,128,256,0.540798,0.031441,0.9626,12,193.888
train_mnist_16ec0_00001,TERMINATED,10.32.35.160:3304567,64,64,0.0435369,0.0269051,0.956,12,71.8648
train_mnist_16ec0_00002,TERMINATED,10.32.35.160:3305567,128,128,0.0750938,0.00175483,0.9867,12,101.749
train_mnist_16ec0_00003,TERMINATED,10.32.35.160:3306609,256,128,0.77175,0.0833653,0.1009,12,85.0505
train_mnist_16ec0_00004,TERMINATED,10.32.35.160:3307587,128,256,0.116587,0.0162096,0.9836,12,192.796
train_mnist_16ec0_00005,TERMINATED,10.32.35.160:3308931,128,64,0.77417,0.00119084,0.9869,12,58.6939
train_mnist_16ec0_00006,TERMINATED,10.32.35.160:3309817,256,64,0.660095,0.0092037,0.9851,12,50.0868
train_mnist_16ec0_00007,TERMINATED,10.32.35.160:3310699,256,128,0.519591,0.0622316,0.9577,12,85.1461
train_mnist_16ec0_00008,TERMINATED,10.32.35.160:3311719,256,256,0.670494,0.0898003,0.9169,12,161.788
train_mnist_16ec0_00009,TERMINATED,10.32.35.160:3312943,64,256,0.0643842,0.00233183,0.9845,12,240.936


[36m(pid=3303162)[0m 2023-12-05 02:01:20.480179: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3303162)[0m 2023-12-05 02:01:20.482465: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3303162)[0m 2023-12-05 02:01:20.529078: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
[36m(pid=3303162)[0m 2023-12-05 02:01:20.529611: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3303162)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate com

Trial name,mean_accuracy
train_mnist_16ec0_00000,0.9626
train_mnist_16ec0_00001,0.956
train_mnist_16ec0_00002,0.9867
train_mnist_16ec0_00003,0.1009
train_mnist_16ec0_00004,0.9836
train_mnist_16ec0_00005,0.9869
train_mnist_16ec0_00006,0.9851
train_mnist_16ec0_00007,0.9577
train_mnist_16ec0_00008,0.9169
train_mnist_16ec0_00009,0.9845


[36m(pid=3304567)[0m 2023-12-05 02:04:39.216653: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
[36m(pid=3304567)[0m 2023-12-05 02:04:39.264370: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.[32m [repeated 2x across cluster][0m
[36m(pid=3304567)[0m 2023-12-05 02:04:39.264876: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
[36m(pid=3304567)[0m To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[36m(pid=3304567)[0m   setattr(self, word, getattr(machar, word).flat[0])
[36m(pid=3304567)[0m   return 

**Answer:**

Above we can see the execution for Grid Search, Bayesian Search, and Hyperband for the given hyperparameter configurations. For each we have defined differet train_mnist functions since each of the methods has slightly different approach.

## 3.2

In [None]:
print("Time taken for Grid Search: ", grid_time)

best_result = grid_analysis.best_result
print("Best mean accuracy found is: ", best_result["mean_accuracy"])

print("Best hyperparameters found were: ", grid_analysis.best_config)

Time taken for Grid Search:  5255.451504707336
Best mean accuracy found is:  0.9904000163078308
Best hyperparameters found were:  {'conv_filters': 256, 'lr': 0.001, 'batch_size': 64, 'dropout': 0.25}


In [None]:
# Define the mapping for the categorical values
batch_size_map = {0: 64, 1: 128, 2: 256}

# Retrieve the best configuration
best_config = bayes_analysis.best_config

# Apply the mapping to the continuous values
best_config["conv_filters"] = round(best_config["conv_filters"])
best_config["batch_size"] = batch_size_map[int(round(best_config["batch_size"]))]

# Print the results
print("Time taken for Bayesian Search: ", bayes_time)
print("Best mean accuracy found is: ", bayes_analysis.best_result["mean_accuracy"])
print("Best hyperparameters found were: ", best_config)

Time taken for Bayesian Search:  617.1990833282471
Best mean accuracy found is:  0.9837999939918518
Best hyperparameters found were:  {'conv_filters': 105, 'lr': 0.01915704647548995, 'batch_size': 256, 'dropout': 0.18182496720710062}


In [None]:
print("Time taken for Hyperband Search: ", hyperband_time)
print("Best mean accuracy found is: ", hyperband_analysis.best_result["mean_accuracy"])
print("Best hyperparameters found were: ", hyperband_analysis.best_config)

Time taken for Hyperband Search:  1284.8512477874756
Best mean accuracy found is:  0.9868999719619751
Best hyperparameters found were:  {'conv_filters': 64, 'lr': 0.0011908442812931575, 'batch_size': 128, 'dropout': 0.7741701209349096}


## 3.3

**Answer:**

Above outputs show us results of our 3 hyperparameter optimization methods - Grid Search, Bayesian Search, and Hyperband - on our Lenet model using the MNIST data. Looking at the outputs we can notice that all the models performed very well (maybe even too well - note the overfitting) with accuracies between 0.984 and 0.990. However, they had very different perfomances in the term of time taken to run it and hyperparameters they found and selected as best.

**Grid Search**
- First method we used was Grid Search. This method "tries out" **all** possible hyperparameter combinations of specified configurations and as such runs for the longest period of time. We can see that Grid Search took 5255.45 seconds to run but it did yield the highest accuracy of 0.9904. While this is the best accuracy compared to other 2 methods we need to consider if the long run time (and with that high computational cost) is worth it especially if we have a large number of potential hyperparameters.

**Bayesian Search**
- Next method we used was Bayesian Search. This method uses probabilistic modeling for parameter selection and due to that it is more efficient than Grid Search as it focuses on regions with more promising hyperparameters based on prior evaluations. We can see that Bayesian Search was the fastest of 3 methods and took only 617.20 seconds to run but it also achieved the lowest accuracy of 0.9838. In general, Bayesian Search can end in not optimal hyperparameters compared to Grid Search as it does not try all combinations but it is a good option as it is very fast and usually provides good results. While it did the worst out of our 3 methods it still achieved 0.98 accuracy which is very high.

**Hyperband Search**
- The final method we used was Hyperband Search. This method is resource efficiant optimization as it can very quickly identify promising hyperparameters by adaptively allocating resources.  Looking at the above results we can see that this method is a good choice if we want to find a middle ground between Grid Search and Bayesian Search as it has higher accuracy than Bayesian Search at 0.9869 (but lower than Grid Search) with much lower run time than Grid Search as it ran for 1284.85 seconds(but 2x longer than Bayesian Search).


Looking at all these we can see that while Grid Search provided the best accuracy it was the slowest method out of all. Further, while Bayesian Search ran fastest it had the lowest accuracy and Hyperband Search was somewhere in the middle on both accuracy and runtime front. In our case I would argue that we should use Bayesian Search becuase it ran fastest (and as such required the least computational resources) and it still achieved a very high accuracy of 0.98. However, in general we need to consider which is more important running quickly and using little computational resources or achiving the max accuracy (tradeoff between exploration and exploitation). If we are unsure, Hyperband Search is a good choice as it is somewhere between other two models.