In [4]:
import os

In [5]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [6]:
!nvidia-smi

Mon Mar 11 16:40:56 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000000:16:00.0 Off |                    0 |
| N/A   37C    P0              40W / 300W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2-32GB           On  | 00000000:3A:00.0 Off |  

In [7]:
!cp training_data/* /dev/shm/

In [8]:
import numpy as np
memory_map = True
if memory_map:
    train_input = np.load('/dev/shm/train_input.npy', mmap_mode='r')
    train_target = np.load('/dev/shm/train_target.npy', mmap_mode='r')
    val_input = np.load('/dev/shm/val_input.npy', mmap_mode='r')
    val_target = np.load('/dev/shm/val_target.npy', mmap_mode='r')
else:
    train_input = np.load('/dev/shm/train_input.npy')
    train_target = np.load('/dev/shm/train_target.npy')
    val_input = np.load('/dev/shm/val_input.npy')
    val_target = np.load('/dev/shm/val_target.npy')

In [9]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import LeakyReLU
from keras.layers import Dropout
import tensorflow_addons as tfa
from qhoptim.tf import QHAdamOptimizer
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import LearningRateScheduler
import keras_tuner as kt
import os
import logging

def build_model(hp):
    alpha = hp.Float("leak", min_value = 0, max_value = .4)
    dp_rate = hp.Float("dropout", min_value = 0, max_value = .25)
    batch_norm = hp.Boolean("batch_normalization")
    model = Sequential()
    hiddenUnits = hp.Int("hidden_units", min_value = 200, max_value = 480)
    model.add(Dense(units = hiddenUnits, input_dim=175, kernel_initializer='normal'))
    model.add(LeakyReLU(alpha = alpha))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Dropout(dp_rate))
    for i in range(hp.Int("num_layers", min_value = 4, max_value = 11)):
        model.add(Dense(units = hiddenUnits, kernel_initializer='normal'))
        model.add(LeakyReLU(alpha = alpha))
        if batch_norm:
            model.add(BatchNormalization())
        model.add(Dropout(dp_rate))
    model.add(Dense(55, kernel_initializer='normal', activation='linear'))
    initial_learning_rate = hp.Float("lr", min_value=1e-6, max_value=1e-3, sampling="log")
    optimizer = hp.Choice("optimizer", ["adam", "RAdam", "QHAdam"])
    if optimizer == "adam":
        optimizer = keras.optimizers.Adam(learning_rate = initial_learning_rate)
    elif optimizer == "RAdam":
        optimizer = tfa.optimizers.RectifiedAdam(learning_rate = initial_learning_rate)
    elif optimizer == "QHAdam":
        optimizer = QHAdamOptimizer(learning_rate = initial_learning_rate, nu2=1.0, beta1=0.995, beta2=0.999)
    model.compile(optimizer = optimizer, loss = 'mse', metrics = ["mse"])
    return model

2024-03-11 16:49:11.250632: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-11 16:49:20.882129: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-11 16:49:24.405901: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-11 16:49:55.331199: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [10]:
batch_size = 5000
num_epochs = 8
shuffle_buffer = 50000

with tf.device('/CPU:0'):
    train_ds = tf.data.Dataset.from_tensor_slices((train_input, train_target))
    val_ds = tf.data.Dataset.from_tensor_slices((val_input, val_target))

    # Applying transformations to the dataset:
    # Shuffle, batch, and prefetch for the training dataset
    train_ds = train_ds.shuffle(buffer_size=shuffle_buffer) # Shuffle the data
    train_ds = train_ds.batch(batch_size, drop_remainder=True)  # Batch the data
    train_ds = train_ds.prefetch(tf.data.AUTOTUNE)  # Prefetch for efficiency

    # Batch and prefetch for the validation dataset
    val_ds = val_ds.batch(batch_size)
    val_ds = val_ds.prefetch(tf.data.AUTOTUNE)

def lr_schedule(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_scheduler = LearningRateScheduler(lr_schedule)

tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_mse",
    max_trials=1,
    executions_per_trial=1,
    overwrite=False,
    directory="tuning_directory/",
    project_name="eagleeight"
)

kwargs = {'epochs': num_epochs,
          'verbose': 2,
          'shuffle': True
         }

tuner.search(train_ds, validation_data=val_ds, **kwargs, \
    callbacks=[lr_scheduler, callbacks.EarlyStopping('val_loss', patience=5, restore_best_weights=True)])

2024-03-11 16:50:54.039281: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-11 16:51:01.716705: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31095 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:16:00.0, compute capability: 7.0



Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
0.34467           |?                 |leak
0.060967          |?                 |dropout
True              |?                 |batch_normalization
471               |?                 |hidden_units
9                 |?                 |num_layers
0.00026052        |?                 |lr
RAdam             |?                 |optimizer

Epoch 1/8
14341/14341 - 597s - loss: 0.4938 - mse: 0.4938 - val_loss: 0.7181 - val_mse: 0.7181 - lr: 2.6052e-04 - 597s/epoch - 42ms/step
Epoch 2/8
14341/14341 - 576s - loss: 0.4394 - mse: 0.4394 - val_loss: 0.5549 - val_mse: 0.5549 - lr: 2.6052e-04 - 576s/epoch - 40ms/step
Epoch 3/8
14341/14341 - 590s - loss: 0.4128 - mse: 0.4128 - val_loss: 0.5221 - val_mse: 0.5221 - lr: 2.6052e-04 - 590s/epoch - 41ms/step
Epoch 4/8
14341/14341 - 586s - loss: 0.4050 - mse: 0.4050 - val_loss: 0.5257 - val_mse: 0.5257 - lr: 2.6052e-04 - 586s/epoch - 41ms/step
Epoch 5/8
14341/14341 - 588s - loss