In [8]:
import glob
import numpy as np
import tensorflow as tf

def npz_batch_generator(pattern, batch_size=512):
    files = sorted(glob.glob(pattern))
    X_buf, y_buf = [], []

    for fname in files:
        data = np.load(fname)
        X = data["X"]
        y = data["y"]

        for i in range(len(X)):
            X_buf.append(X[i])
            y_buf.append(y[i])

            if len(X_buf) == batch_size:
                yield np.array(X_buf, dtype=np.float32), np.array(y_buf, dtype=np.float32)
                X_buf, y_buf = [], []

def make_tf_dataset(pattern, batch_size=512):
    output_signature = (
        tf.TensorSpec(shape=(None, 768), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.float32)
    )

    ds = tf.data.Dataset.from_generator(
        lambda: npz_batch_generator(pattern, batch_size),
        output_signature=output_signature
    )

    ds = ds.repeat()  
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds


In [9]:
from tensorflow import keras
from tensorflow.keras import layers

def build_dense_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(768,)))

    num_layers = hp.Int('num_layers', 1, 5)

    for i in range(num_layers):
        num_exp = hp.Int(f'num_exp_{i}', 4, 10)
        units = 2 ** num_exp

        activation = hp.Choice(f'activation_{i}', ['relu', 'tanh', 'elu'])

        model.add(layers.Dense(units=units, activation=activation))

        dropout_rate = hp.Float(f'dropout_{i}', 0.0, 0.5, step=0.1)
        if dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(1))

    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mae']
    )
    return model


In [10]:
import math


batch=4096
samples=48000000
steps_per_epoch = math.ceil((samples - (samples * 0.5)) / batch)  
validation_steps = math.ceil((samples * 0.5) / batch)


In [None]:
import keras_tuner as kt

train_ds = make_tf_dataset("training/train_*.npz", batch_size=512)
test_ds  = make_tf_dataset("training/test_*.npz",  batch_size=512)

tuner = kt.BayesianOptimization(
    build_dense_model,
    objective="val_loss",
    max_trials=50,
    directory="bayesian_tuning",
    project_name="simple_dense_model"
)

tuner.search(
    train_ds,
    validation_data=test_ds,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    epochs=100,
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=8,
            restore_best_weights=True
        )
    ]
)




Reloading Tuner from bayesian_tuning/dense_model/tuner0.json

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
5                 |4                 |num_layers
7                 |4                 |num_exp_0
elu               |tanh              |activation_0
0.1               |0.2               |dropout_0
9                 |4                 |num_exp_1
tanh              |relu              |activation_1
0.3               |0                 |dropout_1
6                 |4                 |num_exp_2
tanh              |relu              |activation_2
0.1               |0                 |dropout_2
4                 |4                 |num_exp_3
tanh              |relu              |activation_3
0.4               |0                 |dropout_3

Epoch 1/100
[1m5854/5860[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - loss: 79611.7109 - mae: 167.6416

KeyboardInterrupt: 

In [None]:
best_model = tuner.get_best_models(1)[0]
best_hp = tuner.get_best_hyperparameters(1)[0]

print("Najlepsze hiperparametry:")
for param in best_hp.values:
    print(param, best_hp.get(param))
