In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import kerastuner as kt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np

  import kerastuner as kt


In [2]:
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [4]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
class ExpandDimsLayer(layers.Layer):
    def __init__(self, axis, **kwargs):
        super(ExpandDimsLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, inputs):
        return tf.expand_dims(inputs, axis=self.axis)

In [None]:
def build_model(hp):
    inputs = keras.Input(shape=(X_train.shape[1],), name="input_features")
    
    inputs_expanded = ExpandDimsLayer(axis=1)(inputs)

    num_heads = hp.Choice("num_heads", [2, 4, 6])
    ff_dim = hp.Int("ff_dim", min_value=32, max_value=128, step=32)
    key_dim = min(ff_dim, inputs.shape[-1])

    attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(inputs_expanded, inputs_expanded)
    attention_output = layers.Flatten()(attention_output)
    attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output + inputs)

    ffn_output = layers.Dense(ff_dim, activation="relu")(attention_output)
    ffn_output = layers.Dense(inputs.shape[-1])(ffn_output)
    transformer_output = layers.LayerNormalization(epsilon=1e-6)(ffn_output + attention_output)

    x = layers.Dense(hp.Int("dense_units", min_value=32, max_value=128, step=32), activation="relu")(transformer_output)
    x = layers.Dropout(hp.Float("dropout", min_value=0.1, max_value=0.5, step=0.1))(x)
    outputs = layers.Dense(3, activation="softmax")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp.Choice("learning_rate", [1e-2, 1e-3, 1e-4])),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    return model

In [27]:
tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=2,
    executions_per_trial=1,
    directory="tabtransformers",
    project_name="tabtransformers"
)

In [28]:
tuner.search(X_train, y_train, epochs=2, validation_split=0.2, verbose=1)

Trial 2 Complete [00h 00m 09s]
val_accuracy: 0.8333333134651184

Best val_accuracy So Far: 0.8333333134651184
Total elapsed time: 00h 00m 19s


In [29]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [30]:
print(f"""
Best Hyperparameters:
- num_heads: {best_hps.get('num_heads')}
- ff_dim: {best_hps.get('ff_dim')}
- dense_units: {best_hps.get('dense_units')}
- dropout: {best_hps.get('dropout')}
- learning_rate: {best_hps.get('learning_rate')}
""")


Best Hyperparameters:
- num_heads: 4
- ff_dim: 64
- dense_units: 128
- dropout: 0.5
- learning_rate: 0.01



In [31]:
best_model = tuner.hypermodel.build(best_hps)
best_model.fit(X_train, y_train, epochs=1, validation_split=0.2, verbose=1)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 293ms/step - accuracy: 0.5013 - loss: 1.0164 - val_accuracy: 0.9167 - val_loss: 0.6845


<keras.src.callbacks.history.History at 0x179c01178d0>

In [32]:
test_loss, test_acc = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.8667 - loss: 0.6664
Test Accuracy: 0.8667
