In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import pandas as pd

In [9]:
df = pd.read_csv(r"C:\Users\Robyi\Documents\Data Science Dataset\delivery.csv")
df = df.drop(columns=df[['Order_ID','Weather', 'Traffic_Level','Time_of_Day', 'Vehicle_Type']])
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df.head()

Unnamed: 0,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,7.93,12,1.0,43
1,16.42,20,2.0,84
2,9.52,28,1.0,59
3,7.44,5,1.0,37
4,19.03,16,5.0,68


In [14]:
X = df.drop(columns=['Delivery_Time_min']).values
y = df['Delivery_Time_min'].values

In [15]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y.reshape(-1,1))

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
class ExpandDimsLayer(layers.Layer):
    def __init__(self, axis, **kwargs):
        super(ExpandDimsLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, inputs):
        return tf.expand_dims(inputs, axis=self.axis)

In [None]:
def build_model(hp):
    inputs = keras.Input(shape=(X_train.shape[1],), name="input_features")
    
    inputs_expanded = ExpandDimsLayer(axis=1)(inputs)

    num_heads = hp.Choice("num_heads", [2, 4, 6])
    ff_dim = hp.Int("ff_dim", min_value=32, max_value=128, step=32)
    key_dim = min(ff_dim, inputs.shape[-1])

    attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(inputs_expanded, inputs_expanded)
    attention_output = layers.Flatten()(attention_output)
    attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output + inputs)

    ffn_output = layers.Dense(ff_dim, activation="relu")(attention_output)
    ffn_output = layers.Dense(inputs.shape[-1])(ffn_output)
    transformer_output = layers.LayerNormalization(epsilon=1e-6)(ffn_output + attention_output)

    x = layers.Dense(hp.Int("dense_units", min_value=32, max_value=128, step=32), activation="relu")(transformer_output)
    x = layers.Dropout(hp.Float("dropout", min_value=0.1, max_value=0.5, step=0.1))(x)
    outputs = layers.Dense(1, activation="linear")(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp.Choice("learning_rate", [1e-2, 1e-3, 1e-4])),
        loss="mse",
        metrics=["mae"]
    )
    
    return model

In [20]:
tuner = kt.RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=2,
    executions_per_trial=1,
    directory="tabtransformer",
    project_name="tabtransformer"
)




In [21]:
tuner.search(X_train, y_train, epochs=2, validation_split=0.2, verbose=1)

Trial 2 Complete [00h 00m 09s]

Best val_accuracy So Far: None
Total elapsed time: 00h 00m 21s


In [22]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [23]:
print(f"""
Best Hyperparameters:
- num_heads: {best_hps.get('num_heads')}
- ff_dim: {best_hps.get('ff_dim')}
- dense_units: {best_hps.get('dense_units')}
- dropout: {best_hps.get('dropout')}
- learning_rate: {best_hps.get('learning_rate')}
""")


Best Hyperparameters:
- num_heads: 2
- ff_dim: 128
- dense_units: 64
- dropout: 0.4
- learning_rate: 0.01



In [24]:
best_model = tuner.hypermodel.build(best_hps)
best_model.fit(X_train, y_train, epochs=1, validation_split=0.2, verbose=1)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 82ms/step - loss: 0.8830 - mae: 0.7678 - val_loss: 0.7195 - val_mae: 0.6207


<keras.src.callbacks.history.History at 0x22c9f205cd0>

In [25]:
test_loss, test_acc = best_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.4831 - mae: 0.5211
Test Accuracy: 0.5350
