In [1]:
import tensorflow as tf
tf.keras.backend.clear_session()

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('/content/sample_data/02_train.csv')

from sklearn.model_selection import train_test_split

train_set, test_set_from_train = train_test_split(df, test_size=0.2, random_state=42)
train_set, dev_set = train_test_split(train_set, test_size=0.2, random_state=42)

# split target and features
X_train = train_set.drop('Premium Amount', axis=1)
y_train = train_set['Premium Amount'].copy()
X_valid = dev_set.drop('Premium Amount', axis=1)
y_valid = dev_set['Premium Amount'].copy()

In [4]:
import tensorflow.keras.backend as K

# def root_mean_squared_log_error(y_true, y_pred):
#     msle = tf.keras.losses.MeanSquaredLogarithmicError()
#     return K.sqrt(msle(y_true, y_pred))

def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(tf.keras.losses.MeanSquaredError()(y_true, y_pred))

In [5]:
%pip install -q -U keras-tuner

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
X_train = tf.convert_to_tensor(X_train)
y_train = tf.convert_to_tensor(y_train)
X_valid = tf.convert_to_tensor(X_valid)
y_valid = tf.convert_to_tensor(y_valid)

In [7]:
import keras_tuner as kt

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=32, max_value=128)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2,
                             sampling="log")
    optimizer = hp.Choice("optimizer", values=["adam", 'RMSprop'])

    if optimizer == "RMSprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()

    for _ in range(n_hidden):
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))

    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1))

    model.compile(loss=root_mean_squared_error, optimizer=optimizer)
    return model

In [8]:
random_search_tuner = kt.RandomSearch(
    build_model, objective='val_loss', max_trials=6, overwrite=True,
    directory='kaggle_competition', project_name='my_rnd_search', seed=42
)
random_search_tuner.search(X_train, y_train, epochs=15, validation_data=(X_valid, y_valid), batch_size=150)

Trial 6 Complete [00h 05m 15s]
val_loss: 1.0731289386749268

Best val_loss So Far: 1.0731289386749268
Total elapsed time: 00h 29m 10s


In [9]:
top3_models = random_search_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))
  saveable.load_own_variables(weights_store.get(inner_path))


In [10]:
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)
top3_params[0].values

{'n_hidden': 8,
 'n_neurons': 75,
 'learning_rate': 0.0008960175671873151,
 'optimizer': 'adam'}

In [11]:
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 5 summary
Hyperparameters:
n_hidden: 8
n_neurons: 75
learning_rate: 0.0008960175671873151
optimizer: adam
Score: 1.0731289386749268


In [12]:
X_test = test_set_from_train.drop('Premium Amount', axis=1)
y_test = test_set_from_train['Premium Amount'].copy()
X_test = tf.convert_to_tensor(X_test)
y_test = tf.convert_to_tensor(y_test)

In [13]:
# Reshape y_test if necessary
y_test = tf.reshape(y_test, (-1, 1))

# Convert y_test to numpy array
y_test = y_test.numpy()

rmsle_test = best_model.evaluate(X_test, y_test)

[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - loss: 1.0594


In [14]:
# save final prediction
test_set = pd.read_csv('/content/sample_data/02_test.csv')
del test_set['Premium Amount']
test_set = tf.convert_to_tensor(test_set)
y_pred = best_model.predict(test_set)

[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2ms/step


In [15]:
import numpy as np

original_test_set = pd.read_csv('/content/sample_data/test.csv')
id = original_test_set['id']
y_pred = y_pred.flatten()
y_pred = np.exp(y_pred)
final_prediction = pd.DataFrame({'id': id, 'Premium Amount': y_pred})
final_prediction.to_csv('keras_sequential_08.csv', index=False)