In [8]:
import pandas as pd

In [9]:
df = pd.read_csv('/content/sample_data/01_train.csv')

from sklearn.model_selection import train_test_split

train_set, test_set_from_train = train_test_split(df, test_size=0.2, random_state=42)
train_set, dev_set = train_test_split(train_set, test_size=0.2, random_state=42)

# split target and features
X_train = train_set.drop('Premium Amount', axis=1)
y_train = train_set['Premium Amount'].copy()
X_valid = dev_set.drop('Premium Amount', axis=1)
y_valid = dev_set['Premium Amount'].copy()

In [10]:
import tensorflow as tf
import tensorflow.keras.backend as K

def root_mean_squared_log_error(y_true, y_pred):
    msle = tf.keras.losses.MeanSquaredLogarithmicError()
    return K.sqrt(msle(y_true, y_pred))

In [11]:
X_train = tf.convert_to_tensor(X_train)
y_train = tf.convert_to_tensor(y_train)

tf.random.set_seed(42)
norm_layer = tf.keras.layers.Normalization(input_shape=X_train.shape[1:])
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(100, activation='relu'),

    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(alpha=0.2),

    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(alpha=0.2),

    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(alpha=0.2),

    tf.keras.layers.Dense(1)
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss=root_mean_squared_log_error, optimizer=optimizer)

norm_layer.adapt(X_train)
history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=100,
                    validation_data=(X_valid, y_valid),
                    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)])

Epoch 1/100


  super().__init__(**kwargs)


[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - loss: 1.1797 - val_loss: 1.0853
Epoch 2/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - loss: 1.0799 - val_loss: 1.0840
Epoch 3/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 1.0771 - val_loss: 1.0806
Epoch 4/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2ms/step - loss: 1.0730 - val_loss: 1.0773
Epoch 5/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - loss: 1.0694 - val_loss: 1.0757
Epoch 6/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2ms/step - loss: 1.0676 - val_loss: 1.0746
Epoch 7/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2ms/step - loss: 1.0667 - val_loss: 1.0740
Epoch 8/100
[1m7680/7680[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - loss: 1.0659 - val_loss: 1.0730
Epoch 9/100
[1m7680

In [15]:
X_test = test_set_from_train.drop('Premium Amount', axis=1)
y_test = test_set_from_train['Premium Amount'].copy()
X_test = tf.convert_to_tensor(X_test)
y_test = tf.convert_to_tensor(y_test)

In [16]:
# Reshape y_test if necessary
y_test = tf.reshape(y_test, (-1, 1))

# Convert y_test to numpy array
y_test = y_test.numpy()

rmsle_test = model.evaluate(X_test, y_test)

(240000, 44)
(240000,)
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - loss: 1.0613


In [17]:
# save final prediction
test_set = pd.read_csv('/content/sample_data/01_test.csv')
del test_set['Premium Amount']
test_set = tf.convert_to_tensor(test_set)
y_pred = model.predict(test_set)

[1m25000/25000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 1ms/step


In [18]:
original_test_set = pd.read_csv('/content/sample_data/test.csv')
id = original_test_set['id']
y_pred = y_pred.flatten()
final_prediction = pd.DataFrame({'id': id, 'Premium Amount': y_pred})
final_prediction.to_csv('keras_sequential_04.csv', index=False)