# Predict Calorie Expenditure

## Modified the metric from `log1p RMSE` to Raw `RMSE` for both Keras and Keras Tuner

## Keras - 1 Attempt

## Attempt 1

In [None]:
import random, json, time
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load data
sample_sub = pd.read_csv('playground-series-s5e5/sample_submission.csv')
id_col = sample_sub.columns[0]
target_columns = list(sample_sub.columns[1:])

df_train = pd.read_csv('playground-series-s5e5/train.csv')
df_test = pd.read_csv('playground-series-s5e5/test.csv')

df = df_train.copy()

# Target encoding for regression
y_values = df[target_columns].astype(float).values
# apply log1p since values >= 0
y_enc = np.log1p(y_values)

# Features
X = df.drop(columns=target_columns + [id_col], errors='ignore')

# Split (use provided test)
X_train = X.copy()
y_train = y_enc
train_ids = df[id_col]
test_ids = df_test[id_col]
X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
y_val = None

# Feature engineering: drop all-missing
all_missing = [c for c in X_train.columns if X_train[c].isna().all()]
X_train.drop(columns=all_missing, inplace=True)
X_val.drop(columns=all_missing, inplace=True, errors='ignore')
# Categorical handling
categorical = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
high_card = [c for c in categorical if X_train[c].nunique() > 50]
X_train.drop(columns=high_card, inplace=True)
X_val.drop(columns=high_card, inplace=True, errors='ignore')

# Preprocessing pipeline
numeric_features = X_train.select_dtypes(include=[np.number]).columns.tolist()
cat_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', num_pipeline, numeric_features),
    ('cat', cat_pipeline, cat_features)
])

X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# Model architecture guidelines for small dataset
n_samples, n_features = X_train_proc.shape
units1 = min(n_features * 2, 128)
units2 = min(n_features, 64)

inputs = Input(shape=(n_features,))
x = Dense(units1, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(units2, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(len(target_columns), activation='linear')(x)
model = Model(inputs, outputs)

def mse_real(y_true_log, y_pred_log):
    y_true = tf.math.expm1(y_true_log)
    y_pred = tf.math.expm1(y_pred_log)
    return tf.reduce_mean(tf.square(y_true - y_pred))
mse_real.__name__ = 'mse_real'      

def rmse_real(y_true_log, y_pred_log):
    y_true = tf.math.expm1(y_true_log)
    y_pred = tf.math.expm1(y_pred_log)
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
rmse_real.__name__ = 'rmse_real'

# Compile
model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=[mse_real, rmse_real]
)

# Callbacks & Training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ModelCheckpoint('model_best.h5', monitor='val_loss', save_best_only=True)
]
start_time = time.time()
if y_val is not None:
    history = model.fit(X_train_proc, y_train, validation_data=(X_val_proc, y_val),
                        epochs=100, batch_size=64, callbacks=callbacks, verbose=2)
else:
    history = model.fit(X_train_proc, y_train, validation_split=0.2,
                        epochs=100, batch_size=64, callbacks=callbacks, verbose=2)
duration = time.time() - start_time

# Evaluation & Logging
hist = history.history
results = {
    'training_loss': history.history['mse_real'][-1],
    'validation_loss': history.history['val_mse_real'][-1],
    'training_RMSE': history.history['rmse_real'][-1],
    'validation_RMSE': history.history['val_rmse_real'][-1]
}
with open('Keras/results.json', 'w') as f:
    json.dump(results, f)

# Prediction & Submission
raw_preds = model.predict(X_val_proc)
# inverse log1p
final = np.expm1(raw_preds)
if final.ndim == 1:
    final = final.reshape(-1, 1)
submission = pd.DataFrame(final, columns=target_columns)
submission.insert(0, id_col, test_ids.reset_index(drop=True))
submission.to_csv('Keras/submission_result.csv', index=False)

Epoch 1/100




9375/9375 - 23s - 2ms/step - loss: 1.1184 - mse_real: 156258.1875 - rmse_real: 155.1820 - val_loss: 0.1161 - val_mse_real: 320.6773 - val_rmse_real: 17.8240
Epoch 2/100




9375/9375 - 21s - 2ms/step - loss: 0.1690 - mse_real: 1536.9480 - rmse_real: 38.3128 - val_loss: 0.0895 - val_mse_real: 348.9626 - val_rmse_real: 18.5622
Epoch 3/100
9375/9375 - 21s - 2ms/step - loss: 0.1613 - mse_real: 1476.0460 - rmse_real: 37.5259 - val_loss: 0.0996 - val_mse_real: 246.3358 - val_rmse_real: 15.6242
Epoch 4/100
9375/9375 - 17s - 2ms/step - loss: 0.1597 - mse_real: 1442.6733 - rmse_real: 37.1666 - val_loss: 0.1386 - val_mse_real: 347.7831 - val_rmse_real: 18.5669
Epoch 5/100
9375/9375 - 17s - 2ms/step - loss: 0.1590 - mse_real: 1432.0992 - rmse_real: 37.0164 - val_loss: 0.2138 - val_mse_real: 353.1501 - val_rmse_real: 18.7299
Epoch 6/100
9375/9375 - 15s - 2ms/step - loss: 0.1509 - mse_real: 1356.5935 - rmse_real: 36.0223 - val_loss: 0.1937 - val_mse_real: 306.4412 - val_rmse_real: 17.4503
Epoch 7/100
9375/9375 - 15s - 2ms/step - loss: 0.1516 - mse_real: 1351.7606 - rmse_real: 35.9694 - val_loss: 0.1977 - val_mse_real: 424.3353 - val_rmse_real: 20.5223
Epoch 8/100
9375

## Keras Tuner - 1 Attempt

## Attempt 1

In [None]:
import random, json, time
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
import keras_tuner as kt

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load data
sample_sub = pd.read_csv('playground-series-s5e5/sample_submission.csv')
id_col = sample_sub.columns[0]
target_columns = list(sample_sub.columns[1:])

df_train = pd.read_csv('playground-series-s5e5/train.csv')
df_test = pd.read_csv('playground-series-s5e5/test.csv')

df = df_train.copy()

# Target encoding for regression
y_values = df[target_columns].astype(float).values
# apply log1p since values >= 0
y_enc = np.log1p(y_values)

# Features
X = df.drop(columns=target_columns + [id_col], errors='ignore')

# Split (use provided test)
X_train = X.copy()
y_train = y_enc
train_ids = df[id_col]
test_ids = df_test[id_col]
X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
y_val = None

# Feature engineering: drop all-missing
all_missing = [c for c in X_train.columns if X_train[c].isna().all()]
X_train.drop(columns=all_missing, inplace=True)
X_val.drop(columns=all_missing, inplace=True, errors='ignore')
# Categorical handling
categorical = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
high_card = [c for c in categorical if X_train[c].nunique() > 50]
X_train.drop(columns=high_card, inplace=True)
X_val.drop(columns=high_card, inplace=True, errors='ignore')

# Preprocessing pipeline
numeric_features = X_train.select_dtypes(include=[np.number]).columns.tolist()
cat_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', num_pipeline, numeric_features),
    ('cat', cat_pipeline, cat_features)
])

X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# Model architecture guidelines for small dataset
n_samples, n_features = X_train_proc.shape
units1 = min(n_features * 2, 128)
units2 = min(n_features, 64)

# Define early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

def mse_real(y_true_log, y_pred_log):
    y_true = tf.math.expm1(y_true_log)
    y_pred = tf.math.expm1(y_pred_log)
    return tf.reduce_mean(tf.square(y_true - y_pred))
mse_real.__name__ = 'mse_real'      

def rmse_real(y_true_log, y_pred_log):
    y_true = tf.math.expm1(y_true_log)
    y_pred = tf.math.expm1(y_pred_log)
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
rmse_real.__name__ = 'rmse_real'


class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        layers = hp.Int('layers', 2, 8)
        units = hp.Int('units', 64, 1024, step=64)
        act = hp.Choice('activation', ['relu'])
        drop = hp.Float('dropout', 0.0, 0.5, step=0.1)
        opt = hp.Choice('optimizer', ['adam'])
        lr = hp.Float('learning_rate', 1e-5, 0.01, sampling='log')

        inputs = Input(shape=(n_features,))
        x = inputs
        for _ in range(layers):
            x = Dense(units, activation=act)(x)
            x = Dropout(drop)(x)
        outputs = Dense(len(target_columns), activation='linear')(x)
        model = Model(inputs, outputs)
        model.compile(optimizer=opt, loss='mean_squared_error', metrics=[mse_real, rmse_real])
        return model

# Initialize the Bayesian tuner
bs = 64  # batch size
ep = 100  # epochs

tuner = kt.BayesianOptimization(
    MyHyperModel(),
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    seed=42,
    overwrite=False,
    project_name='bayesian_tuner'
)

# Search for the best hyperparameters
if y_val is not None:
    tuner.search(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )
else:
    tuner.search(
        X_train_proc, y_train,
        validation_split=0.2,
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )

# Build the best model
model = tuner.hypermodel.build(tuner.get_best_hyperparameters(1)[0])

start_time = time.time()

# Retrain the model with the original callbacks and data
if y_val is not None:
    history = model.fit(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )
else:
    history = model.fit(
        X_train_proc, y_train,
        validation_split=0.2,
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )


duration = time.time() - start_time

# Evaluation & Logging
hist = history.history
results = {
    'training_loss': history.history['mse_real'][-1],
    'validation_loss': history.history['val_mse_real'][-1],
    'training_RMSE': history.history['rmse_real'][-1],
    'validation_RMSE': history.history['val_rmse_real'][-1]
}
with open('results.json', 'w') as f:
    json.dump(results, f)

# Prediction & Submission
raw_preds = model.predict(X_val_proc)
# inverse log1p
final = np.expm1(raw_preds)
if final.ndim == 1:
    final = final.reshape(-1, 1)
submission = pd.DataFrame(final, columns=target_columns)
submission.insert(0, id_col, test_ids.reset_index(drop=True))
submission.to_csv('submission_result.csv', index=False)

Trial 10 Complete [00h 15m 18s]
val_loss: 0.010824072174727917

Best val_loss So Far: 0.003554456401616335
Total elapsed time: 03h 07m 13s
Epoch 1/100




9375/9375 - 38s - 4ms/step - loss: 0.0252 - mse_real: 3751.2417 - rmse_real: 8.2843 - val_loss: 0.0053 - val_mse_real: 24.9505 - val_rmse_real: 4.8635
Epoch 2/100




9375/9375 - 34s - 4ms/step - loss: 0.0053 - mse_real: 32.2052 - rmse_real: 5.2252 - val_loss: 0.0049 - val_mse_real: 22.6163 - val_rmse_real: 4.6260
Epoch 3/100
9375/9375 - 34s - 4ms/step - loss: 0.0048 - mse_real: 26.4636 - rmse_real: 4.7925 - val_loss: 0.0052 - val_mse_real: 40.3812 - val_rmse_real: 6.2617
Epoch 4/100




9375/9375 - 35s - 4ms/step - loss: 0.0046 - mse_real: 24.1529 - rmse_real: 4.5920 - val_loss: 0.0046 - val_mse_real: 22.9312 - val_rmse_real: 4.6823
Epoch 5/100




9375/9375 - 32s - 3ms/step - loss: 0.0045 - mse_real: 22.9233 - rmse_real: 4.4812 - val_loss: 0.0038 - val_mse_real: 16.3908 - val_rmse_real: 3.8658
Epoch 6/100




9375/9375 - 28s - 3ms/step - loss: 0.0044 - mse_real: 22.0416 - rmse_real: 4.3970 - val_loss: 0.0037 - val_mse_real: 15.3377 - val_rmse_real: 3.7387
Epoch 7/100
9375/9375 - 27s - 3ms/step - loss: 0.0043 - mse_real: 21.3566 - rmse_real: 4.3382 - val_loss: 0.0043 - val_mse_real: 17.2215 - val_rmse_real: 3.9935
Epoch 8/100
9375/9375 - 33s - 4ms/step - loss: 0.0043 - mse_real: 20.6032 - rmse_real: 4.2697 - val_loss: 0.0043 - val_mse_real: 17.0217 - val_rmse_real: 3.9793
Epoch 9/100
9375/9375 - 30s - 3ms/step - loss: 0.0042 - mse_real: 20.1837 - rmse_real: 4.2207 - val_loss: 0.0043 - val_mse_real: 17.9855 - val_rmse_real: 4.1026
Epoch 10/100
9375/9375 - 35s - 4ms/step - loss: 0.0042 - mse_real: 19.7215 - rmse_real: 4.1796 - val_loss: 0.0044 - val_mse_real: 16.7303 - val_rmse_real: 3.9402
Epoch 11/100
9375/9375 - 38s - 4ms/step - loss: 0.0042 - mse_real: 19.2873 - rmse_real: 4.1379 - val_loss: 0.0042 - val_mse_real: 15.3736 - val_rmse_real: 3.7504
Epoch 12/100
9375/9375 - 37s - 4ms/step - lo



9375/9375 - 38s - 4ms/step - loss: 0.0041 - mse_real: 18.7374 - rmse_real: 4.0805 - val_loss: 0.0036 - val_mse_real: 14.1114 - val_rmse_real: 3.5641
Epoch 14/100




9375/9375 - 37s - 4ms/step - loss: 0.0040 - mse_real: 18.4523 - rmse_real: 4.0558 - val_loss: 0.0036 - val_mse_real: 14.4003 - val_rmse_real: 3.6006
Epoch 15/100




9375/9375 - 33s - 4ms/step - loss: 0.0040 - mse_real: 18.3934 - rmse_real: 4.0448 - val_loss: 0.0036 - val_mse_real: 13.6440 - val_rmse_real: 3.5009
Epoch 16/100
9375/9375 - 38s - 4ms/step - loss: 0.0040 - mse_real: 18.1204 - rmse_real: 4.0148 - val_loss: 0.0037 - val_mse_real: 13.8860 - val_rmse_real: 3.5342
Epoch 17/100
9375/9375 - 38s - 4ms/step - loss: 0.0040 - mse_real: 18.2428 - rmse_real: 4.0091 - val_loss: 0.0036 - val_mse_real: 13.7688 - val_rmse_real: 3.5262
Epoch 18/100




9375/9375 - 38s - 4ms/step - loss: 0.0040 - mse_real: 17.6523 - rmse_real: 3.9735 - val_loss: 0.0036 - val_mse_real: 13.2383 - val_rmse_real: 3.4517
Epoch 19/100




9375/9375 - 38s - 4ms/step - loss: 0.0040 - mse_real: 17.6502 - rmse_real: 3.9673 - val_loss: 0.0036 - val_mse_real: 13.0555 - val_rmse_real: 3.4282
Epoch 20/100
9375/9375 - 37s - 4ms/step - loss: 0.0039 - mse_real: 17.4467 - rmse_real: 3.9496 - val_loss: 0.0036 - val_mse_real: 13.3831 - val_rmse_real: 3.4679
Epoch 21/100
9375/9375 - 38s - 4ms/step - loss: 0.0039 - mse_real: 17.4624 - rmse_real: 3.9430 - val_loss: 0.0036 - val_mse_real: 13.1120 - val_rmse_real: 3.4317
Epoch 22/100
9375/9375 - 36s - 4ms/step - loss: 0.0039 - mse_real: 17.3227 - rmse_real: 3.9276 - val_loss: 0.0036 - val_mse_real: 13.1681 - val_rmse_real: 3.4414
Epoch 23/100
9375/9375 - 37s - 4ms/step - loss: 0.0039 - mse_real: 17.2549 - rmse_real: 3.9117 - val_loss: 0.0036 - val_mse_real: 13.3299 - val_rmse_real: 3.4702
Epoch 24/100
9375/9375 - 37s - 4ms/step - loss: 0.0039 - mse_real: 17.1473 - rmse_real: 3.9090 - val_loss: 0.0036 - val_mse_real: 13.3074 - val_rmse_real: 3.4590
Epoch 25/100
9375/9375 - 36s - 4ms/step -

In [None]:
print(duration)


1342.345624
