# Backpack Prediction Challenge


## Keras - 1 Attempt 

## Attempt 1


In [2]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras import Model

# Set seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# 1. Data Loading & Split
# Training files
train_files = ['playground-series-s5e2/train.csv', 'playground-series-s5e2/training_extra.csv']
train_dfs = [pd.read_csv(f) for f in train_files]
# Test file
df_test = pd.read_csv('playground-series-s5e2/test.csv')
# Infer id and target columns from sample submission
sub_sample = pd.read_csv('playground-series-s5e2/sample_submission.csv', nrows=0)
id_col = sub_sample.columns[0]
target_columns = sub_sample.columns.tolist()[1:]

# Combine training data
df = pd.concat(train_dfs, ignore_index=True)

# 2. Target Encoding (Regression)
y_values = df[target_columns].astype(float).values
y_enc = np.log1p(y_values) if np.all(y_values >= 0) else y_values

# 3. Features & IDs
X = df.drop(columns=target_columns + [id_col], errors='ignore')
X_train = X.copy()
y_train = y_enc
test_ids = df_test[id_col]
X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
y_val = None

# 4. Feature Engineering
# Drop columns with all missing values
X_train.dropna(axis=1, how='all', inplace=True)
X_val = X_val[X_train.columns]
# Identify categorical vs numeric
categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
low_card_cats = [c for c in categorical_cols if X_train[c].nunique() <= 50]
numeric_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 5. Preprocessing Pipeline
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, low_card_cats)
])
# Fit & transform
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# 6. Model Architecture Selection
n_samples, n_features = X_train_proc.shape
n_targets = len(target_columns)
if n_samples < 10000 or n_features < 100:
    units1 = min(n_features * 2, 128)
    units2 = min(n_features, 64)
    hidden_layers = [int(units1), int(units2)]
    use_bn = False
    dropout_rate = 0.3
else:
    sizes = [n_features * i for i in (2, 1, 0.5, 0.25)]
    hidden_layers = [int(s) for s in sizes if s >= 16]
    use_bn = True
    dropout_rate = 0.4

# Build the model
inputs = Input(shape=(n_features,))
x = inputs
for units in hidden_layers:
    x = Dense(units, activation='relu')(x)
    if use_bn:
        x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
outputs = Dense(n_targets, activation='linear')(x)
model = Model(inputs, outputs)

# 7. Compile
model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=[RootMeanSquaredError(name='rmse'), MeanAbsoluteError(name='mae')]
)

# 8. Callbacks & Training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
]
start_time = time.time()
history = model.fit(
    X_train_proc, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=64,
    callbacks=callbacks,
    verbose=2
)
duration = time.time() - start_time

# 9. Evaluation & Logging
training_loss = history.history['loss'][-1]
training_rmse = history.history['rmse'][-1]
validation_loss = history.history['val_loss'][-1]
validation_rmse = history.history['val_rmse'][-1]
results = {
    'training_loss': training_loss,
    'training_rmse': training_rmse,
    'validation_loss': validation_loss,
    'validation_rmse': validation_rmse,
    'training_duration': duration
}
with open('results.json', 'w') as f:
    json.dump(results, f)

# 10. Prediction & Submission
raw_preds = model.predict(X_val_proc)
final = raw_preds
if np.all(final >= 0):
    final = np.expm1(final)
if final.ndim == 1:
    final = final.reshape(-1, 1)
submission = pd.DataFrame(final, columns=target_columns)
submission.insert(0, id_col, test_ids.reset_index(drop=True))
submission.to_csv('submission_result.csv', index=False)


2025-07-09 21:25:04.887946: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/100

Epoch 1: val_loss improved from inf to 0.33582, saving model to best_model.h5




49929/49929 - 80s - 2ms/step - loss: 0.4076 - mae: 0.5157 - rmse: 0.6384 - val_loss: 0.3358 - val_mae: 0.4837 - val_rmse: 0.5795
Epoch 2/100

Epoch 2: val_loss improved from 0.33582 to 0.33579, saving model to best_model.h5




49929/49929 - 85s - 2ms/step - loss: 0.3356 - mae: 0.4828 - rmse: 0.5793 - val_loss: 0.3358 - val_mae: 0.4835 - val_rmse: 0.5795
Epoch 3/100

Epoch 3: val_loss improved from 0.33579 to 0.33577, saving model to best_model.h5




49929/49929 - 87s - 2ms/step - loss: 0.3355 - mae: 0.4827 - rmse: 0.5793 - val_loss: 0.3358 - val_mae: 0.4835 - val_rmse: 0.5795
Epoch 4/100

Epoch 4: val_loss did not improve from 0.33577
49929/49929 - 84s - 2ms/step - loss: 0.3355 - mae: 0.4827 - rmse: 0.5792 - val_loss: 0.3358 - val_mae: 0.4839 - val_rmse: 0.5795
Epoch 5/100

Epoch 5: val_loss did not improve from 0.33577
49929/49929 - 82s - 2ms/step - loss: 0.3355 - mae: 0.4827 - rmse: 0.5792 - val_loss: 0.3358 - val_mae: 0.4837 - val_rmse: 0.5795
Epoch 6/100

Epoch 6: val_loss did not improve from 0.33577
49929/49929 - 74s - 1ms/step - loss: 0.3355 - mae: 0.4827 - rmse: 0.5792 - val_loss: 0.3358 - val_mae: 0.4837 - val_rmse: 0.5795
Epoch 7/100

Epoch 7: val_loss did not improve from 0.33577
49929/49929 - 71s - 1ms/step - loss: 0.3355 - mae: 0.4827 - rmse: 0.5792 - val_loss: 0.3358 - val_mae: 0.4838 - val_rmse: 0.5795
Epoch 8/100

Epoch 8: val_loss did not improve from 0.33577
49929/49929 - 70s - 1ms/step - loss: 0.3355 - mae: 0.48

# Keras Tuner - 2 Attempts

## Attempt 1 - Failed

In [None]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras import Model

# Set seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# 1. Data Loading & Split
# Training files
train_files = ['playground-series-s5e2/train.csv', 'playground-series-s5e2/training_extra.csv']
train_dfs = [pd.read_csv(f) for f in train_files]
# Test file
df_test = pd.read_csv('playground-series-s5e2/test.csv')
# Infer id and target columns from sample submission
sub_sample = pd.read_csv('playground-series-s5e2/sample_submission.csv', nrows=0)
id_col = sub_sample.columns[0]
target_columns = sub_sample.columns.tolist()[1:]

# Combine training data
df = pd.concat(train_dfs, ignore_index=True)

# 2. Target Encoding (Regression)
y_values = df[target_columns].astype(float).values
y_enc = np.log1p(y_values) if np.all(y_values >= 0) else y_values

# 3. Features & IDs
X = df.drop(columns=target_columns + [id_col], errors='ignore')
X_train = X.copy()
y_train = y_enc
test_ids = df_test[id_col]
X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
y_val = None

# 4. Feature Engineering
# Drop columns with all missing values
X_train.dropna(axis=1, how='all', inplace=True)
X_val = X_val[X_train.columns]
# Identify categorical vs numeric
categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
low_card_cats = [c for c in categorical_cols if X_train[c].nunique() <= 50]
numeric_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 5. Preprocessing Pipeline
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, low_card_cats)
])
# Fit & transform
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# 6. Model Architecture Selection
n_samples, n_features = X_train_proc.shape
n_targets = len(target_columns)
if n_samples < 10000 or n_features < 100:
    units1 = min(n_features * 2, 128)
    units2 = min(n_features, 64)
    hidden_layers = [int(units1), int(units2)]
    use_bn = False
    dropout_rate = 0.3
else:
    sizes = [n_features * i for i in (2, 1, 0.5, 0.25)]
    hidden_layers = [int(s) for s in sizes if s >= 16]
    use_bn = True
    dropout_rate = 0.4

# Build the model using Keras Tuner
import keras_tuner as kt
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

n_features = X_train_proc.shape[1]

class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        layers = hp.Int('layers', 2, 8)
        units = hp.Int('units', 64, 1024, 64)
        drop = hp.Float('dropout', 0.0, 0.5, 0.1)
        opt = hp.Choice('optimizer', ['adam'])
        lr = hp.Float('learning_rate', 1e-5, 0.01, sampling='log')

        inputs = Input(shape=(n_features,))
        x = inputs
        for _ in range(layers):
            x = Dense(units, activation='relu')(x)
            x = Dropout(drop)(x)
        outputs = Dense(1, activation='linear')(x)
        model = Model(inputs, outputs)
        model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mae'])
        return model

# Initialize the Bayesian tuner
bs = 32  # batch size
ep = 20   # epochs

tuner = kt.BayesianOptimization(
    MyHyperModel(),
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    seed=42,
    overwrite=True,
    project_name='bayesian_tuner'
)

if y_val is not None:
    tuner.search(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )
else:
    tuner.search(
        X_train_proc, y_train,
        validation_split=0.2,
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )

model = tuner.hypermodel.build(tuner.get_best_hyperparameters(1)[0])

# Retrain the model with the original callbacks and data
if y_val is not None:
    history = model.fit(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )
else:
    history = model.fit(
        X_train_proc, y_train,
        validation_split=0.2,
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )

# 9. Evaluation & Logging
training_loss = history.history['loss'][-1]
training_rmse = history.history['rmse'][-1]
validation_loss = history.history['val_loss'][-1]
validation_rmse = history.history['val_rmse'][-1]
results = {
    'training_loss': training_loss,
    'training_rmse': training_rmse,
    'validation_loss': validation_loss,
    'validation_rmse': validation_rmse,
    'training_duration': duration
}
with open('results.json', 'w') as f:
    json.dump(results, f)

# 10. Prediction & Submission
raw_preds = model.predict(X_val_proc)
final = raw_preds
if np.all(final >= 0):
    final = np.expm1(final)
if final.ndim == 1:
    final = final.reshape(-1, 1)
submission = pd.DataFrame(final, columns=target_columns)
submission.insert(0, id_col, test_ids.reset_index(drop=True))
submission.to_csv('submission_result.csv', index=False)


Trial 1 Complete [00h 03m 10s]
val_loss: 0.3359396457672119

Best val_loss So Far: 0.3359396457672119
Total elapsed time: 00h 03m 10s




99858/99858 - 186s - 2ms/step - loss: 0.3556 - mae: 0.4931 - val_loss: 0.3360 - val_mae: 0.4836


KeyError: 'rmse'

## Attempt 2

In [None]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras import Model

# Set seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# 1. Data Loading & Split
# Training files
train_files = ['train.csv', 'training_extra.csv']
train_dfs = [pd.read_csv(f) for f in train_files]
# Test file
df_test = pd.read_csv('test.csv')
# Infer id and target columns from sample submission
sub_sample = pd.read_csv('sample_submission.csv', nrows=0)
id_col = sub_sample.columns[0]
target_columns = sub_sample.columns.tolist()[1:]

# Combine training data
df = pd.concat(train_dfs, ignore_index=True)

# 2. Target Encoding (Regression)
y_values = df[target_columns].astype(float).values
y_enc = np.log1p(y_values) if np.all(y_values >= 0) else y_values

# 3. Features & IDs
X = df.drop(columns=target_columns + [id_col], errors='ignore')
X_train = X.copy()
y_train = y_enc
test_ids = df_test[id_col]
X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
y_val = None

# 4. Feature Engineering
# Drop columns with all missing values
X_train.dropna(axis=1, how='all', inplace=True)
X_val = X_val[X_train.columns]
# Identify categorical vs numeric
categorical_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
low_card_cats = [c for c in categorical_cols if X_train[c].nunique() <= 50]
numeric_cols = X_train.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 5. Preprocessing Pipeline
numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_cols),
    ('cat', categorical_transformer, low_card_cats)
])
# Fit & transform
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# 6. Model Architecture Selection
n_samples, n_features = X_train_proc.shape
n_targets = len(target_columns)
if n_samples < 10000 or n_features < 100:
    units1 = min(n_features * 2, 128)
    units2 = min(n_features, 64)
    hidden_layers = [int(units1), int(units2)]
    use_bn = False
    dropout_rate = 0.3
else:
    sizes = [n_features * i for i in (2, 1, 0.5, 0.25)]
    hidden_layers = [int(s) for s in sizes if s >= 16]
    use_bn = True
    dropout_rate = 0.4

# Build the model using Keras Tuner
import keras_tuner as kt
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define early stopping and model checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

n_features = X_train_proc.shape[1]

class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        layers = hp.Int('layers', 2, 8)
        units = hp.Int('units', 64, 1024, 64)
        drop = hp.Float('dropout', 0.0, 0.5, 0.1)
        opt = hp.Choice('optimizer', ['adam'])
        lr = hp.Float('learning_rate', 1e-5, 0.01, sampling='log')

        inputs = Input(shape=(n_features,))
        x = inputs
        for _ in range(layers):
            x = Dense(units, activation='relu')(x)
            x = Dropout(drop)(x)
        outputs = Dense(1, activation='linear')(x)
        model = Model(inputs, outputs)
        model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mae'])
        return model

# Initialize the Bayesian tuner
bs = 32  # batch size
ep = 20   # epochs

tuner = kt.BayesianOptimization(
    MyHyperModel(),
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    seed=42,
    overwrite=True,
    project_name='bayesian_tuner'
)

if y_val is not None:
    tuner.search(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )
else:
    tuner.search(
        X_train_proc, y_train,
        validation_split=0.2,
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )

model = tuner.hypermodel.build(tuner.get_best_hyperparameters(1)[0])

# Retrain the model with the original callbacks and data
start_time = time.time()  # Start timing
if y_val is not None:
    history = model.fit(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )
else:
    history = model.fit(
        X_train_proc, y_train,
        validation_split=0.2,
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )
end_time = time.time()  # End timing
duration = end_time - start_time  # Calculate duration

# 9. Evaluation & Logging
training_loss = history.history['loss'][-1]
training_mae = history.history['mae'][-1]  # Changed from 'rmse' to 'mae'
validation_loss = history.history['val_loss'][-1]
validation_mae = history.history['val_mae'][-1]  # Changed from 'val_rmse' to 'val_mae'
results = {
    'training_loss': training_loss,
    'training_mae': training_mae,
    'validation_loss': validation_loss,
    'validation_mae': validation_mae,
    'training_duration': duration
}
with open('results.json', 'w') as f:
    json.dump(results, f)

# 10. Prediction & Submission
raw_preds = model.predict(X_val_proc)
final = raw_preds
if np.all(final >= 0):
    final = np.expm1(final)
if final.ndim == 1:
    final = final.reshape(-1, 1)
submission = pd.DataFrame(final, columns=target_columns)
submission.insert(0, id_col, test_ids.reset_index(drop=True))
submission.to_csv('submission_result.csv', index=False)

Trial 8 Complete [00h 07m 58s]

Best val_loss So Far: 0.335781455039978
Total elapsed time: 11h 08m 19s

Search: Running Trial #9

Value             |Best Value So Far |Hyperparameter
3                 |6                 |layers
512               |64                |units
0.3               |0.2               |dropout
adam              |adam              |optimizer
0.0023691         |0.0046217         |learning_rate

Epoch 1/20
[1m99858/99858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.3771 - mae: 0.5044



[1m99858/99858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 4ms/step - loss: 0.3771 - mae: 0.5044 - val_loss: 0.3360 - val_mae: 0.4840
Epoch 2/20
[1m99854/99858[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - loss: 0.3368 - mae: 0.4837



[1m99858/99858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 4ms/step - loss: 0.3368 - mae: 0.4837 - val_loss: 0.3360 - val_mae: 0.4837
Epoch 3/20
[1m99858/99858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 4ms/step - loss: 0.3357 - mae: 0.4829 - val_loss: 0.3360 - val_mae: 0.4837
Epoch 4/20
