In [1]:
# =====================================
# Improved Neural Network - Focal Loss
# =====================================
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import f1_score

TARGET = "spend_category"
IDCOL = "trip_id"

# Load processed data
train = pd.read_csv("/kaggle/input/preprocess-travel/train_processed.csv")
test  = pd.read_csv("/kaggle/input/preprocess-travel/test_processed.csv")

2025-11-22 13:53:20.211760: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763819600.471047      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763819600.545953      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [2]:
X = train.drop(columns=[TARGET, IDCOL])
y = train[[TARGET]]
X_test = test.drop(columns=[IDCOL])

# One-hot encode target
ohe = OneHotEncoder(sparse_output=False)
y_ohe = ohe.fit_transform(y)

# Split
X_train, X_val, y_train, y_val = train_test_split(
    X, y_ohe, test_size=0.2, stratify=y, random_state=42
)

In [3]:
# Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

In [4]:
# Focal loss
def focal_loss(gamma=2., alpha=.25):
    def loss(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -tf.reduce_sum(
            alpha * tf.math.pow(1. - pt_1, gamma) * tf.math.log(tf.clip_by_value(pt_1, 1e-7, 1.)) +
            (1-alpha) * tf.math.pow(pt_0, gamma) * tf.math.log(tf.clip_by_value(1. - pt_0, 1e-7, 1.))
        )
    return loss

# Build Neural Network
nn = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_dim=X_train.shape[1]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(3, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-11-22 13:53:42.956886: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [5]:
nn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=focal_loss(gamma=2, alpha=0.25)
)

# Callbacks
early = tf.keras.callbacks.EarlyStopping(patience=7, restore_best_weights=True)
lr_reduce = tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5)

In [6]:
# Train
history = nn.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=60,
    batch_size=128,
    callbacks=[early, lr_reduce],
    verbose=1
)

Epoch 1/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - loss: 56.0346 - val_loss: 17.2981 - learning_rate: 0.0010
Epoch 2/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 29.0907 - val_loss: 17.1173 - learning_rate: 0.0010
Epoch 3/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 23.6400 - val_loss: 16.5315 - learning_rate: 0.0010
Epoch 4/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 21.0438 - val_loss: 16.3121 - learning_rate: 0.0010
Epoch 5/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 18.5563 - val_loss: 16.0280 - learning_rate: 0.0010
Epoch 6/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 18.1061 - val_loss: 15.9143 - learning_rate: 0.0010
Epoch 7/60
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 17.3734 - val_loss: 15.8009 - learning_rate

In [7]:
# Predictions
val_probs = nn.predict(X_val)
val_preds = np.argmax(val_probs, axis=1)
y_true = np.argmax(y_val, axis=1)

f1 = f1_score(y_true, val_preds, average="macro")
print("\n Improved NN F1 Score:", round(f1, 4))

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step

 Improved NN F1 Score: 0.6366


In [8]:
# Final test predictions
test_probs = nn.predict(X_test)
test_preds = np.argmax(test_probs, axis=1)

sub = pd.DataFrame({
    IDCOL: test[IDCOL],
    TARGET: test_preds
})

sub.to_csv("/kaggle/working/submission.csv", index=False)
print("\n Saved: nn_focal_submission.csv")

[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

 Saved: nn_focal_submission.csv
