#FINAL EXPORT (BEST MODEL)

In [6]:
from pathlib import Path
import subprocess

ROOT = Path(subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode().strip())


In [7]:

# # FINAL EXPORT (BEST MODEL) — NN baseline without rolling7 (Kaggle ~0.19488)

# %%
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_percentage_error

import tensorflow as tf
from tensorflow import keras

# 1) Load data


train_path = ROOT / "data" / "processed" / "df_train_data_cleaned.csv"
val_path   = ROOT / "data" / "processed" / "df_validation_data_cleaned.csv"
test_path  = ROOT / "data" / "processed" / "df_test_data_cleaned.csv"



df_train = pd.read_csv(train_path)
df_val   = pd.read_csv(val_path)
df_test  = pd.read_csv(test_path)

for df in (df_train, df_val, df_test):
    df["Datum"] = pd.to_datetime(df["Datum"], errors="coerce")

target = "Umsatz_umsatz"

# 2) Split X/y
X_train = df_train.drop(columns=[target, "Datum"])
y_train = df_train[target].astype(float)

X_val = df_val.drop(columns=[target, "Datum"])
y_val = df_val[target].astype(float)

# test has no target
X_test = df_test.drop(columns=["Datum"])

# 3) Drop rolling7 (best model)
drop_cols = ["umsatz_rolling7"]
X_train = X_train.drop(columns=drop_cols, errors="ignore")
X_val   = X_val.drop(columns=drop_cols, errors="ignore")
X_test  = X_test.drop(columns=drop_cols, errors="ignore")

# 4) Ensure categorical as string
cat_cols = ["Warengruppe_umsatz"]
for df in (X_train, X_val, X_test):
    df["Warengruppe_umsatz"] = df["Warengruppe_umsatz"].astype(int).astype(str)

# 5) Align columns (very important)
# make sure val/test have exactly same feature columns as train
X_val  = X_val.reindex(columns=X_train.columns, fill_value=0)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

# 6) Preprocess (scaling + one-hot)
num_cols = [c for c in X_train.columns if c not in cat_cols]

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ],
    remainder="drop",
)

X_train_p = preprocess.fit_transform(X_train)
X_val_p   = preprocess.transform(X_val)
X_test_p  = preprocess.transform(X_test)

print("Shapes:", X_train_p.shape, X_val_p.shape, X_test_p.shape)

# 7) Build + train NN (same architecture)
tf.random.set_seed(42)

model = keras.Sequential([
    keras.layers.Input(shape=(X_train_p.shape[1],)),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(1),
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mape",                 # to match Kaggle metric
    metrics=["mae"]
)

early = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train_p, y_train,
    validation_data=(X_val_p, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early],
    verbose=1
)

# quick sanity check on val (MAPE)
pred_val = model.predict(X_val_p).ravel()
pred_val = np.clip(pred_val, 0, None)
val_mape = mean_absolute_percentage_error(y_val, pred_val) * 100
print(f"Validation MAPE %: {val_mape:.2f}")

# 8) Predict test + create submission
pred_test = model.predict(X_test_p).ravel()
pred_test = np.clip(pred_test, 0, None)

submission = df_test[["id"]].copy()
submission["umsatz"] = pred_test

out_path   = ROOT / "data" / "processed" / "submission_nn_best.csv"
submission.to_csv(out_path, index=False)

print("Saved:", out_path)
print("rows:", len(submission))
print("pred mean/min/max:",
      float(submission["umsatz"].mean()),
      float(submission["umsatz"].min()),
      float(submission["umsatz"].max()))
print(submission.head())


Shapes: (7487, 18) (1841, 18) (1830, 18)
Epoch 1/200


2026-01-02 11:49:25.719478: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 75.7710 - mae: 172.6764 - val_loss: 46.7604 - val_mae: 107.7386
Epoch 2/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 42.5711 - mae: 107.6000 - val_loss: 34.2592 - val_mae: 79.3066
Epoch 3/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30.7037 - mae: 73.9372 - val_loss: 24.9345 - val_mae: 51.9840
Epoch 4/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 24.5212 - mae: 55.0842 - val_loss: 23.7149 - val_mae: 47.7035
Epoch 5/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.9914 - mae: 50.5846 - val_loss: 22.7706 - val_mae: 45.3726
Epoch 6/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.0775 - mae: 48.3991 - val_loss: 22.1402 - val_mae: 43.9175
Epoch 7/200
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [8]:
print("y_train min/max:", y_train.min(), y_train.max())
print("pred_val min/max:", pred_val.min(), pred_val.max())

y_train min/max: 12.9373828412174 1879.46183076734
pred_val min/max: 22.423714 659.35956
