In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error


data = pd.read_csv("modelling data/daily_sales.csv")

# Basic contextual features
base_features = ['time_bucket', 'oil_price', 'is_holiday']

# Encode time_bucket if needed
data['time_bucket'] = data['time_bucket'].astype('category').cat.codes

# All columns that end with yesterday
yesterday_features = [col for col in data.columns if "_yesterday" in col]

# All avg3 features
avg3_features = [col for col in data.columns if "_avg3" in col]

# Targets are columns without "yesterday" and without "avg3"
targets = [
    col for col in data.columns
    if col not in base_features
    and "_yesterday" not in col
    and "_avg3" not in col
    and col not in ['date']  # date is not a target
]

feature_cols = base_features + yesterday_features + avg3_features

X = data[feature_cols]
y = data[targets]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

model = MultiOutputRegressor(
    XGBRegressor(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.9,
        colsample_bytree=0.9,
        objective="reg:squarederror"
    )
)

model.fit(X_train, y_train)

MSE: 0.3915211260318756


In [26]:
# Predict using the test set and round predictions
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("MSE:", mse)


MSE: 0.3915211260318756


In [27]:
from sklearn.metrics import r2_score

# Global R² (flatten all outputs)
r2_global = r2_score(y_test.values.flatten(), y_pred.flatten())
print("Global R²:", r2_global)

# Per-pizza R²
r2_per_target = {}

for i, col in enumerate(y_test.columns):
    r2_per_target[col] = r2_score(y_test[col], y_pred[:, i])

print("\nR² per pizza:")
for k, v in r2_per_target.items():
    print(f"{k}: {v:.4f}")

Global R²: 0.3731009364128113

R² per pizza:
bbq_ckn_l: 0.2450
bbq_ckn_m: 0.2536
bbq_ckn_s: 0.2302
big_meat_s: 0.2954
brie_carre_s: 0.2606
calabrese_l: 0.1842
calabrese_m: 0.2387
calabrese_s: 0.1148
cali_ckn_l: 0.2090
cali_ckn_m: 0.2847
cali_ckn_s: 0.2276
ckn_alfredo_l: 0.1703
ckn_alfredo_m: 0.2769
ckn_alfredo_s: 0.2103
ckn_pesto_l: 0.2495
ckn_pesto_m: 0.2702
ckn_pesto_s: 0.2388
classic_dlx_l: 0.2700
classic_dlx_m: 0.2992
classic_dlx_s: 0.2703
five_cheese_l: 0.3800
four_cheese_l: 0.2483
four_cheese_m: 0.2513
green_garden_l: 0.2827
green_garden_m: 0.3101
green_garden_s: 0.2377
hawaiian_l: 0.1794
hawaiian_m: 0.2492
hawaiian_s: 0.3034
ital_cpcllo_l: 0.2801
ital_cpcllo_m: 0.2937
ital_cpcllo_s: 0.2730
ital_supr_l: 0.2603
ital_supr_m: 0.2900
ital_supr_s: 0.2600
ital_veggie_l: 0.1313
ital_veggie_m: 0.2471
ital_veggie_s: 0.2244
mediterraneo_l: 0.2344
mediterraneo_m: 0.2185
mediterraneo_s: 0.1622
mexicana_l: 0.2632
mexicana_m: 0.1890
mexicana_s: 0.2245
napolitana_l: 0.1606
napolitana_m: 0.3885
