In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

# Load dataset
file_path = "/Users/jaygamage/Downloads/dataf_c.csv"
df = pd.read_csv(file_path)

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Set date as index for time series analysis
df.set_index('date', inplace=True)

# Forecasting for the next 12 months
forecast_steps = 12
forecast_index = pd.date_range(df.index[-1], periods=forecast_steps + 1, freq='M')[1:]

# Define a dictionary to store refined forecasts
refined_forecast_results = {}

# List of columns to forecast
columns_to_forecast = ["gdp", "ncpi", "crudeoil", "traffic_index", "petrol95", "petrol92", "auto_diesel", "super_diesel"]

# Apply ARIMA model for each selected column with optimized parameters
for column in columns_to_forecast:
    model = ARIMA(df[column], order=(7, 1, 7))  # Higher-order ARIMA model for better precision
    model_fit = model.fit()
    forecast_values = model_fit.forecast(steps=forecast_steps)
    refined_forecast_results[column] = forecast_values

# Create a DataFrame with refined forecasts
refined_forecast_df = pd.DataFrame(refined_forecast_results, index=forecast_index)
refined_forecast_df.insert(0, 'Date', forecast_index)

# Define the number of holidays per month in 2025
holidays_per_month = {
    1: 2, 2: 3, 3: 3, 4: 5, 5: 3, 6: 2, 7: 1, 8: 1, 9: 2, 10: 2, 11: 1, 12: 2
}

# Fill in the number of holidays based on the month
refined_forecast_df['num_of_holidays'] = refined_forecast_df['Date'].dt.month.map(holidays_per_month)

# Display the refined forecast
print(refined_forecast_df)
# Save the refined forecast DataFrame to a CSV file
refined_forecast_df.to_csv('/Users/jaygamage/Downloads/ffdata.csv', index=False)

In [152]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

# --- CONFIG ---
TRAIN_PATH = "/Users/jaygamage/Downloads/feature_outputs/train_featured1.csv"
PREDICT_PATH = "/Users/jaygamage/Downloads/feature_outputs/predict_featured1.csv"
TRAIN_OUTPUT = "/Users/jaygamage/Downloads/feature_outputs_new/train_standardized.csv"
PREDICT_OUTPUT = "/Users/jaygamage/Downloads/feature_outputs_new/predict_standardized.csv"

# --- Column Lists ---
train_standardize_cols = [
    'advertising_promotion', 'petrol95', 'petrol92', 'auto_diesel', 'super_diesel',
    'gdp', 'ncpi', 'crudeoil', 'traffic_index', 'sales_quantity_change_pct'
] + [f'sales_quantity_lag_{i}' for i in range(1, 13)] + [
    'sales_quantity_rollmean_3', 'sales_quantity_rollmean_6',
    'sales_quantity_rollmean_9', 'sales_quantity_rollmean_12',
    'sales_quantity_rolling_std_3', 'sales_quantity_rolling_std_6'
]

predict_standardize_cols = [
    "advertising_promotion",
    "gdp", "ncpi", "crudeoil", "traffic_index",
    "petrol95", "petrol92", "auto_diesel", "super_diesel"
]

# --- Load datasets ---
train_df = pd.read_csv(TRAIN_PATH, parse_dates=["date"])
predict_df = pd.read_csv(PREDICT_PATH, parse_dates=["date"])

# --- Store scalers separately ---
train_scalers = {}
predict_scalers = {}

# --- Group-wise Training Standardization ---
for group in train_df["group_code"].unique():
    group_mask = train_df["group_code"] == group
    # Train scaler for full training features
    scaler_train = StandardScaler()
    train_df.loc[group_mask, train_standardize_cols] = scaler_train.fit_transform(
        train_df.loc[group_mask, train_standardize_cols]
    )
    train_scalers[group] = scaler_train

    # Train separate scaler only for prediction feature subset
    scaler_predict = StandardScaler()
    # Use same columns but from training data; rename to match prediction naming
    predict_fit_data = train_df.loc[group_mask, [
        'advertising_promotion', 'gdp', 'ncpi', 'crudeoil', 'traffic_index',
        'petrol95', 'petrol92', 'auto_diesel', 'super_diesel'
    ]].copy()
    predict_fit_data.columns = predict_standardize_cols  # Match prediction column names
    scaler_predict.fit(predict_fit_data)
    predict_scalers[group] = scaler_predict

# --- Ensure output directory exists ---
os.makedirs(os.path.dirname(TRAIN_OUTPUT), exist_ok=True)

# --- Save standardized training data ---
train_df.to_csv(TRAIN_OUTPUT, index=False)
print(f"✅ Group-standardized training data saved to: {TRAIN_OUTPUT}")

# --- Apply matching scaler to prediction data ---
for group in predict_df["group_code"].unique():
    if group in predict_scalers:
        group_mask = predict_df["group_code"] == group
        scaler = predict_scalers[group]
        predict_df.loc[group_mask, predict_standardize_cols] = scaler.transform(
            predict_df.loc[group_mask, predict_standardize_cols]
        )
    else:
        print(f"⚠️ Warning: No scaler found for group {group}. Skipping.")

# --- Save standardized prediction data ---
predict_df.to_csv(PREDICT_OUTPUT, index=False)
print(f"✅ Prediction data standardized using matching scalers saved to: {PREDICT_OUTPUT}")

✅ Group-standardized training data saved to: /Users/jaygamage/Downloads/feature_outputs_new/train_standardized.csv
✅ Prediction data standardized using matching scalers saved to: /Users/jaygamage/Downloads/feature_outputs_new/predict_standardized.csv


In [155]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

# --- CONFIG ---
TRAIN_PATH = "/Users/jaygamage/Downloads/feature_outputs/train_featured.csv"
PREDICT_PATH = "/Users/jaygamage/Downloads/feature_outputs/predict_featured.csv"
TRAIN_OUTPUT = "/Users/jaygamage/Downloads/feature_outputs_new/2t.csv"
PREDICT_OUTPUT = "/Users/jaygamage/Downloads/feature_outputs_new/2p.csv"

# --- Column Lists ---
train_standardize_cols = [
    'advertising_promotion', 'petrol95', 'petrol92', 'auto_diesel', 'super_diesel',
    'gdp', 'ncpi', 'crudeoil', 'traffic_index'
] + [f'sales_quantity_lag_{i}' for i in range(1, 13)] + [
    'sales_quantity_rollmean_3', 'sales_quantity_rollmean_6',
    'sales_quantity_rollmean_9', 'sales_quantity_rollmean_12'
]

predict_standardize_cols = [
    "advertising_promotion",
    "gdp", "ncpi", "crudeoil", "traffic_index",
    "petrol95", "petrol92", "auto_diesel", "super_diesel"
]

# --- Load datasets ---
train_df = pd.read_csv(TRAIN_PATH, parse_dates=["date"])
predict_df = pd.read_csv(PREDICT_PATH, parse_dates=["date"])

# --- Store scalers separately ---
train_scalers = {}
predict_scalers = {}

# --- Group-wise Training Standardization ---
for group in train_df["group_code"].unique():
    group_mask = train_df["group_code"] == group
    # Train scaler for full training features
    scaler_train = StandardScaler()
    train_df.loc[group_mask, train_standardize_cols] = scaler_train.fit_transform(
        train_df.loc[group_mask, train_standardize_cols]
    )
    train_scalers[group] = scaler_train

    # Train separate scaler only for prediction feature subset
    scaler_predict = StandardScaler()
    # Use same columns but from training data; rename to match prediction naming
    predict_fit_data = train_df.loc[group_mask, [
        'advertising_promotion', 'gdp', 'ncpi', 'crudeoil', 'traffic_index',
        'petrol95', 'petrol92', 'auto_diesel', 'super_diesel'
    ]].copy()
    predict_fit_data.columns = predict_standardize_cols  # Match prediction column names
    scaler_predict.fit(predict_fit_data)
    predict_scalers[group] = scaler_predict

# --- Ensure output directory exists ---
os.makedirs(os.path.dirname(TRAIN_OUTPUT), exist_ok=True)

# --- Save standardized training data ---
train_df.to_csv(TRAIN_OUTPUT, index=False)
print(f"✅ Group-standardized training data saved to: {TRAIN_OUTPUT}")

# --- Apply matching scaler to prediction data ---
for group in predict_df["group_code"].unique():
    if group in predict_scalers:
        group_mask = predict_df["group_code"] == group
        scaler = predict_scalers[group]
        predict_df.loc[group_mask, predict_standardize_cols] = scaler.transform(
            predict_df.loc[group_mask, predict_standardize_cols]
        )
    else:
        print(f"⚠️ Warning: No scaler found for group {group}. Skipping.")

# --- Save standardized prediction data ---
predict_df.to_csv(PREDICT_OUTPUT, index=False)
print(f"✅ Prediction data standardized using matching scalers saved to: {PREDICT_OUTPUT}")

✅ Group-standardized training data saved to: /Users/jaygamage/Downloads/feature_outputs_new/2t.csv
✅ Prediction data standardized using matching scalers saved to: /Users/jaygamage/Downloads/feature_outputs_new/2p.csv


In [None]:
#r1.4#
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import joblib
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -------- CONFIG --------
TRAIN_PATH = "/Users/jaygamage/Downloads/feature_outputs/train_featured.csv"
PREDICT_PATH = "/Users/jaygamage/Downloads/feature_outputs/predict_featured.csv"
OUTPUT_DIR = "/Users/jaygamage/Downloads/r1.4"
TARGET = "sales_quantity"
TEST_YEAR = 2024
N_TRIALS = 100
CAT_FEATURES = ["group_code"]
LAG_COLS = ["lag_1", "lag_2", "lag_3"]
ROLLING_COLS = ["rolling_mean_3", "rolling_mean_6"]

# -------- METRICS --------
def get_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-6))) * 100
    r2 = r2_score(y_true, y_pred)
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape, "R2": r2}

def plot_forecast(dates, y_true, y_pred, group_code, save_path):
    plt.figure(figsize=(10, 5))
    plt.plot(dates, y_true, label="Actual", marker="o")
    plt.plot(dates, y_pred, label="Predicted", marker="x")
    plt.title(f"CatBoost Forecast for Group {group_code}")
    plt.xlabel("Date")
    plt.ylabel("Sales Quantity")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# -------- FEATURE UPDATES --------
def update_recursive_features(df):
    df = df.sort_values("date")
    for lag in [1, 2, 3]:
        df[f"lag_{lag}"] = df[TARGET].shift(lag)
    for window in [3, 6]:
        df[f"rolling_mean_{window}"] = df[TARGET].rolling(window).mean()
    return df

# -------- OPTUNA OBJECTIVE --------
def build_objective_expanding(train_df, features, cat_features):
    def objective(trial):
        params = {
            "iterations": trial.suggest_int("iterations", 300, 1500),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "depth": trial.suggest_int("depth", 4, 10),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
            "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
            "random_strength": trial.suggest_float("random_strength", 1e-9, 10.0, log=True),
            "loss_function": "RMSE",
            "random_seed": 42,
            "verbose": 0
        }
        alpha, beta = 1000, 1000  # Penalty weights
        losses = []

        # Expanding Window Validation
        for train_end, val_year in zip([2022, 2023], [2023, 2024]):
            train_cut = train_df[train_df["date"].dt.year <= train_end]
            val_cut = train_df[train_df["date"].dt.year == val_year]

            X_train, y_train = train_cut[features], train_cut[TARGET]
            X_val, y_val = val_cut[features], val_cut[TARGET]

            model = CatBoostRegressor(**params)
            model.fit(
                X_train, y_train,
                eval_set=(X_val, y_val),
                cat_features=cat_features,
                early_stopping_rounds=50,
                verbose=0
            )

            preds = model.predict(X_val)
            rmse = np.sqrt(mean_squared_error(y_val, preds))
            mape = np.mean(np.abs((y_val - preds) / (y_val + 1e-6))) * 100
            r2 = r2_score(y_val, preds)

            loss = rmse + alpha * max(0, mape - 15) + beta * max(0.5 - r2, 0)
            losses.append(loss)

        return np.mean(losses)
    return objective

# -------- MAIN --------
def main():
    os.makedirs(f"{OUTPUT_DIR}/models", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/forecasts", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/plots", exist_ok=True)

    train_df = pd.read_csv(TRAIN_PATH, parse_dates=["date"])
    predict_df = pd.read_csv(PREDICT_PATH, parse_dates=["date"])

    group_codes = train_df["group_code"].unique()
    metric_records = []
    optuna_param_records = []

    for group in group_codes:
        print(f"🔍 Tuning group: {group}")
        train_g = train_df[train_df["group_code"] == group].copy()
        pred_g = predict_df[predict_df["group_code"] == group].copy()

        if pred_g.empty:
            print(f"⚠️ Skipping group {group} — no 2025 data.")
            continue

        drop_cols = ["date", "sales_quantity", "material_group"]
        features = [col for col in train_g.columns if col not in drop_cols and col in pred_g.columns]

        # -------- OPTUNA TUNING --------
        study = optuna.create_study(direction="minimize")
        study.optimize(build_objective_expanding(train_g, features, CAT_FEATURES), n_trials=N_TRIALS)
        best_params = study.best_params
        best_params.update({"loss_function": "RMSE", "random_seed": 42, "verbose": 0})
        optuna_param_records.append({**best_params, "group_code": group})

        # -------- FINAL MODEL TRAINING --------
        final_train = train_g[train_g["date"].dt.year <= TEST_YEAR]
        X_train, y_train = final_train[features], final_train[TARGET]

        model = CatBoostRegressor(**best_params)
        model.fit(X_train, y_train, cat_features=CAT_FEATURES)
        joblib.dump(model, f"{OUTPUT_DIR}/models/{group}_catboost_optuna.pkl")

        # -------- VALIDATION --------
        val_cut = train_g[train_g["date"].dt.year == TEST_YEAR]
        val_preds = model.predict(val_cut[features])
        val_cut["prediction"] = val_preds
        metrics = get_metrics(val_cut[TARGET], val_preds)
        metrics["group_code"] = group
        metric_records.append(metrics)
        plot_forecast(val_cut["date"], val_cut[TARGET], val_preds, group, f"{OUTPUT_DIR}/plots/{group}_forecast.png")

        # -------- RECURSIVE FORECASTING --------
        history = pd.concat([train_g, pred_g], ignore_index=True).sort_values("date")
        for idx, row in pred_g.iterrows():
            current_date = row["date"]
            temp_history = history[history["date"] <= current_date].copy()
            temp_history = update_recursive_features(temp_history)
            updated_row = temp_history[temp_history["date"] == current_date]
            pred_features = updated_row[features]
            pred_value = model.predict(pred_features)[0]
            history.loc[history["date"] == current_date, TARGET] = pred_value

        # Save Recursive Forecast
        pred_g = history[history["date"].dt.year == 2025][["date", "group_code", TARGET]]
        pred_g.rename(columns={TARGET: "predicted_sales_quantity"}, inplace=True)
        pred_g.to_csv(f"{OUTPUT_DIR}/forecasts/{group}_forecast.csv", index=False)
        print(f"📈 Group {group} complete.")

    pd.DataFrame(metric_records).to_csv(f"{OUTPUT_DIR}/metrics.csv", index=False)
    pd.DataFrame(optuna_param_records).to_csv(f"{OUTPUT_DIR}/optuna_params.csv", index=False)
    print("✅ All groups trained, validated, and recursively forecasted.")

if __name__ == "__main__":
    main()

[I 2025-04-26 01:34:49,835] A new study created in memory with name: no-name-ded43287-ec25-45bf-9ef2-15ad3fa0bfee
[I 2025-04-26 01:34:49,970] Trial 0 finished with value: 7862.8278905478855 and parameters: {'iterations': 1442, 'learning_rate': 0.059377922994710765, 'depth': 4, 'l2_leaf_reg': 5.94584466141386, 'bagging_temperature': 0.5601012068916709, 'random_strength': 0.0009868037116197925}. Best is trial 0 with value: 7862.8278905478855.


🔍 Tuning group: 0


[I 2025-04-26 01:34:51,390] Trial 1 finished with value: 13324.817074049908 and parameters: {'iterations': 929, 'learning_rate': 0.07468696243592728, 'depth': 10, 'l2_leaf_reg': 5.6575516192653055, 'bagging_temperature': 0.04816369012186028, 'random_strength': 3.979801658079165e-05}. Best is trial 0 with value: 7862.8278905478855.
[I 2025-04-26 01:34:51,770] Trial 2 finished with value: 12163.14090939039 and parameters: {'iterations': 1145, 'learning_rate': 0.17505703867986538, 'depth': 9, 'l2_leaf_reg': 3.5217401211785617, 'bagging_temperature': 0.045071577450199074, 'random_strength': 1.693722566177437e-08}. Best is trial 0 with value: 7862.8278905478855.
[I 2025-04-26 01:34:52,186] Trial 3 finished with value: 11206.47809005453 and parameters: {'iterations': 359, 'learning_rate': 0.16500946102720374, 'depth': 9, 'l2_leaf_reg': 4.862791911475443, 'bagging_temperature': 0.8482046762848681, 'random_strength': 0.02706531472333222}. Best is trial 0 with value: 7862.8278905478855.
[I 2025

📈 Group 0 complete.
🔍 Tuning group: 1


[I 2025-04-26 01:35:11,953] Trial 0 finished with value: 16522.76897660445 and parameters: {'iterations': 375, 'learning_rate': 0.24043129702836444, 'depth': 9, 'l2_leaf_reg': 7.864814763915595, 'bagging_temperature': 0.06265824741161707, 'random_strength': 0.07868469694618}. Best is trial 0 with value: 16522.76897660445.
[I 2025-04-26 01:35:12,346] Trial 1 finished with value: 17080.264207962267 and parameters: {'iterations': 497, 'learning_rate': 0.015272028513940482, 'depth': 8, 'l2_leaf_reg': 5.545087406519544, 'bagging_temperature': 0.24651714767225708, 'random_strength': 0.0002244802310683211}. Best is trial 0 with value: 16522.76897660445.
[I 2025-04-26 01:35:12,635] Trial 2 finished with value: 17798.94546214555 and parameters: {'iterations': 585, 'learning_rate': 0.18486443571803496, 'depth': 9, 'l2_leaf_reg': 4.091245121521282, 'bagging_temperature': 0.3419030223758903, 'random_strength': 0.0009576866433728101}. Best is trial 0 with value: 16522.76897660445.
[I 2025-04-26 01:

📈 Group 1 complete.
🔍 Tuning group: 2


[I 2025-04-26 01:35:23,628] Trial 1 finished with value: 15981.818673762213 and parameters: {'iterations': 716, 'learning_rate': 0.023414949907882614, 'depth': 7, 'l2_leaf_reg': 7.265481193102462, 'bagging_temperature': 0.2212484866485691, 'random_strength': 0.19894939795923094}. Best is trial 0 with value: 14951.244492693388.
[I 2025-04-26 01:35:24,069] Trial 2 finished with value: 15610.316797974203 and parameters: {'iterations': 380, 'learning_rate': 0.11440529895969535, 'depth': 9, 'l2_leaf_reg': 8.242567911904914, 'bagging_temperature': 0.752137373562917, 'random_strength': 0.9690432548027563}. Best is trial 0 with value: 14951.244492693388.
[I 2025-04-26 01:35:24,783] Trial 3 finished with value: 14931.741225292124 and parameters: {'iterations': 836, 'learning_rate': 0.15672573123962594, 'depth': 9, 'l2_leaf_reg': 9.961018851614083, 'bagging_temperature': 0.5161187609425167, 'random_strength': 2.327091971613575e-05}. Best is trial 3 with value: 14931.741225292124.
[I 2025-04-26 0

📈 Group 2 complete.
🔍 Tuning group: 3


[I 2025-04-26 01:35:55,770] Trial 0 finished with value: 19400.21602241384 and parameters: {'iterations': 1499, 'learning_rate': 0.02639430233544647, 'depth': 4, 'l2_leaf_reg': 5.052417672747642, 'bagging_temperature': 0.6237964783679983, 'random_strength': 4.538755905223514e-06}. Best is trial 0 with value: 19400.21602241384.
[I 2025-04-26 01:35:55,880] Trial 1 finished with value: 18130.68972892443 and parameters: {'iterations': 408, 'learning_rate': 0.18536561195200538, 'depth': 8, 'l2_leaf_reg': 7.938772456923585, 'bagging_temperature': 0.2601710542347373, 'random_strength': 2.0119970724753214e-06}. Best is trial 1 with value: 18130.68972892443.
[I 2025-04-26 01:35:56,052] Trial 2 finished with value: 19228.189806442275 and parameters: {'iterations': 736, 'learning_rate': 0.013185396779602907, 'depth': 5, 'l2_leaf_reg': 4.504978395711271, 'bagging_temperature': 0.00022930224933181975, 'random_strength': 8.212821249906205e-09}. Best is trial 1 with value: 18130.68972892443.
[I 2025-

📈 Group 3 complete.
🔍 Tuning group: 4


[I 2025-04-26 01:36:33,298] Trial 0 finished with value: 15542.305264893124 and parameters: {'iterations': 513, 'learning_rate': 0.13457371716886438, 'depth': 10, 'l2_leaf_reg': 9.491589695778739, 'bagging_temperature': 0.4010952772051096, 'random_strength': 0.0002308545250405956}. Best is trial 0 with value: 15542.305264893124.
[I 2025-04-26 01:36:34,056] Trial 1 finished with value: 11650.346526275775 and parameters: {'iterations': 1499, 'learning_rate': 0.044360344720786336, 'depth': 10, 'l2_leaf_reg': 1.2984819016322757, 'bagging_temperature': 0.020411654581573457, 'random_strength': 0.00028115297844896444}. Best is trial 1 with value: 11650.346526275775.
[I 2025-04-26 01:36:34,260] Trial 2 finished with value: 11938.295980633198 and parameters: {'iterations': 837, 'learning_rate': 0.016240524021083575, 'depth': 4, 'l2_leaf_reg': 3.307071861493184, 'bagging_temperature': 0.4206760320976818, 'random_strength': 6.4712425929536786e-09}. Best is trial 1 with value: 11650.346526275775.


📈 Group 4 complete.
🔍 Tuning group: 5


[I 2025-04-26 01:37:01,784] Trial 1 finished with value: 10651.345921391016 and parameters: {'iterations': 960, 'learning_rate': 0.017702247910412767, 'depth': 6, 'l2_leaf_reg': 1.1122828300487775, 'bagging_temperature': 0.33429369587196334, 'random_strength': 4.588738219607755e-05}. Best is trial 0 with value: 10149.9233981189.
[I 2025-04-26 01:37:02,900] Trial 2 finished with value: 13686.605691493685 and parameters: {'iterations': 713, 'learning_rate': 0.03163120669811649, 'depth': 10, 'l2_leaf_reg': 4.72302720532299, 'bagging_temperature': 0.14654684310417598, 'random_strength': 2.299363861554847e-09}. Best is trial 0 with value: 10149.9233981189.
[I 2025-04-26 01:37:02,963] Trial 3 finished with value: 11848.590281635657 and parameters: {'iterations': 966, 'learning_rate': 0.1432089368503695, 'depth': 4, 'l2_leaf_reg': 9.06270824770192, 'bagging_temperature': 0.6193678314260634, 'random_strength': 5.211638825209602e-06}. Best is trial 0 with value: 10149.9233981189.
[I 2025-04-26 

📈 Group 5 complete.
🔍 Tuning group: 6


[I 2025-04-26 01:37:26,079] Trial 1 finished with value: 16442.18582926184 and parameters: {'iterations': 1364, 'learning_rate': 0.041829026523843245, 'depth': 6, 'l2_leaf_reg': 7.496637005912849, 'bagging_temperature': 0.0964097847913794, 'random_strength': 0.06817692137863775}. Best is trial 1 with value: 16442.18582926184.
[I 2025-04-26 01:37:26,168] Trial 2 finished with value: 18092.42858102005 and parameters: {'iterations': 692, 'learning_rate': 0.10115741295059061, 'depth': 6, 'l2_leaf_reg': 5.24204638196045, 'bagging_temperature': 0.592748941574008, 'random_strength': 0.0017977591188632117}. Best is trial 1 with value: 16442.18582926184.
[I 2025-04-26 01:37:26,301] Trial 3 finished with value: 17631.508098148122 and parameters: {'iterations': 824, 'learning_rate': 0.012339942748292716, 'depth': 4, 'l2_leaf_reg': 6.941287308638123, 'bagging_temperature': 0.07365375804318197, 'random_strength': 1.4053577970483031e-05}. Best is trial 1 with value: 16442.18582926184.
[I 2025-04-26 

📈 Group 6 complete.
🔍 Tuning group: 7


[I 2025-04-26 01:37:38,615] Trial 0 finished with value: 16723.608464056488 and parameters: {'iterations': 678, 'learning_rate': 0.222288473750527, 'depth': 10, 'l2_leaf_reg': 6.404499467266864, 'bagging_temperature': 0.16189240068733635, 'random_strength': 0.006397747378963207}. Best is trial 0 with value: 16723.608464056488.
[I 2025-04-26 01:37:38,930] Trial 1 finished with value: 12343.993589264071 and parameters: {'iterations': 593, 'learning_rate': 0.18099683009385573, 'depth': 10, 'l2_leaf_reg': 5.47110476172011, 'bagging_temperature': 0.13043972476131493, 'random_strength': 1.02739819382729}. Best is trial 1 with value: 12343.993589264071.
[I 2025-04-26 01:37:41,270] Trial 2 finished with value: 17663.199110085905 and parameters: {'iterations': 1071, 'learning_rate': 0.01756952139003983, 'depth': 10, 'l2_leaf_reg': 9.927569875996657, 'bagging_temperature': 0.346885813009683, 'random_strength': 1.2182314855754962e-07}. Best is trial 1 with value: 12343.993589264071.
[I 2025-04-26

📈 Group 7 complete.
🔍 Tuning group: 8


[I 2025-04-26 01:38:08,091] Trial 1 finished with value: 20762.603703350796 and parameters: {'iterations': 1135, 'learning_rate': 0.04650914276027819, 'depth': 9, 'l2_leaf_reg': 2.9927913877878165, 'bagging_temperature': 0.24233050369982045, 'random_strength': 1.7782172594816575e-09}. Best is trial 0 with value: 17892.7090183094.
[I 2025-04-26 01:38:08,338] Trial 2 finished with value: 17918.96167625479 and parameters: {'iterations': 510, 'learning_rate': 0.03897381888961422, 'depth': 6, 'l2_leaf_reg': 6.899321172328002, 'bagging_temperature': 0.7013657067135362, 'random_strength': 5.608144916276227e-09}. Best is trial 0 with value: 17892.7090183094.
[I 2025-04-26 01:38:09,404] Trial 3 finished with value: 20200.960256068276 and parameters: {'iterations': 1473, 'learning_rate': 0.02936572410794781, 'depth': 8, 'l2_leaf_reg': 7.035790889589451, 'bagging_temperature': 0.25564096821935767, 'random_strength': 3.720856108313612e-06}. Best is trial 0 with value: 17892.7090183094.
[I 2025-04-

📈 Group 8 complete.
🔍 Tuning group: 9


[I 2025-04-26 01:38:32,170] Trial 1 finished with value: 14307.384456496773 and parameters: {'iterations': 687, 'learning_rate': 0.042839984206930075, 'depth': 9, 'l2_leaf_reg': 7.005898339779747, 'bagging_temperature': 0.07751539767987392, 'random_strength': 9.645975548282805e-08}. Best is trial 0 with value: 9595.564181474807.
[I 2025-04-26 01:38:33,078] Trial 2 finished with value: 15200.693401827828 and parameters: {'iterations': 1479, 'learning_rate': 0.06345805113964298, 'depth': 10, 'l2_leaf_reg': 6.387369631557199, 'bagging_temperature': 0.7607414064836553, 'random_strength': 5.392188987446223e-08}. Best is trial 0 with value: 9595.564181474807.
[I 2025-04-26 01:38:33,249] Trial 3 finished with value: 9360.282331596332 and parameters: {'iterations': 614, 'learning_rate': 0.01653736647497615, 'depth': 5, 'l2_leaf_reg': 3.3637771939091294, 'bagging_temperature': 0.28809641412095477, 'random_strength': 0.013832690910554581}. Best is trial 3 with value: 9360.282331596332.
[I 2025-0

📈 Group 9 complete.
🔍 Tuning group: 10


[I 2025-04-26 01:38:46,133] Trial 0 finished with value: 22463.26303714604 and parameters: {'iterations': 773, 'learning_rate': 0.21494346100277772, 'depth': 8, 'l2_leaf_reg': 4.3505897692985265, 'bagging_temperature': 0.6144431703649831, 'random_strength': 8.085760636891013e-08}. Best is trial 0 with value: 22463.26303714604.
[I 2025-04-26 01:38:46,237] Trial 1 finished with value: 17537.009144810567 and parameters: {'iterations': 747, 'learning_rate': 0.09185378802268164, 'depth': 4, 'l2_leaf_reg': 5.828612762229004, 'bagging_temperature': 0.7804582723109561, 'random_strength': 0.020914485870810715}. Best is trial 1 with value: 17537.009144810567.
[I 2025-04-26 01:38:46,565] Trial 2 finished with value: 23404.885914034305 and parameters: {'iterations': 493, 'learning_rate': 0.04155531857391231, 'depth': 7, 'l2_leaf_reg': 5.153164039165732, 'bagging_temperature': 0.6341206293725449, 'random_strength': 0.0827286317045694}. Best is trial 1 with value: 17537.009144810567.
[I 2025-04-26 0

📈 Group 10 complete.
🔍 Tuning group: 11


[I 2025-04-26 01:39:08,633] Trial 0 finished with value: 10272.191838962432 and parameters: {'iterations': 1272, 'learning_rate': 0.07211012160409501, 'depth': 9, 'l2_leaf_reg': 7.928545487360657, 'bagging_temperature': 0.101532711692782, 'random_strength': 4.6542438889832495e-09}. Best is trial 0 with value: 10272.191838962432.
[I 2025-04-26 01:39:08,979] Trial 1 finished with value: 8607.063888773704 and parameters: {'iterations': 1300, 'learning_rate': 0.030933825230313364, 'depth': 6, 'l2_leaf_reg': 4.973264617160592, 'bagging_temperature': 0.49264293774764834, 'random_strength': 5.918203697601356e-05}. Best is trial 1 with value: 8607.063888773704.
[I 2025-04-26 01:39:09,277] Trial 2 finished with value: 9642.054253945895 and parameters: {'iterations': 1062, 'learning_rate': 0.05003496654939481, 'depth': 7, 'l2_leaf_reg': 4.659902068750462, 'bagging_temperature': 0.9317947971351247, 'random_strength': 5.3126875168213305e-09}. Best is trial 1 with value: 8607.063888773704.
[I 2025-

📈 Group 11 complete.
🔍 Tuning group: 12


[I 2025-04-26 01:39:34,417] Trial 0 finished with value: 12713.775618367094 and parameters: {'iterations': 571, 'learning_rate': 0.016703043404836585, 'depth': 6, 'l2_leaf_reg': 3.0224393490012442, 'bagging_temperature': 0.9236064818393205, 'random_strength': 0.00021506958969446098}. Best is trial 0 with value: 12713.775618367094.
[I 2025-04-26 01:39:34,585] Trial 1 finished with value: 18196.49024908631 and parameters: {'iterations': 1011, 'learning_rate': 0.13656594842050712, 'depth': 9, 'l2_leaf_reg': 8.778306720074848, 'bagging_temperature': 0.07847996012367198, 'random_strength': 9.755491159340791e-08}. Best is trial 0 with value: 12713.775618367094.
[I 2025-04-26 01:39:35,193] Trial 2 finished with value: 18463.022336257192 and parameters: {'iterations': 461, 'learning_rate': 0.011507684045007356, 'depth': 9, 'l2_leaf_reg': 5.763069452284639, 'bagging_temperature': 0.5302443875919733, 'random_strength': 1.117910002600791e-05}. Best is trial 0 with value: 12713.775618367094.
[I 20

📈 Group 12 complete.
🔍 Tuning group: 13


[I 2025-04-26 01:39:52,423] Trial 0 finished with value: 17358.98635148257 and parameters: {'iterations': 313, 'learning_rate': 0.19183265771706845, 'depth': 9, 'l2_leaf_reg': 5.033762521468369, 'bagging_temperature': 0.9159245875856568, 'random_strength': 4.346647193756391e-09}. Best is trial 0 with value: 17358.98635148257.
[I 2025-04-26 01:39:52,497] Trial 1 finished with value: 15844.811723186887 and parameters: {'iterations': 923, 'learning_rate': 0.20072152748542077, 'depth': 5, 'l2_leaf_reg': 8.557699230245943, 'bagging_temperature': 0.4050633195492269, 'random_strength': 0.0022643594100851135}. Best is trial 1 with value: 15844.811723186887.
[I 2025-04-26 01:39:52,726] Trial 2 finished with value: 16843.43644341881 and parameters: {'iterations': 348, 'learning_rate': 0.061300842748342556, 'depth': 7, 'l2_leaf_reg': 8.325090134441009, 'bagging_temperature': 0.21781642218523978, 'random_strength': 5.038433682217926e-05}. Best is trial 1 with value: 15844.811723186887.
[I 2025-04-

📈 Group 13 complete.
🔍 Tuning group: 14


[I 2025-04-26 01:40:08,538] Trial 2 finished with value: 11018.723657172435 and parameters: {'iterations': 1323, 'learning_rate': 0.011029932234793857, 'depth': 5, 'l2_leaf_reg': 5.7835859497330935, 'bagging_temperature': 0.33909105705764064, 'random_strength': 2.2084759174629237e-08}. Best is trial 1 with value: 8561.470343557745.
[I 2025-04-26 01:40:08,821] Trial 3 finished with value: 10931.746285434361 and parameters: {'iterations': 1089, 'learning_rate': 0.106336645493274, 'depth': 9, 'l2_leaf_reg': 4.947421469209826, 'bagging_temperature': 0.10990989447411692, 'random_strength': 0.002963954734750946}. Best is trial 1 with value: 8561.470343557745.
[I 2025-04-26 01:40:09,048] Trial 4 finished with value: 8548.669112312777 and parameters: {'iterations': 1340, 'learning_rate': 0.018125333524817152, 'depth': 7, 'l2_leaf_reg': 1.5087884635973927, 'bagging_temperature': 0.3831814223077864, 'random_strength': 0.7275141999981275}. Best is trial 4 with value: 8548.669112312777.
[I 2025-04

📈 Group 14 complete.
🔍 Tuning group: 15


[I 2025-04-26 01:43:10,414] Trial 1 finished with value: 13022.312788994688 and parameters: {'iterations': 766, 'learning_rate': 0.01646071582738597, 'depth': 9, 'l2_leaf_reg': 9.500622700851865, 'bagging_temperature': 0.9473924753745042, 'random_strength': 0.003392756249728236}. Best is trial 0 with value: 12908.402291481902.
[I 2025-04-26 01:43:10,923] Trial 2 finished with value: 14175.71674933055 and parameters: {'iterations': 442, 'learning_rate': 0.027477059074717824, 'depth': 9, 'l2_leaf_reg': 1.7565044610933915, 'bagging_temperature': 0.1748996908852798, 'random_strength': 2.1607114706852274e-06}. Best is trial 0 with value: 12908.402291481902.
[I 2025-04-26 01:43:11,003] Trial 3 finished with value: 12327.761227442823 and parameters: {'iterations': 403, 'learning_rate': 0.08548754662602683, 'depth': 6, 'l2_leaf_reg': 4.1450207973697, 'bagging_temperature': 0.865735270500192, 'random_strength': 2.8846611772906374e-07}. Best is trial 3 with value: 12327.761227442823.
[I 2025-04-

📈 Group 15 complete.
🔍 Tuning group: 16


[I 2025-04-26 01:43:23,806] Trial 0 finished with value: 5816.836889205209 and parameters: {'iterations': 796, 'learning_rate': 0.056977444326316, 'depth': 8, 'l2_leaf_reg': 7.753323375961486, 'bagging_temperature': 0.5791955321302406, 'random_strength': 0.0059540486007460215}. Best is trial 0 with value: 5816.836889205209.
[I 2025-04-26 01:43:24,586] Trial 1 finished with value: 5542.090561499001 and parameters: {'iterations': 941, 'learning_rate': 0.019166789487468337, 'depth': 9, 'l2_leaf_reg': 4.904261053198953, 'bagging_temperature': 0.5231190906038733, 'random_strength': 0.00027539210114555234}. Best is trial 1 with value: 5542.090561499001.
[I 2025-04-26 01:43:24,654] Trial 2 finished with value: 6279.751377196362 and parameters: {'iterations': 962, 'learning_rate': 0.057611856293245646, 'depth': 5, 'l2_leaf_reg': 9.298603314149466, 'bagging_temperature': 0.8576385656735583, 'random_strength': 0.2824470566611072}. Best is trial 1 with value: 5542.090561499001.
[I 2025-04-26 01:4

📈 Group 16 complete.
🔍 Tuning group: 17


[I 2025-04-26 01:43:35,563] Trial 0 finished with value: 11716.434017266454 and parameters: {'iterations': 1149, 'learning_rate': 0.020800964714147288, 'depth': 10, 'l2_leaf_reg': 3.0402087427484257, 'bagging_temperature': 0.9894772628501062, 'random_strength': 0.0017559232759784855}. Best is trial 0 with value: 11716.434017266454.
[I 2025-04-26 01:43:35,591] Trial 1 finished with value: 9073.831737543325 and parameters: {'iterations': 1214, 'learning_rate': 0.24346260904973685, 'depth': 4, 'l2_leaf_reg': 5.865650646502094, 'bagging_temperature': 0.36879376862178836, 'random_strength': 0.0716214899210473}. Best is trial 1 with value: 9073.831737543325.
[I 2025-04-26 01:43:36,448] Trial 2 finished with value: 9017.479706695318 and parameters: {'iterations': 859, 'learning_rate': 0.03453873191502474, 'depth': 9, 'l2_leaf_reg': 8.097481132156755, 'bagging_temperature': 0.5972163233813278, 'random_strength': 0.009373450793503872}. Best is trial 2 with value: 9017.479706695318.
[I 2025-04-2

📈 Group 17 complete.
🔍 Tuning group: 18


[I 2025-04-26 01:43:51,725] Trial 0 finished with value: 14168.44837896443 and parameters: {'iterations': 358, 'learning_rate': 0.17497242077251157, 'depth': 8, 'l2_leaf_reg': 2.7802878027973317, 'bagging_temperature': 0.28902458072932846, 'random_strength': 1.3085944747241202e-05}. Best is trial 0 with value: 14168.44837896443.
[I 2025-04-26 01:43:51,915] Trial 1 finished with value: 12753.88055644769 and parameters: {'iterations': 981, 'learning_rate': 0.06606263903740854, 'depth': 5, 'l2_leaf_reg': 3.6284633366117243, 'bagging_temperature': 0.07480834039889117, 'random_strength': 3.361509541948595e-09}. Best is trial 1 with value: 12753.88055644769.
[I 2025-04-26 01:43:52,316] Trial 2 finished with value: 12744.843506578058 and parameters: {'iterations': 598, 'learning_rate': 0.01737940924193606, 'depth': 7, 'l2_leaf_reg': 1.772802974777702, 'bagging_temperature': 0.46510362135958805, 'random_strength': 0.0007102279407677979}. Best is trial 2 with value: 12744.843506578058.
[I 2025-

📈 Group 18 complete.
🔍 Tuning group: 19


[I 2025-04-26 01:44:14,949] Trial 0 finished with value: 20890.974472010712 and parameters: {'iterations': 1486, 'learning_rate': 0.23290267350314323, 'depth': 10, 'l2_leaf_reg': 6.665516621351617, 'bagging_temperature': 0.319632245941747, 'random_strength': 0.7360448025819991}. Best is trial 0 with value: 20890.974472010712.
[I 2025-04-26 01:44:15,930] Trial 1 finished with value: 27493.565099723186 and parameters: {'iterations': 1462, 'learning_rate': 0.0569736131978711, 'depth': 10, 'l2_leaf_reg': 1.4347586118634927, 'bagging_temperature': 0.19571721659519103, 'random_strength': 4.68862972078572e-06}. Best is trial 0 with value: 20890.974472010712.
[I 2025-04-26 01:44:16,242] Trial 2 finished with value: 24648.29775195391 and parameters: {'iterations': 758, 'learning_rate': 0.010168317422372288, 'depth': 7, 'l2_leaf_reg': 9.866321088644181, 'bagging_temperature': 0.5720102227083214, 'random_strength': 3.7781220034666137e-07}. Best is trial 0 with value: 20890.974472010712.
[I 2025-0

📈 Group 19 complete.
🔍 Tuning group: 20


[I 2025-04-26 01:44:29,235] Trial 0 finished with value: 13600.633701342635 and parameters: {'iterations': 1333, 'learning_rate': 0.044182561752782144, 'depth': 7, 'l2_leaf_reg': 4.8650214885818395, 'bagging_temperature': 0.609921599615257, 'random_strength': 0.02118477019845715}. Best is trial 0 with value: 13600.633701342635.
[I 2025-04-26 01:44:29,434] Trial 1 finished with value: 9587.291978197656 and parameters: {'iterations': 1114, 'learning_rate': 0.018264157012964324, 'depth': 4, 'l2_leaf_reg': 9.754013510105707, 'bagging_temperature': 0.31838477517126906, 'random_strength': 0.0772152423168494}. Best is trial 1 with value: 9587.291978197656.
[I 2025-04-26 01:44:29,618] Trial 2 finished with value: 12800.183355244677 and parameters: {'iterations': 1036, 'learning_rate': 0.12275437500399193, 'depth': 6, 'l2_leaf_reg': 4.787587041460972, 'bagging_temperature': 0.09566292695783352, 'random_strength': 1.9801763415509326}. Best is trial 1 with value: 9587.291978197656.
[I 2025-04-26 

📈 Group 20 complete.
🔍 Tuning group: 21


[I 2025-04-26 01:44:55,999] Trial 0 finished with value: 8923.843117420502 and parameters: {'iterations': 674, 'learning_rate': 0.10377507181851833, 'depth': 10, 'l2_leaf_reg': 7.380739782754019, 'bagging_temperature': 0.1198990654155353, 'random_strength': 0.0011325289454377453}. Best is trial 0 with value: 8923.843117420502.
[I 2025-04-26 01:44:56,287] Trial 1 finished with value: 11072.155682739889 and parameters: {'iterations': 1255, 'learning_rate': 0.031085240370832462, 'depth': 8, 'l2_leaf_reg': 7.011734877373566, 'bagging_temperature': 0.6865736905260222, 'random_strength': 0.008796385749954087}. Best is trial 0 with value: 8923.843117420502.
[I 2025-04-26 01:44:56,477] Trial 2 finished with value: 12677.455486948835 and parameters: {'iterations': 922, 'learning_rate': 0.04813817099230789, 'depth': 9, 'l2_leaf_reg': 1.9323242057740078, 'bagging_temperature': 0.8046432276872053, 'random_strength': 2.48929302502421e-08}. Best is trial 0 with value: 8923.843117420502.
[I 2025-04-2

📈 Group 21 complete.
🔍 Tuning group: 22


[I 2025-04-26 01:45:09,817] Trial 1 finished with value: 13272.530668130488 and parameters: {'iterations': 762, 'learning_rate': 0.011128484710845859, 'depth': 7, 'l2_leaf_reg': 9.43248517239923, 'bagging_temperature': 0.43223991384172056, 'random_strength': 3.6674967872617937e-07}. Best is trial 0 with value: 10383.113636909187.
[I 2025-04-26 01:45:09,953] Trial 2 finished with value: 10596.32913519975 and parameters: {'iterations': 636, 'learning_rate': 0.016070392993374392, 'depth': 4, 'l2_leaf_reg': 9.316728652297195, 'bagging_temperature': 0.4983602268429833, 'random_strength': 1.3645697987686438e-07}. Best is trial 0 with value: 10383.113636909187.
[I 2025-04-26 01:45:09,986] Trial 3 finished with value: 10398.416387906396 and parameters: {'iterations': 1130, 'learning_rate': 0.1983586688958182, 'depth': 4, 'l2_leaf_reg': 8.201486294756217, 'bagging_temperature': 0.45467654471623387, 'random_strength': 2.1219332490602096}. Best is trial 0 with value: 10383.113636909187.
[I 2025-0

📈 Group 22 complete.
✅ All groups trained, validated, and recursively forecasted.


In [16]:
# r1.4 — CatBoost Forecasting with log1p/expm1
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import joblib
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -------- CONFIG --------
TRAIN_PATH = "/Users/jaygamage/Downloads/feature_outputs/train_featured.csv"
PREDICT_PATH = "/Users/jaygamage/Downloads/feature_outputs/predict_featured.csv"
OUTPUT_DIR = "/Users/jaygamage/Downloads/r1.4.log"
TARGET = "sales_quantity"
TEST_YEAR = 2024
N_TRIALS = 100
CAT_FEATURES = ["group_code"]
LAG_COLS = ["lag_1", "lag_2", "lag_3"]
ROLLING_COLS = ["rolling_mean_3", "rolling_mean_6"]

# -------- METRICS --------
def get_metrics(y_true, y_pred):
    y_true, y_pred = np.expm1(y_true), np.expm1(y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-6))) * 100
    r2 = r2_score(y_true, y_pred)
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape, "R2": r2}

def plot_forecast(dates, y_true, y_pred, group_code, save_path):
    y_true, y_pred = np.expm1(y_true), np.expm1(y_pred)
    plt.figure(figsize=(10, 5))
    plt.plot(dates, y_true, label="Actual", marker="o")
    plt.plot(dates, y_pred, label="Predicted", marker="x")
    plt.title(f"CatBoost Forecast for Group {group_code}")
    plt.xlabel("Date")
    plt.ylabel("Sales Quantity")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# -------- FEATURE UPDATES --------
def update_recursive_features(df):
    df = df.sort_values("date")
    for lag in [1, 2, 3]:
        df[f"lag_{lag}"] = df[TARGET].shift(lag)
    for window in [3, 6]:
        df[f"rolling_mean_{window}"] = df[TARGET].rolling(window).mean()
    return df

# -------- OPTUNA OBJECTIVE --------
def build_objective_expanding(train_df, features, cat_features):
    def objective(trial):
        params = {
            "iterations": trial.suggest_int("iterations", 300, 1500),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "depth": trial.suggest_int("depth", 4, 10),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
            "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
            "random_strength": trial.suggest_float("random_strength", 1e-9, 10.0, log=True),
            "loss_function": "RMSE",
            "random_seed": 42,
            "verbose": 0
        }
        alpha, beta = 1000, 1000
        losses = []

        for train_end, val_year in zip([2022, 2023], [2023, 2024]):
            train_cut = train_df[train_df["date"].dt.year <= train_end]
            val_cut = train_df[train_df["date"].dt.year == val_year]

            X_train, y_train = train_cut[features], np.log1p(train_cut[TARGET])
            X_val, y_val = val_cut[features], np.log1p(val_cut[TARGET])

            model = CatBoostRegressor(**params)
            model.fit(
                X_train, y_train,
                eval_set=(X_val, y_val),
                cat_features=cat_features,
                early_stopping_rounds=50,
                verbose=0
            )

            preds = model.predict(X_val)
            rmse = np.sqrt(mean_squared_error(y_val, preds))
            mape = np.mean(np.abs((np.expm1(y_val) - np.expm1(preds)) / (np.expm1(y_val) + 1e-6))) * 100
            r2 = r2_score(np.expm1(y_val), np.expm1(preds))

            loss = rmse + alpha * max(0, mape - 15) + beta * max(0.5 - r2, 0)
            losses.append(loss)

        return np.mean(losses)
    return objective

# -------- MAIN --------
def main():
    os.makedirs(f"{OUTPUT_DIR}/models", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/forecasts", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/plots", exist_ok=True)

    train_df = pd.read_csv(TRAIN_PATH, parse_dates=["date"])
    predict_df = pd.read_csv(PREDICT_PATH, parse_dates=["date"])

    group_codes = train_df["group_code"].unique()
    metric_records = []
    optuna_param_records = []

    for group in group_codes:
        print(f"🔍 Tuning group: {group}")
        train_g = train_df[train_df["group_code"] == group].copy()
        pred_g = predict_df[predict_df["group_code"] == group].copy()

        if pred_g.empty:
            print(f"⚠️ Skipping group {group} — no 2025 data.")
            continue

        drop_cols = ["date", "sales_quantity", "material_group"]
        features = [col for col in train_g.columns if col not in drop_cols and col in pred_g.columns]

        # -------- OPTUNA TUNING --------
        study = optuna.create_study(direction="minimize")
        study.optimize(build_objective_expanding(train_g, features, CAT_FEATURES), n_trials=N_TRIALS)
        best_params = study.best_params
        best_params.update({"loss_function": "RMSE", "random_seed": 42, "verbose": 0})
        optuna_param_records.append({**best_params, "group_code": group})

        # -------- FINAL MODEL TRAINING --------
        final_train = train_g[train_g["date"].dt.year <= TEST_YEAR]
        X_train, y_train = final_train[features], np.log1p(final_train[TARGET])

        model = CatBoostRegressor(**best_params)
        model.fit(X_train, y_train, cat_features=CAT_FEATURES)
        joblib.dump(model, f"{OUTPUT_DIR}/models/{group}_catboost_optuna.pkl")

        # -------- VALIDATION --------
        val_cut = train_g[train_g["date"].dt.year == TEST_YEAR]
        val_preds = model.predict(val_cut[features])
        val_cut["prediction"] = val_preds
        metrics = get_metrics(np.log1p(val_cut[TARGET]), val_preds)
        metrics["group_code"] = group
        metric_records.append(metrics)
        plot_forecast(val_cut["date"], np.log1p(val_cut[TARGET]), val_preds, group, f"{OUTPUT_DIR}/plots/{group}_forecast.png")

        # -------- RECURSIVE FORECASTING --------
        history = pd.concat([train_g, pred_g], ignore_index=True).sort_values("date")
        for idx, row in pred_g.iterrows():
            current_date = row["date"]
            temp_history = history[history["date"] <= current_date].copy()
            temp_history = update_recursive_features(temp_history)
            updated_row = temp_history[temp_history["date"] == current_date]
            pred_features = updated_row[features]
            pred_value = model.predict(pred_features)[0]
            history.loc[history["date"] == current_date, TARGET] = np.expm1(pred_value)

        # Save Recursive Forecast
        pred_g = history[history["date"].dt.year == 2025][["date", "group_code", TARGET]]
        pred_g.rename(columns={TARGET: "predicted_sales_quantity"}, inplace=True)
        pred_g.to_csv(f"{OUTPUT_DIR}/forecasts/{group}_forecast.csv", index=False)
        print(f"📈 Group {group} complete.")

    pd.DataFrame(metric_records).to_csv(f"{OUTPUT_DIR}/metrics.csv", index=False)
    pd.DataFrame(optuna_param_records).to_csv(f"{OUTPUT_DIR}/optuna_params.csv", index=False)
    print("✅ All groups trained, validated, and recursively forecasted.")

if __name__ == "__main__":
    main()

[I 2025-05-03 00:40:01,740] A new study created in memory with name: no-name-ecb70d49-8f83-41c0-b84c-aa9438352871


🔍 Tuning group: 0


[I 2025-05-03 00:40:02,015] Trial 0 finished with value: 3165.9760354382156 and parameters: {'iterations': 1393, 'learning_rate': 0.20196179893844332, 'depth': 7, 'l2_leaf_reg': 3.214333787198278, 'bagging_temperature': 0.590304467571771, 'random_strength': 0.0003109714169900954}. Best is trial 0 with value: 3165.9760354382156.
[I 2025-05-03 00:40:02,875] Trial 1 finished with value: 9729.199524857988 and parameters: {'iterations': 342, 'learning_rate': 0.05549063656973899, 'depth': 10, 'l2_leaf_reg': 2.098848572041677, 'bagging_temperature': 0.6450052761523128, 'random_strength': 0.0011529426085971708}. Best is trial 0 with value: 3165.9760354382156.
[I 2025-05-03 00:40:02,961] Trial 2 finished with value: 2123.4184023857606 and parameters: {'iterations': 1148, 'learning_rate': 0.06624827491674914, 'depth': 4, 'l2_leaf_reg': 5.0997608373468974, 'bagging_temperature': 0.8154930069369084, 'random_strength': 3.653963796542995e-06}. Best is trial 2 with value: 2123.4184023857606.
[I 2025-

📈 Group 0 complete.
🔍 Tuning group: 1


[I 2025-05-03 00:40:26,217] Trial 0 finished with value: 8343.069456997522 and parameters: {'iterations': 936, 'learning_rate': 0.03346584400853696, 'depth': 7, 'l2_leaf_reg': 7.3009825140010856, 'bagging_temperature': 0.08111120045742115, 'random_strength': 7.370233846432109e-07}. Best is trial 0 with value: 8343.069456997522.
[I 2025-05-03 00:40:26,454] Trial 1 finished with value: 7408.245485128766 and parameters: {'iterations': 1357, 'learning_rate': 0.05730612663276169, 'depth': 7, 'l2_leaf_reg': 7.077684544732586, 'bagging_temperature': 0.82988973023215, 'random_strength': 3.276800698970223e-05}. Best is trial 1 with value: 7408.245485128766.
[I 2025-05-03 00:40:26,801] Trial 2 finished with value: 6480.468096438385 and parameters: {'iterations': 782, 'learning_rate': 0.010475552508060015, 'depth': 6, 'l2_leaf_reg': 9.80855460198569, 'bagging_temperature': 0.41351010505101926, 'random_strength': 2.385443917848389}. Best is trial 2 with value: 6480.468096438385.
[I 2025-05-03 00:4

📈 Group 1 complete.
🔍 Tuning group: 2


[I 2025-05-03 00:40:47,575] Trial 0 finished with value: 10523.092167158396 and parameters: {'iterations': 332, 'learning_rate': 0.2910602607949578, 'depth': 10, 'l2_leaf_reg': 3.5079159578125014, 'bagging_temperature': 0.7799052334531642, 'random_strength': 0.011164703091869649}. Best is trial 0 with value: 10523.092167158396.
[I 2025-05-03 00:40:47,979] Trial 1 finished with value: 6345.430591357854 and parameters: {'iterations': 363, 'learning_rate': 0.09640627216311833, 'depth': 9, 'l2_leaf_reg': 7.574732833700312, 'bagging_temperature': 0.6598458854112671, 'random_strength': 0.010055047985939078}. Best is trial 1 with value: 6345.430591357854.
[I 2025-05-03 00:40:48,988] Trial 2 finished with value: 6002.989929879533 and parameters: {'iterations': 1213, 'learning_rate': 0.011226679866899611, 'depth': 8, 'l2_leaf_reg': 8.8686839021413, 'bagging_temperature': 0.25831836377770234, 'random_strength': 5.547146928563329e-06}. Best is trial 2 with value: 6002.989929879533.
[I 2025-05-03 

📈 Group 2 complete.
🔍 Tuning group: 3


[I 2025-05-03 00:41:22,766] Trial 1 finished with value: 7885.082079589145 and parameters: {'iterations': 801, 'learning_rate': 0.04319094484353776, 'depth': 4, 'l2_leaf_reg': 5.463695874115119, 'bagging_temperature': 0.1216534750844479, 'random_strength': 0.00038194237617667545}. Best is trial 1 with value: 7885.082079589145.
[I 2025-05-03 00:41:23,091] Trial 2 finished with value: 7129.505956371192 and parameters: {'iterations': 1203, 'learning_rate': 0.23181932989170426, 'depth': 10, 'l2_leaf_reg': 4.334019629850164, 'bagging_temperature': 0.7249792246271564, 'random_strength': 0.0008306319945701051}. Best is trial 2 with value: 7129.505956371192.
[I 2025-05-03 00:41:23,124] Trial 3 finished with value: 8321.058824038711 and parameters: {'iterations': 1193, 'learning_rate': 0.20393757950420457, 'depth': 4, 'l2_leaf_reg': 2.611940643792387, 'bagging_temperature': 0.9825421411136291, 'random_strength': 3.702472615663309e-06}. Best is trial 2 with value: 7129.505956371192.
[I 2025-05-0

📈 Group 3 complete.
🔍 Tuning group: 4


[I 2025-05-03 00:41:47,956] Trial 0 finished with value: 5175.818116998136 and parameters: {'iterations': 998, 'learning_rate': 0.06322159854811858, 'depth': 9, 'l2_leaf_reg': 4.707313781719128, 'bagging_temperature': 0.8401352940744833, 'random_strength': 0.0011696306309181826}. Best is trial 0 with value: 5175.818116998136.
[I 2025-05-03 00:41:49,641] Trial 1 finished with value: 4948.176171927118 and parameters: {'iterations': 1143, 'learning_rate': 0.057306302907711985, 'depth': 9, 'l2_leaf_reg': 8.22160960221934, 'bagging_temperature': 0.7899348097528054, 'random_strength': 0.05820989604239276}. Best is trial 1 with value: 4948.176171927118.
[I 2025-05-03 00:41:50,550] Trial 2 finished with value: 8163.996586971661 and parameters: {'iterations': 434, 'learning_rate': 0.013173023823580758, 'depth': 10, 'l2_leaf_reg': 9.842741027360413, 'bagging_temperature': 0.918634514683683, 'random_strength': 1.1385835282264132}. Best is trial 1 with value: 4948.176171927118.
[I 2025-05-03 00:41

📈 Group 4 complete.
🔍 Tuning group: 5


[I 2025-05-03 00:42:06,273] Trial 0 finished with value: 8977.90482567083 and parameters: {'iterations': 1278, 'learning_rate': 0.02338353560467095, 'depth': 10, 'l2_leaf_reg': 7.113797263775871, 'bagging_temperature': 0.3012600301046485, 'random_strength': 1.7880088956037912e-08}. Best is trial 0 with value: 8977.90482567083.
[I 2025-05-03 00:42:06,360] Trial 1 finished with value: 5935.134332557986 and parameters: {'iterations': 1204, 'learning_rate': 0.09566941724435638, 'depth': 7, 'l2_leaf_reg': 1.563357728664705, 'bagging_temperature': 0.3406774929368187, 'random_strength': 2.364592813121089e-07}. Best is trial 1 with value: 5935.134332557986.
[I 2025-05-03 00:42:07,341] Trial 2 finished with value: 5702.87938445083 and parameters: {'iterations': 1184, 'learning_rate': 0.01962403113970644, 'depth': 9, 'l2_leaf_reg': 3.8326386493069577, 'bagging_temperature': 0.44243668692539606, 'random_strength': 0.3737219634637486}. Best is trial 2 with value: 5702.87938445083.
[I 2025-05-03 00

📈 Group 5 complete.
🔍 Tuning group: 6


[I 2025-05-03 00:42:31,135] Trial 0 finished with value: 15176.622719572584 and parameters: {'iterations': 808, 'learning_rate': 0.017747959843156347, 'depth': 10, 'l2_leaf_reg': 7.810861020008396, 'bagging_temperature': 0.4340961021560805, 'random_strength': 0.00029940939117227013}. Best is trial 0 with value: 15176.622719572584.
[I 2025-05-03 00:42:31,209] Trial 1 finished with value: 11328.058225199884 and parameters: {'iterations': 705, 'learning_rate': 0.02740765765850615, 'depth': 4, 'l2_leaf_reg': 6.7459350646360985, 'bagging_temperature': 0.7104006350365808, 'random_strength': 0.0012073823673323177}. Best is trial 1 with value: 11328.058225199884.
[I 2025-05-03 00:42:31,382] Trial 2 finished with value: 10527.073416966168 and parameters: {'iterations': 651, 'learning_rate': 0.01607862496930393, 'depth': 5, 'l2_leaf_reg': 8.740964458546088, 'bagging_temperature': 0.21841925528226513, 'random_strength': 0.4004930826520793}. Best is trial 2 with value: 10527.073416966168.
[I 2025-

📈 Group 6 complete.
🔍 Tuning group: 7


[I 2025-05-03 00:42:44,801] Trial 1 finished with value: 2636.7267242103535 and parameters: {'iterations': 732, 'learning_rate': 0.012742427094345506, 'depth': 4, 'l2_leaf_reg': 9.528796694117245, 'bagging_temperature': 0.2384324983617787, 'random_strength': 0.9732707923640685}. Best is trial 1 with value: 2636.7267242103535.
[I 2025-05-03 00:42:45,054] Trial 2 finished with value: 3749.315847860147 and parameters: {'iterations': 643, 'learning_rate': 0.04234868481709915, 'depth': 6, 'l2_leaf_reg': 5.985491489340677, 'bagging_temperature': 0.8469846809862702, 'random_strength': 1.8072016113342114e-09}. Best is trial 1 with value: 2636.7267242103535.
[I 2025-05-03 00:42:45,191] Trial 3 finished with value: 5006.089918476373 and parameters: {'iterations': 374, 'learning_rate': 0.010924938930798218, 'depth': 5, 'l2_leaf_reg': 8.732367503262076, 'bagging_temperature': 0.19723350083480273, 'random_strength': 0.0015069842249939784}. Best is trial 1 with value: 2636.7267242103535.
[I 2025-05-

📈 Group 7 complete.
🔍 Tuning group: 8


[I 2025-05-03 00:43:01,527] Trial 0 finished with value: 7440.842131247213 and parameters: {'iterations': 1313, 'learning_rate': 0.02197801947363275, 'depth': 6, 'l2_leaf_reg': 9.424360714347069, 'bagging_temperature': 0.4519850383474806, 'random_strength': 7.201396504800937e-08}. Best is trial 0 with value: 7440.842131247213.
[I 2025-05-03 00:43:02,264] Trial 1 finished with value: 8092.597438653333 and parameters: {'iterations': 1131, 'learning_rate': 0.01977587638155903, 'depth': 7, 'l2_leaf_reg': 9.331387450856216, 'bagging_temperature': 0.13447511496186915, 'random_strength': 3.0262061400326515e-07}. Best is trial 0 with value: 7440.842131247213.
[I 2025-05-03 00:43:04,354] Trial 2 finished with value: 11107.882090653893 and parameters: {'iterations': 937, 'learning_rate': 0.027888471328054592, 'depth': 10, 'l2_leaf_reg': 4.443865883174935, 'bagging_temperature': 0.2982246321545713, 'random_strength': 0.007947566778781064}. Best is trial 0 with value: 7440.842131247213.
[I 2025-05

📈 Group 8 complete.
🔍 Tuning group: 9


[I 2025-05-03 00:43:22,925] Trial 1 finished with value: 3408.155826969343 and parameters: {'iterations': 722, 'learning_rate': 0.021595912692144958, 'depth': 9, 'l2_leaf_reg': 4.341072372262877, 'bagging_temperature': 0.9066092604707507, 'random_strength': 1.7430917886007598}. Best is trial 1 with value: 3408.155826969343.
[I 2025-05-03 00:43:23,837] Trial 2 finished with value: 4757.092166712103 and parameters: {'iterations': 1053, 'learning_rate': 0.04418795478786528, 'depth': 10, 'l2_leaf_reg': 8.79615821328492, 'bagging_temperature': 0.8448358516634646, 'random_strength': 7.846732399869768}. Best is trial 1 with value: 3408.155826969343.
[I 2025-05-03 00:43:24,382] Trial 3 finished with value: 5710.049475041471 and parameters: {'iterations': 380, 'learning_rate': 0.07082778140821551, 'depth': 9, 'l2_leaf_reg': 6.255674622941687, 'bagging_temperature': 0.09625461178156092, 'random_strength': 7.167320830996505e-06}. Best is trial 1 with value: 3408.155826969343.
[I 2025-05-03 00:43:

📈 Group 9 complete.
🔍 Tuning group: 10


[I 2025-05-03 00:43:44,124] Trial 0 finished with value: 8586.172781308105 and parameters: {'iterations': 918, 'learning_rate': 0.014994742041115392, 'depth': 7, 'l2_leaf_reg': 7.219782682889405, 'bagging_temperature': 0.018917609982982198, 'random_strength': 1.980000052505391e-07}. Best is trial 0 with value: 8586.172781308105.
[I 2025-05-03 00:43:44,621] Trial 1 finished with value: 8775.335422310829 and parameters: {'iterations': 823, 'learning_rate': 0.016170118373045904, 'depth': 7, 'l2_leaf_reg': 1.5502179107939003, 'bagging_temperature': 0.02164128024948886, 'random_strength': 1.1401997433560393e-08}. Best is trial 0 with value: 8586.172781308105.
[I 2025-05-03 00:43:45,355] Trial 2 finished with value: 13576.854874731278 and parameters: {'iterations': 836, 'learning_rate': 0.011378941343854646, 'depth': 8, 'l2_leaf_reg': 9.47139671328989, 'bagging_temperature': 0.46642502376126427, 'random_strength': 5.332569469554871}. Best is trial 0 with value: 8586.172781308105.
[I 2025-05-

📈 Group 10 complete.
🔍 Tuning group: 11


[I 2025-05-03 00:44:02,394] Trial 0 finished with value: 2461.6573590308417 and parameters: {'iterations': 554, 'learning_rate': 0.07188429588279187, 'depth': 9, 'l2_leaf_reg': 8.843939289767096, 'bagging_temperature': 0.1677915540718995, 'random_strength': 0.38700909821813645}. Best is trial 0 with value: 2461.6573590308417.
[I 2025-05-03 00:44:03,781] Trial 1 finished with value: 6225.490105234545 and parameters: {'iterations': 603, 'learning_rate': 0.1100473167093909, 'depth': 10, 'l2_leaf_reg': 9.503989938404525, 'bagging_temperature': 0.047069293284433766, 'random_strength': 2.1357441816133628e-05}. Best is trial 0 with value: 2461.6573590308417.
[I 2025-05-03 00:44:04,405] Trial 2 finished with value: 2428.845780255804 and parameters: {'iterations': 687, 'learning_rate': 0.015725775044349327, 'depth': 8, 'l2_leaf_reg': 6.672050060591802, 'bagging_temperature': 0.524842909812459, 'random_strength': 0.9893210105931929}. Best is trial 2 with value: 2428.845780255804.
[I 2025-05-03 0

📈 Group 11 complete.
🔍 Tuning group: 12


[I 2025-05-03 00:44:33,657] Trial 1 finished with value: 14799.778392780328 and parameters: {'iterations': 809, 'learning_rate': 0.03736822245974723, 'depth': 10, 'l2_leaf_reg': 4.147513332900163, 'bagging_temperature': 0.056505490017267346, 'random_strength': 2.7279110391833035e-08}. Best is trial 0 with value: 12174.145887534029.
[I 2025-05-03 00:44:33,795] Trial 2 finished with value: 10433.840467901047 and parameters: {'iterations': 344, 'learning_rate': 0.07148545871902588, 'depth': 8, 'l2_leaf_reg': 4.027768029748, 'bagging_temperature': 0.7021344058186825, 'random_strength': 1.9832561613591294e-05}. Best is trial 2 with value: 10433.840467901047.
[I 2025-05-03 00:44:33,931] Trial 3 finished with value: 6537.983362585178 and parameters: {'iterations': 1288, 'learning_rate': 0.012848055051125686, 'depth': 4, 'l2_leaf_reg': 3.445536332208199, 'bagging_temperature': 0.10190105820926199, 'random_strength': 3.625675162304966e-05}. Best is trial 3 with value: 6537.983362585178.
[I 2025

📈 Group 12 complete.
🔍 Tuning group: 13


[I 2025-05-03 00:44:56,625] Trial 1 finished with value: 15547.718091187773 and parameters: {'iterations': 888, 'learning_rate': 0.24499153802800175, 'depth': 10, 'l2_leaf_reg': 9.69326823033716, 'bagging_temperature': 0.3770678673743736, 'random_strength': 1.1062696909050457e-06}. Best is trial 0 with value: 8484.806326936066.
[I 2025-05-03 00:44:56,868] Trial 2 finished with value: 8825.027743703697 and parameters: {'iterations': 743, 'learning_rate': 0.01280861769562432, 'depth': 5, 'l2_leaf_reg': 3.532214872641989, 'bagging_temperature': 0.4858207295462015, 'random_strength': 0.00019070764746067316}. Best is trial 0 with value: 8484.806326936066.
[I 2025-05-03 00:44:58,253] Trial 3 finished with value: 8917.4435744457 and parameters: {'iterations': 593, 'learning_rate': 0.015925180708991984, 'depth': 10, 'l2_leaf_reg': 7.06090205582499, 'bagging_temperature': 0.17664858443846898, 'random_strength': 0.8646466469327555}. Best is trial 0 with value: 8484.806326936066.
[I 2025-05-03 00

📈 Group 13 complete.
🔍 Tuning group: 14


[I 2025-05-03 00:45:13,652] Trial 1 finished with value: 7293.602851812999 and parameters: {'iterations': 398, 'learning_rate': 0.11543924176176637, 'depth': 7, 'l2_leaf_reg': 9.580680540604172, 'bagging_temperature': 0.5391052917463982, 'random_strength': 9.268663110667183e-06}. Best is trial 1 with value: 7293.602851812999.
[I 2025-05-03 00:45:14,221] Trial 2 finished with value: 8335.35383790937 and parameters: {'iterations': 358, 'learning_rate': 0.06062999797822625, 'depth': 9, 'l2_leaf_reg': 8.160644299548057, 'bagging_temperature': 0.5893871913002754, 'random_strength': 0.058720309139918075}. Best is trial 1 with value: 7293.602851812999.
[I 2025-05-03 00:45:14,619] Trial 3 finished with value: 8761.094150382842 and parameters: {'iterations': 871, 'learning_rate': 0.1545018871843625, 'depth': 10, 'l2_leaf_reg': 6.7669221314050105, 'bagging_temperature': 0.7807317746437994, 'random_strength': 0.1564111968032534}. Best is trial 1 with value: 7293.602851812999.
[I 2025-05-03 00:45:

📈 Group 14 complete.
🔍 Tuning group: 15


[I 2025-05-03 00:45:27,557] Trial 0 finished with value: 11373.456320762922 and parameters: {'iterations': 850, 'learning_rate': 0.06461670477448819, 'depth': 10, 'l2_leaf_reg': 5.015045701023402, 'bagging_temperature': 0.02807597740324419, 'random_strength': 3.3465714877007883e-06}. Best is trial 0 with value: 11373.456320762922.
[I 2025-05-03 00:45:27,705] Trial 1 finished with value: 7672.03027389308 and parameters: {'iterations': 1152, 'learning_rate': 0.045173942900646136, 'depth': 6, 'l2_leaf_reg': 3.3217322125414546, 'bagging_temperature': 0.9251314229223931, 'random_strength': 1.9634380024645415e-07}. Best is trial 1 with value: 7672.03027389308.
[I 2025-05-03 00:45:27,979] Trial 2 finished with value: 7441.732016488112 and parameters: {'iterations': 1071, 'learning_rate': 0.22715263561048096, 'depth': 10, 'l2_leaf_reg': 3.135983719162885, 'bagging_temperature': 0.07999598681858022, 'random_strength': 2.308376353432386}. Best is trial 2 with value: 7441.732016488112.
[I 2025-05

📈 Group 15 complete.
🔍 Tuning group: 16


[I 2025-05-03 00:45:47,140] Trial 3 finished with value: 786.9434168139675 and parameters: {'iterations': 439, 'learning_rate': 0.037494100055766574, 'depth': 5, 'l2_leaf_reg': 4.733604779165758, 'bagging_temperature': 0.5857925757329495, 'random_strength': 9.433302538157169e-08}. Best is trial 3 with value: 786.9434168139675.
[I 2025-05-03 00:45:47,373] Trial 4 finished with value: 1974.341996780745 and parameters: {'iterations': 1346, 'learning_rate': 0.026626242593718173, 'depth': 8, 'l2_leaf_reg': 3.4158755198525586, 'bagging_temperature': 0.7759773065788724, 'random_strength': 0.4210028095242626}. Best is trial 3 with value: 786.9434168139675.
[I 2025-05-03 00:45:47,425] Trial 5 finished with value: 1088.7951725855073 and parameters: {'iterations': 633, 'learning_rate': 0.035395091401796794, 'depth': 4, 'l2_leaf_reg': 4.622805918079488, 'bagging_temperature': 0.9350428120740362, 'random_strength': 0.00018046377151761554}. Best is trial 3 with value: 786.9434168139675.
[I 2025-05-0

📈 Group 16 complete.
🔍 Tuning group: 17


[I 2025-05-03 00:45:59,210] Trial 1 finished with value: 4818.99791360486 and parameters: {'iterations': 888, 'learning_rate': 0.01214955647825067, 'depth': 5, 'l2_leaf_reg': 4.792648384774135, 'bagging_temperature': 0.8381595522354045, 'random_strength': 5.683495084732886e-05}. Best is trial 0 with value: 4672.901035761839.
[I 2025-05-03 00:45:59,502] Trial 2 finished with value: 3718.032361959848 and parameters: {'iterations': 387, 'learning_rate': 0.049089965793542197, 'depth': 7, 'l2_leaf_reg': 8.426388448624325, 'bagging_temperature': 0.2369942404633042, 'random_strength': 0.002195043239925549}. Best is trial 2 with value: 3718.032361959848.
[I 2025-05-03 00:45:59,912] Trial 3 finished with value: 4599.1964362214785 and parameters: {'iterations': 1262, 'learning_rate': 0.014835468341891214, 'depth': 5, 'l2_leaf_reg': 8.71367998694689, 'bagging_temperature': 0.7470884492294612, 'random_strength': 0.0027253889437163156}. Best is trial 2 with value: 3718.032361959848.
[I 2025-05-03 0

📈 Group 17 complete.
🔍 Tuning group: 18


[I 2025-05-03 00:46:16,040] Trial 0 finished with value: 10325.45519886387 and parameters: {'iterations': 427, 'learning_rate': 0.013947781724853933, 'depth': 10, 'l2_leaf_reg': 2.130769733278824, 'bagging_temperature': 0.6591828850448193, 'random_strength': 0.0036315031044894517}. Best is trial 0 with value: 10325.45519886387.
[I 2025-05-03 00:46:16,653] Trial 1 finished with value: 5751.867683460576 and parameters: {'iterations': 667, 'learning_rate': 0.05215083190786833, 'depth': 8, 'l2_leaf_reg': 5.014199350869774, 'bagging_temperature': 0.4232724218406476, 'random_strength': 0.4409643896302206}. Best is trial 1 with value: 5751.867683460576.
[I 2025-05-03 00:46:17,361] Trial 2 finished with value: 6768.7648861743 and parameters: {'iterations': 758, 'learning_rate': 0.1244106058358894, 'depth': 8, 'l2_leaf_reg': 8.191677177338951, 'bagging_temperature': 0.7753899191422546, 'random_strength': 2.1387656486037e-05}. Best is trial 1 with value: 5751.867683460576.
[I 2025-05-03 00:46:17

📈 Group 18 complete.
🔍 Tuning group: 19


[I 2025-05-03 00:46:47,040] Trial 1 finished with value: 12147.423360450555 and parameters: {'iterations': 957, 'learning_rate': 0.017342020079093256, 'depth': 7, 'l2_leaf_reg': 8.873506316382983, 'bagging_temperature': 0.4622394566528968, 'random_strength': 0.09141287786766134}. Best is trial 1 with value: 12147.423360450555.
[I 2025-05-03 00:46:47,315] Trial 2 finished with value: 13546.334951416167 and parameters: {'iterations': 832, 'learning_rate': 0.029704334222772185, 'depth': 8, 'l2_leaf_reg': 6.232881760643121, 'bagging_temperature': 0.5991732873015279, 'random_strength': 2.4509566692670883e-06}. Best is trial 1 with value: 12147.423360450555.
[I 2025-05-03 00:46:47,666] Trial 3 finished with value: 13051.403726548497 and parameters: {'iterations': 562, 'learning_rate': 0.04198133328599005, 'depth': 8, 'l2_leaf_reg': 8.429449898465759, 'bagging_temperature': 0.6696902987516925, 'random_strength': 0.0025079243845258785}. Best is trial 1 with value: 12147.423360450555.
[I 2025-0

📈 Group 19 complete.
🔍 Tuning group: 20


[I 2025-05-03 00:47:03,026] Trial 0 finished with value: 9299.969775404377 and parameters: {'iterations': 467, 'learning_rate': 0.023178708620653674, 'depth': 6, 'l2_leaf_reg': 5.547101624859613, 'bagging_temperature': 0.4128495674835151, 'random_strength': 0.006685742751224904}. Best is trial 0 with value: 9299.969775404377.
[I 2025-05-03 00:47:03,625] Trial 1 finished with value: 8062.663336029046 and parameters: {'iterations': 1384, 'learning_rate': 0.03331680131526877, 'depth': 9, 'l2_leaf_reg': 3.374217390397887, 'bagging_temperature': 0.11393194824656461, 'random_strength': 6.03177331491156}. Best is trial 1 with value: 8062.663336029046.
[I 2025-05-03 00:47:03,726] Trial 2 finished with value: 9213.756489678157 and parameters: {'iterations': 1378, 'learning_rate': 0.26212450999656006, 'depth': 7, 'l2_leaf_reg': 1.6244593704666948, 'bagging_temperature': 0.6231659436847493, 'random_strength': 0.0038466620785384455}. Best is trial 1 with value: 8062.663336029046.
[I 2025-05-03 00:

📈 Group 20 complete.
🔍 Tuning group: 21


[I 2025-05-03 00:47:22,064] Trial 0 finished with value: 4463.962660009485 and parameters: {'iterations': 665, 'learning_rate': 0.012585927194192569, 'depth': 8, 'l2_leaf_reg': 9.127472838637079, 'bagging_temperature': 0.4039999968454916, 'random_strength': 0.028678358904085495}. Best is trial 0 with value: 4463.962660009485.
[I 2025-05-03 00:47:23,542] Trial 1 finished with value: 5767.358534874508 and parameters: {'iterations': 426, 'learning_rate': 0.029949261873055848, 'depth': 10, 'l2_leaf_reg': 3.555489214131275, 'bagging_temperature': 0.5826916400275933, 'random_strength': 0.00010848398097764877}. Best is trial 0 with value: 4463.962660009485.
[I 2025-05-03 00:47:24,689] Trial 2 finished with value: 6497.671861556337 and parameters: {'iterations': 379, 'learning_rate': 0.13122369324346764, 'depth': 10, 'l2_leaf_reg': 9.086905908041151, 'bagging_temperature': 0.07655168717184346, 'random_strength': 5.215535910374168e-07}. Best is trial 0 with value: 4463.962660009485.
[I 2025-05-

📈 Group 21 complete.
🔍 Tuning group: 22


[I 2025-05-03 00:47:35,353] Trial 0 finished with value: 5933.9245625253825 and parameters: {'iterations': 1217, 'learning_rate': 0.010249019988928292, 'depth': 6, 'l2_leaf_reg': 8.220172277002073, 'bagging_temperature': 0.9640020784820162, 'random_strength': 0.0007386484219598649}. Best is trial 0 with value: 5933.9245625253825.
[I 2025-05-03 00:47:36,756] Trial 1 finished with value: 8151.685563833528 and parameters: {'iterations': 554, 'learning_rate': 0.034429194956899105, 'depth': 10, 'l2_leaf_reg': 6.00875755648834, 'bagging_temperature': 0.6158106297867941, 'random_strength': 1.1006042780010554e-07}. Best is trial 0 with value: 5933.9245625253825.
[I 2025-05-03 00:47:36,871] Trial 2 finished with value: 4020.0886576951025 and parameters: {'iterations': 412, 'learning_rate': 0.015371006835710513, 'depth': 4, 'l2_leaf_reg': 1.4298804574960244, 'bagging_temperature': 0.4998470372871674, 'random_strength': 5.229569068426381e-07}. Best is trial 2 with value: 4020.0886576951025.
[I 20

📈 Group 22 complete.
✅ All groups trained, validated, and recursively forecasted.


In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import joblib
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -------- CONFIG --------
TRAIN_PATH = "/Users/jaygamage/Downloads/feature_outputs/train_featured.csv"
PREDICT_PATH = "/Users/jaygamage/Downloads/feature_outputs/predict_featured.csv"
OUTPUT_DIR = "/Users/jaygamage/Downloads/r1.4lg_refined"
TARGET = "sales_quantity"
TEST_YEAR = 2024
N_TRIALS = 100
CAT_FEATURES = ["group_code"]

# -------- METRICS --------
def get_metrics(y_true, y_pred):
    y_true, y_pred = np.expm1(y_true), np.expm1(y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-6))) * 100
    r2 = r2_score(y_true, y_pred)
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape, "R2": r2}

def plot_forecast(dates, y_true, y_pred, group_code, save_path):
    y_true, y_pred = np.expm1(y_true), np.expm1(y_pred)
    plt.figure(figsize=(10, 5))
    plt.plot(dates, y_true, label="Actual", marker="o")
    plt.plot(dates, y_pred, label="Predicted", marker="x")
    plt.title(f"Forecast for Group {group_code}")
    plt.xlabel("Date")
    plt.ylabel("Sales Quantity")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

# -------- FEATURE UPDATER --------
def update_recursive_features(df):
    df = df.sort_values("date")
    for lag in [1, 2, 3]:
        df[f"lag_{lag}"] = df[TARGET].shift(lag)
    for window in [3, 6]:
        df[f"rolling_mean_{window}"] = df[TARGET].rolling(window).mean()
    return df

# -------- OPTUNA OBJECTIVE --------
def build_objective(train_df, features, cat_features):
    def objective(trial):
        params = {
            "iterations": trial.suggest_int("iterations", 300, 1500),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
            "depth": trial.suggest_int("depth", 4, 10),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1.0, 10.0),
            "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 1.0),
            "random_strength": trial.suggest_float("random_strength", 1e-9, 10.0, log=True),
            "loss_function": "RMSE", "random_seed": 42, "verbose": 0
        }
        alpha, beta = 1000, 1000
        losses = []

        for train_end, val_year in zip([2022, 2023], [2023, 2024]):
            train_cut = train_df[train_df["date"].dt.year <= train_end]
            val_cut = train_df[train_df["date"].dt.year == val_year]

            X_train, y_train = train_cut[features], np.log1p(train_cut[TARGET])
            X_val, y_val = val_cut[features], np.log1p(val_cut[TARGET])

            model = CatBoostRegressor(**params)
            model.fit(X_train, y_train, eval_set=(X_val, y_val), cat_features=cat_features, early_stopping_rounds=50, verbose=0)

            preds = model.predict(X_val)
            rmse = np.sqrt(mean_squared_error(y_val, preds))
            mape = np.mean(np.abs((np.expm1(y_val) - np.expm1(preds)) / (np.expm1(y_val) + 1e-6))) * 100
            r2 = r2_score(np.expm1(y_val), np.expm1(preds))

            loss = rmse + alpha * max(0, mape - 15) + beta * max(0.5 - r2, 0)
            losses.append(loss)

        return np.mean(losses)
    return objective

# -------- MAIN --------
def main():
    os.makedirs(f"{OUTPUT_DIR}/models", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/forecasts", exist_ok=True)
    os.makedirs(f"{OUTPUT_DIR}/plots", exist_ok=True)

    train_df = pd.read_csv(TRAIN_PATH, parse_dates=["date"])
    predict_df = pd.read_csv(PREDICT_PATH, parse_dates=["date"])
    group_codes = train_df["group_code"].unique()

    metric_records = []
    train_metrics = []
    optuna_param_records = []

    for group in group_codes:
        print(f"🔍 Tuning group: {group}")
        train_g = train_df[train_df["group_code"] == group].copy()
        pred_g = predict_df[predict_df["group_code"] == group].copy()

        if pred_g.empty:
            print(f"⚠️ Skipping group {group} — no 2025 data.")
            continue

        drop_cols = ["date", "sales_quantity", "material_group"]
        features = [col for col in train_g.columns if col not in drop_cols and col in pred_g.columns]

        # -------- TUNING --------
        study = optuna.create_study(direction="minimize")
        study.optimize(build_objective(train_g, features, CAT_FEATURES), n_trials=N_TRIALS)
        best_params = study.best_params
        best_params.update({"loss_function": "RMSE", "random_seed": 42, "verbose": 0})
        optuna_param_records.append({**best_params, "group_code": group})

        # -------- FINAL TRAINING --------
        final_train = train_g[train_g["date"].dt.year <= TEST_YEAR]
        X_train, y_train = final_train[features], np.log1p(final_train[TARGET])
        model = CatBoostRegressor(**best_params)
        model.fit(X_train, y_train, cat_features=CAT_FEATURES)
        joblib.dump(model, f"{OUTPUT_DIR}/models/{group}_catboost_optuna.pkl")

        # -------- VALIDATION --------
        val_cut = train_g[train_g["date"].dt.year == TEST_YEAR]
        val_preds = model.predict(val_cut[features])
        val_cut["prediction"] = val_preds
        metrics = get_metrics(np.log1p(val_cut[TARGET]), val_preds)
        metrics["group_code"] = group
        metric_records.append(metrics)
        plot_forecast(val_cut["date"], np.log1p(val_cut[TARGET]), val_preds, group, f"{OUTPUT_DIR}/plots/{group}_forecast.png")

        # -------- TRAIN METRICS --------
        train_preds = model.predict(X_train)
        train_m = get_metrics(y_train, train_preds)
        train_m["group_code"] = group
        train_metrics.append(train_m)

        # -------- RECURSIVE FORECAST --------
        history = pd.concat([train_g, pred_g], ignore_index=True).sort_values("date")
        for idx, row in pred_g.iterrows():
            current_date = row["date"]
            temp_history = history[history["date"] <= current_date].copy()
            temp_history = update_recursive_features(temp_history)
            updated_row = temp_history[temp_history["date"] == current_date]
            pred_features = updated_row[features]
            pred_value = model.predict(pred_features)[0]
            history.loc[history["date"] == current_date, TARGET] = np.expm1(pred_value)

        pred_g = history[history["date"].dt.year == 2025][["date", "group_code", TARGET]]
        pred_g.rename(columns={TARGET: "predicted_sales_quantity"}, inplace=True)
        pred_g.to_csv(f"{OUTPUT_DIR}/forecasts/{group}_forecast.csv", index=False)

        print(f"✅ Group {group} complete.")

    pd.DataFrame(metric_records).to_csv(f"{OUTPUT_DIR}/cv_metrics.csv", index=False)
    pd.DataFrame(train_metrics).to_csv(f"{OUTPUT_DIR}/train_metrics.csv", index=False)
    pd.DataFrame(optuna_param_records).to_csv(f"{OUTPUT_DIR}/optuna_params.csv", index=False)
    print("🎯 Forecasting complete for all groups.")

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-05-18 22:22:50,015] A new study created in memory with name: no-name-52939b6e-ff2f-4d8e-a1d3-bfdd9b565e2f


🔍 Tuning group: 0


[I 2025-05-18 22:22:53,234] Trial 0 finished with value: 7073.910151060184 and parameters: {'iterations': 1354, 'learning_rate': 0.025484256805413694, 'depth': 10, 'l2_leaf_reg': 9.467786197997103, 'bagging_temperature': 0.6300968763906004, 'random_strength': 1.4443065366189583e-07}. Best is trial 0 with value: 7073.910151060184.
[I 2025-05-18 22:22:53,700] Trial 1 finished with value: 8786.047324959485 and parameters: {'iterations': 1440, 'learning_rate': 0.2009301000902353, 'depth': 10, 'l2_leaf_reg': 5.893658480207052, 'bagging_temperature': 0.3783147594794437, 'random_strength': 9.052905700940828e-07}. Best is trial 0 with value: 7073.910151060184.
[I 2025-05-18 22:22:53,883] Trial 2 finished with value: 3383.7642798355714 and parameters: {'iterations': 517, 'learning_rate': 0.03170550622451818, 'depth': 6, 'l2_leaf_reg': 4.6884116650815235, 'bagging_temperature': 0.38444690960009253, 'random_strength': 0.0006034664776730023}. Best is trial 2 with value: 3383.7642798355714.
[I 2025

✅ Group 0 complete.
🔍 Tuning group: 1


[I 2025-05-18 22:23:09,872] Trial 0 finished with value: 11552.222287033379 and parameters: {'iterations': 556, 'learning_rate': 0.016267854746094608, 'depth': 10, 'l2_leaf_reg': 9.690565471599154, 'bagging_temperature': 0.7758897606091315, 'random_strength': 3.948066487913383e-05}. Best is trial 0 with value: 11552.222287033379.
[I 2025-05-18 22:23:10,138] Trial 1 finished with value: 6764.225551572219 and parameters: {'iterations': 444, 'learning_rate': 0.032156596899750436, 'depth': 7, 'l2_leaf_reg': 7.705268402826264, 'bagging_temperature': 0.22374663150102958, 'random_strength': 0.1541363433139112}. Best is trial 1 with value: 6764.225551572219.
[I 2025-05-18 22:23:10,967] Trial 2 finished with value: 8935.589926068822 and parameters: {'iterations': 1490, 'learning_rate': 0.04639943586624705, 'depth': 10, 'l2_leaf_reg': 2.904662120489702, 'bagging_temperature': 0.5048968415453979, 'random_strength': 0.07610116370129404}. Best is trial 1 with value: 6764.225551572219.
[I 2025-05-18

✅ Group 1 complete.
🔍 Tuning group: 2


[I 2025-05-18 22:23:30,260] Trial 0 finished with value: 8037.754843752104 and parameters: {'iterations': 1189, 'learning_rate': 0.015250332077458455, 'depth': 10, 'l2_leaf_reg': 2.38766841008182, 'bagging_temperature': 0.6899264150750415, 'random_strength': 0.004799087640169892}. Best is trial 0 with value: 8037.754843752104.
[I 2025-05-18 22:23:30,988] Trial 1 finished with value: 10766.127847531077 and parameters: {'iterations': 902, 'learning_rate': 0.07595026015969551, 'depth': 10, 'l2_leaf_reg': 1.1557734633469805, 'bagging_temperature': 0.8831543454761938, 'random_strength': 0.01302614182030348}. Best is trial 0 with value: 8037.754843752104.
[I 2025-05-18 22:23:31,054] Trial 2 finished with value: 6730.328603014329 and parameters: {'iterations': 1108, 'learning_rate': 0.03357917022792437, 'depth': 4, 'l2_leaf_reg': 2.2957589631579376, 'bagging_temperature': 0.9292885611336349, 'random_strength': 0.00021846278855271772}. Best is trial 2 with value: 6730.328603014329.
[I 2025-05-

✅ Group 2 complete.
🔍 Tuning group: 3


[I 2025-05-18 22:23:53,332] Trial 2 finished with value: 6139.43129480707 and parameters: {'iterations': 525, 'learning_rate': 0.0696709981872478, 'depth': 7, 'l2_leaf_reg': 3.2076825038412986, 'bagging_temperature': 0.11819247596383242, 'random_strength': 0.018973224263527294}. Best is trial 2 with value: 6139.43129480707.
[I 2025-05-18 22:23:53,422] Trial 3 finished with value: 8793.693390955159 and parameters: {'iterations': 931, 'learning_rate': 0.20763028248001086, 'depth': 7, 'l2_leaf_reg': 8.389787448456882, 'bagging_temperature': 0.12207648957302586, 'random_strength': 2.0156341225627226}. Best is trial 2 with value: 6139.43129480707.
[I 2025-05-18 22:23:53,456] Trial 4 finished with value: 7275.676607891976 and parameters: {'iterations': 540, 'learning_rate': 0.20887639801573704, 'depth': 5, 'l2_leaf_reg': 2.812450412016778, 'bagging_temperature': 0.2115951319740874, 'random_strength': 0.005100073713302991}. Best is trial 2 with value: 6139.43129480707.
[I 2025-05-18 22:23:53,

✅ Group 3 complete.
🔍 Tuning group: 4


[I 2025-05-18 22:24:28,660] Trial 0 finished with value: 5612.213859943766 and parameters: {'iterations': 1385, 'learning_rate': 0.04587853393867719, 'depth': 7, 'l2_leaf_reg': 4.465987111736787, 'bagging_temperature': 0.3180340980625308, 'random_strength': 0.022377049920527484}. Best is trial 0 with value: 5612.213859943766.
[I 2025-05-18 22:24:28,743] Trial 1 finished with value: 4356.41436923847 and parameters: {'iterations': 1201, 'learning_rate': 0.20607735024549187, 'depth': 5, 'l2_leaf_reg': 6.044680372723958, 'bagging_temperature': 0.355099710904317, 'random_strength': 0.0003722330682160596}. Best is trial 1 with value: 4356.41436923847.
[I 2025-05-18 22:24:29,021] Trial 2 finished with value: 5033.302347847417 and parameters: {'iterations': 1234, 'learning_rate': 0.0539540668831688, 'depth': 6, 'l2_leaf_reg': 7.004689830601436, 'bagging_temperature': 0.5556503488396185, 'random_strength': 3.4931488544218563e-06}. Best is trial 1 with value: 4356.41436923847.
[I 2025-05-18 22:2

✅ Group 4 complete.
🔍 Tuning group: 5


[I 2025-05-18 22:24:47,395] Trial 0 finished with value: 4816.718079243868 and parameters: {'iterations': 680, 'learning_rate': 0.11008581188050993, 'depth': 8, 'l2_leaf_reg': 8.424809196034552, 'bagging_temperature': 0.5445158703408761, 'random_strength': 1.4547574571169238e-06}. Best is trial 0 with value: 4816.718079243868.
[I 2025-05-18 22:24:47,517] Trial 1 finished with value: 4842.396032496156 and parameters: {'iterations': 1206, 'learning_rate': 0.041814803500177425, 'depth': 5, 'l2_leaf_reg': 6.127371887818729, 'bagging_temperature': 0.5282238259654654, 'random_strength': 0.008740796578958087}. Best is trial 0 with value: 4816.718079243868.
[I 2025-05-18 22:24:47,756] Trial 2 finished with value: 4853.9639560900805 and parameters: {'iterations': 809, 'learning_rate': 0.03969849550329638, 'depth': 6, 'l2_leaf_reg': 9.75793319080174, 'bagging_temperature': 0.0802479307338988, 'random_strength': 0.06527506616143584}. Best is trial 0 with value: 4816.718079243868.
[I 2025-05-18 22

✅ Group 5 complete.
🔍 Tuning group: 6


[I 2025-05-18 22:25:11,659] Trial 0 finished with value: 10237.603400800997 and parameters: {'iterations': 972, 'learning_rate': 0.011330759528563456, 'depth': 7, 'l2_leaf_reg': 4.2103312834486175, 'bagging_temperature': 0.023724337131035833, 'random_strength': 1.013596161040034}. Best is trial 0 with value: 10237.603400800997.
[I 2025-05-18 22:25:12,043] Trial 1 finished with value: 10407.141800042595 and parameters: {'iterations': 587, 'learning_rate': 0.023471944967931074, 'depth': 8, 'l2_leaf_reg': 4.139030768326617, 'bagging_temperature': 0.18186892929877452, 'random_strength': 7.651156510505141e-06}. Best is trial 0 with value: 10237.603400800997.
[I 2025-05-18 22:25:12,282] Trial 2 finished with value: 11844.79886618162 and parameters: {'iterations': 301, 'learning_rate': 0.061817099198063254, 'depth': 9, 'l2_leaf_reg': 1.450038165724813, 'bagging_temperature': 0.017468190985422627, 'random_strength': 1.7252766708224643e-08}. Best is trial 0 with value: 10237.603400800997.
[I 20

✅ Group 6 complete.
🔍 Tuning group: 7


[I 2025-05-18 22:25:34,795] Trial 1 finished with value: 4234.2457567322535 and parameters: {'iterations': 1014, 'learning_rate': 0.024526332688348296, 'depth': 7, 'l2_leaf_reg': 7.207097498016376, 'bagging_temperature': 0.08230748662477416, 'random_strength': 3.1052779800808916e-09}. Best is trial 0 with value: 3334.1267735310917.
[I 2025-05-18 22:25:34,904] Trial 2 finished with value: 4471.788395440022 and parameters: {'iterations': 352, 'learning_rate': 0.15570684253650013, 'depth': 7, 'l2_leaf_reg': 8.999188675034905, 'bagging_temperature': 0.6672015271716969, 'random_strength': 0.00013134337303358874}. Best is trial 0 with value: 3334.1267735310917.
[I 2025-05-18 22:25:35,636] Trial 3 finished with value: 3326.79422998483 and parameters: {'iterations': 924, 'learning_rate': 0.0164152443738889, 'depth': 8, 'l2_leaf_reg': 5.882471856845845, 'bagging_temperature': 0.06256435071964705, 'random_strength': 5.237726165173929}. Best is trial 3 with value: 3326.79422998483.
[I 2025-05-18 

✅ Group 7 complete.
🔍 Tuning group: 8


[I 2025-05-18 22:25:49,052] Trial 1 finished with value: 9276.490326350046 and parameters: {'iterations': 1119, 'learning_rate': 0.05435882040232523, 'depth': 9, 'l2_leaf_reg': 2.9326593964100414, 'bagging_temperature': 0.532554287054488, 'random_strength': 1.613631886785419e-07}. Best is trial 0 with value: 6846.439066066239.
[I 2025-05-18 22:25:50,070] Trial 2 finished with value: 9944.889476071294 and parameters: {'iterations': 1249, 'learning_rate': 0.07954272307124603, 'depth': 9, 'l2_leaf_reg': 4.905604923196108, 'bagging_temperature': 0.21771591440339333, 'random_strength': 1.811219186859207e-05}. Best is trial 0 with value: 6846.439066066239.
[I 2025-05-18 22:25:50,740] Trial 3 finished with value: 9002.676945040781 and parameters: {'iterations': 1144, 'learning_rate': 0.211063784803321, 'depth': 9, 'l2_leaf_reg': 9.758962108882253, 'bagging_temperature': 0.32871799143973457, 'random_strength': 0.0012000754601014677}. Best is trial 0 with value: 6846.439066066239.
[I 2025-05-18

✅ Group 8 complete.
🔍 Tuning group: 9


[I 2025-05-18 22:26:06,939] Trial 0 finished with value: 4330.37082711981 and parameters: {'iterations': 1302, 'learning_rate': 0.26643101968978067, 'depth': 9, 'l2_leaf_reg': 3.429622361422034, 'bagging_temperature': 0.7967861009141444, 'random_strength': 0.03381167311896047}. Best is trial 0 with value: 4330.37082711981.
[I 2025-05-18 22:26:07,319] Trial 1 finished with value: 3224.0447143951665 and parameters: {'iterations': 1453, 'learning_rate': 0.1402219507137882, 'depth': 8, 'l2_leaf_reg': 3.8322490792368478, 'bagging_temperature': 0.4512043797618599, 'random_strength': 2.3029810719152808e-06}. Best is trial 1 with value: 3224.0447143951665.
[I 2025-05-18 22:26:07,970] Trial 2 finished with value: 5021.419335930109 and parameters: {'iterations': 677, 'learning_rate': 0.01014254323361729, 'depth': 8, 'l2_leaf_reg': 3.6910941047493147, 'bagging_temperature': 0.7819230750958303, 'random_strength': 1.4247910346927233e-05}. Best is trial 1 with value: 3224.0447143951665.
[I 2025-05-1

✅ Group 9 complete.
🔍 Tuning group: 10


[I 2025-05-18 22:26:38,799] Trial 0 finished with value: 8596.342109161462 and parameters: {'iterations': 477, 'learning_rate': 0.031523481497533826, 'depth': 7, 'l2_leaf_reg': 4.102380805704929, 'bagging_temperature': 0.08076580815534562, 'random_strength': 9.66443059153027e-06}. Best is trial 0 with value: 8596.342109161462.
[I 2025-05-18 22:26:39,110] Trial 1 finished with value: 7709.021900384839 and parameters: {'iterations': 1422, 'learning_rate': 0.019682103724007187, 'depth': 5, 'l2_leaf_reg': 6.624927796090672, 'bagging_temperature': 0.5015005554203871, 'random_strength': 3.9783336307030444e-05}. Best is trial 1 with value: 7709.021900384839.
[I 2025-05-18 22:26:39,477] Trial 2 finished with value: 7001.07269003548 and parameters: {'iterations': 1414, 'learning_rate': 0.010826491192561091, 'depth': 4, 'l2_leaf_reg': 7.0491839109521734, 'bagging_temperature': 0.02519198531990241, 'random_strength': 0.0016114379722091913}. Best is trial 2 with value: 7001.07269003548.
[I 2025-05

✅ Group 10 complete.
🔍 Tuning group: 11


[I 2025-05-18 22:26:59,699] Trial 0 finished with value: 4629.757757348121 and parameters: {'iterations': 1169, 'learning_rate': 0.11430712595008038, 'depth': 7, 'l2_leaf_reg': 6.50704357893516, 'bagging_temperature': 0.8596015207141714, 'random_strength': 5.580990954683947e-07}. Best is trial 0 with value: 4629.757757348121.
[I 2025-05-18 22:27:00,068] Trial 1 finished with value: 3071.2162657275726 and parameters: {'iterations': 1430, 'learning_rate': 0.03085814200976173, 'depth': 6, 'l2_leaf_reg': 8.512522300821932, 'bagging_temperature': 0.07069152371830845, 'random_strength': 1.0711823401493354e-09}. Best is trial 1 with value: 3071.2162657275726.
[I 2025-05-18 22:27:00,120] Trial 2 finished with value: 2016.135897778472 and parameters: {'iterations': 984, 'learning_rate': 0.26839739096172366, 'depth': 4, 'l2_leaf_reg': 9.332918835814196, 'bagging_temperature': 0.026821151706718283, 'random_strength': 1.551525999316175e-09}. Best is trial 2 with value: 2016.135897778472.
[I 2025-0

✅ Group 11 complete.
🔍 Tuning group: 12


[I 2025-05-18 22:27:22,630] Trial 1 finished with value: 5779.52118354587 and parameters: {'iterations': 589, 'learning_rate': 0.01777118066266023, 'depth': 4, 'l2_leaf_reg': 4.977946660304806, 'bagging_temperature': 0.1633208919071526, 'random_strength': 1.3335287200368605e-09}. Best is trial 0 with value: 5576.233606440824.
[I 2025-05-18 22:27:22,771] Trial 2 finished with value: 9237.469549754336 and parameters: {'iterations': 414, 'learning_rate': 0.04493815061265429, 'depth': 7, 'l2_leaf_reg': 4.9797554163459905, 'bagging_temperature': 0.5944884106447689, 'random_strength': 1.4234754741141078e-06}. Best is trial 0 with value: 5576.233606440824.
[I 2025-05-18 22:27:22,908] Trial 3 finished with value: 12428.26233772577 and parameters: {'iterations': 1498, 'learning_rate': 0.1343534928803807, 'depth': 8, 'l2_leaf_reg': 9.496615059895161, 'bagging_temperature': 0.6624431611204588, 'random_strength': 1.9107112775183414e-09}. Best is trial 0 with value: 5576.233606440824.
[I 2025-05-18

✅ Group 12 complete.
🔍 Tuning group: 13


[I 2025-05-18 22:27:31,261] Trial 1 finished with value: 8540.060258343936 and parameters: {'iterations': 1008, 'learning_rate': 0.03216201475094803, 'depth': 4, 'l2_leaf_reg': 8.920281227144052, 'bagging_temperature': 0.6756074740553699, 'random_strength': 0.07207666192868789}. Best is trial 0 with value: 7507.122387006785.
[I 2025-05-18 22:27:31,411] Trial 2 finished with value: 10085.058535941022 and parameters: {'iterations': 1415, 'learning_rate': 0.1607015144033621, 'depth': 7, 'l2_leaf_reg': 8.452559359594463, 'bagging_temperature': 0.4528123384978634, 'random_strength': 1.3438298834872605e-08}. Best is trial 0 with value: 7507.122387006785.
[I 2025-05-18 22:27:31,856] Trial 3 finished with value: 10179.262589659518 and parameters: {'iterations': 466, 'learning_rate': 0.01004131046789923, 'depth': 8, 'l2_leaf_reg': 1.2827617317249453, 'bagging_temperature': 0.9007882512170984, 'random_strength': 4.521509243587499e-08}. Best is trial 0 with value: 7507.122387006785.
[I 2025-05-18

✅ Group 13 complete.
🔍 Tuning group: 14


[I 2025-05-18 22:27:44,892] Trial 0 finished with value: 7217.607111147635 and parameters: {'iterations': 930, 'learning_rate': 0.013229222088155945, 'depth': 7, 'l2_leaf_reg': 6.680073759651801, 'bagging_temperature': 0.2860571294195309, 'random_strength': 0.004058538585350432}. Best is trial 0 with value: 7217.607111147635.
[I 2025-05-18 22:27:46,444] Trial 1 finished with value: 13742.846781181894 and parameters: {'iterations': 1133, 'learning_rate': 0.013515796029735296, 'depth': 10, 'l2_leaf_reg': 1.4406475773266592, 'bagging_temperature': 0.32981661457525413, 'random_strength': 1.263126273728944e-07}. Best is trial 0 with value: 7217.607111147635.
[I 2025-05-18 22:27:49,021] Trial 2 finished with value: 11229.438312003113 and parameters: {'iterations': 1476, 'learning_rate': 0.012739056823300229, 'depth': 10, 'l2_leaf_reg': 2.8807381828477374, 'bagging_temperature': 0.11989354887405157, 'random_strength': 5.178502952401536e-05}. Best is trial 0 with value: 7217.607111147635.
[I 2

✅ Group 14 complete.
🔍 Tuning group: 15


[I 2025-05-18 22:28:05,951] Trial 1 finished with value: 8427.928673461509 and parameters: {'iterations': 603, 'learning_rate': 0.033547331895098904, 'depth': 7, 'l2_leaf_reg': 7.286778463044424, 'bagging_temperature': 0.03517948942881488, 'random_strength': 0.0011371189872773056}. Best is trial 0 with value: 4412.957459668235.
[I 2025-05-18 22:28:06,055] Trial 2 finished with value: 7510.803134902561 and parameters: {'iterations': 597, 'learning_rate': 0.02640581759912, 'depth': 5, 'l2_leaf_reg': 1.7603767283579421, 'bagging_temperature': 0.5081247754942554, 'random_strength': 7.461220447877343e-05}. Best is trial 0 with value: 4412.957459668235.
[I 2025-05-18 22:28:07,681] Trial 3 finished with value: 8937.935371847834 and parameters: {'iterations': 1453, 'learning_rate': 0.042829032084220786, 'depth': 10, 'l2_leaf_reg': 6.831578546505407, 'bagging_temperature': 0.3811633697938479, 'random_strength': 0.015272694587375152}. Best is trial 0 with value: 4412.957459668235.
[I 2025-05-18 

✅ Group 15 complete.
🔍 Tuning group: 16


[I 2025-05-18 22:28:18,609] Trial 2 finished with value: 621.8378092558717 and parameters: {'iterations': 569, 'learning_rate': 0.14699240844971484, 'depth': 10, 'l2_leaf_reg': 8.544956805274024, 'bagging_temperature': 0.47896062305453924, 'random_strength': 0.11221354432342853}. Best is trial 2 with value: 621.8378092558717.
[I 2025-05-18 22:28:18,909] Trial 3 finished with value: 2676.3694879999766 and parameters: {'iterations': 1439, 'learning_rate': 0.024543368305721087, 'depth': 8, 'l2_leaf_reg': 7.2036775798590265, 'bagging_temperature': 0.35502933131322134, 'random_strength': 0.06539413818773487}. Best is trial 2 with value: 621.8378092558717.
[I 2025-05-18 22:28:19,597] Trial 4 finished with value: 1378.2757991074295 and parameters: {'iterations': 547, 'learning_rate': 0.025588679153775995, 'depth': 9, 'l2_leaf_reg': 9.328800196420195, 'bagging_temperature': 0.7312223868944847, 'random_strength': 7.815593672049027e-07}. Best is trial 2 with value: 621.8378092558717.
[I 2025-05-

✅ Group 16 complete.
🔍 Tuning group: 17


[I 2025-05-18 22:28:31,184] Trial 0 finished with value: 4141.677282697283 and parameters: {'iterations': 1001, 'learning_rate': 0.025542452150181694, 'depth': 8, 'l2_leaf_reg': 9.072604443920547, 'bagging_temperature': 0.8722010568620598, 'random_strength': 2.1133293664069804e-06}. Best is trial 0 with value: 4141.677282697283.
[I 2025-05-18 22:28:31,499] Trial 1 finished with value: 4350.269760214448 and parameters: {'iterations': 658, 'learning_rate': 0.016024151158060794, 'depth': 6, 'l2_leaf_reg': 9.273539794956699, 'bagging_temperature': 0.16412688510377216, 'random_strength': 1.195013579174843e-05}. Best is trial 0 with value: 4141.677282697283.
[I 2025-05-18 22:28:31,553] Trial 2 finished with value: 4704.447273116277 and parameters: {'iterations': 1119, 'learning_rate': 0.06229376107689578, 'depth': 4, 'l2_leaf_reg': 1.2021174664812164, 'bagging_temperature': 0.29494005646540145, 'random_strength': 1.7713420858863084e-09}. Best is trial 0 with value: 4141.677282697283.
[I 2025

✅ Group 17 complete.
🔍 Tuning group: 18


[I 2025-05-18 22:28:48,816] Trial 1 finished with value: 5416.834299570481 and parameters: {'iterations': 438, 'learning_rate': 0.08908074749655774, 'depth': 5, 'l2_leaf_reg': 2.154659017978086, 'bagging_temperature': 0.819269581216987, 'random_strength': 0.3868127975194312}. Best is trial 0 with value: 4973.609723485935.
[I 2025-05-18 22:28:49,024] Trial 2 finished with value: 7323.140768460091 and parameters: {'iterations': 309, 'learning_rate': 0.042808638431588754, 'depth': 7, 'l2_leaf_reg': 6.2349215977411445, 'bagging_temperature': 0.6302576587673646, 'random_strength': 0.0008633615814179988}. Best is trial 0 with value: 4973.609723485935.
[I 2025-05-18 22:28:49,088] Trial 3 finished with value: 5208.927462452107 and parameters: {'iterations': 1060, 'learning_rate': 0.03278810188043217, 'depth': 4, 'l2_leaf_reg': 1.6634605793932842, 'bagging_temperature': 0.596741139078958, 'random_strength': 1.245982507285694e-07}. Best is trial 0 with value: 4973.609723485935.
[I 2025-05-18 22:

✅ Group 18 complete.
🔍 Tuning group: 19


[I 2025-05-18 22:29:07,279] Trial 0 finished with value: 11759.944394996264 and parameters: {'iterations': 1100, 'learning_rate': 0.021053645788902682, 'depth': 8, 'l2_leaf_reg': 6.0707937868800945, 'bagging_temperature': 0.9010982363068896, 'random_strength': 3.9692901413519373}. Best is trial 0 with value: 11759.944394996264.
[I 2025-05-18 22:29:07,347] Trial 1 finished with value: 11541.926540857707 and parameters: {'iterations': 1066, 'learning_rate': 0.10368682203834537, 'depth': 6, 'l2_leaf_reg': 2.488859871122439, 'bagging_temperature': 0.3831887674896902, 'random_strength': 3.605063243988532e-08}. Best is trial 1 with value: 11541.926540857707.
[I 2025-05-18 22:29:07,716] Trial 2 finished with value: 15191.425579881141 and parameters: {'iterations': 383, 'learning_rate': 0.018154284551204634, 'depth': 9, 'l2_leaf_reg': 3.03347114170359, 'bagging_temperature': 0.13140326012158843, 'random_strength': 1.9251165796060235e-07}. Best is trial 1 with value: 11541.926540857707.
[I 2025

✅ Group 19 complete.
🔍 Tuning group: 20


[I 2025-05-18 22:29:24,281] Trial 1 finished with value: 5882.179589259697 and parameters: {'iterations': 1359, 'learning_rate': 0.05911145960692565, 'depth': 4, 'l2_leaf_reg': 2.7137418437876906, 'bagging_temperature': 0.9573204150716436, 'random_strength': 5.80885356792745e-07}. Best is trial 1 with value: 5882.179589259697.
[I 2025-05-18 22:29:24,381] Trial 2 finished with value: 7351.545361389657 and parameters: {'iterations': 506, 'learning_rate': 0.18880034495718015, 'depth': 6, 'l2_leaf_reg': 4.922871549566121, 'bagging_temperature': 0.17301059986408462, 'random_strength': 6.355233215422383e-05}. Best is trial 1 with value: 5882.179589259697.
[I 2025-05-18 22:29:24,717] Trial 3 finished with value: 9027.558626026721 and parameters: {'iterations': 704, 'learning_rate': 0.011126917719202224, 'depth': 6, 'l2_leaf_reg': 5.260862436031941, 'bagging_temperature': 0.5370547284535541, 'random_strength': 1.5998751771086302e-07}. Best is trial 1 with value: 5882.179589259697.
[I 2025-05-1

✅ Group 20 complete.
🔍 Tuning group: 21


[I 2025-05-18 22:29:41,574] Trial 0 finished with value: 5141.536593851581 and parameters: {'iterations': 771, 'learning_rate': 0.0178093390638589, 'depth': 7, 'l2_leaf_reg': 3.3181697668640675, 'bagging_temperature': 0.9008064813897734, 'random_strength': 0.0086214409739151}. Best is trial 0 with value: 5141.536593851581.
[I 2025-05-18 22:29:41,769] Trial 1 finished with value: 5918.434466397536 and parameters: {'iterations': 848, 'learning_rate': 0.043134040213088795, 'depth': 8, 'l2_leaf_reg': 2.267061404505846, 'bagging_temperature': 0.414853860322119, 'random_strength': 1.259585717489272e-07}. Best is trial 0 with value: 5141.536593851581.
[I 2025-05-18 22:29:42,789] Trial 2 finished with value: 5218.47050797125 and parameters: {'iterations': 508, 'learning_rate': 0.05767708085084177, 'depth': 10, 'l2_leaf_reg': 8.44374088075903, 'bagging_temperature': 0.1427699355534987, 'random_strength': 6.750608396174588e-05}. Best is trial 0 with value: 5141.536593851581.
[I 2025-05-18 22:29:

✅ Group 21 complete.
🔍 Tuning group: 22


[I 2025-05-18 22:29:59,131] Trial 0 finished with value: 6780.017100189523 and parameters: {'iterations': 1137, 'learning_rate': 0.07635683712178795, 'depth': 9, 'l2_leaf_reg': 5.853482894737766, 'bagging_temperature': 0.003305025338567913, 'random_strength': 2.3543687082571434e-08}. Best is trial 0 with value: 6780.017100189523.
[I 2025-05-18 22:29:59,426] Trial 1 finished with value: 7897.997644479645 and parameters: {'iterations': 1269, 'learning_rate': 0.06371331437107454, 'depth': 8, 'l2_leaf_reg': 7.9834731172245705, 'bagging_temperature': 0.058788231217986264, 'random_strength': 0.019656624939451982}. Best is trial 0 with value: 6780.017100189523.
[I 2025-05-18 22:29:59,645] Trial 2 finished with value: 4972.117365724986 and parameters: {'iterations': 1074, 'learning_rate': 0.035508853166872086, 'depth': 5, 'l2_leaf_reg': 9.622341374757822, 'bagging_temperature': 0.32982496662628125, 'random_strength': 5.529745377480038}. Best is trial 2 with value: 4972.117365724986.
[I 2025-05

✅ Group 22 complete.
🎯 Forecasting complete for all groups.
