In [21]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
from sklearn.metrics import r2_score
import itertools
import os

In [22]:
# Load data
train_df = pd.read_csv("./train_test_splits/train.csv")
val_df = pd.read_csv("./train_test_splits/validate.csv")

In [23]:
target_cols = ['temperature', 'rainfall', 'wind_speed', 'precipitation']
feature_cols = [col for col in train_df.columns if col not in target_cols + ['date']]
cat_features = ['location_id', 'day_of_week']

In [24]:
train_pool = Pool(train_df[feature_cols], label=train_df[target_cols], cat_features=cat_features)
val_pool = Pool(val_df[feature_cols], label=val_df[target_cols], cat_features=cat_features)

In [25]:
# 2. Hyperparameter grid
learning_rates = [0.01, 0.05, 0.1]
depths         = [4, 6, 8]
l2_regs        = [1, 3, 5]              # new: regularization
iterations     = [1000]                 # fixed high value, early stopping handles exit

In [26]:
# 3. Tuning Loop
best_score = -np.inf
best_params = {}
results = []

In [27]:
for lr in learning_rates:
    for depth in depths:
        for l2 in l2_regs:
            for iter_ in iterations:
                print(f"\nTraining with lr={lr}, depth={depth}, l2_leaf_reg={l2}, iterations={iter_}")

                model = CatBoostRegressor(
                    iterations=iter_,
                    learning_rate=lr,
                    depth=depth,
                    l2_leaf_reg=l2,
                    loss_function='MultiRMSE',
                    early_stopping_rounds=50,
                    use_best_model=True,
                    verbose=100,
                    random_seed=42
                )

                model.fit(train_pool, eval_set=val_pool)

                preds = model.predict(val_df[feature_cols])
                val_true = val_df[target_cols].values

                # Compute average R² score
                r2_scores = [r2_score(val_true[:, i], preds[:, i]) for i in range(len(target_cols))]
                avg_r2 = np.mean(r2_scores)

                print(f"Avg R² Score: {avg_r2:.4f}")
                results.append({
                    "learning_rate": lr,
                    "depth": depth,
                    "l2_leaf_reg": l2,
                    "iterations": iter_,
                    "avg_r2": avg_r2
                })

                if avg_r2 > best_score:
                    best_score = avg_r2
                    best_params = {
                        "learning_rate": lr,
                        "depth": depth,
                        "l2_leaf_reg": l2,
                        "iterations": iter_
                    }



Training with lr=0.01, depth=4, l2_leaf_reg=1, iterations=1000
0:	learn: 16.4929172	test: 16.3759835	best: 16.3759835 (0)	total: 92.7ms	remaining: 1m 32s
100:	learn: 10.4858339	test: 10.6033855	best: 10.6033855 (100)	total: 8.34s	remaining: 1m 14s
200:	learn: 8.2801859	test: 8.8096123	best: 8.8096123 (200)	total: 18.2s	remaining: 1m 12s
300:	learn: 7.3883117	test: 8.2340490	best: 8.2340490 (300)	total: 29.8s	remaining: 1m 9s
400:	learn: 6.9642090	test: 8.0007105	best: 8.0007105 (400)	total: 41.6s	remaining: 1m 2s
500:	learn: 6.7306082	test: 7.8857029	best: 7.8857029 (500)	total: 53.2s	remaining: 52.9s
600:	learn: 6.5796876	test: 7.8056943	best: 7.8056943 (600)	total: 1m 4s	remaining: 42.8s
700:	learn: 6.4760429	test: 7.7370360	best: 7.7370360 (700)	total: 1m 15s	remaining: 32.2s
800:	learn: 6.3949198	test: 7.6845654	best: 7.6845654 (800)	total: 1m 27s	remaining: 21.7s
900:	learn: 6.3308681	test: 7.6486478	best: 7.6486478 (900)	total: 1m 38s	remaining: 10.8s
999:	learn: 6.2742313	test:

In [28]:
# 4. Save tuning results
results_df = pd.DataFrame(results)
os.makedirs("tuning_logs", exist_ok=True)
results_df.to_csv("tuning_logs/hyperparameter_tuning_log.csv", index=False)

# 5. Output best result
print("\nBest Hyperparameters Found:")
print(best_params)
print(f"Best Avg R² Score: {best_score:.4f}")


Best Hyperparameters Found:
{'learning_rate': 0.1, 'depth': 8, 'l2_leaf_reg': 1, 'iterations': 1000}
Best Avg R² Score: 0.8611
