In [1]:
import pandas as pd
from catboost import Pool, CatBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

In [2]:
# 1. Load validation data
val_df = pd.read_csv("./train_test_splits/validate.csv")

In [4]:
# 2. Define target and features (same as train)
target_cols = ['temperature', 'rainfall', 'wind_speed', 'precipitation']
feature_cols = [col for col in val_df.columns if col not in target_cols + ['date']]
cat_features = ['location_id', 'day_of_week']

In [5]:
# 3. Load saved model
model = CatBoostRegressor()
model.load_model("catboost_initial_model.cbm")

<catboost.core.CatBoostRegressor at 0x2985045c1a0>

In [6]:
# 4. Prepare validation Pool
val_pool = Pool(val_df[feature_cols], label=val_df[target_cols], cat_features=cat_features)

print("Starting validation predictions and metrics calculation...")

Starting validation predictions and metrics calculation...


In [7]:
# 5. Predict on validation set using Pool
val_preds = model.predict(val_pool)  # shape: (num_samples, num_targets)

In [8]:
# 6. Calculate and print metrics for each target
val_true = val_df[target_cols].values

for i, target in enumerate(target_cols):
    r2 = r2_score(val_true[:, i], val_preds[:, i])
    mae = mean_absolute_error(val_true[:, i], val_preds[:, i])
    rmse = np.sqrt(mean_squared_error(val_true[:, i], val_preds[:, i]))
    print(f"Validation metrics for {target}:")
    print(f"  R2 Score: {r2:.4f}")
    print(f"  MAE: {mae:.4f}")
    print(f"  RMSE: {rmse:.4f}\n")

print("Validation complete.")

Validation metrics for temperature:
  R2 Score: 0.9891
  MAE: 0.2525
  RMSE: 0.3244

Validation metrics for rainfall:
  R2 Score: 0.7590
  MAE: 1.6662
  RMSE: 5.1310

Validation metrics for wind_speed:
  R2 Score: 0.9337
  MAE: 1.2784
  RMSE: 1.6858

Validation metrics for precipitation:
  R2 Score: 0.7590
  MAE: 1.6662
  RMSE: 5.1310

Validation complete.
