In [1]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
import os

In [3]:
# 1. Load train data
train_df = pd.read_csv("./train_test_splits/train.csv", parse_dates=['date'])

In [4]:
train_df.head()

Unnamed: 0,location_id,weather_code (wmo code),temperature_2m_max (°C),temperature_2m_min (°C),temperature,apparent_temperature_max (°C),apparent_temperature_min (°C),apparent_temperature_mean (°C),daylight_duration (s),sunshine_duration (s),...,date,day_of_week,temperature_lag1,temperature_lag2,rainfall_lag1,rainfall_lag2,wind_speed_lag1,wind_speed_lag2,precipitation_lag1,precipitation_lag2
0,0,2,32.6,24.1,28.0,38.0,29.0,32.7,43161.09,40563.73,...,2010-01-03,6,26.4,26.0,15.7,0.0,11.6,12.2,15.7,0.0
1,0,61,31.0,25.0,27.5,37.7,30.5,33.4,43867.57,39600.0,...,2010-01-04,0,28.0,26.4,0.0,15.7,13.0,11.6,0.0,15.7
2,0,61,31.1,25.1,27.7,37.5,30.8,33.8,44517.18,40574.88,...,2010-01-05,1,27.5,28.0,7.4,0.0,13.6,13.0,7.4,0.0
3,0,51,30.2,26.1,28.1,37.0,32.1,34.0,44987.79,39781.38,...,2010-01-06,2,27.7,27.5,9.6,7.4,10.3,13.6,9.6,7.4
4,0,63,28.1,24.5,26.0,32.5,28.1,30.1,45064.31,27218.75,...,2010-01-07,3,28.1,27.7,4.1,9.6,15.0,10.3,4.1,9.6


In [5]:
# 2. Define target and feature columns
target_cols = ['temperature', 'rainfall', 'wind_speed', 'precipitation']

In [6]:
# Drop target + date columns from features
feature_cols = [col for col in train_df.columns if col not in target_cols + ['date']]

In [7]:
# 3. Prepare CatBoost Pool with categorical features
cat_features = ['location_id', 'day_of_week']

train_pool = Pool(
    data=train_df[feature_cols],
    label=train_df[target_cols],
    cat_features=cat_features
)

In [8]:
# 4. Initialize CatBoost Regressor (multi-output)
model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    loss_function='MultiRMSE',
    random_seed=42,       # For reproducibility
    verbose=100
)

In [9]:
# 5. Train the model on train set only
model.fit(train_pool)

0:	learn: 15.4668413	total: 298ms	remaining: 4m 58s
100:	learn: 5.8966895	total: 15s	remaining: 2m 13s
200:	learn: 5.3934183	total: 29.3s	remaining: 1m 56s
300:	learn: 5.0795946	total: 43.4s	remaining: 1m 40s
400:	learn: 4.8562931	total: 59.4s	remaining: 1m 28s
500:	learn: 4.6678812	total: 1m 14s	remaining: 1m 14s
600:	learn: 4.5070036	total: 1m 30s	remaining: 60s
700:	learn: 4.3598319	total: 1m 46s	remaining: 45.4s
800:	learn: 4.2346279	total: 2m 2s	remaining: 30.3s
900:	learn: 4.1246171	total: 2m 18s	remaining: 15.2s
999:	learn: 4.0235997	total: 2m 35s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x2147807b0e0>

In [10]:
# 6. Save the trained model to file
save_path = "catboost_initial_model.cbm"
model.save_model(save_path)

In [11]:
# 7. Check if the model was saved successfully
if os.path.isfile(save_path):
    print(f"Model saved successfully at: {save_path}")
else:
    print("Model saving failed!")

print("Initial model training and saving completed.")

Model saved successfully at: catboost_initial_model.cbm
Initial model training and saving completed.
