In [None]:
from sklearn.model_selection import train_test_split

import columns
import paths


def split_train_test(X, y, test_size=0.2, random_state=42, stratify=None):
    return train_test_split(
        X, y,
        test_size=test_size,
        random_state=random_state,
        stratify=stratify
    )

# Fire occurrence model (classification)


In [None]:
from catboost import CatBoostClassifier
import pandas as pd

df = pd.read_csv(paths.inmet_inpe_linked_file)

Xc = df.drop(columns=[columns.fire_intensity_column, columns.fire_occurrence_column])
yc = df[columns.fire_occurrence_column]

Xc_train, Xc_test, yc_train, yc_test = split_train_test(Xc, yc, stratify=yc)

pos = yc.sum()
neg = len(yc) - pos
scale = neg / pos

model_cls = CatBoostClassifier(
    loss_function='Logloss',
    eval_metric='AUC',
    custom_metric=['Precision', 'Recall', 'F1'],
    learning_rate=0.1,
    depth=8,
    iterations=300,
    scale_pos_weight=scale,
    od_type='Iter',  # early stopping type
    od_wait=10,  # patience for early stopping
    use_best_model=True,
    verbose=True,
    train_dir=paths.classification_info_dir
)

model_cls.fit(Xc_train, yc_train, eval_set=(Xc_test, yc_test))
model_cls.save_model(paths.classifier_cbm_file)

# Fire intensity model (regression)

In [None]:
from catboost import CatBoostRegressor
import pandas as pd

df = pd.read_csv(paths.inmet_inpe_linked_file)
df.dropna(subset=[columns.fire_intensity_column], inplace=True)  # Remove rows with missing target values

Xr = df.drop(columns=[columns.fire_intensity_column, columns.fire_occurrence_column])
yr = df[columns.fire_intensity_column]

Xr_train, Xr_test, yr_train, yr_test = split_train_test(Xr, yr)

model_reg = CatBoostRegressor(
    loss_function='RMSE',
    eval_metric='RMSE',
    custom_metric=['MAE', 'R2', 'MAPE'],
    learning_rate=0.1,
    depth=8,
    iterations=300,
    od_type='Iter',  # early stopping type
    od_wait=10,  # patience for early stopping
    use_best_model=True,
    verbose=True,
    train_dir=paths.regression_info_dir
)

model_reg.fit(Xr_train, yr_train, eval_set=(Xr_test, yr_test))
model_reg.save_model(paths.regressor_cbm_file)