In [None]:
# %%time
from sklearn.metrics import *
from sklearn.model_selection import KFold
from catboost import CatBoostRegressor
import numpy as np

FOLDS = 10
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

oof_cat = np.zeros(len(train))
pred_cat = np.zeros(len(test))

for i, (tr_idx, va_idx) in enumerate(kf.split(train)):
    
    X_train = train.iloc[tr_idx][features]
    y_train = train.iloc[tr_idx][target]
    X_valid = train.iloc[va_idx][features]
    y_valid = train.iloc[va_idx][target]
    X_test = test[features]

    model_cat = CatBoostRegressor(
        iterations=5000,
        learning_rate=0.02,
        depth=3,
        eval_metric='RMSE',
        cat_features=cat,  # Pass categorical feature indices
        early_stopping_rounds=20,
        verbose=0,
        task_type="GPU"  # Use "GPU" for GPU training
    )
    
    model_cat.fit(
        X_train, y_train,
        eval_set=(X_valid, y_valid),
        use_best_model=True
    )

    valid_preds = model_cat.predict(X_valid)
    oof_cat[va_idx] = valid_preds
    pred_cat += model_cat.predict(X_test)
    fold_rmse = mean_squared_error(y_valid, valid_preds, squared=False)
    
    print("#" * 25)
    print(f"### Fold {i+1} \n")
    print(f"Fold {i+1} RMSE: {fold_rmse}")
    print("#" * 25)

pred_cat /= FOLDS


In [None]:
feature_importance = model_cat.get_feature_importance()
feature_names = train[features].columns

indices = np.argsort(feature_importance)[::-1]

plt.figure(figsize=(10, 15))
plt.barh(range(len(feature_importance)), feature_importance[indices], align='center', color='skyblue')
plt.yticks(range(len(feature_importance)), [feature_names[i] for i in indices], fontsize=10) 
plt.xlabel('Feature Importance', fontsize=12)
plt.title('Feature Importance from CatBoost Model', fontsize=12)
plt.gca().invert_yaxis()

# Adjust layout for padding
plt.tight_layout(pad=1.0)

plt.show()
