In [None]:
%%time
from sklearn.metrics import *
from sklearn.model_selection import StratifiedKFold, train_test_split, KFold
import xgboost as xgb
from xgboost import XGBRegressor

FOLDS = 10
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)

oof_xgb = np.zeros(len(train))
pred_xgb = np.zeros(len(test))

for i, (tr_idx, va_idx) in enumerate(kf.split(train)):

    X_train = train.loc[tr_idx, features].copy()
    y_train = train.loc[tr_idx, target]
    X_valid = train.loc[va_idx, features].copy()
    y_valid = train.loc[va_idx, target]
    X_test = test[features].copy()

    model_xgb = XGBRegressor(
        device="gpu",
        max_depth=3,
        colsample_bytree=0.4,  
        cat_features=cat,
        n_estimators=5000, 
        learning_rate=0.02,
        verbose=False, 
        early_stopping_rounds=20,
        eval_metric='rmse',
        use_label_encoder=False,
        enable_categorical=True
    )
    
    model_xgb.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        verbose=False
    )

    valid_preds = model_xgb.predict(X_valid)
    oof_xgb[va_idx] = valid_preds
    pred_xgb += model_xgb.predict(X_test)
    fold_rmse = mean_squared_error(y_valid, valid_preds, squared=False)
    
    print("#"*25)
    print(f"### Fold {i+1} \n")
    print(f"Fold {i+1} RMSE: {fold_rmse}")
    print("#"*25)

pred_xgb /= FOLDS

In [None]:
feature_names = train[features].columns
feature_importance = model_xgb.feature_importances_


indices = np.argsort(feature_importance)[::-1]

# Plotting
plt.figure(figsize=(10, 15))
plt.barh(range(len(feature_importance)), feature_importance[indices], align='center')
plt.yticks(range(len(feature_importance)), [feature_names[i] for i in indices], fontsize=10)
plt.xlabel('Feature Importance', fontsize=12)
plt.title('Feature Importance from XGBoost Model', fontsize=10)
plt.gca().invert_yaxis()


plt.tight_layout(pad=1.0)

plt.show()