# Setup

In [117]:
import warnings
warnings.filterwarnings('ignore')

In [118]:
# Data loading
import pandas as pd

DATASET_PATH = 'StudentPerformanceFactors.csv'
DATASET_TARGET = 'Exam_Score'

df = pd.read_csv(DATASET_PATH)

df

Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Sleep_Hours,Previous_Scores,Motivation_Level,Internet_Access,Tutoring_Sessions,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Physical_Activity,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender,Exam_Score
0,23,84,Low,High,No,7,73,Low,Yes,0,Low,Medium,Public,Positive,3,No,High School,Near,Male,67
1,19,64,Low,Medium,No,8,59,Low,Yes,2,Medium,Medium,Public,Negative,4,No,College,Moderate,Female,61
2,24,98,Medium,Medium,Yes,7,91,Medium,Yes,2,Medium,Medium,Public,Neutral,4,No,Postgraduate,Near,Male,74
3,29,89,Low,Medium,Yes,8,98,Medium,Yes,1,Medium,Medium,Public,Negative,4,No,High School,Moderate,Male,71
4,19,92,Medium,Medium,Yes,6,65,Medium,Yes,3,Medium,High,Public,Neutral,4,No,College,Near,Female,70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6602,25,69,High,Medium,No,7,76,Medium,Yes,1,High,Medium,Public,Positive,2,No,High School,Near,Female,68
6603,23,76,High,Medium,No,8,81,Medium,Yes,3,Low,High,Public,Positive,2,No,High School,Near,Female,69
6604,20,90,Medium,Low,Yes,6,65,Low,Yes,3,Low,Medium,Public,Negative,2,No,Postgraduate,Near,Female,68
6605,10,86,High,High,Yes,6,91,High,Yes,2,Low,Medium,Private,Positive,3,No,High School,Far,Female,68


In [119]:
categorical_vars = []
continuous_vars = []
binary_vars = []
ordinal_vars = []
dtype = []

for col in df.columns:
    unique_count = df[col].nunique()
    cols_type = None
    unique = None

    # Categorical variables
    if df[col].dtype == 'object':
        categorical_vars.append(col)
        unique = df[col].unique().tolist()

        # Binary variables
        if unique_count == 2:
            binary_vars.append(col)
            cols_type = 'Binary'

        # Ordinal and nominal variables
        elif unique_count <= 3 and df[col].dtype == 'object':
            ordinal_vars.append(col)
            cols_type = 'Ordinal'
    # Continous variables
    else:
        continuous_vars.append(col)
        cols_type = 'Continuous'

    dtype.append({
        "Name": col,
        "Type": cols_type,
        "Unique value": unique
    })

df_cols_dtype = pd.DataFrame(dtype)

df_cols_dtype

Unnamed: 0,Name,Type,Unique value
0,Hours_Studied,Continuous,
1,Attendance,Continuous,
2,Parental_Involvement,Ordinal,"[Low, Medium, High]"
3,Access_to_Resources,Ordinal,"[High, Medium, Low]"
4,Extracurricular_Activities,Binary,"[No, Yes]"
5,Sleep_Hours,Continuous,
6,Previous_Scores,Continuous,
7,Motivation_Level,Ordinal,"[Low, Medium, High]"
8,Internet_Access,Binary,"[Yes, No]"
9,Tutoring_Sessions,Continuous,


# Data Preprocessing

## Data cleaning

In [120]:
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

In [121]:
df.loc[(df[DATASET_TARGET] > 100, DATASET_TARGET)] = 100

## Extract input features and output feature

In [122]:
X = df.drop(columns=[DATASET_TARGET])
y = df[DATASET_TARGET]

display(X)
display(y)

Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Sleep_Hours,Previous_Scores,Motivation_Level,Internet_Access,Tutoring_Sessions,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Physical_Activity,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender
0,23,84,Low,High,No,7,73,Low,Yes,0,Low,Medium,Public,Positive,3,No,High School,Near,Male
1,19,64,Low,Medium,No,8,59,Low,Yes,2,Medium,Medium,Public,Negative,4,No,College,Moderate,Female
2,24,98,Medium,Medium,Yes,7,91,Medium,Yes,2,Medium,Medium,Public,Neutral,4,No,Postgraduate,Near,Male
3,29,89,Low,Medium,Yes,8,98,Medium,Yes,1,Medium,Medium,Public,Negative,4,No,High School,Moderate,Male
4,19,92,Medium,Medium,Yes,6,65,Medium,Yes,3,Medium,High,Public,Neutral,4,No,College,Near,Female
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6602,25,69,High,Medium,No,7,76,Medium,Yes,1,High,Medium,Public,Positive,2,No,High School,Near,Female
6603,23,76,High,Medium,No,8,81,Medium,Yes,3,Low,High,Public,Positive,2,No,High School,Near,Female
6604,20,90,Medium,Low,Yes,6,65,Low,Yes,3,Low,Medium,Public,Negative,2,No,Postgraduate,Near,Female
6605,10,86,High,High,Yes,6,91,High,Yes,2,Low,Medium,Private,Positive,3,No,High School,Far,Female


0       67
1       61
2       74
3       71
4       70
        ..
6602    68
6603    69
6604    68
6605    68
6606    64
Name: Exam_Score, Length: 6378, dtype: int64

## Pipeline for cross validation

Helper class for auto skewness transformation

In [123]:
import numpy as np
import pandas as pd
from scipy.stats import yeojohnson
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted

class SkewKurtTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None, skew_threshold=0.5, kurt_target=3.0,
                 weight_skew=1.0, weight_kurt=0.3):
        """
        columns: list of numeric columns to consider (None = all numeric)
        skew_threshold: below this abs(skew), keep original
        kurt_target: reference kurtosis (3 ~ normal)
        weight_skew, weight_kurt: importance in score
        """
        self.columns = columns
        self.skew_threshold = skew_threshold
        self.kurt_target = kurt_target
        self.weight_skew = weight_skew
        self.weight_kurt = weight_kurt

    def _score(self, series):
        s = series.dropna()
        if len(s) < 3:
            return np.inf
        skew = s.skew()
        kurt = s.kurtosis()
        return (self.weight_skew * abs(skew)
                + self.weight_kurt * abs(kurt - self.kurt_target)), skew, kurt

    def fit(self, X, y=None):
        X = pd.DataFrame(X).copy()

        # choose columns
        if self.columns is None:
            self.columns_ = X.select_dtypes(include="number").columns.tolist()
        else:
            self.columns_ = list(self.columns)

        self.methods_ = {}
        self.params_ = {}
        self.stats_ = {}

        for col in self.columns_:
            data = X[col].astype(float)

            # original
            best_score, best_skew, best_kurt = self._score(data)
            best_method = "none"
            best_params = {}

            # if already fine on skew, keep original
            if abs(best_skew) < self.skew_threshold:
                self.methods_[col] = best_method
                self.params_[col] = best_params
                self.stats_[col] = {"skew": best_skew, "kurt": best_kurt}
                continue

            # candidate 1: log (with shift if needed)
            d = data.copy()
            shift = 0.0
            if d.min() <= 0:
                shift = abs(d.min()) + 1.0
                d = d + shift
            d_log = np.log(d)
            score, s, k = self._score(d_log)
            if score < best_score:
                best_score, best_skew, best_kurt = score, s, k
                best_method = "log"
                best_params = {"shift": shift}

            # candidate 2: yeo-johnson
            d_nonnull = data.dropna()
            try:
                yj_vals, lam = yeojohnson(d_nonnull)
                d_yj = pd.Series(yj_vals, index=d_nonnull.index).reindex(data.index)
                score, s, k = self._score(d_yj)
                if score < best_score:
                    best_score, best_skew, best_kurt = score, s, k
                    best_method = "yeojohnson"
                    best_params = {"lambda": lam}
            except Exception:
                pass

            # store winner for this column
            self.methods_[col] = best_method
            self.params_[col] = best_params
            self.stats_[col] = {"skew": best_skew, "kurt": best_kurt}

        return self

    def transform(self, X):
        check_is_fitted(self, ["methods_", "params_", "columns_"])
        X = pd.DataFrame(X).copy()

        for col in self.columns_:
            method = self.methods_.get(col, "none")
            if method == "none":
                continue

            data = X[col].astype(float)

            if method == "log":
                shift = self.params_[col].get("shift", 0.0)
                X[col] = np.log(data + shift)

            elif method == "yeojohnson":
                lam = self.params_[col]["lambda"]
                # yeojohnson with fixed lambda
                X[col] = yeojohnson(data, lmbda=lam)

        return X

In [124]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder, LabelEncoder
from sklearn.decomposition import SparsePCA

cont_cols = [c for c in continuous_vars if c != DATASET_TARGET]

continuous_transformer_linear = Pipeline(steps=[
    ("skew_kurt", SkewKurtTransformer(columns=cont_cols,
                                      skew_threshold=0.5,
                                      weight_skew=1.0,
                                      weight_kurt=0.3)),
    ("scaler", StandardScaler())
])

continuous_transformer_non_linear = Pipeline(steps=[
    ("scaler", StandardScaler())
])

onehot_transformer = Pipeline(steps=[
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])

ordinal_transformer = Pipeline(steps=[
    ("scaler", OrdinalEncoder())
])

preprocessor_linear = ColumnTransformer(
    transformers=[
        ("cont", continuous_transformer_linear, cont_cols),
        ("onehot", onehot_transformer, binary_vars),
        ("ordinal", ordinal_transformer, ordinal_vars) 
    ],
    remainder="passthrough"
)

preprocessor_non_linear = ColumnTransformer(
    transformers=[
        ("cont", continuous_transformer_non_linear, cont_cols),
        ("onehot", onehot_transformer, binary_vars),
        ("ordinal", ordinal_transformer, ordinal_vars) 
    ],
    remainder="passthrough",
)

preprocessor_no_encoding = ColumnTransformer(
    transformers=[
        ("cont", continuous_transformer_non_linear, cont_cols) 
    ],
    remainder="passthrough",
)

sparsePCA = SparsePCA(
        n_components=10,
        alpha=1.0,
        random_state=42,
        max_iter=1000,
        n_jobs=-1,
    )

# Model implementation

Models used for choosing best model via performance comparison

In [125]:
from sklearn.base import BaseEstimator, RegressorMixin

class CatBoostRegWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, cat_features=None, **params):
        self.cat_features = cat_features
        self.params = params
        self.model_ = CatBoostRegressor(**params)

    def fit(self, X, y):
        # X is a DataFrame in your pipeline
        self.model_.fit(
            X, y,
            cat_features=self.cat_features,
            verbose=False
        )
        return self

    def predict(self, X):
        return self.model_.predict(X)

In [126]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor, Pool
from xgboost import XGBRegressor

catboost_pool = Pool(data=X, label=y, cat_features=categorical_vars)
linear_model = ["Linear Regression", "Lasso", "Ridge"]
no_encoding_model = ["CatBoosting Regressor"]
cat_idx = [X.columns.get_loc(c) for c in categorical_vars]

models = {
    # "Linear Regression": LinearRegression(),
    # "Lasso": Lasso(),
    # "Ridge": Ridge(),
    # "Random Forest Regressor": RandomForestRegressor(),
    # "XGBRegressor": XGBRegressor(),
    "CatBoosting Regressor": CatBoostRegWrapper(cat_features=cat_idx)
}

Normal cross-validation (10-Fold)

In [127]:
from sklearn.model_selection import KFold, cross_validate
from sklearn.pipeline import Pipeline

kf = KFold(n_splits=10, shuffle=True, random_state=42)
scoring = {
    "r2": "r2",
    "mae": "neg_mean_absolute_error",
    "rmse": "neg_root_mean_squared_error",
}

avg_cross_score = []

for name, model in models.items():
    preprocessor = preprocessor_non_linear
    if name in linear_model:
        preprocessor = preprocessor_linear
    elif name in no_encoding_model:
        preprocessor = preprocessor_no_encoding
    full_pipeline = Pipeline(steps=[
        ("preprocess", preprocessor),
        # ("sparse_pca", sparsePCA),
        ("model", model)
    ])
    cv_results = cross_validate(
        full_pipeline,
        X,
        y,
        cv=kf,
        scoring=scoring,
        return_train_score=False,
    )
    avg_cross_score.append({
        "Name": name,
        "Average R2 score": cv_results["test_r2"].mean(),
        "Average MAE score": -cv_results["test_mae"].mean(),
        "Average RMSE score": -cv_results["test_rmse"].mean()
    })
    
cross_val = pd.DataFrame(avg_cross_score)

cross_val

ValueError: 
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.023994771088030088

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.023994771088030088 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.03119519816071433

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.03119519816071433 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.026184352776010444

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.026184352776010444 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.013192747382507858

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.013192747382507858 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: 0.6619308921263971

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=0.6619308921263971 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.023205139147307787

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.023205139147307787 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.026325198018964754

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.026325198018964754 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.02715916313375431

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.02715916313375431 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.022202041527498183

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.022202041527498183 : cat_features must be integer or string, real number values and NaN values should be converted to string.

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "_catboost.pyx", line 2600, in _catboost.get_cat_factor_bytes_representation
  File "_catboost.pyx", line 2115, in _catboost.get_id_object_bytes_string_representation
_catboost.CatBoostError: bad object for id: -0.025474883160128

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ph181\AppData\Local\Temp\ipykernel_32280\597880927.py", line 11, in fit
    self.model_.fit(
    ~~~~~~~~~~~~~~~^
        X, y,
        ^^^^^
        cat_features=self.cat_features,
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        verbose=False
        ^^^^^^^^^^^^^
    )
    ^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 5873, in fit
    return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
           ~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     verbose_eval, metric_period, silent, early_stopping_rounds,
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                     save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2395, in _fit
    train_params = self._prepare_train_params(
        X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
    ...<6 lines>...
        callbacks=callbacks
    )
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 2275, in _prepare_train_params
    train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, graph,
                                   sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
                                   baseline, column_description)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1513, in _build_train_pool
    train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, graph=graph, weight=sample_weight, group_id=group_id,
                      group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 855, in __init__
    self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
               group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ph181\AppData\Local\Programs\Python\Python313\Lib\site-packages\catboost\core.py", line 1491, in _init
    self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, graph, weight,
    ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "_catboost.pyx", line 4329, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4399, in _catboost._PoolBase._init_pool
  File "_catboost.pyx", line 4300, in _catboost._PoolBase._init_objects_order_layout_pool
  File "_catboost.pyx", line 3789, in _catboost._set_data
  File "_catboost.pyx", line 3742, in _catboost._set_data_from_generic_matrix
  File "_catboost.pyx", line 2607, in _catboost.get_cat_factor_bytes_representation
_catboost.CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=2]=-0.025474883160128 : cat_features must be integer or string, real number values and NaN values should be converted to string.


Cross validation with all models using skewness and transformation

In [None]:
avg_cross_score = []

for name, model in models.items():
    preprocessor = preprocessor_linear
    full_pipeline = Pipeline(steps=[
        ("preprocess", preprocessor),
        # ("sparse_pca", sparsePCA),
        ("model", model)
    ])
    cv_results = cross_validate(
        full_pipeline,
        X,
        y,
        cv=kf,
        scoring=scoring,
        return_train_score=False,
    )
    avg_cross_score.append({
        "Name": name,
        "Average R2 score": cv_results["test_r2"].mean(),
        "Average MAE score": -cv_results["test_mae"].mean(),
        "Average RMSE score": -cv_results["test_rmse"].mean()
    })
    
cross_val = pd.DataFrame(avg_cross_score)

cross_val

Learning rate set to 0.053962
0:	learn: 3.8052869	total: 5.13ms	remaining: 5.13s
1:	learn: 3.7109667	total: 8.7ms	remaining: 4.34s
2:	learn: 3.6282573	total: 12.5ms	remaining: 4.17s
3:	learn: 3.5483856	total: 15.8ms	remaining: 3.93s
4:	learn: 3.4756843	total: 19.8ms	remaining: 3.93s
5:	learn: 3.4012335	total: 23.4ms	remaining: 3.87s
6:	learn: 3.3358996	total: 27ms	remaining: 3.83s
7:	learn: 3.2704825	total: 31.7ms	remaining: 3.93s
8:	learn: 3.2122890	total: 35.4ms	remaining: 3.9s
9:	learn: 3.1583987	total: 39.4ms	remaining: 3.9s
10:	learn: 3.1081261	total: 43.2ms	remaining: 3.88s
11:	learn: 3.0613901	total: 46.8ms	remaining: 3.85s
12:	learn: 3.0159841	total: 50.7ms	remaining: 3.85s
13:	learn: 2.9779471	total: 54.4ms	remaining: 3.83s
14:	learn: 2.9368746	total: 58.2ms	remaining: 3.82s
15:	learn: 2.8986740	total: 61.5ms	remaining: 3.78s
16:	learn: 2.8652952	total: 65.6ms	remaining: 3.79s
17:	learn: 2.8361304	total: 69.5ms	remaining: 3.79s
18:	learn: 2.8072250	total: 73ms	remaining: 3.77s

Unnamed: 0,Name,Average R2 score,Average MAE score,Average RMSE score
0,Linear Regression,0.615107,1.239738,2.422308
1,Lasso,0.406252,1.956892,3.012136
2,Ridge,0.615107,1.239751,2.422307
3,Random Forest Regressor,0.610367,1.242228,2.439133
4,XGBRegressor,0.618085,1.122961,2.412043
5,CatBoosting Regressor,0.672571,0.893812,2.232518


Cross validation with all models not using skewness and transformation

In [None]:
avg_cross_score = []

for name, model in models.items():
    preprocessor = preprocessor_non_linear
    full_pipeline = Pipeline(steps=[
        ("preprocess", preprocessor),
        # ("sparse_pca", sparsePCA),
        ("model", model)
    ])
    cv_results = cross_validate(
        full_pipeline,
        X,
        y,
        cv=kf,
        scoring=scoring,
        return_train_score=False,
    )
    avg_cross_score.append({
        "Name": name,
        "Average R2 score": cv_results["test_r2"].mean(),
        "Average MAE score": -cv_results["test_mae"].mean(),
        "Average RMSE score": -cv_results["test_rmse"].mean()
    })
    
cross_val = pd.DataFrame(avg_cross_score)

cross_val

Learning rate set to 0.053962
0:	learn: 3.8087365	total: 3.81ms	remaining: 3.81s
1:	learn: 3.7140122	total: 7.46ms	remaining: 3.73s
2:	learn: 3.6243168	total: 10.8ms	remaining: 3.59s
3:	learn: 3.5507248	total: 14.2ms	remaining: 3.55s
4:	learn: 3.4727185	total: 17.5ms	remaining: 3.49s
5:	learn: 3.3989251	total: 22.4ms	remaining: 3.71s
6:	learn: 3.3368457	total: 25.6ms	remaining: 3.63s
7:	learn: 3.2741672	total: 28.6ms	remaining: 3.54s
8:	learn: 3.2165880	total: 31.8ms	remaining: 3.5s
9:	learn: 3.1624724	total: 35ms	remaining: 3.46s
10:	learn: 3.1087638	total: 37.8ms	remaining: 3.4s
11:	learn: 3.0624865	total: 41.2ms	remaining: 3.39s
12:	learn: 3.0169719	total: 44.4ms	remaining: 3.37s
13:	learn: 2.9765686	total: 47.1ms	remaining: 3.31s
14:	learn: 2.9373195	total: 50ms	remaining: 3.29s
15:	learn: 2.8990350	total: 53.3ms	remaining: 3.28s
16:	learn: 2.8658628	total: 56.4ms	remaining: 3.26s
17:	learn: 2.8359124	total: 59.1ms	remaining: 3.23s
18:	learn: 2.8064338	total: 61.9ms	remaining: 3.19

Unnamed: 0,Name,Average R2 score,Average MAE score,Average RMSE score
0,Linear Regression,0.615926,1.234561,2.419624
1,Lasso,0.406253,1.95689,3.012135
2,Ridge,0.615927,1.23457,2.419623
3,Random Forest Regressor,0.609,1.239149,2.442843
4,XGBRegressor,0.622354,1.107248,2.398421
5,CatBoosting Regressor,0.674528,0.886317,2.226269
