In [1]:
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import logging
from catboost import CatBoostRegressor, Pool

# Setup paths
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.getcwd()))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# Setup directories
RESULTS_DIR = os.path.join(os.getcwd(), 'results')
os.makedirs(RESULTS_DIR, exist_ok=True)

# Imports
from preprocessing.data_container import DataContainer
from utils.evaluation import cindex_score
from models.modelling_process import ModellingProcess
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from utils.evaluation import EarlyStoppingMonitor
from sklearn.model_selection import train_test_split
from lifelines.utils import concordance_index
from sklearn.utils.validation import check_X_y, check_is_fitted
from sklearn.base import BaseEstimator, RegressorMixin


# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

In [20]:
class CatBoostModel(BaseEstimator, RegressorMixin): 
    def __init__(self, cat_features = ['TISSUE'], 
                 iterations = None, loss_function = "Cox", eval_metric = "Cox", early_stopping_rounds = 5, 
                 rsm = 0.1, depth = None, min_data_in_leaf = None, learning_rate = 0.1): 
        super(CatBoostModel, self).__init__()
        self.cat_features = cat_features
        self.is_fitted_ = False
        self.iterations=iterations
        self.loss_function=loss_function
        self.eval_metric=eval_metric
        self.early_stopping_rounds = early_stopping_rounds
        self.bootstrap_type='Bernoulli'
        self.boosting_type = 'Plain'
        self.rsm = rsm 
        self.depth = depth
        self.min_data_in_leaf = min_data_in_leaf
        self.learning_rate = learning_rate
        
        
    def _prepare_data(self, X, y):
        y = pd.DataFrame(y)
        if self.loss_function == 'Cox': 
            y['label'] = np.where(y['status'], y['time'], - y['time'])
        # TODO: Include other loss
        
        for col in self.cat_features:
            X.loc[:, col] = X.loc[:,col].astype('category')
        
        #data = pd.concat([X, y], dim = 1)
        #print(data.info())
        return X, y['label']
    
    def fit(self, X, y): 
        # early stopping mit 0.1 des training sets
        X, y = self._prepare_data(X, y)

        train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.1)

        #train_pool = Pool(train[features], label=train['label'], cat_features=cat_features)
        #val_pool = Pool(val[features], label=test['label'], cat_features=cat_features)

        self.model = CatBoostRegressor(iterations=self.iterations,
                        loss_function=self.loss_function,
                        depth = self.depth, 
                        eval_metric=self.eval_metric,
                        learning_rate=self.learning_rate, 
                        early_stopping_rounds = self.early_stopping_rounds,
                        bootstrap_type=self.bootstrap_type, 
                        boosting_type=self.boosting_type,
                        min_data_in_leaf = self.min_data_in_leaf,
                        rsm = self.rsm, 
                        cat_features=self.cat_features)
        
        self.model.fit(X = train_X, y = train_y, eval_set= (val_X, val_y))
        self.is_fitted_ = True
        return self

    def predict(self, X): 
        check_is_fitted(self, 'is_fitted_')
        train_y_pred = self.model.predict(X)
        return train_y_pred
    
    def score(self, X, y):
        check_is_fitted(self, 'is_fitted_')
        preds = self.predict(X)
        ci = concordance_index(y['time'], -preds, y['status'])
        return ci
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def clone(self): 
        super(self).clone()
    
    def get_feature_importance(self, X, y): 
        X, y = self._prepare_data(X, y)
        check_is_fitted(self, 'is_fitted_')
        data = Pool(X, label=y, cat_features=self.cat_features)
        imp = self.model.get_feature_importance(data=data)
        return imp



In [21]:
cat_features = ['TISSUE']

In [22]:
DATA_CONFIG = {
    'use_pca': False,
    'pca_threshold': 0.85,
    'gene_type': 'intersection',
    'use_imputed': True,
    'select_random' : False, 
    'use_cohorts': False, 
    'requires_ohenc' : False, 
    'only_pData': True, 
    'clinical_covs' : ["AGE", "TISSUE", "GLEASON_SCORE", 'PRE_OPERATIVE_PSA']
}

# Model configuration
# MODEL_CONFIG = {
#     'params_cv': {
#         'model__n_estimators': [500],
#         'model__learning_rate': [0.1],
#         'model__max_depth': [3, 5],
#         'model__min_samples_split': [5, 10],
#         'model__min_samples_leaf': [3, 5],
#         'model__subsample': [0.9],
#         'model__max_features': ['sqrt'], 
#         'model__n_iter_no_change' : [10], 
#         'model__validation_fraction' : [0.1]
#     },
#     'refit': True, 
#     'do_nested_resampling': False, 
#     #'monitor' : monitor, 
#     'path' : RESULTS_DIR, 
#     'fname_cv' : 'test'}

# # Model configuration
MODEL_CONFIG = {
    'params_cv': {
        'model__iterations': [500],
        'model__learning_rate': [0.1],
        'model__depth': [3, 5, 10],
        'model__min_data_in_leaf': [3, 5, 10],
        'model__max_features': ['sqrt', 'log2'],  
        'model__nan_mode' : ["Forbidden", "Min"]
        },
    'refit': True, 
    'do_nested_resampling': True, 
    'path' : RESULTS_DIR, 
    'fname_cv' : 'gb_inter_genes_pData'}

# validation_fraction=0.1 as a mean to inclued early stopping
gb_pipeline_steps = [('model', CatBoostModel())]

In [23]:
mp = ModellingProcess()
mp.prepare_data(DATA_CONFIG, PROJECT_ROOT)

#mp.save_results(RESULTS_DIR, 'gb_intersect_imp_done', model = mp.cmplt_model, cv_results = mp.resampling_cmplt, pipe = mp.cmplt_pipeline)

2025-01-08 13:33:40,056 - INFO - Loading data...
2025-01-08 13:33:56,012 - INFO - Found clinical data specification
2025-01-08 13:33:56,019 - INFO - Only uses pData
2025-01-08 13:33:56,151 - INFO - Loaded data: 1091 samples, 4 features


<class 'pandas.core.frame.DataFrame'>
Index: 1091 entries, Atlanta_2014_Long.PT081 to Stockholm_2016_Ross_Adams.STKHLM9246
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   AGE                1091 non-null   float64
 1   TISSUE             1091 non-null   object 
 2   CLIN_T_STAGE       1091 non-null   object 
 3   PATH_T_STAGE       1091 non-null   object 
 4   GLEASON_SCORE      1091 non-null   float64
 5   PRE_OPERATIVE_PSA  1091 non-null   float64
 6   MONTH_TO_BCR       1091 non-null   float64
 7   BCR_STATUS         1091 non-null   int64  
dtypes: float64(4), int64(1), object(3)
memory usage: 76.7+ KB
None


In [24]:
nstd_res_result = mp.do_modelling(gb_pipeline_steps, MODEL_CONFIG)

2025-01-08 13:33:56,162 - INFO - No additional monitoring detected
2025-01-08 13:33:56,162 - INFO - Start model training...
2025-01-08 13:33:56,163 - INFO - Input data shape: X=(1091, 4)
2025-01-08 13:33:56,164 - INFO - Nested resampling...
2025-01-08 13:33:56,165 - INFO - Starting nested resampling...
2025-01-08 13:33:56,166 - INFO - Data shape: X=(1091, 4), groups=9 unique
2025-01-08 13:33:56,167 - INFO - 
Outer fold 1
2025-01-08 13:33:56,169 - INFO - Test cohort: Atlanta_2014_Long


Fitting 8 folds for each of 36 candidates, totalling 288 fits


2025-01-08 13:34:14,558 - INFO - Best parameters: {'model__depth': 10, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'sqrt', 'model__min_data_in_leaf': 10, 'model__nan_mode': 'Min'}
2025-01-08 13:34:14,560 - INFO - Test score: 0.646
2025-01-08 13:34:14,560 - INFO - 
Outer fold 2
2025-01-08 13:34:14,561 - INFO - Test cohort: Belfast_2018_Jain


0:	learn: -1468.7839692	test: -83.2561586	best: -83.2561586 (0)	total: 530us	remaining: 265ms
1:	learn: -1468.7839692	test: -83.2561586	best: -83.2561586 (1)	total: 1.01ms	remaining: 251ms
2:	learn: -1468.7839692	test: -83.2561586	best: -83.2561586 (1)	total: 1.45ms	remaining: 240ms
3:	learn: -1468.7839692	test: -83.2561586	best: -83.2561586 (1)	total: 1.85ms	remaining: 229ms
4:	learn: -1468.7839692	test: -83.2561586	best: -83.2561586 (1)	total: 2.23ms	remaining: 221ms
5:	learn: -1467.0326879	test: -83.2988814	best: -83.2561586 (1)	total: 2.75ms	remaining: 227ms
6:	learn: -1458.9191627	test: -83.0150799	best: -83.0150799 (6)	total: 3.48ms	remaining: 245ms
7:	learn: -1458.9191627	test: -83.0150799	best: -83.0150799 (6)	total: 3.89ms	remaining: 239ms
8:	learn: -1458.9191627	test: -83.0150799	best: -83.0150799 (8)	total: 4.36ms	remaining: 238ms
9:	learn: -1458.9191627	test: -83.0150799	best: -83.0150799 (8)	total: 4.83ms	remaining: 237ms
10:	learn: -1458.2194193	test: -83.0018871	best: -8

2025-01-08 13:34:25,451 - INFO - Best parameters: {'model__depth': 3, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'sqrt', 'model__min_data_in_leaf': 10, 'model__nan_mode': 'Min'}
2025-01-08 13:34:25,452 - INFO - Test score: 0.509
2025-01-08 13:34:25,453 - INFO - 
Outer fold 3
2025-01-08 13:34:25,455 - INFO - Test cohort: CPC_GENE_2017_Fraser


0:	learn: -1362.6687619	test: -103.1497923	best: -103.1497923 (0)	total: 817us	remaining: 408ms
1:	learn: -1362.6687619	test: -103.1497923	best: -103.1497923 (0)	total: 1.38ms	remaining: 343ms
2:	learn: -1362.6687619	test: -103.1497923	best: -103.1497923 (2)	total: 1.81ms	remaining: 300ms
3:	learn: -1362.6687619	test: -103.1497923	best: -103.1497923 (2)	total: 2.23ms	remaining: 277ms
4:	learn: -1362.6687619	test: -103.1497923	best: -103.1497923 (2)	total: 2.84ms	remaining: 282ms
5:	learn: -1360.3791004	test: -103.0786012	best: -103.0786012 (5)	total: 3.98ms	remaining: 328ms
6:	learn: -1358.7308511	test: -103.0571355	best: -103.0571355 (6)	total: 4.75ms	remaining: 334ms
7:	learn: -1358.7308511	test: -103.0571355	best: -103.0571355 (7)	total: 5.21ms	remaining: 321ms
8:	learn: -1358.7308511	test: -103.0571355	best: -103.0571355 (7)	total: 5.65ms	remaining: 308ms
9:	learn: -1357.4841028	test: -102.9807993	best: -102.9807993 (9)	total: 6.22ms	remaining: 305ms
10:	learn: -1357.4841028	test: 

2025-01-08 13:34:36,650 - INFO - Best parameters: {'model__depth': 5, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'log2', 'model__min_data_in_leaf': 5, 'model__nan_mode': 'Forbidden'}
2025-01-08 13:34:36,651 - INFO - Test score: 0.591
2025-01-08 13:34:36,652 - INFO - 
Outer fold 4
2025-01-08 13:34:36,653 - INFO - Test cohort: CPGEA_2020_Li


0:	learn: -1680.8268222	test: -85.8240784	best: -85.8240784 (0)	total: 687us	remaining: 343ms
1:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (1)	total: 1.58ms	remaining: 393ms
2:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (1)	total: 2.07ms	remaining: 342ms
3:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 2.54ms	remaining: 314ms
4:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 2.94ms	remaining: 291ms
5:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 3.37ms	remaining: 277ms
6:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 3.8ms	remaining: 267ms
7:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 4.43ms	remaining: 272ms
8:	learn: -1676.9281354	test: -85.3797876	best: -85.3797876 (3)	total: 5.05ms	remaining: 275ms
Stopped by overfitting detector  (5 iterations wait)

bestTest = -85.37978756
bestIteration = 3

Shrink model to first 4 iterations.
Fitting 8 fold

2025-01-08 13:34:44,328 - INFO - Best parameters: {'model__depth': 10, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'sqrt', 'model__min_data_in_leaf': 3, 'model__nan_mode': 'Forbidden'}
2025-01-08 13:34:44,330 - INFO - Test score: 0.483
2025-01-08 13:34:44,331 - INFO - 
Outer fold 5
2025-01-08 13:34:44,334 - INFO - Test cohort: CamCap_2016_Ross_Adams


0:	learn: -1503.1586173	test: -120.5730351	best: -120.5730351 (0)	total: 805us	remaining: 402ms
1:	learn: -1496.1492170	test: -120.2481990	best: -120.2481990 (1)	total: 1.47ms	remaining: 367ms
2:	learn: -1496.1492170	test: -120.2481990	best: -120.2481990 (1)	total: 1.97ms	remaining: 327ms
3:	learn: -1496.1492170	test: -120.2481990	best: -120.2481990 (1)	total: 2.45ms	remaining: 304ms
4:	learn: -1496.1492170	test: -120.2481990	best: -120.2481990 (1)	total: 2.89ms	remaining: 286ms
5:	learn: -1496.1650560	test: -120.2476283	best: -120.2476283 (5)	total: 3.45ms	remaining: 284ms
6:	learn: -1490.4192408	test: -120.1021973	best: -120.1021973 (6)	total: 4.19ms	remaining: 295ms
7:	learn: -1490.4192408	test: -120.1021973	best: -120.1021973 (6)	total: 4.58ms	remaining: 282ms
8:	learn: -1489.9614848	test: -120.1376867	best: -120.1021973 (6)	total: 5.15ms	remaining: 281ms
9:	learn: -1487.0922557	test: -120.0412822	best: -120.0412822 (9)	total: 5.64ms	remaining: 277ms
10:	learn: -1486.5474718	test: 

2025-01-08 13:34:56,330 - INFO - Best parameters: {'model__depth': 5, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'sqrt', 'model__min_data_in_leaf': 5, 'model__nan_mode': 'Min'}
2025-01-08 13:34:56,332 - INFO - Test score: 0.585
2025-01-08 13:34:56,335 - INFO - 
Outer fold 6
2025-01-08 13:34:56,337 - INFO - Test cohort: CancerMap_2017_Luca


0:	learn: -1627.0896638	test: -107.9273735	best: -107.9273735 (0)	total: 672us	remaining: 336ms
1:	learn: -1627.0896638	test: -107.9273735	best: -107.9273735 (0)	total: 1.28ms	remaining: 320ms
2:	learn: -1627.0896638	test: -107.9273735	best: -107.9273735 (0)	total: 1.88ms	remaining: 311ms
3:	learn: -1627.0054024	test: -107.9834733	best: -107.9273735 (0)	total: 2.89ms	remaining: 358ms
4:	learn: -1627.0054024	test: -107.9834733	best: -107.9273735 (0)	total: 3.93ms	remaining: 389ms
5:	learn: -1626.1029534	test: -107.8224931	best: -107.8224931 (5)	total: 5.72ms	remaining: 471ms
6:	learn: -1626.0821338	test: -107.8006400	best: -107.8006400 (6)	total: 6.91ms	remaining: 487ms
7:	learn: -1626.0821338	test: -107.8006400	best: -107.8006400 (7)	total: 7.73ms	remaining: 476ms
8:	learn: -1623.2031970	test: -107.5136879	best: -107.5136879 (8)	total: 8.62ms	remaining: 470ms
9:	learn: -1623.2031970	test: -107.5136879	best: -107.5136879 (8)	total: 9.62ms	remaining: 471ms
10:	learn: -1623.2031970	test: 

2025-01-08 13:35:09,147 - INFO - Best parameters: {'model__depth': 5, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'log2', 'model__min_data_in_leaf': 10, 'model__nan_mode': 'Min'}
2025-01-08 13:35:09,148 - INFO - Test score: 0.638
2025-01-08 13:35:09,148 - INFO - 
Outer fold 7
2025-01-08 13:35:09,151 - INFO - Test cohort: DKFZ_2018_Gerhauser


0:	learn: -1555.3121239	test: -73.3992091	best: -73.3992091 (0)	total: 921us	remaining: 460ms
1:	learn: -1553.3084769	test: -73.3687749	best: -73.3687749 (1)	total: 2.52ms	remaining: 628ms
2:	learn: -1553.3084769	test: -73.3687749	best: -73.3687749 (2)	total: 3.08ms	remaining: 511ms
3:	learn: -1549.9307070	test: -73.1916152	best: -73.1916152 (3)	total: 8.52ms	remaining: 1.06s
4:	learn: -1548.1814684	test: -73.0952160	best: -73.0952160 (4)	total: 9.09ms	remaining: 900ms
5:	learn: -1548.1814684	test: -73.0952160	best: -73.0952160 (4)	total: 9.61ms	remaining: 791ms
6:	learn: -1548.1814684	test: -73.0952160	best: -73.0952160 (4)	total: 10.1ms	remaining: 711ms
7:	learn: -1548.1814684	test: -73.0952160	best: -73.0952160 (4)	total: 10.5ms	remaining: 648ms
8:	learn: -1548.1839728	test: -73.0951962	best: -73.0951962 (8)	total: 15.7ms	remaining: 856ms
9:	learn: -1548.1839728	test: -73.0951962	best: -73.0951962 (8)	total: 16.2ms	remaining: 791ms
10:	learn: -1548.1839728	test: -73.0951962	best: -7

2025-01-08 13:35:20,183 - INFO - Best parameters: {'model__depth': 10, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'sqrt', 'model__min_data_in_leaf': 10, 'model__nan_mode': 'Min'}
2025-01-08 13:35:20,183 - INFO - Test score: 0.790
2025-01-08 13:35:20,184 - INFO - 
Outer fold 8
2025-01-08 13:35:20,186 - INFO - Test cohort: MSKCC_2010_Taylor


0:	learn: -1626.1802088	test: -114.2296641	best: -114.2296641 (0)	total: 580us	remaining: 290ms
1:	learn: -1625.3741364	test: -114.1779770	best: -114.1779770 (1)	total: 1.19ms	remaining: 296ms
2:	learn: -1625.3741364	test: -114.1779770	best: -114.1779770 (2)	total: 1.68ms	remaining: 278ms
3:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (3)	total: 2.3ms	remaining: 285ms
4:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (3)	total: 3.15ms	remaining: 312ms
5:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (3)	total: 3.71ms	remaining: 306ms
6:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (3)	total: 4.29ms	remaining: 302ms
7:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (3)	total: 4.8ms	remaining: 296ms
8:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (8)	total: 5.36ms	remaining: 292ms
9:	learn: -1620.2237845	test: -113.5724809	best: -113.5724809 (8)	total: 5.88ms	remaining: 288ms
10:	learn: -1620.2237845	test: -1

2025-01-08 13:35:32,697 - INFO - Best parameters: {'model__depth': 3, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'log2', 'model__min_data_in_leaf': 10, 'model__nan_mode': 'Forbidden'}
2025-01-08 13:35:32,698 - INFO - Test score: 0.712
2025-01-08 13:35:32,700 - INFO - 
Outer fold 9
2025-01-08 13:35:32,702 - INFO - Test cohort: Stockholm_2016_Ross_Adams


0:	learn: -1554.3852995	test: -114.6517042	best: -114.6517042 (0)	total: 751us	remaining: 375ms
1:	learn: -1554.3852995	test: -114.6517042	best: -114.6517042 (0)	total: 1.26ms	remaining: 314ms
2:	learn: -1551.6272773	test: -114.1219475	best: -114.1219475 (2)	total: 1.7ms	remaining: 282ms
3:	learn: -1551.6272773	test: -114.1219475	best: -114.1219475 (2)	total: 2.14ms	remaining: 266ms
4:	learn: -1551.6272773	test: -114.1219475	best: -114.1219475 (4)	total: 2.52ms	remaining: 250ms
5:	learn: -1551.6272773	test: -114.1219475	best: -114.1219475 (4)	total: 2.92ms	remaining: 240ms
6:	learn: -1551.6272773	test: -114.1219475	best: -114.1219475 (4)	total: 3.3ms	remaining: 232ms
7:	learn: -1551.5462951	test: -114.1219475	best: -114.1219475 (4)	total: 7.99ms	remaining: 491ms
8:	learn: -1551.5462951	test: -114.1219475	best: -114.1219475 (4)	total: 8.57ms	remaining: 468ms
9:	learn: -1551.5462951	test: -114.1219475	best: -114.1219475 (4)	total: 9.1ms	remaining: 446ms
Stopped by overfitting detector  (

2025-01-08 13:35:45,095 - INFO - Best parameters: {'model__depth': 10, 'model__iterations': 500, 'model__learning_rate': 0.1, 'model__max_features': 'log2', 'model__min_data_in_leaf': 5, 'model__nan_mode': 'Forbidden'}
2025-01-08 13:35:45,097 - INFO - Test score: 0.500
2025-01-08 13:35:45,098 - INFO - Aggregated results:
2025-01-08 13:35:45,099 - INFO - Mean score: 0.606 ± 0.097
2025-01-08 13:35:45,100 - INFO - Individual scores: [0.6458092485549133, 0.5085581957309706, 0.5914760914760915, 0.48347770291947384, 0.5852503382949933, 0.6376966142362037, 0.7899334442595674, 0.7123447204968945, 0.5]
2025-01-08 13:35:45,168 - INFO - Saved CV results to c:\Users\laeti\PCaPrognostics\models\cat_boost\results\results\gb_inter_genes_pData_cv.csv
2025-01-08 13:35:45,171 - INFO - Do HP Tuning for complete model; refit + set complete model
2025-01-08 13:35:45,174 - INFO - Do HP Tuning for complete model


0:	learn: -1440.5889713	test: -119.0067129	best: -119.0067129 (0)	total: 564us	remaining: 282ms
1:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 1.4ms	remaining: 349ms
2:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 1.82ms	remaining: 302ms
3:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 2.36ms	remaining: 293ms
4:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 2.88ms	remaining: 286ms
5:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 3.96ms	remaining: 326ms
6:	learn: -1440.1853940	test: -118.9886021	best: -118.9886021 (1)	total: 4.74ms	remaining: 334ms
Stopped by overfitting detector  (5 iterations wait)

bestTest = -118.9886021
bestIteration = 1

Shrink model to first 2 iterations.
Fitting 9 folds for each of 36 candidates, totalling 324 fits


2025-01-08 13:35:57,356 - INFO - Saved model to c:\Users\laeti\PCaPrognostics\models\cat_boost\results\model
2025-01-08 13:35:57,359 - INFO - Saved pipe to c:\Users\laeti\PCaPrognostics\models\cat_boost\results\pipe


0:	learn: -1743.1150468	test: -126.9843105	best: -126.9843105 (0)	total: 894us	remaining: 446ms
1:	learn: -1743.1150468	test: -126.9843105	best: -126.9843105 (1)	total: 1.64ms	remaining: 409ms
2:	learn: -1743.1150468	test: -126.9843105	best: -126.9843105 (1)	total: 2.27ms	remaining: 376ms
3:	learn: -1743.1150468	test: -126.9843105	best: -126.9843105 (1)	total: 2.9ms	remaining: 360ms
4:	learn: -1743.1150468	test: -126.9843105	best: -126.9843105 (4)	total: 3.59ms	remaining: 355ms
5:	learn: -1739.9357642	test: -126.8581624	best: -126.8581624 (5)	total: 4.65ms	remaining: 383ms
6:	learn: -1739.9357642	test: -126.8581624	best: -126.8581624 (5)	total: 5.28ms	remaining: 372ms
7:	learn: -1737.5946922	test: -126.5168780	best: -126.5168780 (7)	total: 6.4ms	remaining: 394ms
8:	learn: -1737.5946922	test: -126.5168780	best: -126.5168780 (8)	total: 7.15ms	remaining: 390ms
9:	learn: -1737.5946922	test: -126.5168780	best: -126.5168780 (8)	total: 7.74ms	remaining: 380ms
10:	learn: -1737.5946922	test: -1

In [30]:
import pickle

with open('./results/model/gb_inter_genes_pData.pkl', 'rb') as f:
    model = pickle.load(f)

In [31]:
#model.get_feature_importance(mp.X, mp.y)
model.model.feature_importances_

array([10.38278504,  4.83160724, 57.33889703, 27.44671069])