In [87]:
import numpy as np
import pandas as pd
import joblib

# Загрузка модели

In [88]:
pipeline = joblib.load('models/xgbregressor_insurance_.joblib')
display(pipeline)

# Обзор пайплайна

In [89]:
print(f'Шагов в pipeline: {len(pipeline.steps)}')

print('\nШаги:')

for pipeline_step in pipeline.steps:
    display(pipeline_step)

Шагов в pipeline: 2

Шаги:


('preprocessor',
 ColumnTransformer(transformers=[('num', StandardScaler(),
                                  ['age', 'bmi', 'children', 'sex', 'smoker']),
                                 ('cat',
                                  OneHotEncoder(drop='first',
                                                handle_unknown='ignore'),
                                  ['region'])]))

('model',
 XGBRegressor(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              feature_weights=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=3,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
              n_jobs=-1, num_parallel_tree=None, ...))

In [90]:
preprocessor = pipeline.named_steps['preprocessor']

display(preprocessor)
print('Признаки после преобразования:')
print(preprocessor.get_feature_names_out())

print('\nВходные признаки:')
for name, _, columns in preprocessor.transformers_:
    print(f"{name}: {columns}")

Признаки после преобразования:
['num__age' 'num__bmi' 'num__children' 'num__sex' 'num__smoker'
 'cat__region_northwest' 'cat__region_southeast' 'cat__region_southwest']

Входные признаки:
num: ['age', 'bmi', 'children', 'sex', 'smoker']
cat: ['region']


In [91]:
model = pipeline.named_steps['model']

display(model)

print('Гиперпараметры модели:')
display(model.get_xgb_params)

Гиперпараметры модели:


<bound method XGBModel.get_xgb_params of XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.05, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=3,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=100,
             n_jobs=-1, num_parallel_tree=None, ...)>

# Использование

In [92]:
# Входные признаки:
# num: ['age', 'bmi', 'children', 'sex', 'smoker']
# cat: ['region']

In [107]:
def predict_insurance_cost(clients: pd.DataFrame):
    logit_pred = pipeline.predict(clients)
    preds = np.expm1(logit_pred)  # ОБЯЗАТЕЛЬНО ПРЕОБРАЗОВАНИЕ ИЗ ЛОГИТ-ПРОСТРАНСТВА

    for i, pred in enumerate(preds):
        print('-' * 15)
        print(f'КЛИЕНТ | ID={i}')
        display(clients.iloc[[i]])
        print(f'Цена медицинской страховки составит приблизительно ${pred:.2f}.')
        print('-' * 15)

In [119]:
# Клиент:
# Мужчина 34-ёх лет с допустимым показателем ИМТ=24. Некурящий. 2 иждивенца. Регион: юго-запад.

client = pd.DataFrame({
    'age': [34],
    'bmi': [24],
    'children': [2],
    'sex': [1],
    'smoker': [0],
    'region': ['southwest']
})

predict_insurance_cost(clients=client)

---------------
КЛИЕНТ | ID=0


Unnamed: 0,age,bmi,children,sex,smoker,region
0,34,24,2,1,0,southwest


Цена медицинской страховки составит приблизительно $6389.99.
---------------


In [96]:
clients = pd.DataFrame({
    'age': [34, 50, 28, 45],
    'bmi': [24, 30, 22, 27],
    'children': [2, 1, 0, 3],
    'sex': [1, 0, 0, 1],
    'smoker': [0, 1, 0, 1],
    'region': ['southwest', 'northeast', 'southeast', 'northwest']
})

predict_insurance_cost(clients=clients)

---------------
КЛИЕНТ | ID=0


Unnamed: 0,age,bmi,children,sex,smoker,region
0,34,24,2,1,0,southwest


Цена медицинской страховки составит приблизительно $6389.99.
---------------
---------------
КЛИЕНТ | ID=1


Unnamed: 0,age,bmi,children,sex,smoker,region
1,50,30,1,0,1,northeast


Цена медицинской страховки составит приблизительно $27619.21.
---------------
---------------
КЛИЕНТ | ID=2


Unnamed: 0,age,bmi,children,sex,smoker,region
2,28,22,0,0,0,southeast


Цена медицинской страховки составит приблизительно $3562.95.
---------------
---------------
КЛИЕНТ | ID=3


Unnamed: 0,age,bmi,children,sex,smoker,region
3,45,27,3,1,1,northwest


Цена медицинской страховки составит приблизительно $24109.34.
---------------
