In [None]:
import nannyml as nml
from IPython.display import display

reference_df, analysis_df, _ = nml.load_synthetic_multiclass_classification_dataset()

display(reference_df.head(3))

Unnamed: 0,id,acq_channel,app_behavioral_score,requested_credit_limit,app_channel,credit_bureau_score,stated_income,is_customer,timestamp,y_pred_proba_prepaid_card,y_pred_proba_highstreet_card,y_pred_proba_upmarket_card,y_pred,y_true
0,0,Partner3,1.808232,350,web,309,15000,True,2020-05-02 02:01:30,0.97,0.03,0.0,prepaid_card,prepaid_card
1,1,Partner2,4.382568,500,mobile,418,23000,True,2020-05-02 02:03:33,0.87,0.13,0.0,prepaid_card,prepaid_card
2,2,Partner2,-0.787575,400,web,507,24000,False,2020-05-02 02:04:49,0.47,0.35,0.18,prepaid_card,upmarket_card


In [None]:
print(reference_df.head(3).to_markdown(tablefmt="grid"))

+----+------+---------------+------------------------+--------------------------+---------------+-----------------------+-----------------+---------------+---------------------+-----------------------------+--------------------------------+------------------------------+--------------+---------------+
|    |   id | acq_channel   |   app_behavioral_score |   requested_credit_limit | app_channel   |   credit_bureau_score |   stated_income | is_customer   | timestamp           |   y_pred_proba_prepaid_card |   y_pred_proba_highstreet_card |   y_pred_proba_upmarket_card | y_pred       | y_true        |
|  0 |    0 | Partner3      |               1.80823  |                      350 | web           |                   309 |           15000 | True          | 2020-05-02 02:01:30 |                        0.97 |                           0.03 |                         0    | prepaid_card | prepaid_card  |
+----+------+---------------+------------------------+--------------------------+----------

In [None]:
# matrix can be provided as a list of lists or a numpy array
business_value_matrix = [
    [1, 0, -1],
    [0, 1, 0],
    [-1, 0, 1]
]
estimator = nml.CBPE(
    y_pred_proba={
        'prepaid_card': 'y_pred_proba_prepaid_card',
        'highstreet_card': 'y_pred_proba_highstreet_card',
        'upmarket_card': 'y_pred_proba_upmarket_card'},
    y_pred='y_pred',
    y_true='y_true',
    timestamp_column_name='timestamp',
    problem_type='classification_multiclass',
    metrics=['business_value'],
    business_value_matrix=business_value_matrix,
    normalize_business_value="per_prediction",
    chunk_size=6000,
)

In [None]:
estimator.fit(reference_df)

In [None]:
results = estimator.estimate(analysis_df)
display(results.filter(period='analysis').to_df())

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,business_value,business_value,business_value,business_value,business_value,business_value,business_value,business_value
Unnamed: 0_level_1,key,chunk_index,start_index,end_index,start_date,end_date,period,value,sampling_error,realized,upper_confidence_boundary,lower_confidence_boundary,upper_threshold,lower_threshold,alert
0,[0:5999],0,0,5999,2020-09-01 03:10:01,2020-09-13 16:15:10,analysis,2.008617,0.008047,,2.03276,1.984475,2.050316,1.963201,False
1,[6000:11999],1,6000,11999,2020-09-13 16:15:32,2020-09-25 19:48:42,analysis,2.016709,0.008047,,2.040851,1.992566,2.050316,1.963201,False
2,[12000:17999],2,12000,17999,2020-09-25 19:50:04,2020-10-08 02:53:47,analysis,2.025152,0.008047,,2.049294,2.00101,2.050316,1.963201,False
3,[18000:23999],3,18000,23999,2020-10-08 02:57:34,2020-10-20 15:48:19,analysis,2.018928,0.008047,,2.04307,1.994786,2.050316,1.963201,False
4,[24000:29999],4,24000,29999,2020-10-20 15:49:06,2020-11-01 22:04:40,analysis,2.006521,0.008047,,2.030664,1.982379,2.050316,1.963201,False
5,[30000:35999],5,30000,35999,2020-11-01 22:04:59,2020-11-14 03:55:33,analysis,1.564443,0.008047,,1.588585,1.5403,2.050316,1.963201,True
6,[36000:41999],6,36000,41999,2020-11-14 03:55:49,2020-11-26 09:19:06,analysis,1.56846,0.008047,,1.592603,1.544318,2.050316,1.963201,True
7,[42000:47999],7,42000,47999,2020-11-26 09:19:22,2020-12-08 14:33:56,analysis,1.562041,0.008047,,1.586183,1.537898,2.050316,1.963201,True
8,[48000:53999],8,48000,53999,2020-12-08 14:34:25,2020-12-20 18:30:30,analysis,1.566866,0.008047,,1.591009,1.542724,2.050316,1.963201,True
9,[54000:59999],9,54000,59999,2020-12-20 18:31:09,2021-01-01 22:57:55,analysis,1.57425,0.008047,,1.598392,1.550107,2.050316,1.963201,True


In [None]:
from docs.utils import print_multi_index_markdown
print_multi_index_markdown(results.filter(period='analysis').to_df())

+----+---------------+-----------------+-----------------+---------------+---------------------+---------------------+------------+--------------------+--------------------+--------------+-------------------------------+-------------------------------+---------------------+---------------------+-----------+
|    | | chunk       |                 |                 |               |                     |                     |            | | business_value   |                    |              |                               |                               |                     |                     |           |
|    | | key         | | chunk_index   | | start_index   | | end_index   | | start_date        | | end_date          | | period   | | value            | | sampling_error   | | realized   | | upper_confidence_boundary   | | lower_confidence_boundary   | | upper_threshold   | | lower_threshold   | | alert   |
| 0  | [0:5999]      | 0               | 0               | 5999          

In [None]:
metric_fig = results.plot()
metric_fig.show()

In [None]:
metric_fig = results.plot()
metric_fig.write_image(file=f"../_static/tutorials/performance_estimation/multiclass/business_value.svg")