In [1]:
import nannyml as nml
from IPython.display import display

reference_df = nml.load_synthetic_car_loan_dataset()[0]
analysis_df = nml.load_synthetic_car_loan_dataset()[1]

display(reference_df.head(3))



Unnamed: 0,car_value,salary_range,debt_to_income_ratio,loan_length,repaid_loan_on_prev_car,size_of_downpayment,driver_tenure,repaid,timestamp,y_pred_proba,y_pred
0,39811.0,40K - 60K €,0.63295,19.0,False,40%,0.212653,1.0,2018-01-01 00:00:00.000,0.99,1
1,12679.0,40K - 60K €,0.718627,7.0,True,10%,4.927549,0.0,2018-01-01 00:08:43.152,0.07,0
2,19847.0,40K - 60K €,0.721724,17.0,False,0%,0.520817,1.0,2018-01-01 00:17:26.304,1.0,1


In [2]:
print(reference_df.head(3).to_markdown(tablefmt="grid"))

+----+-------------+----------------+------------------------+---------------+---------------------------+-----------------------+-----------------+----------+-------------------------+----------------+----------+
|    |   car_value | salary_range   |   debt_to_income_ratio |   loan_length | repaid_loan_on_prev_car   | size_of_downpayment   |   driver_tenure |   repaid | timestamp               |   y_pred_proba |   y_pred |
|  0 |       39811 | 40K - 60K €    |               0.63295  |            19 | False                     | 40%                   |        0.212653 |        1 | 2018-01-01 00:00:00.000 |           0.99 |        1 |
+----+-------------+----------------+------------------------+---------------+---------------------------+-----------------------+-----------------+----------+-------------------------+----------------+----------+
|  1 |       12679 | 40K - 60K €    |               0.718627 |             7 | True                      | 10%                   |        4.9275

In [3]:
estimator = nml.CBPE(
    y_pred_proba='y_pred_proba',
    y_pred='y_pred',
    y_true='repaid',
    timestamp_column_name='timestamp',
    metrics=['business_value'],
    chunk_size=5000,
    problem_type='classification_binary',
    business_value_matrix=[[5, -10], [-50, 50]],
    normalize_business_value="per_prediction",
)

In [4]:
estimator.fit(reference_df)

<nannyml.performance_estimation.confidence_based.cbpe.CBPE at 0x10490c280>

In [5]:
results = estimator.estimate(analysis_df)
display(results.filter(period='analysis').to_df())

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,business_value,business_value,business_value,business_value,business_value,business_value,business_value,business_value
Unnamed: 0_level_1,key,chunk_index,start_index,end_index,start_date,end_date,period,value,sampling_error,realized,upper_confidence_boundary,lower_confidence_boundary,upper_threshold,lower_threshold,alert
0,[0:4999],0,0,4999,2018-10-30 18:00:00,2018-11-30 00:27:16.848,analysis,24.327385,0.375491,,25.45386,23.200911,24.420349,22.826251,False
1,[5000:9999],1,5000,9999,2018-11-30 00:36:00,2018-12-30 07:03:16.848,analysis,23.139895,0.375491,,24.266369,22.01342,24.420349,22.826251,False
2,[10000:14999],2,10000,14999,2018-12-30 07:12:00,2019-01-29 13:39:16.848,analysis,23.304815,0.375491,,24.43129,22.178341,24.420349,22.826251,False
3,[15000:19999],3,15000,19999,2019-01-29 13:48:00,2019-02-28 20:15:16.848,analysis,23.390021,0.375491,,24.516495,22.263546,24.420349,22.826251,False
4,[20000:24999],4,20000,24999,2019-02-28 20:24:00,2019-03-31 02:51:16.848,analysis,23.849335,0.375491,,24.975809,22.722861,24.420349,22.826251,False
5,[25000:29999],5,25000,29999,2019-03-31 03:00:00,2019-04-30 09:27:16.848,analysis,21.995507,0.375491,,23.121981,20.869032,24.420349,22.826251,True
6,[30000:34999],6,30000,34999,2019-04-30 09:36:00,2019-05-30 16:03:16.848,analysis,21.904553,0.375491,,23.031028,20.778079,24.420349,22.826251,True
7,[35000:39999],7,35000,39999,2019-05-30 16:12:00,2019-06-29 22:39:16.848,analysis,21.918829,0.375491,,23.045303,20.792355,24.420349,22.826251,True
8,[40000:44999],8,40000,44999,2019-06-29 22:48:00,2019-07-30 05:15:16.848,analysis,21.791221,0.375491,,22.917695,20.664747,24.420349,22.826251,True
9,[45000:49999],9,45000,49999,2019-07-30 05:24:00,2019-08-29 11:51:16.848,analysis,22.641113,0.375491,,23.767587,21.514639,24.420349,22.826251,True


In [6]:
from docs.utils import print_multi_index_markdown
print_multi_index_markdown(results.filter(period='analysis').to_df())

+----+---------------+-----------------+-----------------+---------------+---------------------+----------------------------+------------+--------------------+--------------------+--------------+-------------------------------+-------------------------------+---------------------+---------------------+-----------+
|    | | chunk       |                 |                 |               |                     |                            |            | | business_value   |                    |              |                               |                               |                     |                     |           |
|    | | key         | | chunk_index   | | start_index   | | end_index   | | start_date        | | end_date                 | | period   | | value            | | sampling_error   | | realized   | | upper_confidence_boundary   | | lower_confidence_boundary   | | upper_threshold   | | lower_threshold   | | alert   |
| 0  | [0:4999]      | 0               | 0          

In [7]:
metric_fig = results.plot()
metric_fig.show()

In [8]:
metric_fig = results.plot()
metric_fig.write_image(file=f"../_static/tutorials/performance_estimation/binary/tutorial-business-value-estimation-binary-car-loan-analysis-with-ref.svg")
