In [None]:
import numpy as np

import nannyml as nml
from IPython.display import display

In [None]:
ct = nml.thresholds.ConstantThreshold(lower=0.5, upper=0.9)

In [None]:
stdt = nml.thresholds.StandardDeviationThreshold(std_lower_multiplier=3, std_upper_multiplier=3, offset_from=np.mean)

In [None]:
reference_df, analysis_df, _ = nml.load_synthetic_car_loan_dataset()
display(reference_df.head())


Unnamed: 0,car_value,salary_range,debt_to_income_ratio,loan_length,repaid_loan_on_prev_car,size_of_downpayment,driver_tenure,repaid,timestamp,y_pred_proba,y_pred
0,39811.0,40K - 60K €,0.63295,19.0,False,40%,0.212653,1.0,2018-01-01 00:00:00.000,0.99,1
1,12679.0,40K - 60K €,0.718627,7.0,True,10%,4.927549,0.0,2018-01-01 00:08:43.152,0.07,0
2,19847.0,40K - 60K €,0.721724,17.0,False,0%,0.520817,1.0,2018-01-01 00:17:26.304,1.0,1
3,22652.0,20K - 20K €,0.705992,16.0,False,10%,0.453649,1.0,2018-01-01 00:26:09.456,0.98,1
4,21268.0,60K+ €,0.671888,21.0,True,30%,5.695263,1.0,2018-01-01 00:34:52.608,0.99,1


In [None]:
print(reference_df.head().to_markdown(tablefmt="grid"))

+----+-------------+----------------+------------------------+---------------+---------------------------+-----------------------+-----------------+----------+-------------------------+----------------+----------+
|    |   car_value | salary_range   |   debt_to_income_ratio |   loan_length | repaid_loan_on_prev_car   | size_of_downpayment   |   driver_tenure |   repaid | timestamp               |   y_pred_proba |   y_pred |
|  0 |       39811 | 40K - 60K €    |               0.63295  |            19 | False                     | 40%                   |        0.212653 |        1 | 2018-01-01 00:00:00.000 |           0.99 |        1 |
+----+-------------+----------------+------------------------+---------------+---------------------------+-----------------------+-----------------+----------+-------------------------+----------------+----------+
|  1 |       12679 | 40K - 60K €    |               0.718627 |             7 | True                      | 10%                   |        4.9275

In [None]:
estimator = nml.CBPE(
    y_pred_proba='y_pred_proba',
    y_pred='y_pred',
    y_true='repaid',
    timestamp_column_name='timestamp',
    metrics=['f1'],
    chunk_size=5000,
    problem_type='classification_binary',
)
estimator.thresholds['f1']

StandardDeviationThreshold{'std_lower_multiplier': 3, 'std_upper_multiplier': 3, 'offset_from': <function mean at 0x7fe6402f19d0>}

In [None]:
estimator.fit(reference_df)
results = estimator.estimate(analysis_df)
columns = [('chunk', 'key'), ('chunk', 'period'), ('f1', 'value'), ('f1', 'upper_threshold'), ('f1', 'lower_threshold'), ('f1', 'alert')]
display(results.to_df()[columns])


Unnamed: 0_level_0,chunk,chunk,f1,f1,f1,f1
Unnamed: 0_level_1,key,period,value,upper_threshold,lower_threshold,alert
0,[0:4999],reference,0.94296,0.95085,0.93466,False
1,[5000:9999],reference,0.940827,0.95085,0.93466,False
2,[10000:14999],reference,0.943211,0.95085,0.93466,False
3,[15000:19999],reference,0.942901,0.95085,0.93466,False
4,[20000:24999],reference,0.943178,0.95085,0.93466,False
5,[25000:29999],reference,0.942702,0.95085,0.93466,False
6,[30000:34999],reference,0.940858,0.95085,0.93466,False
7,[35000:39999],reference,0.944588,0.95085,0.93466,False
8,[40000:44999],reference,0.944518,0.95085,0.93466,False
9,[45000:49999],reference,0.94443,0.95085,0.93466,False


In [None]:
print(results.to_df()[columns].to_markdown(tablefmt="grid"))

+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|    | ('chunk', 'key')   | ('chunk', 'period')   |   ('f1', 'value') |   ('f1', 'upper_threshold') |   ('f1', 'lower_threshold') | ('f1', 'alert')   |
|  0 | [0:4999]           | reference             |          0.94296  |                     0.95085 |                     0.93466 | False             |
+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|  1 | [5000:9999]        | reference             |          0.940827 |                     0.95085 |                     0.93466 | False             |
+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|  2 | [10000:14999]      | reference             |          0.943211 |                 

In [None]:
metric_fig = results.plot()
metric_fig.show()

In [None]:
metric_fig.write_image('../_static/tutorials/thresholds/est_f1_default_thresholds.svg')

In [None]:
constant_threshold = nml.thresholds.ConstantThreshold(lower=None, upper=0.93)
constant_threshold.thresholds(results.filter(period='reference').to_df()[('f1', 'value')])

(None, 0.93)

In [None]:
estimator = nml.CBPE(
    y_pred_proba='y_pred_proba',
    y_pred='y_pred',
    y_true='repaid',
    timestamp_column_name='timestamp',
    metrics=['f1'],
    chunk_size=5000,
    problem_type='classification_binary',
    thresholds={
        'f1': constant_threshold
    }
)
estimator.fit(reference_df)
results = estimator.estimate(analysis_df)
display(results.to_df()[columns])


Unnamed: 0_level_0,chunk,chunk,f1,f1,f1,f1
Unnamed: 0_level_1,key,period,value,upper_threshold,lower_threshold,alert
0,[0:4999],reference,0.94296,0.93,,True
1,[5000:9999],reference,0.940827,0.93,,True
2,[10000:14999],reference,0.943211,0.93,,True
3,[15000:19999],reference,0.942901,0.93,,True
4,[20000:24999],reference,0.943178,0.93,,True
5,[25000:29999],reference,0.942702,0.93,,True
6,[30000:34999],reference,0.940858,0.93,,True
7,[35000:39999],reference,0.944588,0.93,,True
8,[40000:44999],reference,0.944518,0.93,,True
9,[45000:49999],reference,0.94443,0.93,,True


In [None]:
print(results.to_df()[columns].to_markdown(tablefmt="grid"))

+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|    | ('chunk', 'key')   | ('chunk', 'period')   |   ('f1', 'value') |   ('f1', 'upper_threshold') | ('f1', 'lower_threshold')   | ('f1', 'alert')   |
|  0 | [0:4999]           | reference             |          0.94296  |                        0.93 |                             | True              |
+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|  1 | [5000:9999]        | reference             |          0.940827 |                        0.93 |                             | True              |
+----+--------------------+-----------------------+-------------------+-----------------------------+-----------------------------+-------------------+
|  2 | [10000:14999]      | reference             |          0.943211 |                 

In [None]:
metric_fig = results.plot()
metric_fig.show()


In [None]:
metric_fig.write_image('../_static/tutorials/thresholds/est_f1_inverted_thresholds.svg')