In [1]:
def showrst(df):
    print(df.to_markdown(tablefmt="grid"))

In [2]:
import pandas as pd
import nannyml as nml
from IPython.display import display

reference, analysis, analysis_gt = nml.datasets.load_synthetic_multiclass_classification_dataset()
display(reference.head(3))

Unnamed: 0,acq_channel,app_behavioral_score,requested_credit_limit,app_channel,credit_bureau_score,stated_income,is_customer,partition,identifier,timestamp,y_pred_proba_prepaid_card,y_pred_proba_highstreet_card,y_pred_proba_upmarket_card,y_pred,y_true
0,Partner3,1.808232,350,web,309,15000,True,reference,60000,2020-05-02 02:01:30,0.97,0.03,0.0,prepaid_card,prepaid_card
1,Partner2,4.382568,500,mobile,418,23000,True,reference,60001,2020-05-02 02:03:33,0.87,0.13,0.0,prepaid_card,prepaid_card
2,Partner2,-0.787575,400,web,507,24000,False,reference,60002,2020-05-02 02:04:49,0.47,0.35,0.18,prepaid_card,upmarket_card


In [3]:
metadata = nml.extract_metadata(
    reference,
    model_name='credit_card_segment',
    model_type=nml.ModelType.CLASSIFICATION_MULTICLASS,
    exclude_columns=['identifier']
)
metadata.target_column_name = 'y_true'
display(metadata.is_complete())

(True, [])

In [4]:
cbpe = nml.CBPE(
    model_metadata=metadata,
    chunk_size=6000,
    metrics=['roc_auc', 'f1']
)
cbpe = cbpe.fit(reference_data=reference)

In [9]:
est_perf_analysis = cbpe.estimate(analysis)

In [10]:
display(est_perf_analysis.data.head(3))

Unnamed: 0,key,start_index,end_index,start_date,end_date,partition,confidence_roc_auc,realized_roc_auc,estimated_roc_auc,upper_threshold_roc_auc,lower_threshold_roc_auc,alert_roc_auc,confidence_f1,realized_f1,estimated_f1,upper_threshold_f1,lower_threshold_f1,alert_f1
0,[0:5999],0,5999,2020-09-01 03:10:01,2020-09-13 16:15:10,analysis,0.000827,,0.90921,0.900902,0.913516,False,0.001752,,0.755324,0.741254,0.764944,False
1,[6000:11999],6000,11999,2020-09-13 16:15:32,2020-09-25 19:48:42,analysis,0.000827,,0.912072,0.900902,0.913516,False,0.001752,,0.758419,0.741254,0.764944,False
2,[12000:17999],12000,17999,2020-09-25 19:50:04,2020-10-08 02:53:47,analysis,0.000827,,0.912201,0.900902,0.913516,False,0.001752,,0.760262,0.741254,0.764944,False


In [11]:
showrst(est_perf_analysis.data.head(3))

+----+---------------+---------------+-------------+---------------------+---------------------+-------------+----------------------+--------------------+---------------------+---------------------------+---------------------------+-----------------+-----------------+---------------+----------------+----------------------+----------------------+------------+
|    | key           |   start_index |   end_index | start_date          | end_date            | partition   |   confidence_roc_auc |   realized_roc_auc |   estimated_roc_auc |   upper_threshold_roc_auc |   lower_threshold_roc_auc | alert_roc_auc   |   confidence_f1 |   realized_f1 |   estimated_f1 |   upper_threshold_f1 |   lower_threshold_f1 | alert_f1   |
|  0 | [0:5999]      |             0 |        5999 | 2020-09-01 03:10:01 | 2020-09-13 16:15:10 | analysis    |          0.000827459 |                nan |            0.90921  |                  0.900902 |                  0.913516 | False           |      0.00175158 |          

In [12]:
for metric in cbpe.metrics:
    est_perf_analysis.plot(kind='performance', metric=metric).show()

In [13]:
est_perf_with_ref = cbpe.estimate(pd.concat([reference, analysis], ignore_index=True))

In [15]:
for metric in cbpe.metrics:
    figure = est_perf_with_ref.plot(kind='performance', metric=metric)
    figure.show()

In [16]:
engine='orca'
for metric in cbpe.metrics:
    fig = est_perf_analysis.plot(kind='performance', metric=metric)
    fig.write_image(file=f"../_static/tutorial-perf-est-mc-guide-analysis-{metric}.svg", engine=engine)

In [17]:
for metric in cbpe.metrics:
    fig = est_perf_with_ref.plot(kind='performance', metric=metric)
    fig.write_image(file=f"../_static/tutorial-perf-est-mc-guide-with-ref-{metric}.svg", engine=engine)