In [1]:
import nannyml as nml
from IPython.display import display

reference_df, analysis_df, analysis_targets_df = nml.load_synthetic_car_loan_dataset()
analysis_full_df = analysis_df.merge(analysis_targets_df, left_index=True, right_index=True)

column_names = [
    'car_value', 'salary_range', 'debt_to_income_ratio', 'loan_length', 'repaid_loan_on_prev_car', 'size_of_downpayment', 'driver_tenure', 'y_pred_proba', 'y_pred', 'repaid'
]

univ_calc = nml.UnivariateDriftCalculator(
    column_names=column_names,
    treat_as_categorical=['y_pred', 'repaid'],
    timestamp_column_name='timestamp',
    continuous_methods=['kolmogorov_smirnov', 'jensen_shannon'],
    categorical_methods=['chi2', 'jensen_shannon'],
    chunk_size=5000
)

univ_calc.fit(reference_df)
univariate_results = univ_calc.calculate(analysis_full_df)
display(univariate_results.filter(period='analysis', column_names=['debt_to_income_ratio']).to_df())

alert_count_ranker = nml.AlertCountRanker()
alert_count_ranked_features = alert_count_ranker.rank(
    univariate_results.filter(methods=['jensen_shannon']),
    only_drifting = False)
display(alert_count_ranked_features)

estimated_calc = nml.CBPE(
    y_pred_proba='y_pred_proba',
    y_pred='y_pred',
    y_true='repaid',
    timestamp_column_name='timestamp',
    metrics=['roc_auc', 'recall'],
    chunk_size=5000,
    problem_type='classification_binary',
)
estimated_calc.fit(reference_df)
estimated_perf_results = estimated_calc.estimate(analysis_full_df)
display(estimated_perf_results.filter(period='analysis').to_df())

realized_calc = nml.PerformanceCalculator(
    y_pred_proba='y_pred_proba',
    y_pred='y_pred',
    y_true='repaid',
    timestamp_column_name='timestamp',
    problem_type='classification_binary',
    metrics=['roc_auc', 'recall',],
    chunk_size=5000)
realized_calc.fit(reference_df)
realized_perf_results = realized_calc.calculate(analysis_full_df)
display(realized_perf_results.filter(period='analysis').to_df())

ranker1 = nml.CorrelationRanker()

# ranker fits on one metric and reference period data only
ranker1.fit(
    estimated_perf_results.filter(period='reference', metrics=['roc_auc']))
# ranker ranks on one drift method and one performance metric
correlation_ranked_features1 = ranker1.rank(
    univariate_results.filter(methods=['jensen_shannon']),
    estimated_perf_results.filter(metrics=['roc_auc']),
    only_drifting = False)

display(correlation_ranked_features1)

ranker2 = nml.CorrelationRanker()

# ranker fits on one metric and reference period data only
ranker2.fit(
    realized_perf_results.filter(period='reference', metrics=['recall']))
# ranker ranks on one drift method and one performance metric
correlation_ranked_features2 = ranker2.rank(
    univariate_results.filter(period='analysis', methods=['jensen_shannon']),
    realized_perf_results.filter(period='analysis', metrics=['recall']),
    only_drifting = False)

display(correlation_ranked_features2)

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio
Unnamed: 0_level_1,chunk,chunk,chunk,chunk,chunk,chunk,chunk,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon
Unnamed: 0_level_2,key,chunk_index,start_index,end_index,start_date,end_date,period,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert
0,[0:4999],0,0,4999,2018-10-30 18:00:00,2018-11-30 00:27:16.848,analysis,0.01576,0.018584,,False,0.031661,0.039328,,False
1,[5000:9999],1,5000,9999,2018-11-30 00:36:00,2018-12-30 07:03:16.848,analysis,0.01268,0.018584,,False,0.030011,0.039328,,False
2,[10000:14999],2,10000,14999,2018-12-30 07:12:00,2019-01-29 13:39:16.848,analysis,0.01734,0.018584,,False,0.031129,0.039328,,False
3,[15000:19999],3,15000,19999,2019-01-29 13:48:00,2019-02-28 20:15:16.848,analysis,0.0128,0.018584,,False,0.029464,0.039328,,False
4,[20000:24999],4,20000,24999,2019-02-28 20:24:00,2019-03-31 02:51:16.848,analysis,0.01918,0.018584,,True,0.030809,0.039328,,False
5,[25000:29999],5,25000,29999,2019-03-31 03:00:00,2019-04-30 09:27:16.848,analysis,0.00824,0.018584,,False,0.028681,0.039328,,False
6,[30000:34999],6,30000,34999,2019-04-30 09:36:00,2019-05-30 16:03:16.848,analysis,0.01058,0.018584,,False,0.043628,0.039328,,True
7,[35000:39999],7,35000,39999,2019-05-30 16:12:00,2019-06-29 22:39:16.848,analysis,0.01002,0.018584,,False,0.029253,0.039328,,False
8,[40000:44999],8,40000,44999,2019-06-29 22:48:00,2019-07-30 05:15:16.848,analysis,0.01068,0.018584,,False,0.030628,0.039328,,False
9,[45000:49999],9,45000,49999,2019-07-30 05:24:00,2019-08-29 11:51:16.848,analysis,0.0068,0.018584,,False,0.02833,0.039328,,False


Unnamed: 0,number_of_alerts,column_name,rank
0,6,car_value,1
1,5,y_pred_proba,2
2,5,salary_range,3
3,5,repaid_loan_on_prev_car,4
4,5,loan_length,5
5,2,y_pred,6
6,2,repaid,7
7,1,debt_to_income_ratio,8
8,0,size_of_downpayment,9
9,0,driver_tenure,10


Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,roc_auc,roc_auc,roc_auc,roc_auc,roc_auc,roc_auc,recall,recall,recall,recall,recall,recall,recall,recall
Unnamed: 0_level_1,key,chunk_index,start_index,end_index,start_date,end_date,period,value,sampling_error,realized,...,lower_threshold,alert,value,sampling_error,realized,upper_confidence_boundary,lower_confidence_boundary,upper_threshold,lower_threshold,alert
0,[0:4999],0,0,4999,2018-10-30 18:00:00,2018-11-30 00:27:16.848,analysis,0.970744,0.001814,0.970962,...,0.963317,False,0.928723,0.005137,0.930394,0.944133,0.913313,0.941033,0.9171,False
1,[5000:9999],1,5000,9999,2018-11-30 00:36:00,2018-12-30 07:03:16.848,analysis,0.971011,0.001814,0.970248,...,0.963317,False,0.925261,0.005137,0.923922,0.940671,0.909851,0.941033,0.9171,False
2,[10000:14999],2,10000,14999,2018-12-30 07:12:00,2019-01-29 13:39:16.848,analysis,0.971407,0.001814,0.976282,...,0.963317,False,0.929317,0.005137,0.938246,0.944727,0.913907,0.941033,0.9171,False
3,[15000:19999],3,15000,19999,2019-01-29 13:48:00,2019-02-28 20:15:16.848,analysis,0.971091,0.001814,0.967721,...,0.963317,False,0.929713,0.005137,0.92506,0.945123,0.914303,0.941033,0.9171,False
4,[20000:24999],4,20000,24999,2019-02-28 20:24:00,2019-03-31 02:51:16.848,analysis,0.971123,0.001814,0.969886,...,0.963317,False,0.930604,0.005137,0.927577,0.946014,0.915194,0.941033,0.9171,False
5,[25000:29999],5,25000,29999,2019-03-31 03:00:00,2019-04-30 09:27:16.848,analysis,0.96109,0.001814,0.96005,...,0.963317,True,0.88399,0.005137,0.905086,0.8994,0.86858,0.941033,0.9171,True
6,[30000:34999],6,30000,34999,2019-04-30 09:36:00,2019-05-30 16:03:16.848,analysis,0.961825,0.001814,0.95853,...,0.963317,True,0.883528,0.005137,0.89901,0.898938,0.868118,0.941033,0.9171,True
7,[35000:39999],7,35000,39999,2019-05-30 16:12:00,2019-06-29 22:39:16.848,analysis,0.961073,0.001814,0.959041,...,0.963317,True,0.885501,0.005137,0.901718,0.900911,0.870091,0.941033,0.9171,True
8,[40000:44999],8,40000,44999,2019-06-29 22:48:00,2019-07-30 05:15:16.848,analysis,0.962533,0.001814,0.963094,...,0.963317,True,0.885978,0.005137,0.906124,0.901388,0.870568,0.941033,0.9171,True
9,[45000:49999],9,45000,49999,2019-07-30 05:24:00,2019-08-29 11:51:16.848,analysis,0.961316,0.001814,0.957556,...,0.963317,True,0.889808,0.005137,0.905823,0.905218,0.874398,0.941033,0.9171,True


Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,chunk,roc_auc,roc_auc,roc_auc,roc_auc,roc_auc,recall,recall,recall,recall,recall
Unnamed: 0_level_1,key,chunk_index,start_index,end_index,start_date,end_date,period,targets_missing_rate,sampling_error,value,upper_threshold,lower_threshold,alert,sampling_error,value,upper_threshold,lower_threshold,alert
0,[0:4999],0,0,4999,2018-10-30 18:00:00,2018-11-30 00:27:16.848,analysis,0.0,0.001814,0.970962,0.97866,0.963317,False,0.005137,0.930394,0.941033,0.9171,False
1,[5000:9999],1,5000,9999,2018-11-30 00:36:00,2018-12-30 07:03:16.848,analysis,0.0,0.001814,0.970248,0.97866,0.963317,False,0.005137,0.923922,0.941033,0.9171,False
2,[10000:14999],2,10000,14999,2018-12-30 07:12:00,2019-01-29 13:39:16.848,analysis,0.0,0.001814,0.976282,0.97866,0.963317,False,0.005137,0.938246,0.941033,0.9171,False
3,[15000:19999],3,15000,19999,2019-01-29 13:48:00,2019-02-28 20:15:16.848,analysis,0.0,0.001814,0.967721,0.97866,0.963317,False,0.005137,0.92506,0.941033,0.9171,False
4,[20000:24999],4,20000,24999,2019-02-28 20:24:00,2019-03-31 02:51:16.848,analysis,0.0,0.001814,0.969886,0.97866,0.963317,False,0.005137,0.927577,0.941033,0.9171,False
5,[25000:29999],5,25000,29999,2019-03-31 03:00:00,2019-04-30 09:27:16.848,analysis,0.0,0.001814,0.96005,0.97866,0.963317,True,0.005137,0.905086,0.941033,0.9171,True
6,[30000:34999],6,30000,34999,2019-04-30 09:36:00,2019-05-30 16:03:16.848,analysis,0.0,0.001814,0.95853,0.97866,0.963317,True,0.005137,0.89901,0.941033,0.9171,True
7,[35000:39999],7,35000,39999,2019-05-30 16:12:00,2019-06-29 22:39:16.848,analysis,0.0,0.001814,0.959041,0.97866,0.963317,True,0.005137,0.901718,0.941033,0.9171,True
8,[40000:44999],8,40000,44999,2019-06-29 22:48:00,2019-07-30 05:15:16.848,analysis,0.0,0.001814,0.963094,0.97866,0.963317,True,0.005137,0.906124,0.941033,0.9171,True
9,[45000:49999],9,45000,49999,2019-07-30 05:24:00,2019-08-29 11:51:16.848,analysis,0.0,0.001814,0.957556,0.97866,0.963317,True,0.005137,0.905823,0.941033,0.9171,True


  pearsonr(filtered_values.ravel(), filtered_perf_change)


Unnamed: 0,column_name,pearsonr_correlation,pearsonr_pvalue,has_drifted,rank
0,repaid_loan_on_prev_car,0.998626,1.653702e-24,True,1
1,y_pred_proba,0.998586,2.141497e-24,True,2
2,salary_range,0.997379,5.487114000000001e-22,True,3
3,loan_length,0.997314,6.833463e-22,True,4
4,car_value,0.997213,9.529448e-22,True,5
5,size_of_downpayment,0.311427,0.1813554,False,6
6,debt_to_income_ratio,0.256911,0.2741989,True,7
7,y_pred,0.066571,0.7803558,True,8
8,repaid,-0.127146,0.593218,True,9
9,driver_tenure,-0.141105,0.5529204,False,10


  pearsonr(filtered_values.ravel(), filtered_perf_change)


Unnamed: 0,column_name,pearsonr_correlation,pearsonr_pvalue,has_drifted,rank
0,repaid_loan_on_prev_car,0.96897,4e-06,True,1
1,y_pred_proba,0.966157,6e-06,True,2
2,loan_length,0.965298,6e-06,True,3
3,car_value,0.963623,7e-06,True,4
4,salary_range,0.963456,7e-06,True,5
5,size_of_downpayment,0.308948,0.385072,False,6
6,debt_to_income_ratio,0.307373,0.387627,True,7
7,y_pred,-0.357571,0.310383,True,8
8,repaid,-0.395842,0.257495,True,9
9,driver_tenure,-0.575807,0.08152,False,10
