In [None]:
import nannyml as nml
from IPython.display import display

reference_df, analysis_df, _ = nml.load_synthetic_car_loan_dataset()

column_names = [
    col for col in reference_df.columns
    if col not in ['timestamp', 'repaid']
]
print(column_names)

['car_value', 'salary_range', 'debt_to_income_ratio', 'loan_length', 'repaid_loan_on_prev_car', 'size_of_downpayment', 'driver_tenure', 'y_pred_proba', 'y_pred']


In [None]:
calc = nml.UnivariateDriftCalculator(
    column_names=column_names,
    timestamp_column_name='timestamp',
    continuous_methods=['kolmogorov_smirnov', 'jensen_shannon'],
    categorical_methods=['chi2', 'jensen_shannon'],
)
calc.fit(reference_df)
results = calc.calculate(analysis_df)

161 {'car_value': [<nannyml.drift.univariate.methods.KolmogorovSmirnovStatistic object at 0x7fefc694faf0>, <nannyml.drift.univariate.methods.JensenShannonDistance object at 0x7ff0b7585c10>], 'salary_range': [], 'debt_to_income_ratio': [<nannyml.drift.univariate.methods.KolmogorovSmirnovStatistic object at 0x7ff0b758bf40>, <nannyml.drift.univariate.methods.JensenShannonDistance object at 0x7ff0b7648df0>], 'loan_length': [<nannyml.drift.univariate.methods.KolmogorovSmirnovStatistic object at 0x7ff0b76485b0>, <nannyml.drift.univariate.methods.JensenShannonDistance object at 0x7ff0b7648970>], 'repaid_loan_on_prev_car': [], 'size_of_downpayment': [], 'driver_tenure': [<nannyml.drift.univariate.methods.KolmogorovSmirnovStatistic object at 0x7ff0b758b730>, <nannyml.drift.univariate.methods.JensenShannonDistance object at 0x7ff0b758beb0>], 'y_pred_proba': [<nannyml.drift.univariate.methods.KolmogorovSmirnovStatistic object at 0x7ff0b759cf40>, <nannyml.drift.univariate.methods.JensenShannonDist

In [None]:
display(results.to_df())

Unnamed: 0_level_0,car_value,car_value,car_value,car_value,car_value,car_value,car_value,car_value,chunk,chunk,...,y_pred,y_pred,y_pred_proba,y_pred_proba,y_pred_proba,y_pred_proba,y_pred_proba,y_pred_proba,y_pred_proba,y_pred_proba
Unnamed: 0_level_1,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,chunk,chunk,...,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov
Unnamed: 0_level_2,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value,chunk_index,end_date,...,upper_threshold,value,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value
0,False,,0.1,0.029674,False,,,0.0103,0,2018-01-31 06:27:16.848,...,0.1,0.00549,False,,0.1,0.013355,False,,,0.00922
1,False,,0.1,0.023785,False,,,0.00732,1,2018-03-02 13:03:16.848,...,0.1,0.00634,False,,0.1,0.021129,False,,,0.01042
2,False,,0.1,0.026469,False,,,0.00802,2,2018-04-01 19:39:16.848,...,0.1,0.004877,False,,0.1,0.02237,False,,,0.0091
3,False,,0.1,0.021747,False,,,0.0085,3,2018-05-02 02:15:16.848,...,0.1,0.00175,False,,0.1,0.017829,False,,,0.00872
4,False,,0.1,0.024108,False,,,0.00892,4,2018-06-01 08:51:16.848,...,0.1,0.003687,False,,0.1,0.021662,False,,,0.00852
5,False,,0.1,0.027559,False,,,0.01456,5,2018-07-01 15:27:16.848,...,0.1,0.00379,False,,0.1,0.017256,False,,,0.01028
6,False,,0.1,0.026782,False,,,0.01284,6,2018-07-31 22:03:16.848,...,0.1,0.000289,False,,0.1,0.025322,False,,,0.01248
7,False,,0.1,0.031213,False,,,0.01348,7,2018-08-31 04:39:16.848,...,0.1,0.004707,False,,0.1,0.027507,False,,,0.0089
8,False,,0.1,0.027301,False,,,0.01572,8,2018-09-30 11:15:16.848,...,0.1,0.001139,False,,0.1,0.024322,False,,,0.00768
9,False,,0.1,0.029698,False,,,0.00924,9,2018-10-30 17:51:16.848,...,0.1,0.002668,False,,0.1,0.030395,False,,,0.00498


In [None]:
print(results.to_df().to_markdown(tablefmt="grid"))


+----+--------------------------------------------+------------------------------------------------------+------------------------------------------------------+--------------------------------------------+------------------------------------------------+----------------------------------------------------------+----------------------------------------------------------+------------------------------------------------+-------------------------------------+----------------------------------+-----------------------------------+-----------------------------+--------------------------------+------------------------------------+-------------------------------------+-------------------------------------------------------+-----------------------------------------------------------------+-----------------------------------------------------------------+-------------------------------------------------------+-----------------------------------------------------------+---------------------------

In [None]:
print(results.to_df().loc[:, (slice(None), slice(None), 'value')].columns)

MultiIndex([(              'car_value',     'jensen_shannon', 'value'),
            (              'car_value', 'kolmogorov_smirnov', 'value'),
            (   'debt_to_income_ratio',     'jensen_shannon', 'value'),
            (   'debt_to_income_ratio', 'kolmogorov_smirnov', 'value'),
            (          'driver_tenure',     'jensen_shannon', 'value'),
            (          'driver_tenure', 'kolmogorov_smirnov', 'value'),
            (            'loan_length',     'jensen_shannon', 'value'),
            (            'loan_length', 'kolmogorov_smirnov', 'value'),
            ('repaid_loan_on_prev_car',               'chi2', 'value'),
            ('repaid_loan_on_prev_car',     'jensen_shannon', 'value'),
            (           'salary_range',               'chi2', 'value'),
            (           'salary_range',     'jensen_shannon', 'value'),
            (    'size_of_downpayment',               'chi2', 'value'),
            (    'size_of_downpayment',     'jensen_shannon', 'v

In [None]:
filtered_results = results.filter(period='analysis', methods=['chi2'], column_names=['salary_range'])
print(type(filtered_results))

<class 'nannyml.drift.univariate.result.Result'>


In [None]:
display(filtered_results.to_df())

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,salary_range,salary_range,salary_range,salary_range
Unnamed: 0_level_1,chunk,chunk,chunk,chunk,chunk,chunk,chunk,chi2,chi2,chi2,chi2
Unnamed: 0_level_2,chunk_index,end_date,end_index,key,period,start_date,start_index,alert,lower_threshold,upper_threshold,value
0,0,2018-11-30 00:27:16.848,4999,[0:4999],analysis,2018-10-30 18:00:00,0,False,,,1.033683
1,1,2018-12-30 07:03:16.848,9999,[5000:9999],analysis,2018-11-30 00:36:00,5000,False,,,5.762412
2,2,2019-01-29 13:39:16.848,14999,[10000:14999],analysis,2018-12-30 07:12:00,10000,False,,,2.653961
3,3,2019-02-28 20:15:16.848,19999,[15000:19999],analysis,2019-01-29 13:48:00,15000,False,,,0.070843
4,4,2019-03-31 02:51:16.848,24999,[20000:24999],analysis,2019-02-28 20:24:00,20000,False,,,1.005422
5,5,2019-04-30 09:27:16.848,29999,[25000:29999],analysis,2019-03-31 03:00:00,25000,True,,,455.622094
6,6,2019-05-30 16:03:16.848,34999,[30000:34999],analysis,2019-04-30 09:36:00,30000,True,,,428.633384
7,7,2019-06-29 22:39:16.848,39999,[35000:39999],analysis,2019-05-30 16:12:00,35000,True,,,453.247444
8,8,2019-07-30 05:15:16.848,44999,[40000:44999],analysis,2019-06-29 22:48:00,40000,True,,,438.25997
9,9,2019-08-29 11:51:16.848,49999,[45000:49999],analysis,2019-07-30 05:24:00,45000,True,,,474.891775


In [None]:
print(filtered_results.to_df().to_markdown(tablefmt="grid"))


+----+-------------------------------------+----------------------------------+-----------------------------------+-----------------------------+--------------------------------+------------------------------------+-------------------------------------+-------------------------------------+-----------------------------------------------+-----------------------------------------------+-------------------------------------+
|    |   ('chunk', 'chunk', 'chunk_index') | ('chunk', 'chunk', 'end_date')   |   ('chunk', 'chunk', 'end_index') | ('chunk', 'chunk', 'key')   | ('chunk', 'chunk', 'period')   | ('chunk', 'chunk', 'start_date')   |   ('chunk', 'chunk', 'start_index') | ('salary_range', 'chi2', 'alert')   | ('salary_range', 'chi2', 'lower_threshold')   | ('salary_range', 'chi2', 'upper_threshold')   |   ('salary_range', 'chi2', 'value') |
|  0 |                                   0 | 2018-11-30 00:27:16.848000       |                              4999 | [0:4999]                    | an

In [None]:
display(filtered_results.to_df(multilevel=False))


Unnamed: 0,chunk_index,chunk_end_date,chunk_end_index,chunk_key,chunk_period,chunk_start_date,chunk_start_index,salary_range_chi2_alert,salary_range_chi2_lower_threshold,salary_range_chi2_upper_threshold,salary_range_chi2_value
0,0,2018-11-30 00:27:16.848,4999,[0:4999],analysis,2018-10-30 18:00:00,0,False,,,1.033683
1,1,2018-12-30 07:03:16.848,9999,[5000:9999],analysis,2018-11-30 00:36:00,5000,False,,,5.762412
2,2,2019-01-29 13:39:16.848,14999,[10000:14999],analysis,2018-12-30 07:12:00,10000,False,,,2.653961
3,3,2019-02-28 20:15:16.848,19999,[15000:19999],analysis,2019-01-29 13:48:00,15000,False,,,0.070843
4,4,2019-03-31 02:51:16.848,24999,[20000:24999],analysis,2019-02-28 20:24:00,20000,False,,,1.005422
5,5,2019-04-30 09:27:16.848,29999,[25000:29999],analysis,2019-03-31 03:00:00,25000,True,,,455.622094
6,6,2019-05-30 16:03:16.848,34999,[30000:34999],analysis,2019-04-30 09:36:00,30000,True,,,428.633384
7,7,2019-06-29 22:39:16.848,39999,[35000:39999],analysis,2019-05-30 16:12:00,35000,True,,,453.247444
8,8,2019-07-30 05:15:16.848,44999,[40000:44999],analysis,2019-06-29 22:48:00,40000,True,,,438.25997
9,9,2019-08-29 11:51:16.848,49999,[45000:49999],analysis,2019-07-30 05:24:00,45000,True,,,474.891775


In [None]:
print(filtered_results.to_df(multilevel=False).to_markdown(tablefmt="grid"))

+----+---------------+----------------------------+-------------------+---------------+----------------+---------------------+---------------------+---------------------------+-------------------------------------+-------------------------------------+---------------------------+
|    |   chunk_index | chunk_end_date             |   chunk_end_index | chunk_key     | chunk_period   | chunk_start_date    |   chunk_start_index | salary_range_chi2_alert   | salary_range_chi2_lower_threshold   | salary_range_chi2_upper_threshold   |   salary_range_chi2_value |
|  0 |             0 | 2018-11-30 00:27:16.848000 |              4999 | [0:4999]      | analysis       | 2018-10-30 18:00:00 |                   0 | False                     |                                     |                                     |                 1.03368   |
+----+---------------+----------------------------+-------------------+---------------+----------------+---------------------+---------------------+---------

In [None]:
database_writer = nml.DatabaseWriter(connection_string='sqlite:///nml.db')
database_writer.write(results)

In [None]:
import sqlite3
cursor = sqlite3.connect('nml.db').cursor()
cursor.execute("""SELECT name FROM sqlite_master WHERE type='table'""")
print(cursor.fetchall())

[('model',), ('run',), ('univariate_drift_metrics',), ('data_reconstruction_feature_drift_metrics',), ('realized_performance_metrics',), ('cbpe_performance_metrics',), ('dle_performance_metrics',)]


In [None]:
cursor.execute("""SELECT * FROM univariate_drift_metrics LIMIT 3""")
print(cursor.fetchall())

[(1, None, 1, '2018-10-30 18:00:00.000000', '2018-11-30 00:27:16.848000', '2018-11-14 21:13:38.424000', 'kolmogorov_smirnov', 0.013080000000022962, 0, 'car_value'), (2, None, 1, '2018-11-30 00:36:00.000000', '2018-12-30 07:03:16.848000', '2018-12-15 03:49:38.424000', 'kolmogorov_smirnov', 0.01106000000000218, 0, 'car_value'), (3, None, 1, '2018-12-30 07:12:00.000000', '2019-01-29 13:39:16.848000', '2019-01-14 10:25:38.424000', 'kolmogorov_smirnov', 0.016619999999999663, 0, 'car_value')]


In [None]:
!rm nml.db