In [None]:
# !pip install nannyml scipy pandas numpy scikit-learn frouros

In [70]:
import nannyml as nml

# Load built-in synthetic data
reference_df, analysis_df, _ = nml.load_synthetic_car_loan_dataset()

# Define input features (numeric columns only)
feature_cols = [
    'car_value',
    'salary_range',
    'debt_to_income_ratio',
    'loan_length',
    'repaid_loan_on_prev_car',
    'size_of_downpayment',
    'driver_tenure'
]

# Initialize without a timestamp, chunking by rows (e.g., 1000 rows per chunk)
calc = nml.DomainClassifierCalculator(
    feature_column_names=feature_cols,
    chunk_size=1000
)



In [71]:

# Train on reference, detect on analysis
calc.fit(reference_df)
results = calc.calculate(analysis_df)



[LightGBM] [Info] Number of positive: 800, number of negative: 39200
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 797
[LightGBM] [Info] Number of data points in the train set: 40000, number of used features: 7
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.020000 -> initscore=-3.891820
[LightGBM] [Info] Start training from score -3.891820
[LightGBM] [Info] Number of positive: 800, number of negative: 39200
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 797
[LightGBM] [Info] Number of data points in the train set: 40000, number of used features: 7
[LightGBM] [Info] [binary:

In [72]:
# Extract and view drift scores and alerts
df = results.filter(period='analysis').to_df()
dc = df['domain_classifier_auroc'][['value', 'upper_threshold', 'lower_threshold', 'alert']]
print(dc)


       value  upper_threshold  lower_threshold  alert
0   0.491677             0.65             0.45  False
1   0.518979             0.65             0.45  False
2   0.514292             0.65             0.45  False
3   0.497608             0.65             0.45  False
4   0.511237             0.65             0.45  False
5   0.493480             0.65             0.45  False
6   0.509624             0.65             0.45  False
7   0.498208             0.65             0.45  False
8   0.483835             0.65             0.45  False
9   0.513914             0.65             0.45  False
10  0.494633             0.65             0.45  False
11  0.496519             0.65             0.45  False
12  0.485175             0.65             0.45  False
13  0.485592             0.65             0.45  False
14  0.487896             0.65             0.45  False
15  0.513866             0.65             0.45  False
16  0.501248             0.65             0.45  False
17  0.493640             0.6

In [74]:

# Visualize drift over chunks
import matplotlib.pyplot as plt
fig = results.plot()
fig.show()

In [76]:
reference_df

Unnamed: 0,id,car_value,salary_range,debt_to_income_ratio,loan_length,repaid_loan_on_prev_car,size_of_downpayment,driver_tenure,repaid,timestamp,y_pred_proba,y_pred
0,0,39811.0,40K - 60K €,0.632950,19.0,False,40%,0.212653,1.0,2018-01-01 00:00:00.000,0.99,1
1,1,12679.0,40K - 60K €,0.718627,7.0,True,10%,4.927549,0.0,2018-01-01 00:08:43.152,0.07,0
2,2,19847.0,40K - 60K €,0.721724,17.0,False,0%,0.520817,1.0,2018-01-01 00:17:26.304,1.00,1
3,3,22652.0,20K - 40K €,0.705992,16.0,False,10%,0.453649,1.0,2018-01-01 00:26:09.456,0.98,1
4,4,21268.0,60K+ €,0.671888,21.0,True,30%,5.695263,1.0,2018-01-01 00:34:52.608,0.99,1
...,...,...,...,...,...,...,...,...,...,...,...,...
49995,49995,21780.0,20K - 40K €,0.714499,20.0,False,40%,3.960767,1.0,2018-10-30 17:16:24.240,0.99,1
49996,49996,34489.0,0 - 20K €,0.380681,20.0,False,30%,6.804872,0.0,2018-10-30 17:25:07.392,0.03,0
49997,49997,14349.0,40K - 60K €,0.686617,18.0,True,20%,5.101474,1.0,2018-10-30 17:33:50.544,0.98,1
49998,49998,61683.0,0 - 20K €,0.331308,20.0,False,40%,6.738282,0.0,2018-10-30 17:42:33.696,0.00,0


In [77]:
# Remove 'repaid' if not present in analysis_df
feature_cols = [
    col
    for col in reference_df.columns
    if col not in ['timestamp', 'identifier', 'period', 'work_home_actual', 'y_pred_proba', 'y_pred', 'repaid']
    or (col == 'repaid' and col in analysis_df.columns)
]

calc = nml.UnivariateDriftCalculator(
    column_names=feature_cols,
    continuous_methods=['kolmogorov_smirnov'],  # univariate KS test
    chunk_size=5000  # how many rows per chunk
)
calc = calc.fit(reference_df)
results = calc.calculate(analysis_df)


In [80]:
# Ensure columns are sorted for MultiIndex slicing
uni_df = results.filter(period='analysis').to_df()
uni_df = uni_df.sort_index(axis=1)

# Example: select all columns with 'kolmogorov_smirnov' and 'value', 'upper_threshold', 'lower_threshold', 'alert'
ks_cols = [col for col in uni_df.columns if col[1] == 'kolmogorov_smirnov']
ks_df = uni_df.loc[:, ks_cols]
ks_df



Unnamed: 0_level_0,car_value,car_value,car_value,car_value,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,debt_to_income_ratio,driver_tenure,driver_tenure,driver_tenure,driver_tenure,id,id,id,id,loan_length,loan_length,loan_length,loan_length
Unnamed: 0_level_1,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov,kolmogorov_smirnov
Unnamed: 0_level_2,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value,alert,lower_threshold,upper_threshold,value
0,False,,0.019426,0.01308,False,,0.018584,0.01576,True,,0.017342,0.02114,False,,1,1.0,False,,0.016691,0.00884
1,False,,0.019426,0.01106,False,,0.018584,0.01268,False,,0.017342,0.00994,False,,1,1.0,False,,0.016691,0.01418
2,False,,0.019426,0.01662,False,,0.018584,0.01734,True,,0.017342,0.02362,False,,1,1.0,False,,0.016691,0.0124
3,False,,0.019426,0.01434,False,,0.018584,0.0128,False,,0.017342,0.0143,False,,1,1.0,False,,0.016691,0.01298
4,False,,0.019426,0.01116,True,,0.018584,0.01918,False,,0.017342,0.00906,False,,1,1.0,False,,0.016691,0.01022
5,True,,0.019426,0.4353,False,,0.018584,0.00824,False,,0.017342,0.00698,False,,1,1.0,True,,0.016691,0.17992
6,True,,0.019426,0.43028,False,,0.018584,0.01058,False,,0.017342,0.00826,False,,1,1.0,True,,0.016691,0.18032
7,True,,0.019426,0.43772,False,,0.018584,0.01002,False,,0.017342,0.01382,False,,1,1.0,True,,0.016691,0.19572
8,True,,0.019426,0.43602,False,,0.018584,0.01068,False,,0.017342,0.0088,False,,1,1.0,True,,0.016691,0.18212
9,True,,0.019426,0.43838,False,,0.018584,0.0068,False,,0.017342,0.0062,False,,1,1.0,True,,0.016691,0.19872


In [82]:
fig = results.plot(kind='drift')
fig.show()
