In [1]:
import torch
import pandas as pd
import polars as pl
import nannyml as nml

from nannyml.thresholds import ConstantThreshold

In [2]:
cur_df = pl.read_csv('data/preprocessed/train.csv')
old_df = pl.read_csv('data/preprocessed/val.csv')

# Add noise to prediction to simulate prediction drift
cur_df = cur_df.with_columns(pl.col('Pawpularity').add(3).alias('Prediction')).to_pandas()
old_df = old_df.with_columns(pl.col('Pawpularity').add(5).alias('Prediction')).to_pandas()

In [3]:
# All columns except image id
all_cols = [col for col in cur_df.columns if col != 'Id']
# Target and prediction
cont_cols = ['Pawpularity', 'Prediction']
# Binary features, can be treated as categorical or continuous
bin_cols = [col for col in all_cols if col not in cont_cols]

In [4]:
calc = nml.UnivariateDriftCalculator(
    column_names = all_cols,
    # treat_as_categorical = bin_cols,
    continuous_methods = ['jensen_shannon'],
    # categorical_methods = ['jensen_shannon'],
    chunk_size = 100,
    thresholds = {
        # Jensen-Shannon score is not the same as p-value
        # 0 means identical data and 1 means very different
        'jensen_shannon': ConstantThreshold(upper = 0.1)
    }
)

calc.fit(old_df)
drift_result = calc.calculate(cur_df)

with pd.option_context('display.max_columns', None): 
    display(drift_result.to_df(multilevel = True))

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,Accessory,Accessory,Accessory,Accessory,Action,Action,Action,Action,Blur,Blur,Blur,Blur,Collage,Collage,Collage,Collage,Eyes,Eyes,Eyes,Eyes,Face,Face,Face,Face,Group,Group,Group,Group,Human,Human,Human,Human,Info,Info,Info,Info,Near,Near,Near,Near,Occlusion,Occlusion,Occlusion,Occlusion,Pawpularity,Pawpularity,Pawpularity,Pawpularity,Prediction,Prediction,Prediction,Prediction,Subject Focus,Subject Focus,Subject Focus,Subject Focus
Unnamed: 0_level_1,chunk,chunk,chunk,chunk,chunk,chunk,chunk,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon,jensen_shannon
Unnamed: 0_level_2,key,chunk_index,start_index,end_index,start_date,end_date,period,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert,value,upper_threshold,lower_threshold,alert
0,[0:99],0,0,99,,,reference,0.053003,0.1,,False,0.024846,0.1,,False,0.008464,0.1,,False,0.01198,0.1,,False,0.020184,0.1,,False,0.006855,0.1,,False,0.028332,0.1,,False,0.005236,0.1,,False,0.009522,0.1,,False,0.017722,0.1,,False,0.005386,0.1,,False,0.138204,0.1,,True,0.138204,0.1,,True,0.088597,0.1,,False
1,[100:199],1,100,199,,,reference,0.04243,0.1,,False,0.050045,0.1,,False,0.008773,0.1,,False,0.01118,0.1,,False,0.020855,0.1,,False,0.007001,0.1,,False,0.031344,0.1,,False,0.005284,0.1,,False,0.009122,0.1,,False,0.018501,0.1,,False,0.005333,0.1,,False,0.087201,0.1,,False,0.087201,0.1,,False,0.06292,0.1,,False
2,[0:99],0,0,99,,,analysis,0.057581,0.1,,False,0.059304,0.1,,False,0.008773,0.1,,False,0.031656,0.1,,False,0.031569,0.1,,False,0.032997,0.1,,False,0.014486,0.1,,False,0.005236,0.1,,False,0.009522,0.1,,False,0.087914,0.1,,False,0.016322,0.1,,False,0.130759,0.1,,True,0.16129,0.1,,True,0.121485,0.1,,True
3,[100:199],1,100,199,,,analysis,0.098767,0.1,,False,0.050045,0.1,,False,0.039785,0.1,,False,0.01118,0.1,,False,0.053652,0.1,,False,0.108234,0.1,,True,0.014486,0.1,,False,0.03575,0.1,,False,0.057581,0.1,,False,0.006074,0.1,,False,0.056212,0.1,,False,0.198702,0.1,,True,0.213911,0.1,,True,0.175127,0.1,,True
4,[200:299],2,200,299,,,analysis,0.057581,0.1,,False,0.024846,0.1,,False,0.027399,0.1,,False,0.067251,0.1,,False,0.0,0.1,,False,0.06911,0.1,,False,0.054424,0.1,,False,0.154114,0.1,,True,0.030007,0.1,,False,0.040322,0.1,,False,0.062666,0.1,,False,0.207823,0.1,,True,0.170869,0.1,,True,0.039095,0.1,,False
5,[300:399],3,300,399,,,analysis,0.098767,0.1,,False,0.085208,0.1,,False,0.024585,0.1,,False,0.031656,0.1,,False,0.0,0.1,,False,0.036694,0.1,,False,0.066793,0.1,,False,0.098328,0.1,,False,0.053003,0.1,,False,0.005988,0.1,,False,0.114645,0.1,,True,0.148334,0.1,,True,0.147374,0.1,,True,0.121485,0.1,,True
6,[400:499],4,400,499,,,analysis,0.057581,0.1,,False,0.050045,0.1,,False,0.047818,0.1,,False,0.073625,0.1,,False,0.031569,0.1,,False,0.036694,0.1,,False,0.054424,0.1,,False,0.038103,0.1,,False,0.079721,0.1,,False,0.018501,0.1,,False,0.100975,0.1,,True,0.275556,0.1,,True,0.269516,0.1,,True,0.018639,0.1,,False
7,[500:599],5,500,599,,,analysis,0.009522,0.1,,False,0.050045,0.1,,False,0.047818,0.1,,False,0.067251,0.1,,False,0.010337,0.1,,False,0.007001,0.1,,False,0.028332,0.1,,False,0.06123,0.1,,False,0.009522,0.1,,False,0.018501,0.1,,False,0.062666,0.1,,False,0.212023,0.1,,True,0.192284,0.1,,True,0.062021,0.1,,False
8,[600:699],6,600,699,,,analysis,0.053003,0.1,,False,0.024846,0.1,,False,0.024585,0.1,,False,0.050193,0.1,,False,0.010337,0.1,,False,0.052763,0.1,,False,0.041622,0.1,,False,0.015573,0.1,,False,0.009122,0.1,,False,0.044624,0.1,,False,0.05064,0.1,,False,0.163397,0.1,,True,0.204167,0.1,,True,0.062021,0.1,,False
9,[700:799],7,700,799,,,analysis,0.04243,0.1,,False,0.024846,0.1,,False,0.024585,0.1,,False,0.050193,0.1,,False,0.122706,0.1,,True,0.036694,0.1,,False,0.028332,0.1,,False,0.111486,0.1,,True,0.030007,0.1,,False,0.040322,0.1,,False,0.05064,0.1,,False,0.171522,0.1,,True,0.235969,0.1,,True,0.121485,0.1,,True


In [5]:
with pd.option_context('display.max_columns', None): 
    test = drift_result.filter(period = 'analysis').to_df(multilevel = True)
    # If True more than 30% (0.3) then assume the data drifted
    display(test['Accessory','jensen_shannon','alert'].value_counts(normalize = True).get(True, 0))

0

In [6]:
fig = drift_result.plot(kind = 'drift')
fig.update_layout(width = 800)
fig.show()

In [7]:
fig = drift_result.plot(kind = 'distribution')
fig.update_layout(width = 800)
fig.show()

Red color means drift is detected, while purple is considered normal (no drift)

Now let's see the performance calculator

In [8]:
calc = nml.PerformanceCalculator(
    metrics = ['rmse'],
    problem_type = 'regression',
    y_pred = 'Prediction',
    y_true = 'Pawpularity',
    chunk_size = 100,
    thresholds = {
        # Pawpularity range is from 0 to 100
        # Error of 25 is bad enough, or even very bad
        'rmse': ConstantThreshold(upper = 25)
    }
)

calc.fit(old_df)
perf_result = calc.calculate(cur_df)

with pd.option_context('display.max_columns', None): 
    display(perf_result.to_df(multilevel = True))

Unnamed: 0_level_0,chunk,chunk,chunk,chunk,chunk,chunk,chunk,chunk,rmse,rmse,rmse,rmse,rmse
Unnamed: 0_level_1,key,chunk_index,start_index,end_index,start_date,end_date,period,targets_missing_rate,sampling_error,value,upper_threshold,lower_threshold,alert
0,[0:99],0,0,99,,,reference,0.0,0.0,5.0,25,,False
1,[100:199],1,100,199,,,reference,0.0,0.0,5.0,25,,False
2,[0:99],0,0,99,,,analysis,0.0,0.0,3.0,25,,False
3,[100:199],1,100,199,,,analysis,0.0,0.0,3.0,25,,False
4,[200:299],2,200,299,,,analysis,0.0,0.0,3.0,25,,False
5,[300:399],3,300,399,,,analysis,0.0,0.0,3.0,25,,False
6,[400:499],4,400,499,,,analysis,0.0,0.0,3.0,25,,False
7,[500:599],5,500,599,,,analysis,0.0,0.0,3.0,25,,False
8,[600:699],6,600,699,,,analysis,0.0,0.0,3.0,25,,False
9,[700:799],7,700,799,,,analysis,0.0,0.0,3.0,25,,False


In [9]:
fig = perf_result.plot(kind = 'performance')
fig.update_layout(width = 800)
fig.show()

Now let's see the rank

In [10]:
ranker = nml.CorrelationRanker()
ranker.fit(perf_result.filter(period = 'reference', metrics = ['rmse']))

rank_df = ranker.rank(
    drift_result.filter(period = 'analysis', metrics = ['jensen_shannon']),
    perf_result.filter(period = 'analysis', metrics = ['rmse']),
    only_drifting = False
)

rank_df


An input array is constant; the correlation coefficient is not defined.



Unnamed: 0,column_name,pearsonr_correlation,pearsonr_pvalue,has_drifted,rank
0,Accessory,,,False,1
1,Action,,,False,2
2,Blur,,,False,3
3,Collage,,,False,4
4,Eyes,,,True,5
5,Face,,,True,6
6,Group,,,False,7
7,Human,,,True,8
8,Info,,,False,9
9,Near,,,False,10


This rank is not usable, sometimes it's nan or constantly at 1.0

Currently I don't have time to debug this, so we will just make our own table later as a workaround