# Drift detection (LSDD and KS)

In [1]:
# Reload modules every time before executing the Python code typed
%load_ext autoreload
%autoreload 2

# Import from vm notebook dir
import sys; sys.path.insert(0, '../../')

from ExplainingDriftTextEmbeddings.access.interim_storage import InterimStorage
from alibi_detect.cd import LSDDDrift
from alibi_detect.cd import KSDrift
import numpy as np

2022-02-10 10:25:27.402770: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-10 10:25:27.402815: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Get installed Alibi version
#import alibi_detect
#alibi_detect.__version__
# -> '0.7.0'

## Read data

In [14]:
splits_embeddings = InterimStorage('tmp_embeddings_list').read()
for s in splits_embeddings.items():
    print(s[0], len(s[1]))

3 10000
4 10000


## Drift detection

### LSDD - Least-Squares Density Difference

In [4]:
# https://docs.seldon.io/projects/alibi-detect/en/v0.7.0/methods/lsdddrift.html
# https://docs.seldon.io/projects/alibi-detect/en/stable/cd/methods/lsdddrift.html?highlight=lsdddrift#Initialize
# https://github.com/EML4U/Drift-detector-comparison/blob/1.0.0/detectors/AlibiLSDD.py
def lsdd_predict(lsdd, list_):
    lsdd_pred = lsdd.predict(np.array(list_))
    print('is_drift', lsdd_pred.get('data').get('is_drift'))
    print('p_val   ', lsdd_pred.get('data').get('p_val'))

In [5]:
lsdd = LSDDDrift(np.array(splits_embeddings[3]), backend='pytorch', p_val=.05)
lsdd_predict(lsdd, splits_embeddings[3])
lsdd_predict(lsdd, splits_embeddings[4])

No GPU detected, fall back on CPU.
is_drift 0
p_val    1.0
is_drift 1
p_val    0.0


### KS - Kolmogorov-Smirnov

In [6]:
# https://docs.seldon.io/projects/alibi-detect/en/v0.7.0/methods/ksdrift.html
# https://docs.seldon.io/projects/alibi-detect/en/stable/cd/methods/ksdrift.html
# https://github.com/EML4U/Drift-detector-comparison/blob/1.0.0/detectors/AlibiKSDetector.py
def ks_predict(ks, list_):
    ks_pred = ks.predict(np.array(list_))
    print('is_drift', ks_pred.get('data').get('is_drift') )
    print('p_val   ', np.mean(ks_pred.get('data').get('p_val')) )

In [7]:
ks = KSDrift(np.array(splits_embeddings[3]))
ks_predict(ks, splits_embeddings[3])
ks_predict(ks, splits_embeddings[4])

is_drift 0
p_val    1.0
is_drift 1
p_val    0.0009217585
