In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeClassifier
from sklearn import metrics
from sklearn.pipeline import make_pipeline

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

import alibi
from alibi_detect.cd import ChiSquareDrift, TabularDrift
from alibi_detect.utils.saving import save_detector, load_detector


In [None]:
# Grab the data
wine_data = load_wine()
feature_names = wine_data.feature_names
X, y = wine_data.data, wine_data.target 

# Make a 50/50 reference/test split
X_ref, X_test, y_ref, y_test = train_test_split(X, y,
                                                test_size=0.50,
                                                random_state=42)

In [None]:
# Initialise the detector
cd = TabularDrift(p_val=.05, X_ref=X_ref)

In [None]:
# Check for drift 
preds = cd.predict(X_test)
labels = ['No', 'Yes']
print('Drift: {}'.format(labels[preds['data']['is_drift']]))

In [None]:
# Check for drift - X_test_cal_error is simulated calibration error test set
X_test_cal_error = 1.1*X_test
preds = cd.predict(X_test_cal_error)
labels = ['No', 'Yes']
print('Drift: {}'.format(labels[preds['data']['is_drift']]))

In [None]:
# check for drift at the level of features
fpreds = cd.predict(X_test+4, drift_type='feature')

In [None]:
results = []
for f in range(cd.n_features):
    
    stat = 'K-S' #all numeric features for this dataset
    fname = feature_names[f]
    is_drift = fpreds['data']['is_drift'][f]
    stat_val, p_val = fpreds['data']['distance'][f], fpreds['data']['p_val'][f]
    
    results.append(
        {
            'feature': fname,
            'statistic': 'K-S',
            'statisticValue': float(stat_val),
            'driftResult': labels[is_drift],
            'pValue': float(p_val)
        }
    )

In [None]:
print(json.dumps(results, indent=4, sort_keys=True))