# Drift Detection - Alibi Detect Examples

Adult Data Example https://docs.seldon.io/projects/alibi-detect/en/latest/examples/cd_chi2ks_adult.html

In [1]:
import alibi
import matplotlib.pyplot as plt
import numpy as np

from alibi_detect.cd import ChiSquareDrift, TabularDrift
from alibi_detect.utils.saving import save_detector, load_detector

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


# Load Data

In [2]:
adult = alibi.datasets.fetch_adult()
X, y = adult.data, adult.target
feature_names = adult.feature_names
category_map = adult.category_map
X.shape, y.shape

((32561, 12), (32561,))

In [3]:
X

array([[39,  7,  1, ...,  0, 40,  9],
       [50,  6,  1, ...,  0, 13,  9],
       [38,  4,  4, ...,  0, 40,  9],
       ...,
       [58,  4,  4, ...,  0, 40,  9],
       [22,  4,  4, ...,  0, 20,  9],
       [52,  5,  4, ...,  0, 40,  9]], dtype=int64)

In [4]:
y

array([0, 0, 0, ..., 0, 0, 1])

In [5]:
feature_names

['Age',
 'Workclass',
 'Education',
 'Marital Status',
 'Occupation',
 'Relationship',
 'Race',
 'Sex',
 'Capital Gain',
 'Capital Loss',
 'Hours per week',
 'Country']

In [6]:
category_map

{1: ['?',
  'Federal-gov',
  'Local-gov',
  'Never-worked',
  'Private',
  'Self-emp-inc',
  'Self-emp-not-inc',
  'State-gov',
  'Without-pay'],
 2: ['Associates',
  'Bachelors',
  'Doctorate',
  'Dropout',
  'High School grad',
  'Masters',
  'Prof-School'],
 3: ['Married', 'Never-Married', 'Separated', 'Widowed'],
 4: ['?',
  'Admin',
  'Blue-Collar',
  'Military',
  'Other',
  'Professional',
  'Sales',
  'Service',
  'White-Collar'],
 5: ['Husband',
  'Not-in-family',
  'Other-relative',
  'Own-child',
  'Unmarried',
  'Wife'],
 6: ['Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black', 'Other', 'White'],
 7: ['Female', 'Male'],
 11: ['?',
  'British-Commonwealth',
  'China',
  'Euro_1',
  'Euro_2',
  'Latin-America',
  'Other',
  'SE-Asia',
  'South-America',
  'United-States',
  'Yugoslavia']}

# Split data into reference and x2 test sets

In [7]:
n_ref = 10000
n_test = 10000

X_ref, X_t0, X_t1 = X[:n_ref], X[:n_ref:n_ref + n_test], X[n_ref + n_test:n_ref + 2 * n_test]
X_ref.shape, X_t0.shape, X_t1.shape

((10000, 12), (1, 12), (10000, 12))

In [8]:
X_ref

array([[39,  7,  1, ...,  0, 40,  9],
       [50,  6,  1, ...,  0, 13,  9],
       [38,  4,  4, ...,  0, 40,  9],
       ...,
       [21,  4,  4, ...,  0, 40,  9],
       [37,  4,  3, ...,  0, 53,  5],
       [39,  4,  4, ...,  0, 40,  9]], dtype=int64)

In [9]:
X_t0

array([[  39,    7,    1,    1,    1,    1,    4,    1, 2174,    0,   40,
           9]], dtype=int64)

In [10]:
X_t1

array([[63,  4,  4, ...,  0, 40,  9],
       [36,  4,  1, ...,  0, 50,  9],
       [25,  4,  1, ...,  0, 50,  9],
       ...,
       [61,  2,  4, ...,  0, 35,  9],
       [41,  4,  4, ...,  0, 40,  9],
       [36,  4,  4, ...,  0, 50,  9]], dtype=int64)

# Let detector infer categories - si set as 'None'

In [11]:
categories_per_feature = {f: None for f in list(category_map.keys())}

In [12]:
categories_per_feature

{1: None, 2: None, 3: None, 4: None, 5: None, 6: None, 7: None, 11: None}

# Initialise detector for tabular data

In [13]:
cd = TabularDrift(p_val=.05, x_ref=X_ref, categories_per_feature=categories_per_feature)

# Check if x2 test sets are drifting from reference data

In [14]:
preds = cd.predict(X_t0)
labels = ['No!', 'Yes!']
print('Drift? {}'.format(labels[preds['data']['is_drift']]))

Drift? Yes!
