# Data Tracker usage example on tabular data

In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from alibi.datasets import fetch_adult

from odcd.metrics import DataTracker

## Load dataset
We fetch the adult dataset and get the categorical map to be used to initalize the `DataTracker`.

In [2]:
data = fetch_adult()

In [3]:
X = data.data
cat_vars = data.category_map

## Initialize `DataTracker` object

In [4]:
dt = DataTracker(n_features=X.shape[1], cat_vars=cat_vars)

## Simulate updating data
The `DataTracker` supports batch updating, so we can simulate a sequence of events by just passing the whole dataset to the `update` function at once.

In [5]:
dt.update(X)

## Inspect metrics
We can use the `get` function to receive data metrics held by the `DataTracker`. We can choose to have these serialized (False by default) which would convert all `numpy` types to Python native types.

In [6]:
stats = dt.get(serialize=False)

In [7]:
stats

{0: {'mean': 38.581646755321145,
  'variance': 186.06140024880193,
  'median': 37.01144792313534,
  'histogram': {0: {'left': 17, 'right': 17, 'count': 395},
   1: {'left': 18, 'right': 18, 'count': 550},
   2: {'left': 19, 'right': 19, 'count': 712},
   3: {'left': 20, 'right': 20, 'count': 753},
   4: {'left': 21, 'right': 21, 'count': 720},
   5: {'left': 22, 'right': 22, 'count': 765},
   6: {'left': 23, 'right': 23, 'count': 877},
   7: {'left': 24, 'right': 24, 'count': 798},
   8: {'left': 25, 'right': 25, 'count': 841},
   9: {'left': 26, 'right': 26, 'count': 785},
   10: {'left': 27, 'right': 27, 'count': 835},
   11: {'left': 28, 'right': 28, 'count': 867},
   12: {'left': 29, 'right': 29, 'count': 813},
   13: {'left': 30, 'right': 30, 'count': 861},
   14: {'left': 31, 'right': 31, 'count': 888},
   15: {'left': 32, 'right': 32, 'count': 828},
   16: {'left': 33, 'right': 33, 'count': 875},
   17: {'left': 34, 'right': 34, 'count': 886},
   18: {'left': 35, 'right': 35, 'c