In [57]:
import numpy as np
from scipy.special import rel_entr, kl_div
from scipy.stats import entropy, ks_2samp
from scipy.spatial.distance import jensenshannon

# Lifelong learning

###### Continual Lifelong Learning with Neural Networks: A Review
https://arxiv.org/abs/1802.07569

# Feature drift

## Total Variance Distance 
### tvd = $\frac{\sum diff_{abs}}{2}$

http://data8.org/fa15/text/3_inference.html \
https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-dg.pdf#clarify-data-bias-metric-total-variation-distance

In [20]:
dist1 = np.array([0.15, 0.18, 0.12, 0.54, 0.01])
dist2 = np.array([0.26, 0.08, 0.08, 0.54, 0.04])

In [24]:
tvd = sum(abs(dist1 - dist2))/2
tvd

0.14

## Kullback-Leibler Divergence

### $D_{KL}(P||Q) = \sum P(x)\log(\frac{P(x)}{Q(x)})$

https://machinelearningmastery.com/divergence-between-probability-distributions/

In [44]:
print('rel_entr: ', sum(rel_entr(dist1, dist2)))
print('kl_div: ', sum(kl_div(dist1, dist2))) # kl_div = rel_entr( x, y ) - x + y
print('entropy: ', entropy(dist1, dist2))

rel_entr:  0.09825335774282917
kl_div:  0.0982533577428292
entropy:  0.09825335774282917


## Jensen-Shannon Divergence

$D_{JS}(P || Q) = \frac{1}{2} * D_{KL}(P || M) + \frac{1}{2} * D_{KL}(Q || M)$ \
$M = \frac{Q+P}{2}$ \
$DIST_{JS} = \sqrt{D_{JS}}$

In [53]:
js_div = lambda x, y: (entropy(x, (x + y)/2) + entropy(y, (x + y)/2))/2
print('jensenshannon: ', jensenshannon(dist1, dist2))
print('js_div: ', js_div(dist1, dist2))

jensenshannon:  0.15546410214041506
js_div:  0.024169087054325404


## Kolmogorov-Smirnov test

https://www.datadoghq.com/blog/engineering/robust-statistical-distances-for-machine-learning/

In [61]:
ks_2samp(dist1, dist2)

Ks_2sampResult(statistic=0.4, pvalue=0.873015873015873)

# Concept drift