In [20]:
from pureml_evaluate.drift_metrics.tabular.hellinger_distance.hellinger_distance import HellingerDistance
from pureml_evaluate.drift_metrics.tabular.kolmogorov_smirnov.kolmogorov_smirnov_statistic import KolmogorovSmirnov
from pureml_evaluate.drift_metrics.tabular.jensen_shannon_distance.jensen_shannon_distance import JensenShannonDistance
from pureml_evaluate.drift_metrics.tabular.l_infinity_distance.l_infinity_distance import LInfinityDistance
from pureml_evaluate.drift_metrics.tabular.wasserstein_distance.wasserstein_distance import WassersteinDistance

In [21]:
import numpy as np
import matplotlib.pyplot as plt

# Seed for reproducibility
np.random.seed(42)

# Number of samples
N = 1000

# 1. Two identical normal distributions
mean1, std1 = 0, 1
data1_a = np.random.normal(mean1, std1, N)
data1_b = np.random.normal(mean1, std1, N)

# 2. Two slightly different normal distributions
mean2_a, mean2_b, std2 = 0, 0.5, 1
data2_a = np.random.normal(mean2_a, std2, N)
data2_b = np.random.normal(mean2_b, std2, N)

# 3. Two very different normal distributions
mean3_a, mean3_b, std3 = 0, 5, 1
data3_a = np.random.normal(mean3_a, std3, N)
data3_b = np.random.normal(mean3_b, std3, N)

# 4. A normal and a uniform distribution
mean4, std4 = 0, 1
data4_a = np.random.normal(mean4, std4, N)
data4_b = np.random.uniform(-3, 3, N)  # Chose these bounds to make it interesting



In [22]:
ks = KolmogorovSmirnov()
ks.compute(data1_a, data1_b)

{'kolmogorov_smirnov': {'value': {'ks_stat': 0.045,
   'p_value': 0.26347172719864703}}}

In [23]:
ks.compute(data2_a, data2_b)

{'kolmogorov_smirnov': {'value': {'ks_stat': 0.205,
   'p_value': 8.493582764331036e-19}}}

In [24]:
ks.compute(data3_a, data3_b)

{'kolmogorov_smirnov': {'value': {'ks_stat': 0.992, 'p_value': 0.0}}}

In [25]:
ks.compute(data4_a, data4_b)

{'kolmogorov_smirnov': {'value': {'ks_stat': 0.183,
   'p_value': 4.808958234324042e-15}}}

In [26]:
wd = WassersteinDistance()
wd.compute(data1_a, data1_b)

{'wasserstein_distance': {'value': 0.014660278706218702}}

In [27]:
wd = WassersteinDistance()
wd.compute(data2_a, data2_b)

{'wasserstein_distance': {'value': 0.01698880644136155}}

In [28]:
wd = WassersteinDistance()
wd.compute(data3_a, data3_b)

{'wasserstein_distance': {'value': 0.007627492259999467}}

In [29]:
wd = WassersteinDistance()
wd.compute(data4_a, data4_b)

{'wasserstein_distance': {'value': 0.08975700659407804}}

In [30]:
hd = HellingerDistance()
hd.compute(data1_a, data1_b)

{'hellinger_distance': {'value': 0.14371687035258263}}

In [31]:
hd  = HellingerDistance()
hd.compute(data2_a, data2_b)


{'hellinger_distance': {'value': 0.3421912257595368}}

In [32]:
hd = HellingerDistance()
hd.compute(data3_a, data3_b)

{'hellinger_distance': {'value': 1.442056132754212}}

In [33]:
hd  = HellingerDistance()
hd.compute(data4_a, data4_b)

{'hellinger_distance': {'value': 0.6128921016490295}}

In [34]:
li = LInfinityDistance()
li.compute(data1_a, data1_b)

{'l_infinity_distance': {'value': 4.28120393757531}}

In [35]:
li = LInfinityDistance()
li.compute(data2_a, data2_b)

{'l_infinity_distance': {'value': 4.978529822933488}}

In [36]:
li = LInfinityDistance()
li.compute(data3_a, data3_b)

{'l_infinity_distance': {'value': 10.10548141220476}}

In [37]:
li = LInfinityDistance()
li.compute(data4_a, data4_b)

{'l_infinity_distance': {'value': 5.610066767066234}}

In [38]:
jd = JensenShannonDistance()
jd.compute(data1_a, data1_b)

{'jensen_shannon_distance': {'value': 0.005625206345051155}}

In [39]:
jd = JensenShannonDistance()
jd.compute(data2_a, data2_b)

{'jensen_shannon_distance': {'value': 0.03200074128432203}}

In [40]:
jd = JensenShannonDistance()
jd.compute(data3_a, data3_b)

{'jensen_shannon_distance': {'value': 0.6803589258772007}}

In [41]:
jd = JensenShannonDistance()
jd.compute(data4_a,data4_b)

{'jensen_shannon_distance': {'value': 0.09180071392935041}}