In [None]:
import numpy as np
from scipy import stats as st
import seaborn as sns
import panel as pn
from matplotlib.figure import Figure

pn.extension('ipywidgets')
pn.config.template = 'material'

In [None]:
def get_d6_hist(log_n_samples=0, kde=False):
    """Plots histogram of a D6"""
    
    fig = Figure(figsize=(6, 6))
    ax = fig.subplots()

    n_samples = int(10**log_n_samples)
    samples = np.floor(st.uniform(0, 6).rvs(n_samples)).astype(int) + 1
    sns.histplot(samples, bins=range(1, 8), ax=ax, kde=kde, stat="probability")
    ax.set_title(f"Histogram of a D6 with {n_samples} samples\nEstimated Mean: {samples.mean():.2f}")
    return fig

n_samples_widget = pn.widgets.FloatSlider(name="Log base 10 of the number of samples", value=0, start=0, end=5)
rv_widgets = pn.WidgetBox(n_samples_widget)
rv_plot = pn.bind(get_d6_hist, log_n_samples=n_samples_widget)
rv_pane = pn.panel(rv_plot);

In [None]:
def get_two_dist_hist(log_n_samples=0, kde=False):
    """Plots Two Hists"""
    
    fig = Figure(figsize=(6, 6))
    ax = fig.subplots()

    n_samples = int(10**log_n_samples)
    def _gen_samples(loc, scale, n_samples):
        return np.floor(st.uniform(loc, scale).rvs(n_samples)).astype(int)
    samples_x =_gen_samples(1, 6, n_samples)
    samples_y = np.concatenate([_gen_samples(1, 4, n_samples), _gen_samples(3, 4, n_samples)])
    sns.histplot(samples_x, bins=range(1, 8), ax=ax, kde=kde, stat="probability")
    sns.histplot(samples_y, bins=range(1, 8), ax=ax, kde=kde, stat="probability")
    ax.set_title(f"Histogram of a X (D6) and Y (D3 + 3) with {n_samples} samples\nEstimated Mean of X: {samples_x.mean():.2f} and Y: {samples_y.mean():.2f}")
    return fig

dist_plot = pn.bind(get_two_dist_hist, log_n_samples=n_samples_widget)
dist_pane = pn.panel(dist_plot)

In [None]:
def get_kl_hist(log_n_samples=0, mean=0, sd=1, kde=False, bins=10):
    """Plots Two Hists"""
    
    fig = Figure(figsize=(6, 6))
    ax = fig.subplots()
    bins = range(-10, 11)
    n_samples = int(10**log_n_samples)

    samples_x = st.norm(mean, sd).rvs(n_samples)
    samples_y = st.norm(1.4, 2.7).rvs(n_samples)

    sns.histplot(samples_x, bins=bins, ax=ax, kde=kde, stat="probability")
    sns.histplot(samples_y, bins=bins, ax=ax, kde=kde, stat="probability")

    hist_x, _ = np.histogram(samples_x, bins=bins, density=True)
    hist_y, _ = np.histogram(samples_y, bins=bins, density=True)

    ent_samples = hist_x * np.log(hist_x / hist_y)
    ent_samples = np.where(np.isfinite(ent_samples), ent_samples, 0)

    divergence = np.sum(ent_samples)
    ax.set_title(f"Histogram of two normals with {n_samples} samples\nEstimated KL Divergence from Y to X: {divergence:.2f}")
    return fig

mean_widget = pn.widgets.FloatSlider(name="Mean", value=0, start=-5, end=5)
sd_widget = pn.widgets.FloatSlider(name="SD", value=1, start=0.1, end=5)
kl_plot = pn.bind(get_kl_hist, log_n_samples=n_samples_widget, mean=mean_widget, sd=sd_widget)
kl_widgets = pn.WidgetBox(n_samples_widget, mean_widget, sd_widget).servable(target="sidebar")

kl_pane = pn.panel(kl_plot)

In [None]:
def zero_runs(a, axis=0):
    # Create an array that is 1 where a is 0, and pad each end with an extra 0.
    diffs = np.diff(np.equal(a, 0).view(np.int8), prepend=0, append=0)
    absdiff = np.abs(diffs)
    # Runs start and end where absdiff is 1.
    ranges = np.where(absdiff == 1)[0].reshape(-1, 2)
    return np.max(ranges[:, 1] - ranges[:, 0], axis=-1) if ranges.shape[0] > 0 else 0


In [None]:
from functools import partial


def get_coins(bins=10, statistic="mean", sequence=None, p=0.4):
    """Coin flip histogram"""

    fig = Figure(figsize=(6, 6))
    ax = fig.subplots()

    # Manually generated sample
    sequence = (
        "00011000011100011010100010100001100101010101001010"
        if sequence is None
        else sequence
    )
    numeric_sequence = np.array([int(c) for c in sequence])
    # print(p)
    numeric_sequence = st.bernoulli(p).rvs(size=50)
    # print(numeric_sequence)
    def _statistic(data, statistic, axis=None):
        if statistic == "mean":
            return data.mean(axis=axis)
        elif statistic == "variance":
            return data.var(axis=axis)
        elif statistic == "entropy":
            return st.entropy(data, axis=axis)
        elif statistic == "0_run_length":
            return zero_runs(data, axis=axis)
        elif statistic == "1_run_length":
            return zero_runs(np.where(data, 0, 1), axis=axis)
        elif statistic == "changes":
            return np.logical_xor(data[:-1], data[1:]).sum(axis=axis)
        elif statistic == "autocorrelation":
            return np.correlate(data[:-1], data[1:]).item()
        else:
            raise ValueError("Invalid statistic")

    null_rv = st.bernoulli(0.5)
    stat_fn = partial(_statistic, statistic=statistic)
    sample_stat = stat_fn(numeric_sequence)
    res = st.monte_carlo_test(
        data=numeric_sequence, rvs=lambda size: null_rv.rvs(size), statistic=stat_fn, vectorized=False
    )
    sns.histplot(res.null_distribution, bins=bins, ax=ax, stat="probability")
    ax.axvline(sample_stat, color="red", linestyle="--")

    ax.set_title(
        f"Histogram of the distribution of the {statistic} with {res.null_distribution.shape[0]} samples.\nSample {statistic}: {sample_stat:.2f} | p-value: {res.pvalue:.2f}"
    )
    return fig


statistics_widget = pn.widgets.Select(
    name="Statistic",
    options=[
        "mean",
        "variance",
        "entropy",
        "0_run_length",
        "1_run_length",
        "changes",
        "autocorrelation",
    ],
    value="mean",
)
bins_widget = pn.widgets.IntSlider(name="Bins", start=1, end=100, value=10)
bernoulli_widget = pn.widgets.FloatSlider(name="p", start=0, end=1, value=0.5, step=0.01)
sequence_widget = pn.widgets.TextInput(
    name="Sequence", value="00011000011100011010100010100001100101010101001010"
)
coin_plot = pn.bind(
    get_coins,
    statistic=statistics_widget,
    bins=bins_widget,
    sequence=sequence_widget,
    p=bernoulli_widget,
)
coin_widgets = pn.WidgetBox(statistics_widget, bins_widget, sequence_widget, bernoulli_widget).servable(target="sidebar")

coin_pane = pn.panel(coin_plot)

In [None]:
# sequence = "01010011000100101101001110001010011010001101111001"
sequence = "00011000011100011010100010100001100101010001001010"
numeric_sequence = np.array([int(c) for c in sequence])
np.correlate(numeric_sequence[:-1], numeric_sequence[1:]).item()

9

In [None]:
tabs = pn.Tabs(
    ('RV', rv_pane),
    ('Two Dists', dist_pane),
    ('KL', kl_pane),
    ('Coins', coin_pane),
    dynamic=True, sizing_mode='stretch_both'
).servable();

In [None]:
def update_sidebar_widgets(target, event):
    print (event.new)
    print (target)
tabs.link(coin_widgets, callbacks={'active': update_sidebar_widgets})


array([ True,  True,  True,  True, False,  True, False,  True, False,
       False,  True,  True, False,  True,  True,  True, False,  True,
        True,  True, False,  True, False, False,  True, False, False,
        True,  True,  True,  True, False,  True, False,  True,  True,
        True, False, False,  True, False,  True,  True, False, False,
       False,  True, False,  True])

In [42]:
_zero_runs = zero_runs(numeric_sequence)
np.max(_zero_runs[:, 1] - _zero_runs[:, 0])

np.int64(3)