# Visualizer for all SciPy distributions

The awesome ``scipy.stats`` subpackage holds a lot of continuous and discrete distributions that you might never heard of. To quickly familiarize oneself with an unknown distribution plotting and *experiencing* the distribution helps a lot. 
This visualiser based on [Bokeh](http://bokeh.pydata.org/) and [ipywidgets](http://ipywidgets.readthedocs.org/) tries to make this as easy and comfortable as possible.

**TL;DR**: Just run all cells and be stunned!

In [1]:
from collections import OrderedDict, defaultdict

import numpy as np
from scipy import stats

from bokeh.io import output_notebook, show, push_notebook
from bokeh.plotting import figure
from bokeh.io import show

from ipywidgets import widgets, interact, interactive
from IPython.display import display 

import warnings
warnings.simplefilter('ignore', DeprecationWarning)

In [2]:
output_notebook()

We start with a little introspection makes to get lists of all continuous and discrete distributions

In [3]:


dist_continuous = [getattr(stats, d) for d in dir(stats) if isinstance(getattr(stats,d), stats.rv_continuous)]
dist_discrete = [getattr(stats, d) for d in dir(stats) if isinstance(getattr(stats,d), stats.rv_discrete)]
print('number of continuous distributions:', len(dist_continuous))
print('number of discrete distributions:  ', len(dist_discrete))

number of continuous distributions: 89
number of discrete distributions:   13


Since a lot of distributions need additional shape parameters we use a nested ``defaultdict`` to define shape parameters as we go. For an undefined distribution ``DEFAULT_SHAPES`` will return ``1.0`` for all shape parameters.

In [4]:
CONTINUOUS = 'continuous'
DISCRETE = 'discrete'

def make_default_shape_dict():
    shape_param = defaultdict(lambda: 1.0)
    return defaultdict(lambda: shape_param)

DEFAULT_SHAPES = make_default_shape_dict()
DEFAULT_SHAPES['alpha'] = {'a': 1.3}
DEFAULT_SHAPES['beta'] = {'a': 1.5, 'b': 2.}
DEFAULT_SHAPES['bernoulli'] = {'p': 0.7}
DEFAULT_SHAPES['binom'] = {'n': 10, 'p': 0.7}
DEFAULT_SHAPES['logser'] = {'p': 0.3}

Some utility functions to:
* create an ordered dict of (distribution name, object) pairs,
* flatten a list of list.
* calculate the support of a distribution

In [5]:
def make_dict(dists):
    return OrderedDict([(dist.name, dist) for dist in dists])

def flatten(lst):
    return [item for sublist in lst for item in sublist]

def support(dist, *shapeargs):
    return dist.interval(1.0, *shapeargs)

In [6]:
def get_dist_func_xy(dist, func, *shapeargs, **params):
    # due to bug in scipy.levy_stable no keyword args
    interval = list(support(dist, *shapeargs))
    if 'scale' in params: # continuous
        for i, x in enumerate(interval):
            if np.isinf(x):
                interval[i] = np.sign(x)*100
            interval[i] += (-1)**i*1e-3
        l, r = interval
        x = np.linspace(l, r, 100*(r-l))
    else:
        for i, x in enumerate(interval):
            if np.isinf(x):
                interval[i] = np.sign(x)*100
        l, r = interval        
        x = np.arange(l+1, r+1)
    y = getattr(dist, func)(x, *shapeargs, **params)
    return x, y

In [7]:
def y_range_end(y):
    return min(np.min(y) + 5, 1.1*np.max(y))


def x_range(x, lim=5):
    offset = 1e-1
    return max(-lim, np.min(x) - offset), min(lim, np.max(x) + offset)

In [8]:
def get_dist_fig_data(dist, func, *args, **kwargs):
    x, y = get_dist_func_xy(dist, func, *args, **kwargs)
    fig = figure(width=600, height=600, title=None, y_range=(0, y_range_end(y)), x_range=x_range(x))
    ren_p = fig.patches([[1, 3, 2], [3, 4, 6, 6]], [[2, 1, 4], [4, 7, 8, 5]], line_width=3, alpha=0.3)
    ren_l = fig.line(x, y, line_width=3)
    return fig, ren_l.data_source.data, ren_p.data_source.data

In [9]:
def dist_options(dist_type):
    if dist_type == CONTINUOUS:
        options = make_dict(dist_continuous)
    elif dist_type == DISCRETE:
        options = make_dict(dist_discrete)
    else:
        raise RuntimeError("Unknown distribution type: {}".format(dist_type))
    return options

def func_options(dist_type):
    if dist_type == CONTINUOUS:
        return ['pdf', 'cdf']
    elif dist_type == DISCRETE:
        return ['pmf', 'cdf']
    else:
        raise RuntimeError("Unknown distribution type: {}".format(dist_type))

In [10]:
def shape_params(dist):
    if dist.shapes is not None:
        return dist.shapes.split(', ')
    
def has_shape_params(dist):
    return shape_params(dist) is not None

In [11]:
def make_patches(x, y, width=0.5):
    m = width/2
    x = [[p-m, p-m, p+m, p+m] for p in x]
    y = [[0, p, p, 0] for p in y]
    return x, y

In [12]:
def update_type_sel():
    dist_sel.options = dist_options(type_sel.value)
    
def update_dist_sel():
    func_sel.options = func_options(type_sel.value)
    if has_shape_params(dist_sel.value):
        shapes = OrderedDict([(p, DEFAULT_SHAPES[dist_sel.value.name][p]) for p in shape_params(dist_sel.value)])
        text_inputs = [widgets.BoundedFloatText(value=v, description='{}:'.format(k)) for k, v in shapes.items()]
        [w.on_trait_change(update_dist_params, name='value') for w in text_inputs]
        shape_param_container.children = text_inputs
    else:
        shape_param_container.children = []
    if dist_sel.value == CONTINUOUS:
        param_container.children = [loc_slider, scale_slider]
    else:
        param_container.children = []
    update_dist_params()
    

def update_continuous(fig, data, *shapeargs):
    data['x'], data['y'] = get_dist_func_xy(dist_sel.value, func_sel.value, *shapeargs, loc=loc_slider.value, scale=scale_slider.value)
    fig.y_range.end = y_range_end(data['y'])
    fig.x_range.start, fig.x_range.end = x_range(data['x'])
    
    
def update_discrete(fig, data, *shapeargs):
    x, y = get_dist_func_xy(dist_sel.value, func_sel.value, *shapeargs)
    data['xs'], data['ys'] = make_patches(x, y)
    fig.y_range.end = 1.1*max(max(data['ys']))
    x_high_value = [x for x, y in zip(flatten(data['xs']), flatten(data['ys'])) if y > 0.001] 
    fig.x_range.start, fig.x_range.end = min(x_high_value) - 1, max(x_high_value) + 1
    
    
def update_dist_params():
    shapeargs = [c.value for c in shape_param_container.children]
    l_data['x'], l_data['y'] = [], []
    p_data['xs'], p_data['ys'] = [], []
    try:
        if dist_sel.value == CONTINUOUS:
            update_continuous(fig, l_data, *shapeargs)
        else:
            update_discrete(fig, p_data, *shapeargs)
    except Exception as e:
        error_text.value = "Wrong parameters! Choose again.<br>ERROR: {}".format(e)
        error_text.visible = True
    else:
        error_text.visible = False
    push_notebook()
    
type_sel = widgets.Dropdown(options=[CONTINUOUS, DISCRETE], value=CONTINUOUS, description='type:')
type_sel.on_trait_change(update_type_sel, name='value')
dist_sel = widgets.Dropdown(options=dist_options(type_sel.value), value=dist_options(type_sel.value)['norm'], description='dist:')
dist_sel.on_trait_change(update_dist_sel, name='value')
func_sel = widgets.Dropdown(options=func_options(type_sel.value), value='pdf', description='func:')
func_sel.on_trait_change(update_dist_sel, name='value')

loc_slider = widgets.FloatSlider(value=0., min=-5.0, max=5.0, step=0.1, description='loc:')
loc_slider.on_trait_change(update_dist_params, name='value')
scale_slider = widgets.FloatSlider(value=1., min=0.01, max=10.0, step=0.01, description='scale:')
scale_slider.on_trait_change(update_dist_params, name='value')

dist_container = widgets.HBox()  
dist_container.children = [type_sel, dist_sel, func_sel]

param_container = widgets.VBox()
param_container.children = [loc_slider, scale_slider]

shape_param_container = widgets.HBox()
error_text = widgets.HTML()

display(dist_container)
display(param_container)
display(shape_param_container)
display(error_text)

fig, l_data, p_data = get_dist_fig_data(dist_sel.value, func_sel.value, loc=loc_slider.value, scale=scale_slider.value)
show(fig)