# Bokeh boxplot
Bokeh boxplot method to plot dataframes

In [1]:
import pandas as pd

from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

In [2]:
def boxplot(df, y_label='value'):
    """Draw a box plot for each column in the dataframe"""
    # based on https://bokeh.pydata.org/en/latest/docs/gallery/boxplot.html

    columns = df.columns.values

    # find the quartiles and IQR for each column
    q1 = df.quantile(q=0.25, axis=0)
    q2 = df.quantile(q=0.5)
    q3 = df.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5 * iqr
    lower = q1 - 1.5 * iqr

    # find the outliers for each column
    def column_outliers(df):
        outliers_by_column = dict()
        for column in df.columns:
            outliers = df[column][(df[column] > upper.loc[column]) | (df[column] < lower.loc[column])]
            outliers_by_column[column] = outliers.values
        return outliers_by_column

    out = column_outliers(df)

    # prepare outlier data for plotting, we need coordinates for every outlier.
    if out:
        outx = []
        outy = []
        for column, outliers in out.items():
            for outlier in outliers:
                outx.append(column)
                outy.append(outlier)

    tooltips = [("", "$y")]
    p = figure(y_axis_label=y_label,
               tools="pan,wheel_zoom,box_zoom,reset,save",
               background_fill_color="#efefef",
               x_range=columns,
               tooltips=tooltips)

    # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
    qmin = df.quantile(q=0.00, axis=0)
    qmax = df.quantile(q=1.00, axis=0)
    upper = [min([x, y]) for (x, y) in zip(list(qmax), upper)]
    lower = [max([x, y]) for (x, y) in zip(list(qmin), lower)]

    # stems
    p.segment(columns, upper, columns, q3, line_color="black")
    p.segment(columns, lower, columns, q1, line_color="black")

    # boxes
    p.vbar(columns, 0.7, q2, q3, fill_color="#E08E79", line_color="black")
    p.vbar(columns, 0.7, q1, q2, fill_color="#3B8686", line_color="black")

    # whiskers (almost-0 height rects simpler than segments)
    p.rect(columns, lower, 0.2, 0.01, line_color="black")
    p.rect(columns, upper, 0.2, 0.01, line_color="black")

    # outliers
    if out:
        p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6, )

    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = "white"
    p.grid.grid_line_width = 2
    p.xaxis.major_label_text_font_size = "12pt"

    show(p)

## Simple box plot example

In [3]:
df = pd.DataFrame({'A': [-10,5,6,7,8,9,10], 'B': [1,2,3,4,5,6,21]})

boxplot(df)

## Iris dataset example

In [4]:
from sklearn import datasets
iris = datasets.load_iris()
df_iris = pd.DataFrame(data= iris['data'], columns= iris['feature_names'])

In [6]:
boxplot(df_iris) 

## Subset example

In [7]:
boxplot(df_iris[['sepal width (cm)', 'petal length (cm)']]) 