# Y-Dotplot Geometry

## Preparation

In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()

In [2]:
def plot_matrix(plots=[], width=400, height=300, columns=2):
    bunch = GGBunch()
    for i in range(len(plots)):
        row = int(i / columns)
        column = i % columns
        bunch.add_plot(plots[i], column * width, row * height, width, height)
    return bunch.show()

In [3]:
DEF_BIN_COUNT = 20
DEF_BINWIDTH_RATIO = 1/DEF_BIN_COUNT

def get_binwidth(df, column, binwidth_ratio=DEF_BINWIDTH_RATIO):
    return binwidth_ratio * (df[column].max() - df[column].min())

def get_bincount(df, column, binwidth):
    return int(round((df[column].max() - df[column].min()) / binwidth))

In [4]:
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
df = df.drop(columns=["Unnamed: 0"])
df = df.sample(n=100, random_state=42, ignore_index=True)
df.head()

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
0,dodge,ram 1500 pickup 4wd,4.7,2008,8,manual(m6),4,9,12,e,pickup
1,toyota,toyota tacoma 4wd,4.0,2008,6,auto(l5),4,16,20,r,pickup
2,toyota,camry,2.2,1999,4,auto(l4),f,21,27,r,midsize
3,audi,a4 quattro,2.0,2008,4,manual(m6),4,20,28,p,compact
4,jeep,grand cherokee 4wd,4.7,2008,8,auto(l5),4,14,19,r,suv


In [5]:
binwidth = get_binwidth(df, "hwy")
binwidth

1.6

## Minimalistic example

In [6]:
ggplot(df, aes(y="hwy")) + geom_ydotplot() + ggtitle("Simplest example")

## Comparison of geoms

In [7]:
PACIFIC_BLUE = '#118ed8'

In [8]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_violin(fill=PACIFIC_BLUE, size=0) + \
    geom_ydotplot(binwidth=binwidth, stackratio=.5, \
                  color=PACIFIC_BLUE, fill='white') + \
    ggtitle("violin + ydotplot (method='dotdensity')")

In [9]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_violin(fill=PACIFIC_BLUE, size=0) + \
    geom_ydotplot(method='histodot', binwidth=binwidth, stackratio=.5, \
                  color=PACIFIC_BLUE, fill='white') + \
    ggtitle("violin + ydotplot (method='histodot')")

## Parameters

### `stackdir`

In [10]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(binwidth=binwidth, stackdir='left') + \
    ggtitle("stackdir='left'")
p2 = p + geom_ydotplot(binwidth=binwidth, stackdir='right') + \
    ggtitle("stackdir='right'")
p3 = p + geom_ydotplot(binwidth=binwidth, stackdir='center') + \
    ggtitle("stackdir='center' (default)")
p4 = p + geom_ydotplot(binwidth=binwidth, stackdir='centerwhole') + \
    ggtitle("stackdir='centerwhole'")

plot_matrix([p1, p2, p3, p4])

### `stackratio`

In [11]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(binwidth=binwidth, stackratio=1.0) + ggtitle("stackratio=1.0 (default)")
p2 = p + geom_ydotplot(binwidth=binwidth, stackratio=0.5) + ggtitle("stackratio=0.5")
p3 = p + geom_ydotplot(binwidth=binwidth, stackratio=1.5) + ggtitle("stackratio=1.5")

plot_matrix([p1, p2, p3])

### `dotsize`

In [12]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(binwidth=binwidth) + ggtitle("dotsize=1.0 (default)")
p2 = p + geom_ydotplot(binwidth=binwidth, dotsize=0.5) + ggtitle("dotsize=0.5")
p3 = p + geom_ydotplot(binwidth=binwidth, dotsize=1.5) + ggtitle("dotsize=1.5")

plot_matrix([p1, p2, p3])

### `center`

In [13]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(binwidth=binwidth, method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(binwidth=binwidth, method='histodot', center=11.0) + ggtitle("center=11.0")

plot_matrix([p1, p2])

### `boundary`

In [14]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(binwidth=binwidth, method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(binwidth=binwidth, method='histodot', boundary=11.0) + ggtitle("boundary=11.0")

plot_matrix([p1, p2])

### `bins`

In [15]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', bins=25) + ggtitle("bins=25")

plot_matrix([p1, p2])

## Grouping

In [16]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       binwidth=binwidth, stackratio=.75, color='black') + \
    ggtitle("method='dotdensity', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       binwidth=binwidth, stackratio=.75, \
                       stackgroups=True, color='black') + \
    ggtitle("method='dotdensity', stackgroups=True")

plot_matrix([p1, p2], width=800, height=400, columns=1)

In [17]:
p = ggplot(df, aes(x="drv", y="hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       binwidth=binwidth, stackratio=.75, \
                       method='histodot', color='black') + \
    ggtitle("method='histodot', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       binwidth=binwidth, stackratio=.75, \
                       method='histodot', stackgroups=True, color='black') + \
    ggtitle("method='histodot', stackgroups=True")

plot_matrix([p1, p2], width=800, height=400, columns=1)

## Tooltips

In [18]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_ydotplot(binwidth=binwidth, stackratio=.75, \
                  tooltips=layer_tooltips().line("^x")\
                                           .line("Stack center|^y")\
                                           .line("Number of dots in stack|@..count..")\
                                           .line("Width of the bin|@..binwidth..")) + \
    ggtitle("With tooltips")

## Facetting

In [19]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_ydotplot(aes(fill="drv"), \
                  binwidth=binwidth, color='black') + \
    facet_grid(x="year") + \
    ggtitle("facet_grid()")

## Flip coordinates

In [20]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_ydotplot(aes(fill="drv"), binwidth=binwidth, \
                  stackratio=.5, dotsize=.5, color='black') + \
    coord_flip() + \
    ggtitle("Flip coordinates")

## "identity" statistic

In [21]:
identity_df = pd.concat([
    pd.DataFrame(
        list(zip(*np.histogram(df[df.drv == drv].hwy, bins=get_bincount(df[df.drv == drv], "hwy", binwidth)))),
        columns=["count", "hwy"]
    ).assign(
        drv = [drv] * get_bincount(df[df.drv == drv], "hwy", binwidth),
        binwidth = [binwidth] * get_bincount(df[df.drv == drv], "hwy", binwidth),
    )
    for drv in df.drv.unique()
])
identity_df.head()

Unnamed: 0,count,hwy,drv,binwidth
0,3,12.0,4,1.6
1,5,13.6,4,1.6
2,2,15.2,4,1.6
3,13,16.8,4,1.6
4,5,18.4,4,1.6


In [22]:
ggplot(identity_df, aes(x="drv", y="hwy", stacksize="count", binwidth="binwidth")) + \
    geom_ydotplot(aes(fill="drv"), stat='identity', stackratio=.75, color="black") + \
    ggtitle("stat='identity'")

## Additional layers

In [23]:
ggplot(df, aes(x="drv", y="hwy")) + \
    geom_ydotplot(aes(fill=as_discrete("drv")), method='histodot', \
                  bins=15, stackdir='center', stackratio=.75, \
                  color='black', alpha=.5, size=.2) + \
    scale_fill_brewer(type='qual', palette='Set1') + \
    theme_grey() + \
    ggtitle("Some additional aesthetics, parameters and layers")