In [1]:
from pandas import DataFrame
import numpy as np
from lets_plot import *

load_lets_plot_js()

In [2]:
# This example was found at: http://www.cookbook-r.com/Graphs/Plotting_distributions_(ggplot2)
np.random.seed(123)
data = DataFrame(dict(
    cond=np.repeat(['A','B'], 200),
    rating=np.concatenate((np.random.normal(0, 1, 200), np.random.normal(.8, 1, 200)))
))

In [3]:
## Basic histogram from the vector "rating"
p = ggplot(data, aes(x='rating')) + ggsize(500, 250)
p + geom_histogram(binwidth=.5)

In [4]:
# Histogram overlaid with kernel density curve
#  - histogram with density instead of count on y-axis
#  - overlay with transparent density plot
p + geom_histogram(aes(y='..density..'), binwidth=.5, colour="black", fill="white") \
+ geom_density(alpha=.2, fill=0xFF6666)

In [5]:
p + geom_histogram(binwidth=.5, colour="black", fill="white") \
+ geom_vline(xintercept=np.mean(data['rating']), color="red", linetype="dashed", size=1)

### Histogram and density plots with multiple groups

In [6]:
p1 = ggplot(data, aes(x='rating', fill='cond')) + ggsize(500, 250)

# Default histogram (stacked)
p1 + geom_histogram(binwidth=.5, alpha=.5)

In [7]:
# Overlaid histograms
p1 + geom_histogram(binwidth=.5, alpha=.5, position="identity")

In [8]:
# Interleaved histograms
p1 + geom_histogram(binwidth=.5, alpha=.5, position="dodge")

In [9]:
# Density plot
p2 = ggplot(data, aes(x='rating', color='cond')) + ggsize(500, 250)
p2 + geom_density()

In [10]:
# Density plot with semi-transparent fill
p2 + geom_density(aes(fill='cond'), alpha=.3)

In [11]:
# Find the mean of each group
cdat = data.groupby(['cond'], as_index=False).mean()
cdat

Unnamed: 0,cond,rating
0,A,0.003787
1,B,0.685638


In [12]:
# Overlaid histograms with means
p2 + geom_histogram(aes(fill='cond'), alpha=.3, position="identity", size=0) \
+ geom_vline(data=cdat, mapping=aes(xintercept='rating',  color='cond'), linetype="dashed", size=1)

In [13]:
# Density plots with means
p2 + geom_density() \
+ geom_vline(data=cdat, mapping=aes(xintercept='rating', color='cond'), linetype="dashed", size=1)

### Using facets

In [14]:
ggplot(data, aes(x='rating')) + geom_histogram(binwidth=.5, colour="black", fill="white") \
+ facet_grid('cond')

In [15]:
# With mean lines, using cdat from above
ggplot(data, aes(x='rating')) + geom_histogram(binwidth=.5, colour="black", fill="white") \
+ geom_vline(data=cdat, mapping=aes(xintercept='rating'), linetype="dashed", size=1, colour="red") \
+ facet_grid(None, 'cond') + ggsize(500, 250)


### Box plots

In [16]:
# A basic box plot
p3 = ggplot(data, aes(x='cond', y='rating')) + ggsize(300, 200)
p3 + geom_boxplot()

In [17]:
# A basic box with the conditions colored
p3 + geom_boxplot(aes(fill='cond'))

In [18]:
# Style outliers
p3 + geom_boxplot(outlier_color='red', outlier_shape=8, outlier_size=5)