In [1]:
%useLatestDescriptors
%use lets-plot

In [2]:
import java.util.Random

In [3]:
val rand = java.util.Random(123)
val n = 200
val data = mapOf<String, Any>(
    "cond" to List(n) { "A" } + List(n) { "B" },
    "rating" to List(n) { rand.nextGaussian() } + List(n) { rand.nextGaussian() * 1.5 + 1.5 },
)

In [4]:
// Basic histogram of "rating"
val p = letsPlot(data) { x = "rating" } + ggsize(500, 250)
p + geomHistogram(binWidth = 0.5)

In [5]:
// Histogram overlaid with kernel density curve
//  - histogram with density instead of count on y-axis
//  - overlay with transparent density plot
p + geomHistogram(binWidth = 0.5, fill = "paper") { y = "..density.." } +
    geomDensity(alpha = 0.2, fill = 0xFF6666)

In [6]:
p + geomHistogram(binWidth = .5, fill = "paper") +
    geomVLine(xintercept=(data["rating"] as List<Double>).average(),
              color = "red",
              linetype = "dashed")

### Histogram and density plots with multiple groups

In [7]:
val p1 = letsPlot(data) { x = "rating"; fill = "cond" } + ggsize(500, 250)

// Default histogram (stacked)
p1 + geomHistogram(binWidth=0.5)

In [8]:
// Overlaid histograms
p1 + geomHistogram(binWidth = 0.5, alpha = 0.7, position = positionIdentity)

In [9]:
// Interleaved histograms
p1 + geomHistogram(binWidth = 0.5, position = positionDodge())

In [10]:
// Density plot
val p2 = ggplot(data) { x = "rating"; color = "cond" } + ggsize(500, 250)
p2 + geomDensity()

In [11]:
// Density plot with semi-transparent fill
p2 + geomDensity(alpha = .3) { fill = "cond" }

In [12]:
// Find the mean of each group
val means = (data["cond"] as List<String> zip data["rating"] as List<Double>)
        .groupBy(keySelector = { it.first }, valueTransform = { it.second })
        .mapValues { it.value.average() }
val cdat = mapOf(
    "cond" to means.keys,
    "rating" to means.values
)
cdat

{cond=[A, B], rating=[-0.011843241476365302, 1.5547269440141214]}

In [13]:
// Overlaid histograms with means
p2 + geomHistogram(alpha = .3,
                   position = positionIdentity,
                   size = 0.0, bins = 10) {fill = "cond"} +
     geomVLine(data=cdat, linetype = "dashed") {
         xintercept = "rating"
         color = "cond"
     }

In [14]:
// Use frqpoly instead of histogram
p2 + geomFreqpoly(bins = 10) {color = "cond"} +
     geomVLine(data = cdat, linetype = "dashed") {
         xintercept = "rating"
         color = "cond"
     }

In [15]:
// Density plots with means
p2 + geomDensity() +
     geomVLine(data = cdat, linetype = "dashed") {
         xintercept = "rating"
         color = "cond"
     }

### Using facets

In [16]:
ggplot(data) {x="rating"} +
    geomHistogram(binWidth = .5, color = "pen", fill = "paper") +
    facetGrid("cond")

### Box plots

In [17]:
// A basic box plot
val p3 = ggplot(data) {y = "cond"; x = "rating"} + ggsize(500, 300)
p3 + geomBoxplot()

In [18]:
// A basic box with the conditions colored
p3 + geomBoxplot {fill = "cond"}

In [19]:
// Style outliers
p3 + geomBoxplot(outlierColor = "red", outlierShape = 8, outlierSize = 1.5)