# statSummaryBin()

In [1]:
%useLatestDescriptors
%use lets-plot
%use dataframe

In [2]:
LetsPlot.getInfo()

Lets-Plot Kotlin API v.4.4.2-alpha4. Frontend: Notebook with dynamically loaded JS. Lets-Plot JS v.4.0.0.

In [3]:
var irisDf = DataFrame.readCSV("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv")
val irisData = irisDf.toMap()
irisDf.head()

## 1. Default

In [4]:
letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + statSummaryBin()

## 2. Options

### 2.1. `geom` parameter

In [5]:
letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
    statSummaryBin(geom = Geom.crossbar(), binWidth = .5) +
    geomPoint(alpha = 1.0/3)

### 2.2. `fn`, `fnMin`, `fnMax`

In [6]:
letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
    statSummaryBin(geom = Geom.crossbar(), binWidth = .5,
                   fn = "mq", fnMin = "lq", fnMax = "uq") +
    geomPoint(alpha = 1.0/3)

### 2.3. `quantiles`

In [7]:
letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
    statSummaryBin(geom = Geom.crossbar(), binWidth = .5,
                   fn = "mq", fnMin = "lq", fnMax = "uq",
                   quantiles = listOf(0.05, 0.5, 0.95)) +
    geomPoint(alpha = 1.0/3)

### 2.4. Arbitrary aesthetics

In [8]:
letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
    statSummaryBin() { color = "..lq.."; shape = "..mq.."; size = "..uq.."; stroke = "..mq.." }

### 2.5. `orientation` and `coordFlip()`

In [9]:
gggrid(listOf(
    letsPlot(irisData) { x = "sepal_length"; y = "petal_length" } +
        statSummaryBin(orientation = "y") { color = "..ymax.." },
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
        statSummaryBin() { color = "..ymax.." } +
        coordFlip()
))

### 2.6. Additional grouping

In [10]:
fun getData(n: Int = 100, seed: Long = 42, categories: List<String> = listOf("A", "B")): Map<String, Any> {
    val rand = java.util.Random(seed)
    return mapOf<String, Any>(
        "x" to List(n) { rand.nextDouble() },
        "y" to List(n) { rand.nextGaussian() },
        "g" to List(n) { categories[rand.nextInt(categories.size)] }
    )
}

letsPlot(getData()) { x = "x"; y = "y"; color = "g" } +
    statSummaryBin(binWidth = 0.25, boundary = 0.0, position = positionDodge())

### 2.7. Other parameters

In [11]:
fun getPlot(center: Double? = null, boundary: Double? = null): org.jetbrains.letsPlot.intern.Plot {
    return letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } +
        statSummaryBin(geom = Geom.crossbar(), binWidth = 0.5, center = center, boundary = boundary) { fill = "..count.." } +
        geomPoint(shape = 21, color = "black", fill = "white") +
        ggtitle("center = $center; boundary = $boundary")
}

gggrid(listOf(
    getPlot(),
    getPlot(center = 1.0),
    getPlot(boundary = 1.0)
), ncol = 2)

## 3. All possible geoms

In [12]:
gggrid(listOf(
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.point(), color = "red") + ggtitle("geomPoint()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.path(), color = "red") + ggtitle("geomPath()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.line(), color = "red") + ggtitle("geomLine()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + statSummaryBin(geom = Geom.bar(), fn = "count", fill = "red") + ggtitle("geomBar()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.lollipop(fatten = 1.0), color = "red") + ggtitle("geomLollipop()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + statSummaryBin(geom = Geom.histogram(), fn = "count", fill = "red") + ggtitle("geomHistogram()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.errorbar(), color = "red") + ggtitle("geomErrorBar()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.crossbar(), color = "red", alpha = 0.5) + ggtitle("geomCrossBar()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.linerange(), color = "red") + ggtitle("geomLineRange()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.pointrange(), color = "red") + ggtitle("geomPointRange()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.boxplot() { middle = "..mq.."; lower = "..lq.."; upper = "..uq.." }, color = "red") + ggtitle("geomBoxPlot()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.ribbon(), color = "red") + ggtitle("geomRibbon()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.area(), color = "red") + ggtitle("geomArea()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.step(), color = "red") + ggtitle("geomStep()"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + geomPoint() + statSummaryBin(geom = Geom.segment() { xend = "petal_length"; yend = "..ymax.." }, fn = "min", color = "red") + ggtitle("geomSegment()"),
), ncol = 3)

## 4. Tests

In [13]:
fun plotTest(dataset: Map<String, Any?>, title: String): org.jetbrains.letsPlot.intern.Plot {
    return letsPlot(dataset) { x = "x"; y = "y" } + statSummaryBin() + ggtitle(title)
}

gggrid(listOf(
    plotTest(mapOf("x" to listOf<String>(), "y" to listOf<Double>()), "Empty data"),
    plotTest(mapOf("x" to listOf("A"), "y" to listOf(0)), "One value"),
    plotTest(mapOf("x" to listOf("A", "B", "C", null), "y" to listOf(0, null, Double.NaN, 1)), "NaN's in data"),
    plotTest(mapOf("x" to listOf(null, null), "y" to listOf(0, 1)), "All x's is NaN"),
    plotTest(mapOf("x" to listOf("A", "B"), "y" to listOf(null, null)), "All y's is NaN"),
), ncol = 2)

In [14]:
gggrid(listOf(
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + statSummaryBin() { color = "..x.." } + ggtitle("color='..x..'"),
    letsPlot(irisData) { x = "petal_length"; y = "sepal_length" } + statSummaryBin() { color = "..y.." } + ggtitle("color='..y..'"),
))