# `statSummary()`

In [1]:
%useLatestDescriptors
%use lets-plot
%use dataframe

In [2]:
LetsPlot.getInfo()

Lets-Plot Kotlin API v.4.4.2-alpha3. Frontend: Notebook with dynamically loaded JS. Lets-Plot JS v.4.0.0.

In [3]:
var mpgDf = DataFrame.readCSV("https://raw.githubusercontent.com/JetBrains/lets-plot-kotlin/master/docs/examples/data/mpg.csv")
val mpgData = mpgDf.toMap()
mpgDf.head()

## 1. Default

In [4]:
letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary()

## 2. Options

### 2.1. `geom` parameter

In [5]:
letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.point())

### 2.2. `fn`, `fnMin`, `fnMax`

In [6]:
gggrid(listOf(
    ggplot(mpgData) { x = "fl" } +
        geomBar(width = 0.35, position = positionNudge(x = -0.2), color = "black", fill = "#fbb4ae", alpha = 0.5) +
        statSummary(geom = Geom.bar(width = 0.35), fn = "count",
                    position = positionNudge(x = 0.2), color = "black", fill = "#b3cde3", alpha = 0.5)
            { y = "hwy" } +
        ggtitle("Count (red) vs. summary (blue)"),
    ggplot(mpgData) { x = "fl" } +
        geomBar(width = 0.35, position = positionNudge(x = -0.2), color = "black", fill = "#fbb4ae", alpha = 0.5)
            { weight = "hwy" } +
        statSummary(geom = Geom.bar(width = 0.35), fn = "sum",
                    position = positionNudge(x = 0.2), color = "black", fill = "#b3cde3", alpha = 0.5)
            { y = "hwy" } +
        ggtitle("Weighted count (red) vs. summary (blue)"),
))

### 2.3. `quantiles`

In [7]:
letsPlot(mpgData) { x = "drv"; y = "hwy" } +
    statSummary() +
    statSummary(fn = "mq", fnMin = "lq", fnMax = "uq", position = positionNudge(x = -0.1), color = "#1b9e77") +
    statSummary(fn = "mq", fnMin = "lq", fnMax = "uq", position = positionNudge(x = 0.1), color = "#d95f02",
                quantiles = listOf(1.0/3, 1.0/2, 2.0/3))

### 2.4. Arbitrary Aesthetics

In [8]:
letsPlot(mpgData) { x = "drv"; y = "hwy" } +
    statSummary() { color = "..lq.."; shape = "..mq.."; size = "..uq.."; stroke = "..mq.." }

### 2.5. `orientation` and `coordFlip()`

In [9]:
gggrid(listOf(
    letsPlot(mpgData) { x = "hwy"; y = "drv" } +
        statSummary(orientation = "y") { color = "..ymax.." },
    letsPlot(mpgData) { x = "drv"; y = "hwy" } +
        statSummary() { color = "..ymax.." } +
        coordFlip()
))

### 2.6. Other parameters

In [10]:
letsPlot(mpgData) { x = "drv"; y = "hwy" } +
    statSummary(fn = "mean",
                geom = Geom.crossbar(), showLegend = false,
                position = positionDodge(),
                tooltips = layerTooltips()
                    .line("max|^ymax")
                    .line("mean|^y")
                    .line("min|^ymin"))
        { fill = asDiscrete("year") } +
    scaleFillBrewer(type = "qual", palette = "Pastel2")

## 3. All possible geoms

In [11]:
gggrid(listOf(
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.point()) + ggtitle("geomPoint()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.lollipop()) + ggtitle("geomLollipop()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.bar(), fn = "count") + ggtitle("geomBar()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.errorbar()) + ggtitle("geomErrorBar()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.crossbar()) + ggtitle("geomCrossBar()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.linerange()) + ggtitle("geomLineRange()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.pointrange()) + ggtitle("geomPointRange()"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary(geom = Geom.boxplot() { middle = "..mq.."; lower = "..lq.."; upper = "..uq.." }) + ggtitle("geomBoxPlot()"),
    letsPlot(mpgData) { x = "cty"; y = "hwy" } + statSummary(geom = Geom.line()) + ggtitle("geomLine()"),
    letsPlot(mpgData) { x = "cty"; y = "hwy" } + statSummary(geom = Geom.ribbon()) + ggtitle("geomRibbon()"),
    letsPlot(mpgData) { x = "cty"; y = "hwy" } + statSummary(geom = Geom.step()) + ggtitle("geomStep()")
), ncol = 3)

## 4. Tests

In [12]:
fun plotTest(dataset: Map<String, Any?>, title: String): org.jetbrains.letsPlot.intern.Plot {
    return letsPlot(dataset) { x = "x"; y = "y" } + statSummary() + ggtitle(title)
}

gggrid(listOf(
    plotTest(mapOf("x" to listOf<String>(), "y" to listOf<Double>()), "Empty data"),
    plotTest(mapOf("x" to listOf("A"), "y" to listOf(0)), "One value"),
    plotTest(mapOf("x" to listOf("A", "B", "C", null), "y" to listOf(0, null, Double.NaN, 1)), "NaN's in data"),
    plotTest(mapOf("x" to listOf(null, null), "y" to listOf(0, 1)), "All x's is NaN"),
    plotTest(mapOf("x" to listOf("A", "B"), "y" to listOf(null, null)), "All y's is NaN"),
), ncol = 2)

In [13]:
gggrid(listOf(
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary() { color = "..x.." } + ggtitle("color='..x..'"),
    letsPlot(mpgData) { x = "drv"; y = "hwy" } + statSummary() { color = "..y.." } + ggtitle("color='..y..'"),
))