[Origin](https://raw.githubusercontent.com/JetBrains/lets-plot-kotlin/master/docs/examples/jupyter-notebooks/distributions.ipynb)

In [36]:
%useLatestDescriptors
%use kandy
import java.util.Random

In [37]:
val rand = java.util.Random(123)
val n = 200
val df = dataFrameOf(
    "cond" to List(n) { "A" } + List(n) { "B" },
    "rating" to List(n) { rand.nextGaussian() } + List(n) { rand.nextGaussian() * 1.5 + 1.5 },
)

In [4]:
// Basic histogram of "rating"
val p = letsPlot(data) { x = "rating" } + ggsize(500, 250)
p + geomHistogram(binWidth=0.5)

In [39]:
df.plot { 
    histogram(rating, binsOption = BinsOption.byWidth(0.5))
    layout.size = 500 to 250
}

In [6]:
// Histogram overlaid with kernel density curve
//  - histogram with density instead of count on y-axis
//  - overlay with transparent density plot

p + geomHistogram(binWidth=0.5, color="black", fill="white") { y = "..density.." } +
    geomDensity(alpha=0.2, fill=0xFF6666)

In [41]:
df.plot { 
    histogram(rating, binsOption = BinsOption.byWidth(0.5)) {
        width = 1.0
        fillColor = Color.WHITE
        borderLine.color = Color.BLACK
        y(Stat.density)
    }
    densityPlot(rating) {
        alpha = 0.2
        fillColor = Color.hex(0xFF6666)
    }
    layout.size = 500 to 250
}

In [8]:
p + geomHistogram(binWidth=.5, color="black", fill="white") +
    geomVLine(xintercept=(data["rating"] as List<Double>).average(), color="red", linetype="dashed", size=1.0)

In [47]:
df.plot {
    val mean = rating.mean()
    histogram(rating, binsOption = BinsOption.byWidth(0.5), binsAlign = BinsAlign.center(mean)) {
        width = 1.0
        fillColor = Color.WHITE
        borderLine.color = Color.BLACK
    }
    vLine { 
        xIntercept.constant(mean)
        color = Color.rgb(255, 0, 0)
        width = 1.2
        type = LineType.DASHED
    }
    layout.size = 500 to 250
}

### Histogram and density plots with multiple groups

In [10]:
val p1 = letsPlot(data) {x = "rating"; fill="cond"} + ggsize(500, 250)

// Default histogram (stacked)
p1 + geomHistogram(binWidth=0.5)

In [48]:
val grouped = df.groupBy {cond}

In [52]:
grouped.plot {
    histogram(rating, binsOption = BinsOption.byWidth(.5)) {
        fillColor(key.cond)
        width = 1.0
        position = Position.stack()
    }
    layout.size = 500 to 250
}

In [12]:
// Overlaid histograms
p1 + geomHistogram(binWidth=0.5, alpha=0.7, position=Pos.identity)

Line_27.jupyter.kts (2:54 - 57) Unresolved reference: Pos

In [53]:
grouped.plot {
    histogram(rating, binsOption = BinsOption.byWidth(.5)) {
        fillColor(key.cond)
        alpha = 0.7
        width = 1.0
        position = Position.identity()
    }
    layout.size = 500 to 250
}

In [14]:
// Interleaved histograms
p1 + geomHistogram(binWidth=0.5, position=Pos.dodge)

Line_29.jupyter.kts (2:43 - 46) Unresolved reference: Pos

In [54]:
grouped.plot {
    histogram(rating, binsOption = BinsOption.byWidth(.5)) {
        fillColor(key.cond)
        width = 1.0
        position = Position.dodge()
    }
    layout.size = 500 to 250
}

In [16]:
// Density plot
val p2 = ggplot(data) {x="rating"; color="cond"} + ggsize(500, 250)
p2 + geomDensity()

In [58]:
grouped.plot {
    densityPlot(rating) {
        alpha = 0.3
    }
    layout.size = 500 to 250
}

In [18]:
// Density plot with semi-transparent fill
p2 + geomDensity(alpha=.3) {fill="cond"} 

In [60]:
grouped.plot {
    densityPlot(rating, trim = true) {
        fillColor = Color.GREY
        borderLine.color(key.cond)
        alpha = 0.3
    }
    layout.size = 500 to 250
}

In [61]:
// Find the mean of each group
val means = (data["cond"] as List<String> zip data["rating"] as List<Double>)
        .groupBy(keySelector = { it.first }, valueTransform = { it.second })
        .mapValues { it.value.average() }
val cdat = mapOf(
    "cond" to means.keys.toList(),
    "rating" to means.values.toList()
)
cdat

{cond=[A, B], rating=[-0.011843241476365302, 1.5547269440141214]}

In [63]:
// Overlaid histograms with means
p2 + geomHistogram(alpha=.3, position= positionIdentity, size=0.0, bins=10) {fill="cond"} +
     geomVLine(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}


In [22]:
plot(data) {
    histogram(rating, bins = Bins.byNumber(10)) {
        alpha(.3)
        position = Position.Identity
        borderLineWidth(0.0)
        fillColor(cond)
    }
    vLine(){
        data = cdat.toMutableMap()
        x(rating)
        color(cond)
        type(LineType.DASHED)
        width(1.0)
    }
    layout {
        size = 500 to 250
    }
}

Line_39.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_39.jupyter.kts (2:5 - 14) Unresolved reference: histogram
Line_39.jupyter.kts (2:15 - 21) Unresolved reference: rating
Line_39.jupyter.kts (2:30 - 34) Unresolved reference: Bins
Line_39.jupyter.kts (3:9 - 14) Unresolved reference: alpha
Line_39.jupyter.kts (4:9 - 17) Unresolved reference: position
Line_39.jupyter.kts (4:20 - 28) Unresolved reference: Position
Line_39.jupyter.kts (5:9 - 24) Unresolved reference: borderLineWidth
Line_39.jupyter.kts (6:9 - 18) Unresolved reference: fillColor
Line_39.jupyter.kts (6:19 - 23) Unresolved reference: cond
Line_39.jupyter.kts (8:5 - 10) Unresolved reference: vLine
Line_39.jupyter.kts (9:9 - 13) Val cannot be reassigned
Line_39.jupyter.kts (9:16 - 20) Unresolved reference: cdat
Line_39.jupyter.kts (10:9 - 10) Unresolved reference: x
Line_39.jupyter.kts (10:11 - 17) Unresolved reference: rating
Line_39.jupyter.kts (11:9 - 14) Unresolved reference: color
Line_39.jupyter.kts (11:15 - 19) 

In [64]:
// Use frqpoly instead of histogram
p2 + geomFreqpoly(bins=10) {color="cond"} +
     geomVLine(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}


In [24]:
plot(data) {
    freqPoly(rating, bins = Bins.byNumber(10)) {
        lineColor(cond)
    }
    vLine(){
        data = cdat.toMutableMap()
        x(rating)
        color(cond)
        type(LineType.DASHED)
        width(1.0)
    }
    layout {
        size = 500 to 250
    }
}

Line_41.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_41.jupyter.kts (2:5 - 13) Unresolved reference: freqPoly
Line_41.jupyter.kts (2:14 - 20) Unresolved reference: rating
Line_41.jupyter.kts (2:29 - 33) Unresolved reference: Bins
Line_41.jupyter.kts (3:9 - 18) Unresolved reference: lineColor
Line_41.jupyter.kts (3:19 - 23) Unresolved reference: cond
Line_41.jupyter.kts (5:5 - 10) Unresolved reference: vLine
Line_41.jupyter.kts (6:9 - 13) Val cannot be reassigned
Line_41.jupyter.kts (6:16 - 20) Unresolved reference: cdat
Line_41.jupyter.kts (7:9 - 10) Unresolved reference: x
Line_41.jupyter.kts (7:11 - 17) Unresolved reference: rating
Line_41.jupyter.kts (8:9 - 14) Unresolved reference: color
Line_41.jupyter.kts (8:15 - 19) Unresolved reference: cond
Line_41.jupyter.kts (9:9 - 13) Expression 'type' cannot be invoked as a function. The function 'invoke()' is not found
Line_41.jupyter.kts (9:9 - 13) Unresolved reference. None of the following candidates is applicable because of r

In [65]:
// Density plots with means
p2 + geomDensity() +
     geomVLine(data=cdat, linetype="dashed", size=1.0) {xintercept="rating"; color="cond"}

In [26]:
plot(data) {
    density(rating) {
        borderLineColor(cond)
    }
    vLine(){
        data = cdat.toMutableMap()
        x(rating)
        color(cond)
        type(LineType.DASHED)
        width(1.0)
    }
    layout {
        size = 500 to 250
    }
}

Line_43.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_43.jupyter.kts (2:5 - 12) Unresolved reference: density
Line_43.jupyter.kts (2:13 - 19) Unresolved reference: rating
Line_43.jupyter.kts (3:9 - 24) Unresolved reference: borderLineColor
Line_43.jupyter.kts (3:25 - 29) Unresolved reference: cond
Line_43.jupyter.kts (5:5 - 10) Unresolved reference: vLine
Line_43.jupyter.kts (6:9 - 13) Val cannot be reassigned
Line_43.jupyter.kts (6:16 - 20) Unresolved reference: cdat
Line_43.jupyter.kts (7:9 - 10) Unresolved reference: x
Line_43.jupyter.kts (7:11 - 17) Unresolved reference: rating
Line_43.jupyter.kts (8:9 - 14) Unresolved reference: color
Line_43.jupyter.kts (8:15 - 19) Unresolved reference: cond
Line_43.jupyter.kts (9:9 - 13) Expression 'type' cannot be invoked as a function. The function 'invoke()' is not found
Line_43.jupyter.kts (9:9 - 13) Unresolved reference. None of the following candidates is applicable because of receiver type mismatch: 
public val AnyCol /* = DataCol

### Using facets

In [27]:
ggplot(data) {x="rating"} + 
    geomHistogram(binWidth=.5, color="black", fill="white") +
    facetGrid("cond")

In [28]:
plot(data) {
    histogram(rating, bins = Bins.byWidth(.5)) {
        borderLineColor(Color.BLACK)
        fillColor(Color.WHITE)
    }
    
    facetGridX(cond)
}

Line_46.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_46.jupyter.kts (2:5 - 14) Unresolved reference: histogram
Line_46.jupyter.kts (2:15 - 21) Unresolved reference: rating
Line_46.jupyter.kts (2:30 - 34) Unresolved reference: Bins
Line_46.jupyter.kts (3:9 - 24) Unresolved reference: borderLineColor
Line_46.jupyter.kts (3:25 - 30) Unresolved reference: Color
Line_46.jupyter.kts (4:9 - 18) Unresolved reference: fillColor
Line_46.jupyter.kts (4:19 - 24) Unresolved reference: Color
Line_46.jupyter.kts (7:5 - 15) Unresolved reference: facetGridX
Line_46.jupyter.kts (7:16 - 20) Unresolved reference: cond

### Box plots

In [29]:
// A basic box plot
val p3 = ggplot(data) {x="cond"; y="rating"} + ggsize(300, 200)
p3 + geomBoxplot()

In [30]:
plot(data) {
    boxplot(cond, rating) {
        
    }
    layout {
        size = 300 to 200
    }
}

Line_49.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_49.jupyter.kts (2:5 - 12) Unresolved reference: boxplot
Line_49.jupyter.kts (2:13 - 17) Unresolved reference: cond
Line_49.jupyter.kts (2:19 - 25) Unresolved reference: rating
Line_49.jupyter.kts (5:5 - 11) Unresolved reference: layout
Line_49.jupyter.kts (6:9 - 13) Unresolved reference. None of the following candidates is applicable because of receiver type mismatch: 
internal val AnyBaseCol /* = BaseColumn<*> */.size: Int defined in org.jetbrains.kotlinx.dataframe.columns
public val AnyCol /* = DataColumn<*> */.size: Int defined in org.jetbrains.kotlinx.dataframe
internal val AnyFrame /* = DataFrame<*> */.size: DataFrameSize defined in org.jetbrains.kotlinx.dataframe

In [31]:
// A basic box with the conditions colored
p3 + geomBoxplot {fill="cond"}

In [32]:
plot(data) {
    boxplot(cond, rating) {
        fillColor(cond)
    }
    layout {
        size = 300 to 200
    }
}

Line_52.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_52.jupyter.kts (2:5 - 12) Unresolved reference: boxplot
Line_52.jupyter.kts (2:13 - 17) Unresolved reference: cond
Line_52.jupyter.kts (2:19 - 25) Unresolved reference: rating
Line_52.jupyter.kts (3:9 - 18) Unresolved reference: fillColor
Line_52.jupyter.kts (3:19 - 23) Unresolved reference: cond
Line_52.jupyter.kts (5:5 - 11) Unresolved reference: layout
Line_52.jupyter.kts (6:9 - 13) Unresolved reference. None of the following candidates is applicable because of receiver type mismatch: 
internal val AnyBaseCol /* = BaseColumn<*> */.size: Int defined in org.jetbrains.kotlinx.dataframe.columns
public val AnyCol /* = DataColumn<*> */.size: Int defined in org.jetbrains.kotlinx.dataframe
internal val AnyFrame /* = DataFrame<*> */.size: DataFrameSize defined in org.jetbrains.kotlinx.dataframe

In [33]:
// Style outliers
p3 + geomBoxplot(outlierColor="red", outlierShape=8, outlierSize=5)

In [34]:
plot(data) {
    boxplot(cond, rating) {
        outlier {
            color(Color.RED)
            symbol(Symbol.ASTERIX)
            size(5.0)
        }
    }
    layout {
        size = 300 to 200
    }
}

Line_55.jupyter.kts (1:1 - 5) Unresolved reference: plot
Line_55.jupyter.kts (2:5 - 12) Unresolved reference: boxplot
Line_55.jupyter.kts (2:13 - 17) Unresolved reference: cond
Line_55.jupyter.kts (2:19 - 25) Unresolved reference: rating
Line_55.jupyter.kts (3:9 - 16) Unresolved reference: outlier
Line_55.jupyter.kts (4:13 - 18) Unresolved reference: color
Line_55.jupyter.kts (4:19 - 24) Unresolved reference: Color
Line_55.jupyter.kts (5:13 - 19) Unresolved reference: symbol
Line_55.jupyter.kts (5:20 - 26) Unresolved reference: Symbol
Line_55.jupyter.kts (6:13 - 17) Unresolved reference. None of the following candidates is applicable because of receiver type mismatch: 
public fun AnyFrame /* = DataFrame<*> */.size(): DataFrameSize defined in org.jetbrains.kotlinx.dataframe
Line_55.jupyter.kts (9:5 - 11) Unresolved reference: layout
Line_55.jupyter.kts (10:9 - 13) Unresolved reference. None of the following candidates is applicable because of receiver type mismatch: 
internal val AnyBas