### Pick Sampling

In [1]:
%useLatestDescriptors
%use lets-plot

In [2]:
import kotlin.random.Random

fun genWord(length: Int, rnd: Random):String {
    val letters = ('a'..'z')
    return List(length) { letters.random(rnd) }.joinToString("")
}    

fun genWordSet(n: Int, seed: Int = 42):Set<String> {
    val rnd = Random(seed)
    val words = HashSet<String>()
    while (words.size < n) {
        words.add(genWord(5, rnd))
    }
    return words
}

fun data(n: Int, words: Set<String>):Map<String, *> {
    val rnd = Random(42)
    return mapOf(
        "word" to List(n) { words.random(rnd) },
        "g" to List(n) { listOf('a','b','c').random(rnd) }
    )
}

In [3]:
// Number of unique words exceeds threshold (50) of default 'pick' sampling on bar chart.
val words = genWordSet(500)
val dat = data(1000, words)
val p = ggplot(dat) { x = "word" }

In [4]:
// Disable sampling to see the overplotting.
p + geomBar(sampling = samplingNone, size = 0)

In [5]:
// Draw plot with default sampling.
p + geomBar(size = 0)

In [6]:
// 'pick' sampling preserves groups on bar chart.
p + geomBar(size = 0) { fill = "g" }

In [7]:
// Orgering words by `count` might be a good idea.
p + geomBar(size = 0) {
    fill = "g"
    x = asDiscrete("word", orderBy = "..count..")
}