In [1]:
%useLatestDescriptors
%use lets-plot

In [2]:
import java.util.Random

import kotlin.system.measureTimeMillis
import kotlin.math.sqrt
import kotlin.math.floor

import org.jetbrains.letsPlot.intern.Plot

In [3]:
val DEFAULT_RUNS = 10
val DEFAULT_WARMUP = 10
val DEFAULT_BATCH_SIZE = 100

In [4]:
object Blackhole {
    @Volatile private var sink: Any? = null
    fun consume(x: Any?) { sink = x }
}

data class TimeitStats(
    val perOpNs: List<Double>,
    val meanNs: Double,
    val medianNs: Double,
    val stddevNs: Double,
    val minNs: Double,
    val maxNs: Double
) {
    val meanMs get() = meanNs / 1e6
    val stddevMs get() = stddevNs / 1e6
    val medianMs get() = medianNs / 1e6
}

fun timeit(
    runs: Int = DEFAULT_RUNS,
    warmup: Int = DEFAULT_WARMUP,
    batchSize: Int = DEFAULT_BATCH_SIZE,
    gcBetween: Boolean = false,
    blocks: List<() -> Unit>
): List<TimeitStats> {
    require(runs > 0 && warmup >= 0 && batchSize > 0)

    repeat(warmup) {
        repeat(batchSize) {
            blocks.forEach { block -> Blackhole.consume(block()) }
        }
    }

    fun now() = System.nanoTime()

    val perOp: MutableList<List<Double>> = mutableListOf()
    repeat(runs) { i ->
        perOp.add(blocks.map { block ->
            if (gcBetween) System.gc()
            val start = now()
            repeat(batchSize) { Blackhole.consume(block()) }
            val end = now()
            val elapsedNs = (end - start).toDouble()
            elapsedNs / batchSize
        })
    }

    return blocks.indices.map { i ->
        val blockPerOp = perOp.map { it[i] }
        val sorted = blockPerOp.sorted()
        val mean = blockPerOp.average()
        val median = if (runs % 2 == 1) sorted[runs/2]
                     else (sorted[runs/2 - 1] + sorted[runs/2]) / 2.0
        val std = sqrt(blockPerOp.map { val d = it - mean; d*d }.average())
        TimeitStats(
            perOpNs = blockPerOp.toList(),
            meanNs = mean,
            medianNs = median,
            stddevNs = std,
            minNs = sorted.first(),
            maxNs = sorted.last()
        )
    }
}

In [5]:
data class TargetFunction(
    val name: String,
    val function: (List<Double>, List<Double>, Double, Double) -> List<Int>
)

fun test(f: TargetFunction) {
    val expectedCounts = listOf(3, 2, 2, 1)
    val xs = listOf(0.0, 0.0, 2.0, 4.0)
    val ys = listOf(0.0, 2.0, 0.0, 4.0)
    val r2 = 6.0
    val counts = f.function(xs, ys, r2, 1.0)
    for (i in counts.indices) {
        require(expectedCounts[i] == counts[i]) { "\"${f.name}\" is not OK" }
    }
    println("\"${f.name}\" is OK")
}

fun d2(x1: Double, y1: Double, x2: Double, y2: Double, xy: Double): Double {
    return (x1 - x2) * (x1 - x2) / xy + (y1 - y2) * (y1 - y2) * xy
}

fun generateData(n: Int, scale: Double = 2.0, seed: Long = 42): Pair<List<Double>, List<Double>> {
    val random = Random(seed)
    return List(n) { scale * random.nextGaussian() } to List(n) { scale * random.nextGaussian() }
}

fun showData(xs: List<Double>, ys: List<Double>): Plot {
    return letsPlot(mapOf("x" to xs, "y" to ys)) { x = "x"; y = "y" } + geomPoint() + ggsize(200, 150)
}

fun measureTime(
    xs: List<Double>,
    ys: List<Double>,
    versions: List<TargetFunction>,
    r2: Double = 1.0
) {
    println("dataset size: ${xs.size}")
    println("dataset dim: [%.1f,%.1f]x[%.1f,%.1f]\n".format(xs.min(), xs.max(), ys.min(), ys.max()))
    timeit(blocks = versions.map { version -> { version.function(xs, ys, r2, 1.0) } }).forEachIndexed { i, stats ->
        println(versions[i].name)
        println("mean = %.3f ms/op, median = %.3f, std = %.3f (min=%.3f, max=%.3f)".format(
            stats.meanMs, stats.medianMs, stats.stddevMs, stats.minNs/1e6, stats.maxNs/1e6
        ))
        println()
    }
}

In [6]:
val v1 = TargetFunction(
    name = "Naive",
    function = { xs, ys, r2, xy ->
        xs.indices.map { i ->
            xs.indices.filter { j ->
                d2(xs[i], ys[i], xs[j], ys[j], xy) < r2
            }.size
        }
    }
)

val v2 = TargetFunction(
    name = "C-Like",
    function = { xs, ys, r2, xy ->
        val neighboursCounts: MutableList<Int> = mutableListOf()
        val n = xs.size
        for (i in 0 until n) {
            var count = 0
            for (j in 0 until n) {
                if (d2(xs[i], ys[i], xs[j], ys[j], xy) < r2) {
                    count += 1
                }
            }
            neighboursCounts.add(count)
        }
        neighboursCounts
    }
)

val v3 = TargetFunction(
    name = "With dictionary",
    function = { xs, ys, r2, xy ->
        val n = xs.size
        val neighboursCounts: MutableList<Int> = MutableList(n) { 1 }
        for (i in 0 until n) {
            for (j in i + 1 until n) {
                if (d2(xs[i], ys[i], xs[j], ys[j], xy) < r2) {
                    neighboursCounts[i] = neighboursCounts[i] + 1
                    neighboursCounts[j] = neighboursCounts[j] + 1
                }
            }
        }
        neighboursCounts
    }
)

val v4 = TargetFunction(
    name = "C-Like, micro-optimization #1",
    function = { xs, ys, r2, xy ->
        val neighboursCounts: MutableList<Int> = mutableListOf()
        val n = xs.size
        for (i in 0 until n) {
            var count = 0
            for (j in 0 until n) {
                if (i == j || d2(xs[i], ys[i], xs[j], ys[j], xy) < r2) {
                    count += 1
                }
            }
            neighboursCounts.add(count)
        }
        neighboursCounts
    }
)

fun inNeighbourCell(xId1: Double, yId1: Double, xId2: Double, yId2: Double): Boolean {
    return -1.0 <= xId1 - xId2 &&
           xId1 - xId2 <= 1.0 &&
           -1.0 <= yId1 - yId2 &&
           yId1 - yId2 <= 1.0
}
val v5 = TargetFunction(
    name = "C-Like, micro-optimization #2",
    function = { xs, ys, r2, xy ->
        val neighboursCounts: MutableList<Int> = mutableListOf()
        val n = xs.size
        val r = sqrt(r2)
        val xIds = xs.map { floor(it / r) }
        val yIds = ys.map { floor(it / r) }
        for (i in 0 until n) {
            var count = 0
            for (j in 0 until n) {
                if (inNeighbourCell(xIds[i], yIds[i], xIds[j], yIds[j]) && d2(xs[i], ys[i], xs[j], ys[j], xy) < r2) {
                    count += 1
                }
            }
            neighboursCounts.add(count)
        }
        neighboursCounts
    }
)

test(v1)
test(v2)
test(v3)
test(v4)
test(v5)

"Naive" is OK
"C-Like" is OK
"With dictionary" is OK
"C-Like, micro-optimization #1" is OK
"C-Like, micro-optimization #2" is OK


In [7]:
val (xs1, ys1) = generateData(100)
showData(xs1, ys1)

In [8]:
measureTime(xs1, ys1, listOf(v1, v2, v3, v4, v5))

dataset size: 100
dataset dim: [-5.1,4.4]x[-5.2,5.8]

Naive
mean = 0.039 ms/op, median = 0.036, std = 0.009 (min=0.035, max=0.067)

C-Like
mean = 0.014 ms/op, median = 0.013, std = 0.004 (min=0.011, max=0.024)

With dictionary
mean = 0.018 ms/op, median = 0.016, std = 0.005 (min=0.016, max=0.033)

C-Like, micro-optimization #1
mean = 0.017 ms/op, median = 0.013, std = 0.009 (min=0.013, max=0.045)

C-Like, micro-optimization #2
mean = 0.018 ms/op, median = 0.015, std = 0.011 (min=0.013, max=0.052)



In [9]:
val (xs2, ys2) = generateData(1000)
showData(xs2, ys2)

In [10]:
measureTime(xs2, ys2, listOf(v1, v2, v3, v4, v5))

dataset size: 1000
dataset dim: [-6.2,5.8]x[-5.9,6.4]

Naive
mean = 4.561 ms/op, median = 4.555, std = 0.039 (min=4.489, max=4.614)

C-Like
mean = 1.655 ms/op, median = 1.653, std = 0.054 (min=1.588, max=1.783)

With dictionary
mean = 1.918 ms/op, median = 1.929, std = 0.042 (min=1.855, max=1.978)

C-Like, micro-optimization #1
mean = 1.679 ms/op, median = 1.682, std = 0.045 (min=1.616, max=1.766)

C-Like, micro-optimization #2
mean = 6.097 ms/op, median = 6.156, std = 0.113 (min=5.897, max=6.192)



In [11]:
val (xs3, ys3) = generateData(1000, scale = 0.5)
showData(xs3, ys3)

In [12]:
measureTime(xs3, ys3, listOf(v1, v2, v3, v4, v5))

dataset size: 1000
dataset dim: [-1.5,1.4]x[-1.5,1.6]

Naive
mean = 8.910 ms/op, median = 8.853, std = 0.151 (min=8.796, max=9.341)

C-Like
mean = 4.114 ms/op, median = 4.119, std = 0.053 (min=3.982, max=4.182)

With dictionary
mean = 4.637 ms/op, median = 4.629, std = 0.056 (min=4.572, max=4.775)

C-Like, micro-optimization #1
mean = 4.062 ms/op, median = 4.081, std = 0.085 (min=3.947, max=4.236)

C-Like, micro-optimization #2
mean = 5.034 ms/op, median = 5.067, std = 0.079 (min=4.881, max=5.116)



In [13]:
val (xs4, ys4) = generateData(1000, scale = 5.0)
showData(xs4, ys4)

In [14]:
measureTime(xs4, ys4, listOf(v1, v2, v3, v4, v5))

dataset size: 1000
dataset dim: [-15.5,14.4]x[-14.7,16.1]

Naive
mean = 3.728 ms/op, median = 3.734, std = 0.045 (min=3.666, max=3.785)

C-Like
mean = 1.095 ms/op, median = 1.087, std = 0.032 (min=1.064, max=1.175)

With dictionary
mean = 1.536 ms/op, median = 1.533, std = 0.024 (min=1.512, max=1.594)

C-Like, micro-optimization #1
mean = 1.231 ms/op, median = 1.222, std = 0.023 (min=1.210, max=1.275)

C-Like, micro-optimization #2
mean = 4.471 ms/op, median = 4.467, std = 0.099 (min=4.357, max=4.601)



In [15]:
val (xs5, ys5) = generateData(2000)
showData(xs5, ys5)

In [16]:
measureTime(xs5, ys5, listOf(v1, v2, v3, v4, v5))

dataset size: 2000
dataset dim: [-6.2,6.4]x[-5.9,7.2]

Naive
mean = 18.239 ms/op, median = 18.246, std = 0.111 (min=18.025, max=18.478)

C-Like
mean = 7.050 ms/op, median = 6.579, std = 1.092 (min=6.366, max=10.207)

With dictionary
mean = 7.925 ms/op, median = 7.955, std = 0.074 (min=7.754, max=8.019)

C-Like, micro-optimization #1
mean = 6.826 ms/op, median = 6.840, std = 0.074 (min=6.632, max=6.915)

C-Like, micro-optimization #2
mean = 25.366 ms/op, median = 25.437, std = 0.185 (min=25.005, max=25.580)

