In [None]:
%use dataframe, kandy

# When will it be done?!
Now we'll try to apply MonteCarlo to predict the next milestone of 13 stories

In [None]:
import kotlinx.datetime.daysUntil
import org.jetbrains.kotlinx.dataframe.api.dropNulls

val csv = DataFrame.read("data/a_team.csv")

In [None]:
val cleaned = csv.dropNulls { `In Analysis` and Analyzed and `In Development` and Developed and `In Acceptance` and `In Production` }
cleaned

# What's our 'historical data' here?

That's right, it's our `In Production` dates.
They show when "something was done".

We'll want to know, per date, how many stories were delivered on that day.

In [None]:
val oldestInProductionDate = cleaned.`In Production`.min().date
val mostRecentInProductionDate = cleaned.`In Production`.max().date
println("$oldestInProductionDate..$mostRecentInProductionDate")

In [None]:
val throughput = cleaned
    .groupBy { `In Production`.convertToLocalDate() }
    .count()
    .sortBy { `In Production` }
    .rename("In Production" to "date", "count" to "storiesDelivered")
throughput

In [None]:
import kotlinx.datetime.DateTimeUnit
import kotlinx.datetime.plus

val allDates = generateSequence(oldestInProductionDate) { it.plus(1, DateTimeUnit.DAY) }.takeWhile { it <= mostRecentInProductionDate }
    .toList().toDataFrame().rename("value" to "date")
allDates

In [None]:
val historicalThroughput = allDates
    .join(other = throughput, type = JoinType.Left) { date }
    .fillNulls("storiesDelivered").withZero()
    .sortBy { date }
historicalThroughput

In [None]:
import kotlin.random.Random

data class Trial(val totalDays: Int)

fun monteCarloFromThroughput(
    storiesInNextMilestone: Int = 13,
    trials: Int = 10_000,
    rng: Random = Random,
): DataFrame<Trial> {
    val values = historicalThroughput.storiesDelivered.values().toList()

    val runs = List(trials) {
        var done = 0
        var days = 0
        while (done < storiesInNextMilestone) {
            done += values[rng.nextInt(values.size)] //some days we had delivered 2 stories, other days 1, most days 0
            days += 1
        }
        Trial(days)
    }
    return runs.toDataFrame()
}

In [None]:
val mc = monteCarloFromThroughput(storiesInNextMilestone = 13)
mc

In [None]:
val p50 = mc.percentile(50.0) { totalDays }
val p85 = mc.percentile(85.0) { totalDays }
println("P50=$p50  P85=$p85")

In [None]:
val frequencyPerTotal = mc.groupBy { totalDays }.count().rename("count" to "frequency").sortBy { totalDays }
frequencyPerTotal

In [None]:
import org.jetbrains.kotlinx.statistics.distribution.NormalDistribution
import org.jetbrains.letsPlot.Stat
import org.jetbrains.letsPlot.core.spec.plotson.BinStatOptions

val distributionBarChart = plot(frequencyPerTotal) {
    layout { size = 2000 to 900 }
    bars {
        x(totalDays)
        y(frequency)
        alpha = 0.6
    }
    // vertical markers
    vLine { xIntercept.constant(p50); type = LineType.DASHED }
    vLine { xIntercept.constant(p85); type = LineType.DASHED }
}
distributionBarChart

# When will it be done?
Now we can say that, with a 50% certainty, for the next milestone of 13 stories, these will be done within 68 days.

If you want a higher certainty (e.g. planning a super expensive team you're integrating with), you should tell your manager that they'll be done within 90 days.

## Caveat
These predictions only stay relevant if your team composition remains the same and you can continue working in the same as you have been.