In [66]:
%use dataframe, kandy

# When will it be done?!
Now we'll try to apply MonteCarlo to predict the next milestone of 13 stories

In [67]:
import kotlinx.datetime.daysUntil
import org.jetbrains.kotlinx.dataframe.api.dropNulls

val csv = DataFrame.read("data/a_team.csv")

In [68]:
val cleaned = csv.dropNulls { `In Analysis` and Analyzed and `In Development` and Developed and `In Acceptance` and `In Production` }
cleaned

StoryId,Selected,Refinement Started,Backlogged,In Analysis,Analyzed,In Development,Developed,In Acceptance,In Production,Abandoned
ST-0001,2023-01-01T17:00,2023-01-04T10:09,2023-01-09T17:46,2023-01-10T16:03,2023-01-15T18:00,2023-01-16T09:56,2023-01-26T08:15,2023-01-29T12:02,2023-02-07T16:28,
ST-0002,2023-01-08T17:32,2023-01-11T12:28,2023-01-16T16:51,2023-01-16T17:21,2023-01-16T17:51,2023-01-18T10:26,2023-01-27T14:28,2023-01-29T09:42,2023-02-11T14:04,
ST-0003,2023-01-13T10:29,2023-01-14T09:25,2023-01-16T10:42,2023-01-19T10:45,2023-01-20T16:25,2023-01-22T13:05,2023-01-29T08:21,2023-02-01T15:28,2023-02-04T14:21,
ST-0004,2023-01-09T09:16,2023-01-10T08:57,2023-01-14T12:48,2023-01-18T14:54,2023-01-22T14:09,2023-01-24T13:05,2023-02-02T08:51,2023-02-06T14:57,2023-02-16T12:01,
ST-0005,2023-01-16T09:29,2023-01-16T13:35,2023-01-22T12:39,2023-01-22T13:09,2023-01-24T09:10,2023-01-25T12:40,2023-02-01T16:48,2023-02-01T17:18,2023-02-04T18:00,
ST-0006,2023-01-18T08:46,2023-01-19T16:12,2023-01-20T15:15,2023-01-24T09:42,2023-01-26T14:42,2023-01-28T16:19,2023-02-05T11:21,2023-02-09T18:00,2023-02-23T14:22,
ST-0007,2023-01-23T08:05,2023-01-26T16:42,2023-01-26T17:15,2023-01-28T08:29,2023-01-28T10:17,2023-01-30T13:35,2023-02-08T11:02,2023-02-08T11:22,2023-02-13T08:21,
ST-0008,2023-01-26T08:05,2023-01-26T09:09,2023-01-29T17:02,2023-01-30T08:19,2023-02-02T18:00,2023-02-04T10:42,2023-02-09T14:48,2023-02-09T15:18,2023-02-18T17:41,
ST-0009,2023-01-28T17:53,2023-01-31T18:00,2023-02-02T18:00,2023-02-02T18:30,2023-02-06T08:08,2023-02-07T15:35,2023-02-19T18:00,2023-02-21T18:00,2023-02-24T18:00,
ST-0010,2023-01-29T09:38,2023-02-03T13:16,2023-02-09T12:39,2023-02-12T10:00,2023-02-14T08:31,2023-02-18T18:00,2023-03-02T11:43,2023-03-02T12:45,2023-04-13T12:29,


# What's our 'historical data' here?

That's right, it's our `In Production` dates.
They show when "something was done".

We'll want to know, per date, how many stories were delivered on that day.

In [69]:
val oldestInProductionDate = cleaned.`In Production`.min().date
val mostRecentInProductionDate = cleaned.`In Production`.max().date
println("$oldestInProductionDate..$mostRecentInProductionDate")

2023-02-04..2025-08-03


In [70]:
val throughput = cleaned
    .groupBy { `In Production`.convertToLocalDate() }
    .count()
    .sortBy { `In Production` }
    .rename("In Production" to "date", "count" to "storiesDelivered")
throughput

date,storiesDelivered
2023-02-04,2
2023-02-07,1
2023-02-11,1
2023-02-13,1
2023-02-16,1
2023-02-18,1
2023-02-23,1
2023-02-24,1
2023-03-01,1
2023-03-17,2


In [71]:
import kotlinx.datetime.DateTimeUnit
import kotlinx.datetime.plus

val allDates = generateSequence(oldestInProductionDate) { it.plus(1, DateTimeUnit.DAY) }
    .takeWhile { it <= mostRecentInProductionDate }
    .toList().toDataFrame().rename("value" to "date")
allDates //incl. weekends

date
2023-02-04
2023-02-05
2023-02-06
2023-02-07
2023-02-08
2023-02-09
2023-02-10
2023-02-11
2023-02-12
2023-02-13


In [72]:
val historicalThroughput = allDates
    .join(other = throughput, type = JoinType.Left) { date }
    .fillNulls("storiesDelivered").withZero()
    .sortBy { date }
historicalThroughput

date,storiesDelivered
2023-02-04,2
2023-02-05,0
2023-02-06,0
2023-02-07,1
2023-02-08,0
2023-02-09,0
2023-02-10,0
2023-02-11,1
2023-02-12,0
2023-02-13,1


In [74]:
import kotlin.random.Random

data class Trial(val totalDays: Int)

fun monteCarloFromThroughput(
    storiesInNextMilestone: Int = 13,
    trials: Int = 10_000,
): DataFrame<Trial> {
    val values = historicalThroughput.storiesDelivered.values().toList()

    val runs = List(trials) {
        var done = 0
        var days = 0
        while (done < storiesInNextMilestone) {
            done += values[Random.nextInt(values.size)] //some days we had delivered 2 stories, other days 1, most days 0
            days += 1
        }
        Trial(days)
    }
    return runs.toDataFrame()
}

In [75]:
val mc = monteCarloFromThroughput(storiesInNextMilestone = 13)
mc

totalDays
77
96
64
93
79
72
75
68
50
83


In [76]:
val p50 = mc.percentile(50.0) { totalDays }
val p85 = mc.percentile(85.0) { totalDays }
println("P50=$p50  P85=$p85")

P50=68.0  P85=90.0


In [77]:
val frequencyPerTotal = mc.groupBy { totalDays }.count().rename("count" to "frequency").sortBy { totalDays }
frequencyPerTotal

totalDays,frequency
16,1
17,1
19,1
21,1
22,1
23,1
24,5
25,5
26,4
27,9


In [78]:
import org.jetbrains.kotlinx.statistics.distribution.NormalDistribution
import org.jetbrains.letsPlot.Stat
import org.jetbrains.letsPlot.core.spec.plotson.BinStatOptions

val distributionBarChart = plot(frequencyPerTotal) {
    layout { size = 2000 to 900 }
    bars {
        x(totalDays)
        y(frequency)
        alpha = 0.6
    }
    // vertical markers
    vLine { xIntercept.constant(p50); type = LineType.DASHED }
    vLine { xIntercept.constant(p85); type = LineType.DASHED }
}
distributionBarChart

# When will it be done?
Now we can say that, with a 50% certainty, for the next milestone of 13 stories, these will be done within 68 days.

If you want a higher certainty (e.g. planning a super expensive team you're integrating with), you should tell your manager that they'll be done within 90 days.

## Caveat
These predictions only stay relevant if your team composition remains the same and you can continue working in the same as you have been.