In [1]:
%use dataframe
%use multik
%use lets-plot

In [2]:
import java.time.LocalDateTime
import java.time.LocalDate
import java.text.SimpleDateFormat
import java.time.ZoneId

import kotlin.math.PI

In [3]:
val rand = java.util.Random()

In [4]:
rand.setSeed(123)

In [5]:
fun getLocalDateFromDateString(date: String, format: String = "yyyy-MM-dd"): LocalDate? {
    return SimpleDateFormat(format)
        .parse(date)
        .toInstant()
        .atZone(ZoneId.systemDefault())
        .toLocalDate()
}

In [6]:
fun getDatesBetween(start: LocalDate, end: LocalDate): List<LocalDate> {
    return start.datesUntil(end).toList()
}

In [7]:
val time by column<LocalDate>(
    getDatesBetween(
        getLocalDateFromDateString("2000-01-01")!!,
        getLocalDateFromDateString("2001-12-31")!!
    )
)
time

In [18]:
val annualCycle = time.map { 
    2 * PI * (it.dayOfYear / 365.25 - 0.28)
}.toList().toNDArray().sin()

In [19]:
val base: NDArray<Double, D2> = 
    10.0 + 15.0 * annualCycle.reshape(annualCycle.shape[0], 1)
// array broadcasting turnaround
val broadBase = base.cat(base, 1).cat(base, 1)
// mocking up temperatures
val tMinValues = broadBase + 3.0 * mk.rand(annualCycle.size, 3)
val tMaxValues = broadBase + 10.0 + 3.0 * mk.rand(annualCycle.size, 3)

In [20]:
// "location" to java.util.Collections.nCopies(time.size() / 3, listOf("IA", "IN", "IL")).flatMap { it }

In [21]:
// generating the dataframe
var df = dataFrameOf(
    "time" to time.toList(),
)

// adding location column
df = df.add("location") {
    listOf("IA", "IN", "IL")
}.explode("location")
.add("tmin") { tMinValues.reshape(tMinValues.size, 1)[it.index()][0] }
.add("tmax") { tMaxValues.reshape(tMaxValues.size, 1)[it.index()][0] }
df.head(5)

In [29]:
df.describe()

In [30]:
val data = df
    .convert { time }.with { it.toEpochDay() }
    .toMap()

ggplot(data) { x = "time" ; y = "tmin"} +
    geomLine() +
    geomLine(color="orange") { x="time" ; y = "tmax"} +
    ylab("Temperature") +
    xlab("Time") +
    ggtitle("Temperature Mockup")


Probability of freeze by calendar month

In [42]:
val freeze = df
    .convert { time }.with { it.month }
    .groupBy { time }
    .pivot { location }
    .aggregate {
        tmin.map { if (it <= 0) 1 else 0 }.mean()
    }.ungroup { location }
    //.fillNaNs { "IA" and "IN" and "IL" }.withZero()

In [43]:
freeze

In [44]:
ggplot(freeze.toMap()) +
    geomLine(color="dark-green") { x="time" ; y="IA" } +
    geomLine(color="dark-blue") { x="time" ; y="IN" } +
    geomLine(color="orange") { x="time" ; y="IN" }

In [54]:
df
    .convert { time }.with { it.month }
    .groupBy { time }
    .pivot { location }
    .aggregate {
        tmin.map { if (it <= 0) 1 else 0 }.mean()
    }

In [66]:
val ia_data = df
    .convert { time }.with { it.month }
    .pivot { location }
    .groupBy { time }
    .aggregate {
        // sampling
        tmin.first() into "tmin"
        tmax.first() into "tmax"
    }.ungroup { location }["time", "IA"]
    
ia_data

In [68]:
ggplot(ia_data.ungroup { IA }.toMap()) { x = "time"} +
    geomLine(color="dark-green") { y = "tmin"} +
    geomLine(color="dark-blue") { y = "tmax"}