# Weather Analisys using Kotlin

This example were taken from Xarray's Fake Weather Analysis
(can be found at https://mybinder.org/v2/gh/pydata/xarray/main?urlpath=lab/tree/doc/examples/weather-data.ipynb, or on Xarray's repository on github).

In [1]:
%use dataframe
%use multik
%use lets-plot

In [2]:
import java.time.LocalDateTime
import java.time.LocalDate
import java.text.SimpleDateFormat
import java.time.ZoneId

import kotlin.math.PI

In [3]:
val rand = java.util.Random()

In [4]:
rand.setSeed(123)

In [5]:
"""
* Function being used for converting a String date, to a LocaleDate
* (Default format for dates in kotlin/dataframe).
"""
fun getLocalDateFromDateString(date: String, format: String = "yyyy-MM-dd"): LocalDate? {
    return SimpleDateFormat(format)
        .parse(date)
        .toInstant()
        .atZone(ZoneId.systemDefault())
        .toLocalDate()
}

In [6]:
fun getDatesBetween(start: LocalDate, end: LocalDate): List<LocalDate> {
    return start.datesUntil(end).toList()
}

In [7]:
val time by column<LocalDate>(
    getDatesBetween(
        getLocalDateFromDateString("2000-01-01")!!,
        getLocalDateFromDateString("2001-12-31")!!
    )
)
time

In [8]:
// Simulating an annual cycle like a Sine wave
val annualCycle = time.map { 
    2 * PI * (it.dayOfYear / 365.25 - 0.28)
}.toList().toNDArray().sin()

In [9]:
// create a vector of the annual cycle
val base: NDArray<Double, D2> = 
    10.0 + 15.0 * annualCycle.reshape(annualCycle.shape[0], 1)

// terrible array broadcasting turnaround
val broadBase = base.cat(base, 1).cat(base, 1)

// mocking up temperatures
val tMinValues = broadBase + 3.0 * mk.rand(annualCycle.size, 3)
val tMaxValues = broadBase + 10.0 + 3.0 * mk.rand(annualCycle.size, 3)

In [10]:
// "location" to java.util.Collections.nCopies(time.size() / 3, listOf("IA", "IN", "IL")).flatMap { it }

In [11]:
// generating the dataframe
var df = dataFrameOf(
    "time" to time.toList(),
)

// adding location column
df = df.add("location") {
        listOf("IA", "IN", "IL")
    }.explode("location")
    // adding temperature columns
    .add("tmin") { tMinValues.reshape(tMinValues.size, 1)[it.index()][0] }
    .add("tmax") { tMaxValues.reshape(tMaxValues.size, 1)[it.index()][0] }
df.head(5)

In [12]:
df.describe() // some brief info of the dataframe

In [13]:
// converting the dataframe for plotting
// Note: Lets-Plot needs to find a way to sort dates
// and providing the epoch day since 01/01/1970 can
// make it work
val data = df
    .convert { time }.with { it.toEpochDay() }
    .toMap()

ggplot(data) { x = "time" ; y = "tmin"} +
    geomLine() +
    geomLine(color="orange") { x="time" ; y = "tmax"} +
    ylab("Temperature") +
    xlab("Time") +
    // Should find a way to change X-axis scale to months
    ggtitle("Temperature Mockup")


Probability of freeze by calendar month

In [14]:
val freeze = df
    // grouping by month
    .groupBy { time.map { it.month } }
    .pivot { location }
    .aggregate {
        // computing the "boolean mean" of the values
        // in python this is much more simpler, because
        // mean can be called in a boolean vector
        tmin.map { if (it <= 0) 1 else 0 }.mean()
    // Removing the location column group
    }.ungroup { location }

In [15]:
freeze

In [16]:
// mapping the probabiliy of freeze along months
ggplot(freeze.toMap()) +
    geomLine(color="dark-green") { x="time" ; y="IA" } +
    geomLine(color="dark-blue") { x="time" ; y="IN" } +
    geomLine(color="orange") { x="time" ; y="IN" }

In [17]:
// Extracting samples of "IA" location for tmin and tmax
val ia_data = df
    .pivot { location }
    .groupBy { time.map { it.month } }
    .aggregate {
        // sampling
        tmin.first() into "tmin"
        tmax.first() into "tmax"
    // Not so efficient, computing the aggregation on
    // all locations
    }.ungroup { location }["time", "IA"]
    
ia_data

In [18]:
// plotting the trend of tmin and tmax
ggplot(ia_data.ungroup { IA }.toMap()) { x = "time"} +
    geomLine(color="dark-green") { y = "tmin"} +
    geomPoint(shape = 15, size = 3, color="dark-green") { y = "tmin" } +
    geomLine(color="dark-blue") { y = "tmax"} +
    geomPoint(shape = 15, size = 3, color="dark-blue") { y="tmax" }

In [96]:
// Climatology
val climatology = df.groupBy { time.map { it.month }}
    .pivot { location }
    .mean().ungroup { location }
climatology

### FIlling missing Values

In [108]:
val someMissing = dataFrameOf(
    df.time,
    df.location,
)
// throw away half of the month, and fill missing values with zeros