In [1]:
from lets_plot import *
from lets_plot.mapping import as_discrete
import polars as pl

LetsPlot.setup_html()

In [2]:
df = pl.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv")
df.glimpse()

Rows: 1462
Columns: 5
$ date         <str> 2013-01-01, 2013-01-02, 2013-01-03, 2013-01-04, 2013-01-05, 2013-01-06, 2013-01-07, 2013-01-08, 2013-01-09, 2013-01-10
$ meantemp     <f64> 10.0, 7.4, 7.166666666666667, 8.666666666666666, 6.0, 7.0, 7.0, 8.857142857142858, 14.0, 11.0
$ humidity     <f64> 84.5, 92.0, 87.0, 71.33333333333333, 86.83333333333333, 82.8, 78.6, 63.714285714285715, 51.25, 62.0
$ wind_speed   <f64> 0.0, 2.98, 4.633333333333334, 1.2333333333333334, 3.6999999999999997, 1.48, 6.3, 7.142857142857143, 12.5, 7.3999999999999995
$ meanpressure <f64> 1015.6666666666666, 1017.8, 1018.6666666666666, 1017.1666666666666, 1016.5, 1018.0, 1020.0, 1018.7142857142857, 1017.0, 1015.6666666666666



In [3]:
df = df.with_columns([
    pl.col("date").str.strptime(pl.Datetime, fmt="%Y-%m-%d"),
])

<a id="section-two-one"></a>
<p style="font-family:monospace; font-weight:bold; letter-spacing: 2px; color:white; font-size:175%; text-align:left;padding: 0px; border-bottom: 3px solid #98C5F2">Time series Visualizations (distributions)</p>

Basically, the basic plot for time series is a line plot (`geom_line`).

In [4]:
df_plot = (
    df.rename(
        {
            "meantemp": "Mean Temperature",
            "wind_speed": "Wind Speed",
            "meanpressure": "Mean Pressure",
        }
    )
    .with_columns([
        pl.col("date").dt.year().alias("year"),
        pl.col("date").dt.month().alias("month"),
        pl.col("date").dt.day().alias("day"),
    ])
    .filter(
        pl.col("year") < 2017
    )
)

df_plot.glimpse()


Rows: 1461
Columns: 8
$ date             <datetime[Î¼s]> 2013-01-01 00:00:00, 2013-01-02 00:00:00, 2013-01-03 00:00:00, 2013-01-04 00:00:00, 2013-01-05 00:00:00, 2013-01-06 00:00:00, 2013-01-07 00:00:00, 2013-01-08 00:00:00, 2013-01-09 00:00:00, 2013-01-10 00:00:00
$ Mean Temperature          <f64> 10.0, 7.4, 7.166666666666667, 8.666666666666666, 6.0, 7.0, 7.0, 8.857142857142858, 14.0, 11.0
$ humidity                  <f64> 84.5, 92.0, 87.0, 71.33333333333333, 86.83333333333333, 82.8, 78.6, 63.714285714285715, 51.25, 62.0
$ Wind Speed                <f64> 0.0, 2.98, 4.633333333333334, 1.2333333333333334, 3.6999999999999997, 1.48, 6.3, 7.142857142857143, 12.5, 7.3999999999999995
$ Mean Pressure             <f64> 1015.6666666666666, 1017.8, 1018.6666666666666, 1017.1666666666666, 1016.5, 1018.0, 1020.0, 1018.7142857142857, 1017.0, 1015.6666666666666
$ year                      <i32> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013
$ month                     <u32> 1, 1, 1, 1, 1

In [5]:
date_breaks = df_plot.filter(pl.col("day") == 1).select("date").rows(named=True)
date_breaks = [item.get("date") for item in date_breaks]

(
    ggplot(df_plot, aes("date", "Mean Temperature"))
    + geom_line(
        aes(group="year", color=as_discrete("year")),
        size=1
    )
    + scale_x_datetime(
        breaks=date_breaks,
        format="%b %Y",
    )
    + facet_grid(x="year", scales="free")
    + ggtitle("Mean Temperature Along Period Under Review")
    + theme(legend_position="bottom")
)

In [6]:
p1 = (
    ggplot(df_plot, aes("year", "Mean Temperature"))
    + geom_boxplot(aes(fill=as_discrete("year")), size=0.5, alpha=0.5)
    + scale_x_discrete(name="year")
    + ggtitle("Mean Temp Aggregated")
    + theme(legend_position="bottom", panel_grid='blank')
) 

p2 = (
    ggplot(df_plot, aes("month", "Mean Temperature"))
    + geom_boxplot(aes(fill=as_discrete("year")), size=0.5, alpha=0.5)
    + scale_x_discrete(name="month")
    + facet_grid(x="year", scales="free")
    + ggtitle("Mean Temp Aggregated by Month")
    + theme(legend_position="bottom", panel_grid='blank')
)

w, h = 1000, 300
bunch = GGBunch()
bunch.add_plot(p1, 0, 0, w, h)
bunch.add_plot(p2, 0, h, w, h)
bunch.show()

In [35]:
# year to year comparison
(
    ggplot(df_plot, aes("day", "Mean Temperature"))
    + geom_line(
        aes(group="year", color=as_discrete("year")),
        size=1.5,
        tooltips=layer_tooltips()
        .title("@year")
        .format("@{Mean Temperature}", ".2f")
        .line("@|@{Mean Temperature}")
        .line("date|@month/@day/@year")
    )
    + geom_point(aes(color=as_discrete("year")), size=3)
    + scale_x_continuous(breaks=list(range(1, 32)))
    + facet_grid(y="month", scales="free")
    + ylab("month")
    + ggtitle("Mean Temperature for Each Month")
    + theme(legend_position="bottom")
)


In [37]:
(
    ggplot(df_plot, aes("Mean Temperature"))
    + geom_histogram(
        aes(group="year", fill=as_discrete("year")),
        color="black",
        bins=15,
        size=.5,
        alpha=.5,
        tooltips=layer_tooltips()
        .line("count|@..count..")
        .format("@{Mean Temperature}", ".2f")
        .line("@|@{Mean Temperature}")
        .line("@|@month")
        .line("@|@year")
    )
    + facet_grid(x="month", y="year")
    + xlab("month")
    + ylab("year")
    + ggtitle("Most Common Temperature")
    + ggsize(1000, 500)
    + theme_classic()
    + theme(legend_position="bottom")
)