# Time Series Analysis

## Technical requirements

In [82]:
import polars as pl

In [83]:
lf = pl.scan_csv('../data/toronto_weather.csv')

In [84]:
lf.fetch(5)

datetime,temperature,wind_speed,pressure,humidity
str,f64,f64,f64,f64
"""2012-10-01 12:…",,,,
"""2012-10-01 13:…",286.26,3.0,1012.0,82.0
"""2012-10-01 14:…",286.262541,3.0,1011.0,81.0
"""2012-10-01 15:…",286.269518,3.0,1011.0,79.0
"""2012-10-01 16:…",286.276496,3.0,1010.0,77.0


In [85]:
lf = lf.with_columns(pl.col('temperature')-273.15)

In [86]:
lf.fetch(5)

datetime,temperature,wind_speed,pressure,humidity
str,f64,f64,f64,f64
"""2012-10-01 12:…",,,,
"""2012-10-01 13:…",13.11,3.0,1012.0,82.0
"""2012-10-01 14:…",13.112541,3.0,1011.0,81.0
"""2012-10-01 15:…",13.119518,3.0,1011.0,79.0
"""2012-10-01 16:…",13.126496,3.0,1010.0,77.0


## Working with date and time

In [87]:
lf_date_parsed = pl.scan_csv('../data/toronto_weather.csv', try_parse_dates=True)
lf_date_parsed.fetch(5)

datetime,temperature,wind_speed,pressure,humidity
datetime[μs],f64,f64,f64,f64
2012-10-01 12:00:00,,,,
2012-10-01 13:00:00,286.26,3.0,1012.0,82.0
2012-10-01 14:00:00,286.262541,3.0,1011.0,81.0
2012-10-01 15:00:00,286.269518,3.0,1011.0,79.0
2012-10-01 16:00:00,286.276496,3.0,1010.0,77.0


In [88]:
lf_date_parsed.schema, lf_date_parsed.dtypes

(OrderedDict([('datetime', Datetime(time_unit='us', time_zone=None)),
              ('temperature', Float64),
              ('wind_speed', Float64),
              ('pressure', Float64),
              ('humidity', Float64)]),
 [Datetime(time_unit='us', time_zone=None),
  Float64,
  Float64,
  Float64,
  Float64])

In [89]:
lf = lf.with_columns(
    pl.col('datetime').str.to_datetime()
)
lf.fetch(5)

datetime,temperature,wind_speed,pressure,humidity
datetime[μs],f64,f64,f64,f64
2012-10-01 12:00:00,,,,
2012-10-01 13:00:00,13.11,3.0,1012.0,82.0
2012-10-01 14:00:00,13.112541,3.0,1011.0,81.0
2012-10-01 15:00:00,13.119518,3.0,1011.0,79.0
2012-10-01 16:00:00,13.126496,3.0,1010.0,77.0


In [153]:
(
    lf
    .select(
        'datetime',
        pl.col('datetime').dt.year().alias('year'),
        pl.col('datetime').dt.month().alias('month'),
        pl.col('datetime').dt.day().alias('day'),
        pl.col('datetime').dt.time().alias('time')
    )
    .fetch(5)
)

datetime,year,month,day,time
datetime[μs],i32,i8,i8,time
2012-10-01 12:00:00,2012,10,1,12:00:00
2012-10-01 13:00:00,2012,10,1,13:00:00
2012-10-01 14:00:00,2012,10,1,14:00:00
2012-10-01 15:00:00,2012,10,1,15:00:00
2012-10-01 16:00:00,2012,10,1,16:00:00


In [187]:
from datetime import datetime

filtered_lf = (
    lf
    .filter(
        pl.col('datetime').dt.date().is_between(
            datetime(2017,1,1), datetime(2017,12,31)
        ),
        pl.col('datetime').dt.hour() < 12
    )
)
filtered_lf.head().collect()

datetime,temperature,wind_speed,pressure,humidity
datetime[μs],f64,f64,f64,f64
2017-01-01 00:00:00,2.44,3.0,1001.0,92.0
2017-01-01 01:00:00,2.19,9.0,1003.0,86.0
2017-01-01 02:00:00,2.41,7.0,1003.0,86.0
2017-01-01 03:00:00,2.42,5.0,1003.0,74.0
2017-01-01 04:00:00,1.77,6.0,1006.0,69.0


In [188]:
(
    filtered_lf
    .select(
        pl.col('datetime').dt.year().unique()
        .implode()
        .list.len()
        .alias('year_cnt'),
        pl.col('datetime').dt.hour().unique()
        .implode()
        .list.len()
        .alias('hour_cnt')
    )
    .head()
    .collect()
)

year_cnt,hour_cnt
u32,u32
1,12


In [217]:
time_zones_lf = (
    lf
    .select(
        'datetime',
        pl.col('datetime').dt.replace_time_zone('America/Toronto')
        .alias('replaced_time_zone_toronto'),
        pl.col('datetime').dt.convert_time_zone('America/Toronto')
        .alias('converted_time_zone_toronto')
    )
)
time_zones_lf.head().collect()

datetime,replaced_time_zone_toronto,converted_time_zone_toronto
datetime[μs],"datetime[μs, America/Toronto]","datetime[μs, America/Toronto]"
2012-10-01 12:00:00,2012-10-01 12:00:00 EDT,2012-10-01 08:00:00 EDT
2012-10-01 13:00:00,2012-10-01 13:00:00 EDT,2012-10-01 09:00:00 EDT
2012-10-01 14:00:00,2012-10-01 14:00:00 EDT,2012-10-01 10:00:00 EDT
2012-10-01 15:00:00,2012-10-01 15:00:00 EDT,2012-10-01 11:00:00 EDT
2012-10-01 16:00:00,2012-10-01 16:00:00 EDT,2012-10-01 12:00:00 EDT
