In [2]:
%reload_ext autoreload
%autoreload 2

import polars as pl

In [3]:
pems_holidays = pl.read_excel("../../data/raw/pems/PeMS Holidays.xlsx")

pems_holidays.head(2)


daily_vmt = (
    # Read all excel files in export folder.
    pl.read_excel(
        "../../data/raw/pems/Aggregate Daily VMT Export/*", sheet_name="Report Data"
    )
    .rename(
        {
            "Day": "date",
            "VMT (Veh-Miles)": "vmt",
            "# Lane Points": "observations",
            "% Observed": "pct_observed",
        }
    )
    # Drop any duplicate dates.
    .unique(subset="date", keep="last")
    .sort("date")
    .with_columns(
        weekday=(
            pl.col("date")
            .dt.weekday()
            .map_elements(
                lambda x: {
                    1: "Monday",
                    2: "Tuesday",
                    3: "Wednesday",
                    4: "Thursday",
                    5: "Friday",
                    6: "Saturday",
                    7: "Sunday",
                }.get(x),
                return_dtype=pl.String,
            )
            .cast(
                pl.Enum(
                    categories=[
                        "Monday",
                        "Tuesday",
                        "Wednesday",
                        "Thursday",
                        "Friday",
                        "Saturday",
                        "Sunday",
                    ]
                )
            )
        )
    )
    .with_columns(
        is_weekend=pl.col("weekday").is_in(["Saturday", "Sunday"]),
        is_holiday=pl.col("date").is_in(pems_holidays["date"].to_list()),
        days_in_year=(pl.col("date").len().over(pl.col("date").dt.year())),
        expected_days_in_year=(
            365
            + (pl.col("date").dt.is_leap_year().first().over(pl.col("date").dt.year()))
        ),
    )
    .with_columns(
        is_full_year=(pl.col("days_in_year") == pl.col("expected_days_in_year"))
    )
)

In [4]:
daily_vmt

date,vmt,observations,pct_observed,weekday,is_weekend,is_holiday,days_in_year,expected_days_in_year,is_full_year
date,f64,i64,f64,enum,bool,bool,u32,i32,bool
2003-01-01,1.7464e7,293184,84.8,"""Wednesday""",false,true,365,365,true
2003-01-02,2.6949e7,293184,83.3,"""Thursday""",false,false,365,365,true
2003-01-03,2.8446e7,293184,85.5,"""Friday""",false,false,365,365,true
2003-01-04,2.3763e7,293184,85.3,"""Saturday""",true,false,365,365,true
2003-01-05,2.0565e7,293184,84.6,"""Sunday""",true,false,365,365,true
…,…,…,…,…,…,…,…,…,…
2025-03-27,4.1487e7,870336,54.8,"""Thursday""",false,false,90,365,false
2025-03-28,4.2445e7,870336,54.9,"""Friday""",false,false,90,365,false
2025-03-29,3.8472e7,870336,55.2,"""Saturday""",true,false,90,365,false
2025-03-30,3.3752e7,870336,55.7,"""Sunday""",true,false,90,365,false


In [5]:
daily_vmt.write_parquet("../../data/clean/pems/pems.parquet")