In [9]:
%reload_ext autoreload
%autoreload 2

import polars as pl

In [10]:
pems_holidays = pl.read_excel("../../data/raw/pems/PeMS Holidays.xlsx")

pems_holidays.head(2)


daily_vmt = (
    # Read all excel files in export folder.
    pl.read_excel(
        "../../data/raw/pems/Aggregate Daily VMT Export/*", sheet_name="Report Data"
    )
    .rename(
        {
            "Day": "date",
            "VMT (Veh-Miles)": "vmt",
            "# Lane Points": "observations",
            "% Observed": "pct_observed",
        }
    )
    # Drop any duplicate dates.
    .unique(subset="date", keep="last")
    .sort("date")
    .with_columns(
        weekday=(
            pl.col("date")
            .dt.weekday()
            .map_elements(
                lambda x: {
                    1: "Monday",
                    2: "Tuesday",
                    3: "Wednesday",
                    4: "Thursday",
                    5: "Friday",
                    6: "Saturday",
                    7: "Sunday",
                }.get(x),
                return_dtype=pl.String,
            )
            .cast(
                pl.Enum(
                    categories=[
                        "Monday",
                        "Tuesday",
                        "Wednesday",
                        "Thursday",
                        "Friday",
                        "Saturday",
                        "Sunday",
                    ]
                )
            )
        )
    )
    .with_columns(
        is_weekend=pl.col("weekday").is_in(["Saturday", "Sunday"]),
        is_holiday=pl.col("date").is_in(pems_holidays["date"].to_list()),
        days_in_year=(pl.col("date").len().over(pl.col("date").dt.year())),
        expected_days_in_year=(
            365
            + (pl.col("date").dt.is_leap_year().first().over(pl.col("date").dt.year()))
        ),
    )
    .with_columns(
        is_full_year=(pl.col("days_in_year") == pl.col("expected_days_in_year"))
    )
)

In [11]:
daily_vmt.write_parquet("../../data/clean/pems/pems.parquet")