In [61]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests
from pathlib import Path
from datetime import datetime

pd.set_option('display.max_colwidth', None)

INDICATOR = 'vmt'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/transportation/vmt/vmt](C:/Users/tan/src/regional-pm-2023/data/raw/transportation/vmt/vmt)

# Transportation: VMT

## Vehicle Miles Traveled

In [62]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Vehicle miles traveled (VMT) on monitored highways.

nan

In [63]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
vmt,Vehicle Miles Traveled,Vehicle miles traveled (VMT) on monitored highways in a given year.,float


In [64]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
pems_vmt,Peformance Measurement System (PeMS),Caltrans,True,"All VMT sources have big limitiations, this one is that it only monitors a subset of all freeways in the region."


In [65]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download PeMS data from region.
1,Calculate total estimated VMT.


In [66]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,


### Step 0: Download PeMS data from region

This was done by runnning [this report](https://pems.dot.ca.gov/?dnode=County&content=loops&tab=det_summary&county_id=73) every year.

In [67]:
display(steps.loc[0])

step    Download PeMS data from region.
Name: 0, dtype: object

### Step 1: Calculate VMT

In [68]:
display(steps.loc[1])

step    Calculate total estimated VMT.
Name: 1, dtype: object

In [69]:
def parse_vmt(pems_report_path: Path) -> pd.DataFrame:
    return (
        pd.read_excel(
            pems_report_path,
            sheet_name='Report Data',
            usecols='B',
        )
        .assign(year=datetime(int(pems_report_path.stem), 1, 1))
        .set_index('year')
    )

In [70]:
vmt = (
    pd.concat(
        (
            parse_vmt(path)
            for path in CONFIG['raw_dir'].iterdir()
        )
    )
    .groupby(['year'])
    .sum()
)
vmt.tail(3)

Unnamed: 0_level_0,VMT
year,Unnamed: 1_level_1
2020-01-01,11838130000.0
2021-01-01,13452790000.0
2022-01-01,13844460000.0


### Save Data

In [71]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/transportation/vmt/vmt](C:/Users/tan/src/regional-pm-2023/data/clean/transportation/vmt/vmt)

In [72]:
vmt.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)