In [32]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'corridor_time'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/transportation/commute/corridor_time](C:/Users/tan/src/regional-pm-2023/data/raw/transportation/commute/corridor_time)

# Transportation Policy: Commute

## Corridor Times

In [33]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Average commute times on select corridors.

nan

In [34]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
corridor,Corridor,Traffic corridor of record.,string
peak,Peak,"Traffic peak time of a record (""AM"" for 8:00 A.M. departure or ""PM"" for 5:00 P.M. departure).",string
travel_times,Travel Times,Average commute times on select corridors,float


In [35]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
corridor_time,State of the Commute,SANDAG,True,Numbers pulled from table on ODP.


In [36]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Extract from legacy PM data.
1,Extract new data from latest State of Commute report and combine with legacy PM data.


In [37]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,
1,TAN,


### Step 0: Get and add to legacy data

[Data was taken from the numbers reported on latest State of Commute](https://opendata.sandag.org/Transportation/SOC-Peak-Period-Highway-Congestion-Volumes-Map-PeM/ntjg-7y5y).

In [38]:
display(steps.loc[0])

step    Extract from legacy PM data.
Name: 0, dtype: object

### Step 1: Extract legacy data

In [39]:
display(steps.loc[1])

step    Extract new data from latest State of Commute report and combine with legacy PM data.
Name: 1, dtype: object

In [41]:
am_data = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A:C,F:V',
        skiprows=4,
        nrows=12,
    )
    .rename(
        columns={
            'Unnamed: 0': 'corridor_id',
            'Corridor': 'freeway',
            'Unnamed: 2': 'route',
        }
    )
    .melt(
        id_vars=['corridor_id', 'freeway', 'route'], 
        var_name='year', 
        value_name='travel_times'
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .assign(corridor=lambda df: df.route + ' via ' + df.freeway)
    .assign(peak='AM')
    .drop(columns=['freeway', 'route'])
    .set_index(['year', 'corridor_id', 'corridor', 'peak'])
)
am_data.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,travel_times
year,corridor_id,corridor,peak,Unnamed: 4_level_1
2022-01-01,10,San Ysidro to Downtown SD via I-5,AM,18.0
2022-01-01,11,El Cajon to Sorrento Valley via I-8,AM,26.0
2022-01-01,12,Poway to Carmel Valley via SR 56,AM,13.0


In [43]:
pm_data = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A:C,X:AN',
        skiprows=4,
        nrows=12,
    )
    .rename(
        columns={
            'Unnamed: 0': 'corridor_id',
            'Corridor': 'freeway',
            'Unnamed: 2': 'route',
        }
    )
    .melt(
        id_vars=['corridor_id', 'freeway', 'route'], 
        var_name='year', 
        value_name='travel_times'
    )
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y.1'))
    .assign(corridor=lambda df: df.route + ' via ' + df.freeway)
    .assign(peak='PM')
    .drop(columns=['freeway', 'route'])
    .set_index(['year', 'corridor_id', 'corridor', 'peak'])
)
pm_data.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,travel_times
year,corridor_id,corridor,peak,Unnamed: 4_level_1
2022-01-01,10,San Ysidro to Downtown SD via I-5,PM,17.0
2022-01-01,11,El Cajon to Sorrento Valley via I-8,PM,38.0
2022-01-01,12,Poway to Carmel Valley via SR 56,PM,46.0


In [44]:
corridor_time = pd.concat([am_data, pm_data])
corridor_time

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,travel_times
year,corridor_id,corridor,peak,Unnamed: 4_level_1
2006-01-01,1,Oceanside to Downtown SD via I-5,AM,56.909639
2006-01-01,2,Escondido to Downtown SD via I-15,AM,47.756218
2006-01-01,3,Escondido to Carlsbad via SR 78,AM,17.773055
2006-01-01,4,El Cajon to Downtown SD via SR 94,AM,19.817705
2006-01-01,5,El Cajon to Downtown SD via I-8,AM,20.916404
...,...,...,...,...
2022-01-01,8,Chula Vista to Sorrento Valley via I-805,PM,49.000000
2022-01-01,9,Chula Vista to Downtown SD via I-805,PM,14.000000
2022-01-01,10,San Ysidro to Downtown SD via I-5,PM,17.000000
2022-01-01,11,El Cajon to Sorrento Valley via I-8,PM,38.000000


### Save Data

In [45]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/transportation/commute/corridor_time](C:/Users/tan/src/regional-pm-2023/data/clean/transportation/commute/corridor_time)

In [46]:
corridor_time.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)