In [37]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests
from datetime import datetime

pd.set_option('display.max_colwidth', None)

INDICATOR = 'transit_boardings'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/OneDrive - San Diego Association of Governments/Performance Monitoring/Regional Performance Monitoring/2023/raw/transportation/transit/transit_boardings](C:/Users/tan/OneDrive - San Diego Association of Governments/Performance Monitoring/Regional Performance Monitoring/2023/raw/transportation/transit/transit_boardings)

# Transportation Policy: Transit

## Transit Boardings

In [38]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Number of transit boardings.

nan

In [39]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
boardings,Boardings,Number of transit boardings in a given year.,int


In [40]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
boardings_xlsx,MTS and NTCD,Caltrans,True,Best source we have was a workbook we've manually saved numbers from the operaters in.


In [41]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Extract from legacy PM data.
1,Calculate new fiscal year from workbooks for monthly transit operator boardings.


In [42]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,
1,TAN,


### Step 0: Extract legacy data

In [43]:
display(steps.loc[0])

step    Extract from legacy PM data.
Name: 0, dtype: object

In [44]:
legacy_boardings_data = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        skiprows=3,
        nrows=17,
        usecols='A,B',
    )
    .rename(columns={'Unnamed: 0': 'year', 'Boardings': 'boardings'})
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index('year')
)
legacy_boardings_data

Unnamed: 0_level_0,boardings
year,Unnamed: 1_level_1
2004-01-01,89097814
2005-01-01,89215818
2006-01-01,94816260
2007-01-01,97073041
2008-01-01,102930179
2009-01-01,104070965
2010-01-01,93552921
2011-01-01,95674258
2012-01-01,100083143
2013-01-01,96974848


### Step 1: Caclulate and combine

In [45]:
display(steps.loc[1])

step    Calculate new fiscal year from workbooks for monthly transit operator boardings.
Name: 1, dtype: object

In [46]:
mts_boardings_fy2021 = (
    pd.read_excel(
        CONFIG['raw_dir']/'Ridership by Month FY19-FY22_MTS.xlsx',
        nrows=48,
        usecols='A:D',
    )
    .melt('YRMO', var_name='mode', value_name='boardings')
    .query("`YRMO` >= '2020-07' and `YRMO` <= '2021-06'")
    .drop(columns='mode')
    .rename(columns={'YRMO': 'year'})
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y-%m'))
    .set_index('year')
    .sum()
)
mts_boardings_fy2021

boardings    39214848.0
dtype: float64

In [47]:
mts_boardings_fy2022 = (
    pd.read_excel(
        CONFIG['raw_dir']/'Ridership by Month FY19-FY22_MTS.xlsx',
        nrows=48,
        usecols='A:D',
    )
    .melt('YRMO', var_name='mode', value_name='boardings')
    .query("`YRMO` >= '2021-07' and `YRMO` <= '2022-06'")
    .drop(columns='mode')
    .rename(columns={'YRMO': 'year'})
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y-%m'))
    .set_index('year')
    .sum()
)
mts_boardings_fy2022

boardings    58428534.05
dtype: float64

In [48]:
ntcd_boardings_fy2021 = (
    pd.read_excel(
        CONFIG['raw_dir']/'Ridership by Month FY19-FY22_NCTD.xlsx',
        sheet_name='FY 2022 vs Prior Year',
        skiprows=7,
        nrows=12,
        usecols='AC',
    )
    .rename(columns={'FY21.5': 'boardings'})
    .sum()
)
ntcd_boardings_fy2021

boardings    4457760
dtype: int64

In [49]:
ntcd_boardings_fy2022 = (
    pd.read_excel(
        CONFIG['raw_dir']/'Ridership by Month FY19-FY22_NCTD.xlsx',
        sheet_name='FY 2022 vs Prior Year',
        skiprows=7,
        nrows=12,
        usecols='AB',
    )
    .rename(columns={'FY22.5': 'boardings'})
    .sum()
)
ntcd_boardings_fy2022

boardings    5935648
dtype: int64

In [50]:
new_boardings_data = pd.DataFrame(
    data={
        'boardings': [
            (mts_boardings_fy2021[0] + ntcd_boardings_fy2021[0]),
            (mts_boardings_fy2022[0] + ntcd_boardings_fy2022[0]),
        ]
    },
    index=(
        pd.Index(
            [
                datetime(2021, 1, 1),
                datetime(2022, 1, 1),
            ],
            name='year'
        )
    )
)
new_boardings_data

Unnamed: 0_level_0,boardings
year,Unnamed: 1_level_1
2021-01-01,43672608.0
2022-01-01,64364182.05


In [51]:
transit_boardings = pd.concat([legacy_boardings_data, new_boardings_data])

### Save Data

In [52]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/OneDrive - San Diego Association of Governments/Performance Monitoring/Regional Performance Monitoring/2023/clean/transportation/transit/transit_boardings](C:/Users/tan/OneDrive - San Diego Association of Governments/Performance Monitoring/Regional Performance Monitoring/2023/clean/transportation/transit/transit_boardings)

In [53]:
transit_boardings.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)