In [None]:
import pandas as pd
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"
import yaml

from IPython.display import Markdown, display
import visualizer_helpers as vh

In [None]:
# NOTE: SPECIALLY TAGGED PARAMETERS CELL
# Assignments in this cell may be overwritten at compile-time
# Otherwise, the below defaults to not filtering any zones
zone_set: str = 'all'
how_method: str = 'any'
affected_tazs: list = []
affected_mazs: list = []

In [None]:
# get corresponding filter from zone_set parameter
single_filter_tazs, multi_filter_tazs = vh.get_filters(zone_set, how_method, affected_tazs)
single_filter_mazs, multi_filter_mazs = vh.get_filters(zone_set, how_method, affected_mazs)

In [None]:
# file IO locations
with open('_quarto.yml') as f:
    config = yaml.safe_load(f)
base_dir = config['sources']['base']
build_dir = config['sources']['build']

## Coordinated Daily Activity Pattern

In [None]:
base_persons_df = pd.read_csv(
    f"{base_dir}/final_persons.csv",
    index_col="person_id",
    usecols=[
        "person_id",
        "cdap_activity",
        'home_zone_id'
    ]
)

build_persons_df = pd.read_csv(
    f"{build_dir}/final_persons.csv",
    index_col="person_id",
    usecols=[
        "person_id",
        "cdap_activity",
        'home_zone_id'
    ]
)

In [None]:
print(f"Filtering persons by home MAZ. Original Persons: {len(base_persons_df)}")
base_persons_df = base_persons_df[single_filter_mazs(base_persons_df.home_zone_id)]
build_persons_df = build_persons_df[single_filter_mazs(build_persons_df.home_zone_id)]
print(f"Persons after filtering: {len(base_persons_df)}")

In [None]:
df = pd.crosstab(
    base_persons_df.cdap_activity,
    build_persons_df.cdap_activity,
    rownames=['Base'],
    colnames=['Build'],
    margins=True,
    margins_name='Total'
)

df

## Mandatory Tour Frequency

In [None]:
# mandatory tour frequency is a decision made by persons, we should apply the filter on person

base_persons_df = pd.read_csv(
    f"{base_dir}/final_persons.csv",
    index_col="person_id",
    usecols=[
        "person_id",
        "mandatory_tour_frequency",
        "home_zone_id",
        "ptype"
    ]
)
# keep workers and students
base_persons_df = base_persons_df[base_persons_df.ptype.isin([1,2,3,6,7,8])]
base_persons_df.loc[base_persons_df.ptype.isin([1,2]), 'person_type'] = "workers"
base_persons_df.loc[base_persons_df.ptype.isin([3,6,7,8]), 'person_type'] = "students"

build_persons_df = pd.read_csv(
    f"{build_dir}/final_persons.csv",
    index_col="person_id",
    usecols=[
        "person_id",
        "mandatory_tour_frequency",
        "home_zone_id",
        "ptype"
    ]
)
build_persons_df = build_persons_df[build_persons_df.ptype.isin([1,2,3,6,7,8])]
build_persons_df.loc[build_persons_df.ptype.isin([1,2]), 'person_type'] = "workers"
build_persons_df.loc[build_persons_df.ptype.isin([3,6,7,8]), 'person_type'] = "students"

In [None]:
print(f"Filtering persons by home MAZ. Original Persons: {len(base_persons_df)}")
base_persons_df = base_persons_df[single_filter_mazs(base_persons_df.home_zone_id)]
build_persons_df = build_persons_df[single_filter_mazs(build_persons_df.home_zone_id)]
print(f"Persons after filtering: {len(base_persons_df)}")

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for person_type in ['workers', 'students']:
    df = pd.crosstab(
        base_persons_df[base_persons_df.person_type == person_type].mandatory_tour_frequency.fillna('None'),
        build_persons_df[build_persons_df.person_type == person_type].mandatory_tour_frequency.fillna('None'),
        rownames=['Base'],
        colnames=['Build'],
        margins=True,
        margins_name='Total',
        dropna=False
    )
    
    display(Markdown(f"### {person_type.capitalize()} Mandatory Tour Frequency"))
    display(df)

## Mandatory Tour Scheduling

In [None]:
base_tours_df = pd.read_csv(
    f"{base_dir}/final_tours.csv",
    index_col="tour_id",
    usecols=[
        "tour_id",
        "tour_type",
        "start",
        "end",
        "origin",
        "destination"
    ]
)
# keep only work and school tours
base_tours_df = base_tours_df[base_tours_df.tour_type.isin(['work', 'school'])]
base_tour_idx = base_tours_df.index

build_tours_df = pd.read_csv(
    f"{build_dir}/final_tours.csv",
    index_col="tour_id",
    usecols=[
        "tour_id",
        "tour_type",
        "start",
        "end",
        "origin",
        "destination"
    ]
)
build_tours_df = build_tours_df[build_tours_df.tour_type.isin(['work', 'school'])]

In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tours_df)}\tbuild: {len(build_tours_df)}""")

base_tours_df = base_tours_df[multi_filter_mazs([base_tours_df.origin, base_tours_df.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tours_df[(
    build_tours_df.index.isin(base_tour_idx) # originally existed in base
    & build_tours_df.index.isin(base_tours_df.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tours_df.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tours_df.origin, build_tours_df.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tours_df)}\tbuild: {len(build_tours_df)}")

In [None]:
# translate time bin to time period index
base_tours_df['start_period'] = base_tours_df.start.apply(vh.get_time_period_index)
build_tours_df['start_period'] = build_tours_df.start.apply(vh.get_time_period_index)

base_tours_df['end_period'] = base_tours_df.end.apply(vh.get_time_period_index)
build_tours_df['end_period'] = build_tours_df.end.apply(vh.get_time_period_index)

In [None]:
# join base and build tours on tour_id
# compare tours that are in both base and build datasets
df = pd.merge(
    base_tours_df,
    build_tours_df,
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build'),
    how='inner'
)

# get the difference in start and end times
df['start_bin_difference'] = df.start_build - df.start_base
df['end_bin_difference'] = df.end_build - df.end_base

# get the difference in start and end time periods
df['start_period_difference'] = df.start_period_build - df.start_period_base
df['end_period_difference'] = df.end_period_build - df.end_period_base

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for purpose in ['work', 'school']:
    for metric in ['start', 'end']:
        purpose_df = df[df.tour_type_base == purpose]
        display(Markdown(f"### {purpose.capitalize()} Tour {metric.capitalize()} Time Bin Difference"))
        purpose_df = purpose_df.groupby(f'{metric}_bin_difference').size().reset_index()
        purpose_df.columns = [f'{metric}_bin_difference', 'count']
        purpose_df = purpose_df.sort_values(by=f'{metric}_bin_difference')

        fig = vh.create_bar_chart(
            source_data=purpose_df,
            source=f'{metric}_bin_difference',
            col=f'{metric}_bin_difference',
            plot_col='count',
            # title=f"{purpose.capitalize()} Tour {metric.capitalize()} Time Difference",
        )
        fig.show()
        display(Markdown(" "))

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for purpose in ['work', 'school']:
    for metric in ['start', 'end']:
        purpose_df = df[df.tour_type_base == purpose]
        display(Markdown(f"### {purpose.capitalize()} Tour {metric.capitalize()} Time Period Difference"))
        purpose_df = purpose_df.groupby(f'{metric}_period_difference').size().reset_index()
        purpose_df.columns = [f'{metric}_period_difference', 'count']
        purpose_df = purpose_df.sort_values(by=f'{metric}_period_difference')

        fig = vh.create_bar_chart(
            source_data=purpose_df,
            source=f'{metric}_period_difference',
            col=f'{metric}_period_difference',
            plot_col='count',
            # title=f"{purpose.capitalize()} Tour {metric.capitalize()} Time Difference",
        )
        fig.show()
        display(Markdown(" "))