In [None]:
import pandas as pd
import numpy as np
import yaml
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

from IPython.display import Markdown, display

import visualizer_helpers as vh

pd.set_option('display.max_rows',100)

In [None]:
# NOTE: SPECIALLY TAGGED PARAMETERS CELL
# Assignments in this cell may be overwritten at compile-time
# Otherwise, the below defaults to not filtering any zones
zone_set: str = 'all'
how_method: str = 'any'
affected_tazs: list = []
affected_mazs: list = []

In [None]:
# get corresponding filter from zone_set parameter
single_filter_tazs, multi_filter_tazs = vh.get_filters(zone_set, how_method, affected_tazs)
single_filter_mazs, multi_filter_mazs = vh.get_filters(zone_set, how_method, affected_mazs)

In [None]:
# file IO locations
with open('_quarto.yml') as f:
    config = yaml.safe_load(f)
base_dir = config['sources']['base']
build_dir = config['sources']['build']

In [None]:
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=['tour_id','tour_category','origin','destination','primary_purpose','person_id','start','end','tour_type'])

base_tour_idx = base_tour.index

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id', 
                         usecols=['tour_id','tour_category','origin','destination','primary_purpose','person_id','start','end','tour_type'])


In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}\tdiff: {len(build_tour)-len(base_tour)}""")

base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}\tdiff: {len(build_tour)-len(base_tour)}")

In [None]:
name_dict = {'escort': "Escorting", 
             'work': "Work", 
             'school': "School", 
             'othmaint': "Other-Maintenance",
             'othdiscr': "Other-Discretionary", 
             'shopping': "Shopping",
             'atwork': "At-work Subtour", 
             'eatout': "Eat Out", 
             'social' : "Social", 
             'univ': "University"}

## Non-Mandatory Tour Destinations

In [None]:
df = base_tour[base_tour.tour_category == 'non_mandatory'].merge(build_tour[build_tour.tour_category == 'non_mandatory'],
                                                                 how='outer',
                                                                 left_index=True,
                                                                 right_index=True,
                                                                 suffixes=('_base','_build'))



In [None]:
df.loc[df.destination_base == df.destination_build,'Case'] = "Unchanged"
df.loc[df.destination_base != df.destination_build,'Case'] = "Changed"

df.loc[df.destination_base.isna(),'Case'] = "Newly Created"
df.loc[df.destination_build.isna(),"Case"] = "Removed"


In [None]:
df = df.Case.value_counts()

df

In [None]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for key, value in name_dict.items():

    if len(base_tour[
        (base_tour.tour_category == 'non_mandatory')
        & (base_tour.primary_purpose == key)
        ]) == 0:
        continue
    
    print(f"#### {value}")
    df = base_tour[
        (base_tour.tour_category == 'non_mandatory')
        & (base_tour.primary_purpose == key)
        ].merge(build_tour[
                    (build_tour.tour_category == 'non_mandatory')
                    & (build_tour.primary_purpose == key)
                ],
                how='left',
                left_index=True,
                right_index=True,
                suffixes=('_base','_build'))
    
    df.loc[df.destination_base == df.destination_build,'Case'] = "Unchanged"
    df.loc[df.destination_base != df.destination_build,'Case'] = "Changed"

    df.loc[df.destination_base.isna(),'Case'] = "Newly Created"
    df.loc[df.destination_build.isna(),"Case"] = "Removed"

    
    df = df.Case.value_counts()

    print(str(df).replace("\n","<br>"))
    
    fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
    
    fig.show()
    display(Markdown(" "))

## Mandatory Tour Scheduling

In [None]:
# keep only work and school tours
base_tours_df = base_tour[base_tour.tour_type.isin(['work', 'school'])]
base_tour_idx = base_tours_df.index

build_tours_df = build_tour[build_tour.tour_type.isin(['work', 'school'])]

In [None]:
# translate time bin to time period index
base_tours_df['start_period'] = base_tours_df.start.apply(vh.get_time_period_index)
build_tours_df['start_period'] = build_tours_df.start.apply(vh.get_time_period_index)

base_tours_df['end_period'] = base_tours_df.end.apply(vh.get_time_period_index)
build_tours_df['end_period'] = build_tours_df.end.apply(vh.get_time_period_index)

In [None]:
# join base and build tours on tour_id
# compare tours that are in both base and build datasets
df = pd.merge(
    base_tours_df,
    build_tours_df,
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build'),
    how='inner'
)

# get the difference in start and end times
df['start_bin_difference'] = df.start_build - df.start_base
df['end_bin_difference'] = df.end_build - df.end_base

# get the difference in start and end time periods
df['start_period_difference'] = df.start_period_build - df.start_period_base
df['end_period_difference'] = df.end_period_build - df.end_period_base

In [None]:
summary_text = f"""**Mandatory tour scheduling changes:**
    Tours which departed earlier in build: {len(df[df.start_bin_difference < 0])}
    Tours which departed in the same bin in build: {len(df[df.start_bin_difference == 0])}
    Tours which departed later in build: {len(df[df.start_bin_difference > 0])}

    Tours which arrived earlier in build: {len(df[df.end_bin_difference < 0])}
    Tours which arrived in the same bin in build: {len(df[df.end_bin_difference == 0 ])}
    Tours which arrived later in build: {len(df[df.end_bin_difference > 0])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

### Tour departures

In [None]:
mapper = {
    0: "EA",
    1: "AM",
    2: "MD",
    3: "PM",
    4: "EV"
}
tp_order = ['EA','AM','MD','PM','EV','Total']
xtab = pd.crosstab(
    df.start_period_base.replace(mapper),
    df.start_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab.loc[tp_order,tp_order])

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for purpose in ['work', 'school']:
    for metric in ['start']:
        purpose_df = df[df.tour_type_base == purpose]
        display(Markdown(f"### {purpose.capitalize()}"))

        xtab = pd.crosstab(
            purpose_df[f'{metric}_period_base'].replace(mapper),
            purpose_df[f'{metric}_period_build'].replace(mapper),
            margins=True,
            margins_name='Total'
        )
        display(xtab.loc[tp_order,tp_order])


        summary_text = f"""**{purpose.capitalize()} tour {metric} changes:**
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         } earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         }  in the same bin in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] == 0])}
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         }  later in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] > 0])}
        """
        display(Markdown(summary_text.replace("\n","<br>")))

        display(Markdown(" "))

### Tour arrivals

In [None]:
xtab = pd.crosstab(
    df.end_period_base.replace(mapper),
    df.end_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab.loc[tp_order,tp_order])

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

for purpose in ['work', 'school']:
    for metric in ['end']:
        purpose_df = df[df.tour_type_base == purpose]
        display(Markdown(f"### {purpose.capitalize()}"))

        xtab = pd.crosstab(
            purpose_df[f'{metric}_period_base'].replace(mapper),
            purpose_df[f'{metric}_period_build'].replace(mapper),
            margins=True,
            margins_name='Total'
        )
        display(xtab.loc[tp_order,tp_order])


        summary_text = f"""**{purpose.capitalize()} tour {metric} changes:**
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         } earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         }  in the same bin in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] == 0])}
            Tours which {'departed' if metric=='start'
                         else 'arrived' if metric=='end'
                         else '???'
                         }  later in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] > 0])}
        """
        display(Markdown(summary_text.replace("\n","<br>")))

        display(Markdown(" "))

## Non-Mandatory Tour Scheduling

In [None]:
# start with the filtered tours, get non-mandatory tours only
base_tours_df = base_tour[base_tour.tour_category == 'non_mandatory']
build_tours_df = build_tour[build_tour.tour_category == 'non_mandatory']

In [None]:
# translate time bin to time period index
base_tours_df['start_period'] = base_tours_df.start.apply(vh.get_time_period_index)
build_tours_df['start_period'] = build_tours_df.start.apply(vh.get_time_period_index)

base_tours_df['end_period'] = base_tours_df.end.apply(vh.get_time_period_index)
build_tours_df['end_period'] = build_tours_df.end.apply(vh.get_time_period_index)

In [None]:
# join base and build tours on tour_id
# compare tours that are in both base and build datasets
df = pd.merge(
    base_tours_df,
    build_tours_df,
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build'),
    how='inner'
)

# get the difference in start and end times
df['start_bin_difference'] = df.start_build - df.start_base
df['end_bin_difference'] = df.end_build - df.end_base

# get the difference in start and end time periods
df['start_period_difference'] = df.start_period_build - df.start_period_base
df['end_period_difference'] = df.end_period_build - df.end_period_base

In [None]:
summary_text = f"""**Non-mandatory tour scheduling changes:**
    Tours which departed earlier in build: {len(df[df.start_bin_difference < 0])}
    Tours which departed in the same bin in build: {len(df[df.start_bin_difference == 0])}
    Tours which departed later in build: {len(df[df.start_bin_difference > 0])}

    Tours which arrived earlier in build: {len(df[df.end_bin_difference < 0])}
    Tours which arrived in the same bin in build: {len(df[df.end_bin_difference == 0 ])}
    Tours which arrived later in build: {len(df[df.end_bin_difference > 0])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

### Tour departures

In [None]:
xtab = pd.crosstab(
    df.start_period_base.replace(mapper),
    df.start_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab)

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly
metric='start'
for key, value in name_dict.items():
    purpose_df = df[df.primary_purpose_base == key]
    if len(purpose_df) == 0:
        continue
    display(Markdown(f"### {key.capitalize()}"))

    xtab = pd.crosstab(
        purpose_df[f'{metric}_period_base'].replace(mapper),
        purpose_df[f'{metric}_period_build'].replace(mapper),
        margins=True,
        margins_name='Total'
    )
    display(xtab)



    summary_text = f"""**{key.capitalize()} tour {metric} changes:**
        Tours which departed earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}
        Tours which departed in the same bin in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] == 0])}
        Tours which departed later in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] > 0])}
    """
    display(Markdown(summary_text.replace("\n","<br>")))
    
    display(Markdown(" "))

### Tour arrivals

In [None]:
xtab = pd.crosstab(
    df.end_period_base.replace(mapper),
    df.end_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab)

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly
metric='end'
for key, value in name_dict.items():
    purpose_df = df[df.primary_purpose_base == key]
    if len(purpose_df) == 0:
        continue
    display(Markdown(f"### {key.capitalize()}"))

    xtab = pd.crosstab(
        purpose_df[f'{metric}_period_base'].replace(mapper),
        purpose_df[f'{metric}_period_build'].replace(mapper),
        margins=True,
        margins_name='Total'
    )
    display(xtab)

    summary_text = f"""**{key.capitalize()} tour {metric} changes:**
        Tours which arrived earlier in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] < 0])}
        Tours which arrived in the same bin in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] == 0])}
        Tours which arrived later in build: {len(purpose_df[purpose_df[f'{metric}_bin_difference'] > 0])}
    """

    display(Markdown(summary_text.replace("\n","<br>")))

    display(Markdown(" "))

## Tour Mode Choice

In [None]:
# xtab base-vs build mode choice for all NM tours
# xtab base-vs build mode choice for NM tours by purpose

In [None]:
usecols = ['tour_id','tour_category','origin','destination','tour_mode', 'tour_category', 'primary_purpose']
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=usecols)

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id',
                         usecols=usecols)

In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}""")

base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}")

In [None]:
df = pd.merge(
    base_tour,
    build_tour,
    how='outer',
    left_index=True,
    right_index=True,
    suffixes=['_base', '_build']
)

df.loc[df.tour_mode_base == df.tour_mode_build,'Case'] = "Unchanged"
df.loc[df.tour_mode_base != df.tour_mode_build,'Case'] = "Changed"

df.loc[df.tour_mode_base.isna(),'Case'] = "Newly Created"
df.loc[df.tour_mode_build.isna(),"Case"] = "Removed"

df.Case.value_counts()

In [None]:
fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(), ["count"])
fig

In [None]:
combiners = {
    "EBIKE": "MICROMOBILITY",
    "ESCOOTER": "MICROMOBILITY",

    "WALK_LOC": "WALK_TRANSIT",
    "WALK_PRM": "WALK_TRANSIT",
    "WALK_MIX": "WALK_TRANSIT",

    "PNR_LOC": "PNR_TRANSIT",
    "PNR_PRM": "PNR_TRANSIT",
    "PNR_MIX": "PNR_TRANSIT",

    "KNR_LOC": "KNR_TRANSIT",
    "KNR_PRM": "KNR_TRANSIT",
    "KNR_MIX": "KNR_TRANSIT",

    "TNC_LOC": "TNC_TRANSIT",
    "TNC_PRM": "TNC_TRANSIT",
    "TNC_MIX": "TNC_TRANSIT",

    "TAXI": "RIDESHARE",
    "TNC_SHARED": "RIDESHARE",
    "TNC_SINGLE": "RIDESHARE",

}
order = ['DRIVEALONE','SHARED2','SHARED3',
         'WALK','BIKE','MICROMOBILITY','WALK_TRANSIT',
         'PNR_TRANSIT','KNR_TRANSIT','TNC_TRANSIT',
         'RIDESHARE','SCH_BUS','Newly created','Removed',"Total",]


In [None]:
# xtab = df[["tour_mode_base", "tour_mode_build"]].replace(combiners).value_counts(dropna=False).unstack().fillna(0)
# xtab.loc[order,order]
order = pd.Series(order)
xtab = pd.crosstab(
    df.tour_mode_base.replace(combiners).fillna('Newly created'),
      df.tour_mode_build.replace(combiners).fillna('Removed'),
      margins=True,
      margins_name='Total')

display(xtab.loc[order[order.isin(xtab.index)],order[order.isin(xtab.columns)]])

In [None]:
transit_modes = [f'{access_mode}_{transit_mode}' 
                 for access_mode in ['WALK','PNR','KNR','TNC'] 
                 for transit_mode in ['LOC','PRM','MIX']]
auto_modes = ['DRIVEALONE','SHARED2','SHARED3']
nmot_modes = ['BIKE','EBIKE','ESCOOTER']
rideshare_modes = ['TAXI','TNC_SHARED','TNC_SINGLE']

In [None]:
mand_df = df[df.tour_category_base == 'mandatory']
nm_df = df[df.tour_category_base == 'non_mandatory']
whole_df = df

In [None]:
df = mand_df

In [None]:
summary_text = f"""**Mandatory tour mode changes:**
To transit
<p style="margin-left: 40px">
from auto: {len(df[df.tour_mode_base.isin(auto_modes) & df.tour_mode_build.isin(transit_modes)])}
from non-motorized: {len(df[df.tour_mode_base.isin(nmot_modes) & df.tour_mode_build.isin(transit_modes)])}
from rideshare: {len(df[df.tour_mode_base.isin(rideshare_modes) & df.tour_mode_build.isin(transit_modes)])}
from school bus: {len(df[df.tour_mode_base.isin(['SCH_BUS']) & df.tour_mode_build.isin(transit_modes)])}
from new tour: {len(df[df.tour_mode_base.isna() & df.tour_mode_build.isin(transit_modes)])}
*Subtotal: {len(df.tour_mode_build.isin(transit_modes)&(~(df.tour_mode_base.isin(transit_modes))))}*
</p>
Stayed on transit: {len(df[df.tour_mode_base.isin(transit_modes) & df.tour_mode_build.isin(transit_modes)])}
Stayed on something else: {len(df[~(df.tour_mode_base.isin(transit_modes)|df.tour_mode_build.isin(transit_modes))])}
Switched to something else: {len(df[df.tour_mode_base.isin(transit_modes)&(~(df.tour_mode_build.isin(transit_modes)))])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

In [None]:
df = nm_df

In [None]:
summary_text = f"""**Non-mandatory tour mode changes:**
To transit
<p style="margin-left: 40px">
from auto: {len(df[df.tour_mode_base.isin(auto_modes) & df.tour_mode_build.isin(transit_modes)])}
from non-motorized: {len(df[df.tour_mode_base.isin(nmot_modes) & df.tour_mode_build.isin(transit_modes)])}
from rideshare: {len(df[df.tour_mode_base.isin(rideshare_modes) & df.tour_mode_build.isin(transit_modes)])}
from school bus: {len(df[df.tour_mode_base.isin(['SCH_BUS']) & df.tour_mode_build.isin(transit_modes)])}
from new tour: {len(df[df.tour_mode_base.isna() & df.tour_mode_build.isin(transit_modes)])}
*Subtotal: {len(df.tour_mode_build.isin(transit_modes)&(~(df.tour_mode_base.isin(transit_modes))))}*
</p>
Stayed on transit: {len(df[df.tour_mode_base.isin(transit_modes) & df.tour_mode_build.isin(transit_modes)])}
Stayed on something else: {len(df[~(df.tour_mode_base.isin(transit_modes)|df.tour_mode_build.isin(transit_modes))])}
Switched to something else: {len(df[df.tour_mode_base.isin(transit_modes)&(~(df.tour_mode_build.isin(transit_modes)))])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

In [None]:
df = whole_df

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

# only trips that exist in the base run will be output
# looping thru name_dict will not find cases where the base value for purpose or category is nan in the outer join
for key, value in name_dict.items():
    df_purp = df.loc[(df.primary_purpose_base == key)]
    if len(df_purp) == 0:
        continue 
        
    print(f"#### {value}")
    df_purp_cases = df_purp.Case.value_counts()
    print(str(df_purp_cases).replace("\n","<br>"))
    
    fig = vh.create_pie_chart(df_purp_cases.to_frame().sort_index(),["count"])
    
    fig.show()
    display(Markdown(" "))

## At-Work Subtour Frequency

In [None]:
# at-work subtours are decisions made by work tours, so we need to filter based on the work location

# read in unfiltered tours for base and build

base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=['tour_id','tour_category','primary_purpose','destination', 'parent_tour_id'])

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id', 
                         usecols=['tour_id','tour_category','primary_purpose','destination', 'parent_tour_id'])

# keep work tours only
base_work_tour = base_tour[base_tour.primary_purpose == 'work']
build_work_tour = build_tour[build_tour.primary_purpose == 'work']
base_work_tour_idx = base_work_tour.index


In [None]:
print(f"""Filtering work tours by destination MAZ.\nOriginal tours in base: {len(base_work_tour)}\tbuild: {len(build_work_tour)}\tdiff: {len(build_work_tour)-len(base_work_tour)}""")

base_work_tour = base_work_tour[single_filter_mazs(base_work_tour.destination)] # base tour in the filtered set

# special build-case filtering
build_work_tour = build_work_tour[(
    build_work_tour.index.isin(base_work_tour_idx) # originally existed in base
    & build_work_tour.index.isin(base_work_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_work_tour.index.isin(base_work_tour_idx)) # is a new tour 
     & single_filter_mazs(build_work_tour.destination)# and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_work_tour)}\tbuild: {len(build_work_tour)}\tdiff: {len(build_work_tour)-len(base_work_tour)}")

In [None]:
# now get the at-work subtours from the unfiltered tours
base_atwork_tours_df = base_tour[base_tour.primary_purpose == 'atwork']
build_atwork_tours_df = build_tour[build_tour.primary_purpose == 'atwork']

In [None]:
# now count at-work subtours by parent_tour_id
base_atwork_tour_counts_df = base_atwork_tours_df.groupby('parent_tour_id').size()
build_atwork_tour_counts_df = build_atwork_tours_df.groupby('parent_tour_id').size()
base_atwork_tour_counts_df = base_atwork_tour_counts_df.rename('at-work')
build_atwork_tour_counts_df = build_atwork_tour_counts_df.rename('at-work')

# reindex to ensure all work tours are included, filling missing with 0
base_work_atwork_counts_df = base_atwork_tour_counts_df.reindex(base_work_tour.index, fill_value=0)
build_work_atwork_counts_df = build_atwork_tour_counts_df.reindex(build_work_tour.index, fill_value=0)

display(Markdown(f"### At-Work Subtour Frequency\n"))
df = pd.crosstab(
    base_work_atwork_counts_df, 
    build_work_atwork_counts_df, 
    rownames=['Base'], 
    colnames=['Build'], 
    margins=True, 
    margins_name='Total'
)
df

## At-Work Subtour Destinations

In [None]:
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id', 
                         usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])


In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}\tdiff: {len(build_tour)-len(base_tour)}""")

base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}")

In [None]:
df = base_tour[base_tour.tour_category == 'atwork'].merge(build_tour[build_tour.tour_category == 'atwork'],
                                                          how='outer',
                                                          left_index=True,
                                                          right_index=True,
                                                          suffixes=('_base','_build'))


In [None]:
df.loc[df.destination_base == df.destination_build,'Case'] = "Unchanged"
df.loc[df.destination_base != df.destination_build,'Case'] = "Changed"

df.loc[df.destination_base.isna(),'Case'] = "Newly Created"
df.loc[df.destination_build.isna(),"Case"] = "Removed"


In [None]:
df = df.Case.value_counts()

df

In [None]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## At-Work Subtour Scheduling

In [None]:
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])
base_tour = base_tour[base_tour.tour_category == 'atwork']

base_tour_idx = base_tour.index

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id', 
                         usecols=['tour_id','tour_category','origin','destination','primary_purpose','start','end'])
build_tour = build_tour[build_tour.tour_category == 'atwork']

In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}\tdiff: {len(build_tour)-len(base_tour)}""")

base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}")

In [None]:
# start with the filtered tours
# base_atwork_tours_df = base_tour[base_tour.tour_category == 'atwork']
# build_atwork_tours_df = build_tour[build_tour.tour_category == 'atwork']

# translate time bin to time period index
base_tour['start_period'] = base_tour.start.apply(vh.get_time_period_index)
build_tour['start_period'] = build_tour.start.apply(vh.get_time_period_index)
base_tour['end_period'] = base_tour.end.apply(vh.get_time_period_index)
build_tour['end_period'] = build_tour.end.apply(vh.get_time_period_index) 

In [None]:
# join base and build tours on tour_id
# compare tours that are in both base and build datasets
df = pd.merge(
    base_tour,
    build_tour,
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build'),
    how='inner'
)

# get the difference in start and end times
df['start_bin_difference'] = df.start_build - df.start_base
df['end_bin_difference'] = df.end_build - df.end_base

# get the difference in start and end time periods
df['start_period_difference'] = df.start_period_build - df.start_period_base
df['end_period_difference'] = df.end_period_build - df.end_period_base

### Tour departures

In [None]:
xtab = pd.crosstab(
    df.start_period_base.replace(mapper),
    df.start_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab.loc[tp_order,tp_order])

### Tour arrivals

In [None]:
xtab = pd.crosstab(
    df.end_period_base.replace(mapper),
    df.end_period_build.replace(mapper),
    margins=True,
    margins_name='Total'
)
display(xtab.loc[tp_order,tp_order])

In [None]:
summary_text = f"""**At-work subtour scheduling changes:**
    Tours which departed earlier in build: {len(df[df.start_bin_difference < 0])}
    Tours which departed in the same bin in build: {len(df[df.start_bin_difference == 0])}
    Tours which departed later in build: {len(df[df.start_bin_difference > 0])}

    Tours which arrived earlier in build: {len(df[df.end_bin_difference < 0])}
    Tours which arrived in the same bin in build: {len(df[df.end_bin_difference == 0 ])}
    Tours which arrived later in build: {len(df[df.end_bin_difference > 0])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

## At-Work Subtour Mode Choice

In [None]:
usecols = ['tour_id','tour_category','origin','destination','tour_category', 'tour_mode']
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=usecols)

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id', 
                         usecols=usecols)

In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}""")
base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}")

In [None]:
df = pd.merge(
    base_tour[base_tour.tour_category == "atwork"],
    build_tour[build_tour.tour_category == "atwork"],
    how='outer',
    left_index=True,
    right_index=True,
    suffixes=['_base', '_build']
)

df.loc[df.tour_mode_base == df.tour_mode_build,'Case'] = "Unchanged"
df.loc[df.tour_mode_base != df.tour_mode_build,'Case'] = "Changed"

df.loc[df.tour_mode_base.isna(),'Case'] = "Newly Created"
df.loc[df.tour_mode_build.isna(),"Case"] = "Removed"

df.Case.value_counts()

In [None]:
fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(), ["count"])
fig

In [None]:
# xtab = df[["tour_mode_base", "tour_mode_build"]].replace(combiners).value_counts(dropna=False).unstack().fillna(0)
# xtab.loc[order,order]
order = pd.Series(order)
xtab = pd.crosstab(
    df.tour_mode_base.replace(combiners).fillna('Newly created'),
      df.tour_mode_build.replace(combiners).fillna('Removed'),
      margins=True,
      margins_name='Total')

display(xtab.loc[order[order.isin(xtab.index)],order[order.isin(xtab.columns)]])

In [None]:
summary_text = f"""**At-work subtour mode changes:**
To transit
<p style="margin-left: 40px">
from auto: {len(df[df.tour_mode_base.isin(auto_modes) & df.tour_mode_build.isin(transit_modes)])}
from non-motorized: {len(df[df.tour_mode_base.isin(nmot_modes) & df.tour_mode_build.isin(transit_modes)])}
from rideshare: {len(df[df.tour_mode_base.isin(rideshare_modes) & df.tour_mode_build.isin(transit_modes)])}
from school bus: {len(df[df.tour_mode_base.isin(['SCH_BUS']) & df.tour_mode_build.isin(transit_modes)])}
from new tour: {len(df[df.tour_mode_base.isna() & df.tour_mode_build.isin(transit_modes)])}
*Subtotal: {len(df.tour_mode_build.isin(transit_modes)&(~(df.tour_mode_base.isin(transit_modes))))}*
</p>
Stayed on transit: {len(df[df.tour_mode_base.isin(transit_modes) & df.tour_mode_build.isin(transit_modes)])}
Stayed on something else: {len(df[~(df.tour_mode_base.isin(transit_modes)|df.tour_mode_build.isin(transit_modes))])}
Switched to something else: {len(df[df.tour_mode_base.isin(transit_modes)&(~(df.tour_mode_build.isin(transit_modes)))])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

## Stop Frequency

In [None]:
usecols = ['tour_id','tour_category','origin','destination','stop_frequency', 'primary_purpose']
base_tour = pd.read_csv(f"{base_dir}/final_tours.csv", 
                        index_col='tour_id', 
                        usecols=usecols)

build_tour = pd.read_csv(f"{build_dir}/final_tours.csv", 
                         index_col='tour_id',
                         usecols=usecols)

In [None]:
print(f"""Filtering tours by origin {
    'and' if how_method == 'all' else 'or' if how_method == 'any' else '???'
    } destination MAZ.\nOriginal tours in base: {len(base_tour)}\tbuild: {len(build_tour)}""")

base_tour = base_tour[multi_filter_mazs([base_tour.origin, base_tour.destination])] # base tour in the filtered set

# special build-case filtering
build_tour = build_tour[(
    build_tour.index.isin(base_tour_idx) # originally existed in base
    & build_tour.index.isin(base_tour.index)    # and was in the current set for the base
    ) |     # OR
    ((~build_tour.index.isin(base_tour_idx)) # is a new tour 
     & multi_filter_mazs([build_tour.origin, build_tour.destination]) # and it's in this set
     )]

print(f"After filtering, tours in base: {len(base_tour)}\tbuild: {len(build_tour)}")

In [None]:
df = pd.merge(
    base_tour,
    build_tour,
    how='outer',
    left_index=True,
    right_index=True,
    suffixes=['_base', '_build']
)

df.loc[df.stop_frequency_base == df.stop_frequency_build,'Case'] = "Unchanged"
df.loc[df.stop_frequency_base != df.stop_frequency_build,'Case'] = "Changed"

df.loc[df.stop_frequency_base.isna(),'Case'] = "Newly Created"
df.loc[df.stop_frequency_build.isna(),"Case"] = "Removed"

df.Case.value_counts()

In [None]:
total_stops_base = (df.stop_frequency_base.str[0].astype(float) 
                    + df.stop_frequency_base.str[5].astype(float)
                    ).fillna(-np.inf).clip(upper=4)

total_stops_build = (df.stop_frequency_build.str[0].astype(float) 
                     + df.stop_frequency_build.str[5].astype(float)
                     ).fillna(-np.inf).clip(upper=4)



In [None]:
fig = vh.create_pie_chart(df.Case.value_counts().to_frame().sort_index(), ["count"])
fig

In [None]:
diff = total_stops_build - total_stops_base
xtab = pd.crosstab(
    total_stops_base.replace({4:"4+",-np.inf:"Newly created"}).astype(str).str.replace(".0",""),
      total_stops_build.clip(upper=4).replace({4:"4+",-np.inf:"Removed"}).astype(str).str.replace(".0",""),
      margins=True,
      margins_name='Total')

display(xtab.sort_index())

In [None]:
summary_text = f"""**Stop frequency changes:**
    Tours with fewer stops in build: {len(diff[diff < 0])}
    Tours with the same stops in build: {len(diff[diff==0])}
    Tours with more stops in build: {len(diff[diff > 0])}
"""
display(Markdown(summary_text.replace("\n","<br>")))

In [None]:
# | output: asis
# above comment is needed for Quarto to render subtabs correctly

# only trips that exist in the base run will be output
# looping thru name_dict will not find cases where the base value for purpose or category is nan in the outer join
for key, value in name_dict.items():
    df_purp = df.loc[df.primary_purpose_base == key]
    if len(df_purp) == 0:
        continue 
        
    print(f"#### {value}")
    df_purp_cases = df_purp.Case.value_counts()
    print(str(df_purp_cases).replace("\n","<br>"))
    
    fig = vh.create_pie_chart(df_purp_cases.to_frame().sort_index(),["count"])
    
    fig.show()
    display(Markdown(" "))