In [1]:
import os
import pandas as pd
import yaml
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

from IPython.display import Markdown, display

# Notebook is in notebooks dir
os.chdir('..')
import visualizer_helpers as vh

In [2]:
# NOTE: SPECIALLY TAGGED PARAMETERS CELL
# Assignments in this cell may be overwritten at compile-time
# Otherwise, the below defaults to not filtering any zones
zone_set: str = 'all'
how_method: str = 'any'
affected_tazs: list = []
affected_mazs: list = []

In [3]:
# get corresponding filter from zone_set parameter
single_filter_tazs, multi_filter_tazs = vh.get_filters(zone_set, how_method, affected_tazs)
single_filter_mazs, multi_filter_mazs = vh.get_filters(zone_set, how_method, affected_mazs)

In [4]:
# file IO locations
with open('_quarto.yml') as f:
    config = yaml.safe_load(f)
base_dir = config['sources']['base']
build_dir = config['sources']['build']

## Auto Ownership

In [5]:
base_hh = pd.read_csv(f"{base_dir}/final_households.csv",
                       index_col='household_id', 
                       usecols=['household_id','auto_ownership','home_zone_id'])

build_hh = pd.read_csv(f"{build_dir}/final_households.csv", 
                       index_col='household_id', 
                       usecols=['household_id','auto_ownership','home_zone_id'])


In [6]:
print(f"Filtering households by TAZ.\nOriginal HHs: {len(base_hh)}")
base_hh = base_hh[single_filter_mazs(base_hh.home_zone_id)]
build_hh = build_hh[single_filter_mazs(build_hh.home_zone_id)]
print(f"HHs after filtering: {len(base_hh)}")

Filtering households by TAZ.
Original HHs: 49
HHs after filtering: 49


In [7]:
df = pd.crosstab(base_hh.auto_ownership, 
                 build_hh.auto_ownership, 
                 rownames=['Base'],
                 colnames=['Build'], 
                 margins=True, 
                 margins_name='Total')
df

Build,0,1,2,3,4,Total
Base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,0,0,0,0,1
1,0,19,0,0,0,19
2,0,0,19,0,0,19
3,1,0,0,5,0,6
4,0,0,0,0,4,4
Total,2,19,19,5,4,49


## Work from Home

In [8]:
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=['person_id','work_from_home','home_zone_id'])

build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=['person_id','work_from_home','home_zone_id'])

In [9]:
print(f"Filtering persons by home TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.home_zone_id)]
build_per = build_per[single_filter_mazs(build_per.home_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by home TAZ. Original Persons: 49
Persons after filtering: 49


In [10]:
df = pd.crosstab(base_per.work_from_home, 
                 build_per.work_from_home, 
                 rownames=['Base'], 
                 colnames=['Build'], 
                 margins=True, 
                 margins_name='Total')
df

Build,False,True,Total
Base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,48,1,49
Total,48,1,49


## School Location Changes

In [11]:
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=['person_id','is_student','school_zone_id','home_zone_id'])
build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=['person_id','is_student','school_zone_id','home_zone_id'])

In [12]:
print(f"Filtering persons by home TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.home_zone_id)]
build_per = build_per[single_filter_mazs(build_per.home_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by home TAZ. Original Persons: 49
Persons after filtering: 49


In [13]:
df = base_per[base_per.is_student].merge(build_per[build_per.is_student],
                                         how='left',
                                         left_index=True,
                                         right_index=True,
                                         suffixes=('_base','_build'))

df = (df.school_zone_id_base == df.school_zone_id_build).value_counts()
df.index = df.index.map({True:'Unchanged',False:'Changed'})
df

Unchanged    11
Changed       1
Name: count, dtype: int64

In [14]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## Workplace Location Changes

In [15]:
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=['person_id','is_worker','workplace_zone_id','home_zone_id'])

build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=['person_id','is_worker','workplace_zone_id','home_zone_id'])

In [16]:
print(f"Filtering persons by home TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.home_zone_id)]
build_per = build_per[single_filter_mazs(build_per.home_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by home TAZ. Original Persons: 49
Persons after filtering: 49


In [17]:
df = base_per[base_per.is_worker].merge(build_per[build_per.is_worker],
                                        how='left',
                                        left_index=True,
                                        right_index=True, 
                                        suffixes=('_base','_build'))

df = (df.workplace_zone_id_base == df.workplace_zone_id_build).value_counts()
df.index = df.index.map({True:'Unchanged',False:'Changed'})
df

Unchanged    15
Changed       1
Name: count, dtype: int64

In [18]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## Transit Pass Subsidy

In [19]:
usecols = ['person_id','transit_pass_ownership','transit_pass_subsidy', 'home_zone_id']
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=usecols)

build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=usecols)

In [20]:
print(f"Filtering persons by home TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.home_zone_id)]
build_per = build_per[single_filter_mazs(build_per.home_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by home TAZ. Original Persons: 49
Persons after filtering: 49


In [21]:
# Note: subsidy does not appear to be conditioned on ownership
df = base_per[['transit_pass_subsidy']].merge(
    build_per[['transit_pass_subsidy']],
    how='left',
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build')
)

In [22]:
df0 = pd.crosstab(
    base_per.transit_pass_subsidy,
    build_per.transit_pass_subsidy,
    rownames=['base'],
    colnames=['build'],
    margins=True,
    margins_name='Total'
)
df0

build,0,1,Total
base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,46,1,47
1,0,2,2
Total,46,3,49


In [23]:
df = (df.transit_pass_subsidy_base == df.transit_pass_subsidy_build).value_counts()
df.index = df.index.map({True: 'Unchanged', False: 'Changed'})
df

Unchanged    48
Changed       1
Name: count, dtype: int64

In [24]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## Transit Pass Ownership

In [25]:
df = base_per[['transit_pass_ownership']].merge(
    build_per[['transit_pass_ownership']],
    how='left',
    left_index=True,
    right_index=True,
    suffixes=('_base', '_build')
)

In [26]:
df0 = pd.crosstab(
    base_per.transit_pass_subsidy,
    build_per.transit_pass_subsidy,
    rownames=['base'],
    colnames=['build'],
    margins=True,
    margins_name='Total'
)
df0

build,0,1,Total
base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,46,1,47
1,0,2,2
Total,46,3,49


In [27]:
df = (df.transit_pass_ownership_base == df.transit_pass_ownership_build).value_counts()
df.index = df.index.map({True: 'Unchanged', False: 'Changed'})
df

Unchanged    48
Changed       1
Name: count, dtype: int64

In [28]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## Free Parking

In [29]:
usecols = ['person_id', 'is_worker', 'free_parking_at_work', 'workplace_zone_id']
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=usecols)

build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=usecols)

In [30]:
print(f"Filtering persons by workplace TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.workplace_zone_id)]
build_per = build_per[single_filter_mazs(build_per.workplace_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by workplace TAZ. Original Persons: 49
Persons after filtering: 49


In [31]:
df = base_per[base_per.is_worker].merge(build_per[build_per.is_worker],
                                        how='left',
                                        left_index=True,
                                        right_index=True, 
                                        suffixes=('_base','_build'))

In [32]:
df0 = pd.crosstab(
    base_per.free_parking_at_work,
    build_per.free_parking_at_work,
    rownames=['base'],
    colnames=['build'],
    margins=True,
    margins_name='Total'
)
df0

build,False,True,Total
base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,42,1,43
True,0,6,6
Total,42,7,49


In [33]:
df = (df.free_parking_at_work_base == df.free_parking_at_work_build).value_counts()
df.index = df.index.map({True:'Unchanged',False:'Changed'})
df

Unchanged    15
Changed       1
Name: count, dtype: int64

In [34]:
fig = vh.create_pie_chart(df.to_frame().sort_index(),["count"])
fig.show()

## Telecommute Frequency

In [35]:
usecols = ['person_id', 'is_worker', 'telecommute_frequency', 'workplace_zone_id']
base_per = pd.read_csv(f"{base_dir}/final_persons.csv",
                       index_col='person_id',
                       usecols=usecols)

build_per = pd.read_csv(f"{build_dir}/final_persons.csv",
                        index_col='person_id',
                        usecols=usecols)

# nicer names, please. grab the first char to get magnitudes.
tc_labels = {
    'No_Telecommute': '0 (No Telecommute)',
    '1_days_week': '1 Day per Week',
    '2_3_days_week': '2-3 Days per Week',
    '4_days_week': '4 Days per Week'
}

base_per.telecommute_frequency = base_per.telecommute_frequency.map(tc_labels)
build_per.telecommute_frequency = build_per.telecommute_frequency.map(tc_labels)

In [36]:
print(f"Filtering persons by workplace TAZ. Original Persons: {len(base_per)}")
base_per = base_per[single_filter_mazs(base_per.workplace_zone_id)]
build_per = build_per[single_filter_mazs(build_per.workplace_zone_id)]
print(f"Persons after filtering: {len(base_per)}")

Filtering persons by workplace TAZ. Original Persons: 49
Persons after filtering: 49


In [94]:
df = base_per[base_per.is_worker].merge(build_per[build_per.is_worker],
                                        how='left',
                                        left_index=True,
                                        right_index=True, 
                                        suffixes=('_base','_build'))

In [95]:
xtab = pd.crosstab(
    df.telecommute_frequency_base,
    df.telecommute_frequency_build,
    rownames=['base'],
    colnames=['build'],
    margins=True,
    margins_name='Total'
).sort_index()
xtab

build,0 (No Telecommute),2-3 Days per Week,4 Days per Week,Total
base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0 (No Telecommute),11,0,0,11
2-3 Days per Week,0,3,0,3
4 Days per Week,0,0,2,2
Total,11,3,2,16


In [102]:
df1 = df.loc[:, ["telecommute_frequency_base", "telecommute_frequency_build"]]

base_tc_magnitude = df1.telecommute_frequency_base.str[0]
build_tc_magnitude = df1.telecommute_frequency_build.str[0]

df1.loc[base_tc_magnitude == build_tc_magnitude, 'case'] = 'Unchanged'
df1.loc[base_tc_magnitude > build_tc_magnitude, 'case'] = 'Decreased'
df1.loc[base_tc_magnitude < build_tc_magnitude, 'case'] = 'Increased'

df1 = df1.case.value_counts()
fig = vh.create_pie_chart(df1.to_frame().sort_index(), ["count"])
fig.show()

## Transponder Ownership

In [39]:
base_hh = pd.read_csv(f"{base_dir}/final_households.csv", 
                      index_col='household_id', 
                      usecols=['household_id','transponder_ownership','home_zone_id'])

build_hh = pd.read_csv(f"{build_dir}/final_households.csv", 
                       index_col='household_id', 
                       usecols=['household_id','transponder_ownership', 'home_zone_id'])


In [40]:
print(f"Filtering households by TAZ. Original HHs: {len(base_hh)}")
base_hh = base_hh[single_filter_mazs(base_hh.home_zone_id)]
build_hh = build_hh[single_filter_mazs(build_hh.home_zone_id)]
print(f"HHs after filtering: {len(base_hh)}")

Filtering households by TAZ. Original HHs: 49
HHs after filtering: 49


In [41]:
df = pd.crosstab(base_hh.transponder_ownership,
                 build_hh.transponder_ownership, 
                 rownames=['Base'],
                 colnames=['Build'], 
                 margins=True, 
                 margins_name='Total')
df

Build,False,True,Total
Base,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,36,1,37
True,0,12,12
Total,36,13,49
