# Transit Boardings Report - version 2
This notebook generates two types of report:
- Standard: A single or comparison report for the overall scenario(s)
    - This includes sub-mode daily boardings and TOD totals (AM, MD, PM, NT) 
- Detailed: A detailed report for selected links.
    - This includes Daily and TOD boardings for user specified route(s)


### This is a work-in-progress and currently (9/24/2021) ONLY to be used by a qualified developer!

In [None]:
# Import required packages
import pandas as pd
import numpy as np
import os
import glob
from functools import reduce
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly import tools

In [None]:
# One or two scenarios?
# 
# This variable ('scenarios') was called 'bases' in the first version of this notebook.
#
scenarios = {'Base Model':r'G:/Regional_Modeling/1A_Archives/LRTP_2018/2040 NB Scen 01_MoDXoutputs/'
             #,'Comparative Model':r'G:/Regional_Modeling/1A_Archives/LRTP_2018/2016 Scen 00_08March2019_MoDXoutputs/'
            } 

In [None]:
# Reference data: CSV file containing list of _ALL_ transit routes:
all_transit_routes_csv_fn = \
r'G:\Regional_Modeling\1A_Archives\LRTP_2018\2016 Scen 00_08March2019_MoDXoutputs\Databases\Statewide_Routes_2018S.csv'

#### User input required: supply name of CSV file with list of routes on which to report.

In [None]:
# Two types of reports:
# Standard (all routes in the input CSV file you suppy) or detailed (in-line list of routes.)

# CSV file containing list of transit routes for which to generate this report:
routes_csv_fn = r'G:/Data_Resources/DataStore/transit info.csv'

# Read this CSV file into a pandas dataframe:
routes_df = pd.read_csv(routes_csv_fn)

# Specify list of routes for which to generate report.
# By default this is all routes in the 'routefile' dataframe:
route_list = routes_df['Route_ID']

In [None]:
# Import transit assignment result CSV files.
# 
# *** 9/29/2021 - The current version of this function DOESN't compute the daily sum.
#                 For the moment, we will attempt to do this as a post-processing step.
#
# Old comment:
# Stackoverflow article used by Margaret as reference for summing the data frames:
# https://stackoverflow.com/questions/11106823/adding-two-pandas-dataframes
# 
def import_transit_assignment(scenario):
    '''bring in data and combine into sum tables for daily and put into a dictionary'''
    base = scenario + r'out/'
    tods = ["AM", "MD", "PM", "NT"]
    # At the end of execution of this function, the dictionary variable'TODsums' will contain all the TOD summed results:
    # one key-value-pair for each 'tod'.
    # 'TODsums' is the return value of this function.
    TODsums = { 'AM' : None, 'MD' : None, 'PM' : None, 'NT' : None }

    # Import CSV files and create sum tables for each TOD and for the day as a whole
    for tod in tods:
        # Get full paths to _all_ CSV files for the current t-o-d (a.k.a. 'time period')
        x = tod + '/' 
        fq_csv_fns = glob.glob(os.path.join(base,x,r'*.csv'))
        # 'tablist' : List of all the dataframes created from reading in the all the CSV files for the current t-o-d
        tablist = []
        for csv_file in fq_csv_fns:
            # Read CSV file into dataframe, set indices, and append to 'tablist'
            tablist.append(pd.read_csv(csv_file).set_index(['ROUTE','STOP']))
        #
        # Filter dataframe to include rows where 'ROUTE' is one of those selected to report on
        # BK question: Why wasn't this done earlier, before the dataframe was added to 'tablist'?
        for t in range(len(tablist)):
            tablist[t] = tablist[t][tablist[t].index.get_level_values('ROUTE').isin(route_list)]
        #
        
        # Sum the tables for the current TOD
        TODsums[tod] = reduce(lambda a, b: a.add(b, fill_value=0), tablist)
    # end_for over all tod's
    
    # *** 9/29/2021 - Computation of the daily sum will be done in a post-processing step.
    
    # Sum of all the TOD sum tables into a single sum for the _entire_ day.
    # BUT this isn't a simple sum, as the data frames can - and do - have different lengths.
    # We first have to join (pandas: 'merge') them...
    
    # j1 = pd.merge(TODsums['AM'], TODsums['MD'], on=['ROUTE', 'STOP'], how='outer')
    # j2 = pd.merge(j1, TODsums['PM'], on=['ROUTE', 'STOP'], how='outer')
    # j3 = pd.merge(j2, TODsums['NT'], on=['ROUTE', 'STOP'], how='outer')
    
    # The following code will not work - commented out for now:
    #
    # Note: This is where the 'daily' key is added to TODsums.
    # Margaret's code to do this is as follows:
    # TODsums['daily'] = reduce(lambda a, b: a.add(b, fill_value=0), j)
    
    # Ensure that the ROUTE and STOP columns aren't indices
    # *** 9/29/2021 - For the time being DON'T DO THIS.
    # for x in TODsums.keys():
        # TODsums[x] = TODsums[x].reset_index()
    #
    return TODsums
#

In [None]:
results = import_transit_assignment(scenarios['Base Model'])

In [None]:
am_results = results['AM']
md_results = results['MD']
pm_results = results['PM']
nt_results = results['NT']

In [None]:
# Start joining the tod-specific data frames
# Join 'am' and 'md' dataframes
j1 = pd.merge(am_results, md_results, on=['ROUTE', 'STOP'], how='outer', suffixes=('_am', '_md'))

In [None]:
j1

In [None]:
j1 = j1.fillna(0)

In [None]:
j1.columns

In [None]:
j1

In [None]:
j1['DirectTransferOff'] = j1['DirectTransferOff_am'] + j1['DirectTransferOff_md']
j1['DirectTransferOn'] = j1['DirectTransferOn_am'] + j1['DirectTransferOn_md']
j1['DriveAccessOn'] = j1['DriveAccessOn_am'] + j1['DriveAccessOn_md']
j1['EgressOff'] = j1['EgressOff_am'] + j1['EgressOff_md']
j1['Off'] = j1['Off_am'] + j1['Off_md']
j1['On'] = j1['On_am'] + j1['On_md']
j1['WalkAccessOn'] = j1['WalkAccessOn_am'] + j1['WalkAccessOn_md'] 
j1['WalkTransferOff'] = j1['WalkTransferOff_am'] + j1['WalkTransferOff_md']
j1['WalkTransferOn'] = j1['WalkTransferOn_am'] + j1['WalkTransferOn_md']

In [None]:
j1

In [None]:
j1.columns

In [None]:
cols_to_drop = ['DirectTransferOff_am', 'DirectTransferOff_md',
                'DirectTransferOn_am', 'DirectTransferOn_md',
				'DriveAccessOn_am', 'DriveAccessOn_md',
				'EgressOff_am','EgressOff_md',
				'Off_am', 'Off_md',
				'On_am', 'On_md',
				'WalkAccessOn_am', 'WalkAccessOn_md',
				'WalkTransferOff_am', 'WalkTransferOff_md',
				'WalkTransferOn_am', 'WalkTransferOn_md'
				]

In [None]:
j1 = j1.drop(columns=cols_to_drop)

In [None]:
j1

In [None]:
# j2 - join 'pm' and 'nt' data frames
j2 = pd.merge(pm_results, nt_results, on=['ROUTE', 'STOP'], how='outer', suffixes=('_pm', '_nt'))

In [None]:
j2

In [None]:
j2 = j2.fillna(0)

In [None]:
j2.columns

In [None]:
j2['DirectTransferOff'] = j2['DirectTransferOff_pm'] + j2['DirectTransferOff_nt']
j2['DirectTransferOn'] = j2['DirectTransferOn_pm'] + j2['DirectTransferOn_nt']
j2['DriveAccessOn'] = j2['DriveAccessOn_pm'] + j2['DriveAccessOn_nt']
j2['EgressOff'] = j2['EgressOff_pm'] + j2['EgressOff_nt']
j2['Off'] = j2['Off_pm'] + j2['Off_nt']
j2['On'] = j2['On_pm'] + j2['On_nt']
j2['WalkAccessOn'] = j2['WalkAccessOn_pm'] + j2['WalkAccessOn_nt'] 
j2['WalkTransferOff'] = j2['WalkTransferOff_pm'] + j2['WalkTransferOff_nt']
j2['WalkTransferOn'] = j2['WalkTransferOn_pm'] + j2['WalkTransferOn_nt']

In [None]:
j2

In [None]:
cols_to_drop = ['DirectTransferOff_pm', 'DirectTransferOff_nt',
                'DirectTransferOn_pm', 'DirectTransferOn_nt',
				'DriveAccessOn_pm', 'DriveAccessOn_nt',
				'EgressOff_pm','EgressOff_nt',
				'Off_pm', 'Off_nt',
				'On_pm', 'On_nt',
				'WalkAccessOn_pm', 'WalkAccessOn_nt',
				'WalkTransferOff_pm', 'WalkTransferOff_nt',
				'WalkTransferOn_pm', 'WalkTransferOn_nt'
				]

In [None]:
j2 = j2.drop(columns=cols_to_drop)

In [None]:
# Sanity check
j2.columns

In [None]:
j2

In [None]:
# Join "j1" and "j2" to produce a dataframe with the daily total
daily_df = pd.merge(j1, j2, on=['ROUTE', 'STOP'], how='outer', suffixes=('_j1', '_j2'))

In [None]:
# The following line _shouldn't_ be needed - just being cautious
daily_df = daily_df.fillna(0)

In [None]:
daily_df.columns

In [None]:
daily_df

In [None]:
daily_df['DirectTransferOff'] = daily_df['DirectTransferOff_j1'] + daily_df['DirectTransferOff_j2']
daily_df['DirectTransferOn'] = daily_df['DirectTransferOn_j1'] + daily_df['DirectTransferOn_j2']
daily_df['DriveAccessOn'] = daily_df['DriveAccessOn_j1'] + daily_df['DriveAccessOn_j2']
daily_df['EgressOff'] = daily_df['EgressOff_j1'] + daily_df['EgressOff_j2']
daily_df['Off'] = daily_df['Off_j1'] + daily_df['Off_j2']
daily_df['On'] = daily_df['On_j1'] + daily_df['On_j2']
daily_df['WalkAccessOn'] = daily_df['WalkAccessOn_j1'] + daily_df['WalkAccessOn_j2'] 
daily_df['WalkTransferOff'] = daily_df['WalkTransferOff_j1'] + daily_df['WalkTransferOff_j2']
daily_df['WalkTransferOn'] = daily_df['WalkTransferOn_j1'] + daily_df['WalkTransferOn_j2']


In [None]:
daily_df.columns

In [None]:
daily_df

In [None]:
cols_to_drop = ['DirectTransferOff_j1', 'DirectTransferOff_j2',
                'DirectTransferOn_j1', 'DirectTransferOn_j2',
				'DriveAccessOn_j1', 'DriveAccessOn_j2',
				'EgressOff_j1','EgressOff_j2',
				'Off_j1', 'Off_j2',
				'On_j1', 'On_j2',
				'WalkAccessOn_j1', 'WalkAccessOn_j2',
				'WalkTransferOff_j1', 'WalkTransferOff_j2',
				'WalkTransferOn_j1', 'WalkTransferOn_j2'
				]


In [None]:
cols_to_drop

In [None]:
daily_df = daily_df.drop(columns=cols_to_drop)

In [None]:
daily_df

In [None]:
results['daily'] = daily_df

In [None]:
# Define data structure and function to map a TransCAD 'Mode' to the corresponding 'Meta-mode'
_mode_to_metamode_mapping_table = {
    1:  'MBTA_Bus',
    2:  'MBTA_Bus',
    3:  'MBTA_Bus' ,
    4:  'Light_Rail',
    5:  'Heavy_Rail',
    6:  'Heavy_Rail',
    7:  'Heavy_Rail',
    8:  'Heavy_Rail',
    9:  'Commuter_Rail',
    10: 'Ferry',
    11: 'Ferry',
    12: 'Light_Rail',
    13: 'Light_Rail',
    14: 'Shuttle_Express',
    15: 'Shuttle_Express',
    16: 'Shuttle_Express',
    17: 'RTA',
    18: 'RTA',
    19: 'RTA',
    20: 'RTA',
    21: 'RTA',
    22: 'RTA',
    23: 'Private',
    24: 'Private',
    25: 'Private',
    26: 'Private',
    27: 'Private',
    28: 'Private',
    29: 'Private',
    30: 'Private',
    31: 'Private',
    32: 'Commuter_Rail',
    33: 'Commuter_Rail',
    34: 'Commuter_Rail',
    35: 'Commuter_Rail',
    36: 'Commuter_Rail',
    37: 'Commuter_Rail',
    38: 'Commuter_Rail',
    39: 'Commuter_Rail',
    40: 'Commuter_Rail',
    41: 'Commuter_Rail',
    42: 'Commuter_Rail',
    43: 'Commuter_Rail',
    44: 'Commuter_Rail',
    70: 'Walk' }

def mode_to_metamode(mode):
    retval = 'None'
    if mode in _mode_to_metamode_mapping_table:
        return _mode_to_metamode_mapping_table[mode]
    # end_if
    return retval

In [None]:
def set_up_metamode_table(scenario):
    '''flag each route type by metaMode'''
    routemode = pd.read_csv(scenario + r'Databases/Statewide_Routes_2018S.csv', 
                            usecols=["Routes_ID", "Mode"]).drop_duplicates()
    routemode['metaMode'] = routemode.apply(lambda x: mode_to_metamode(x['Mode']), axis=1)
    return routemode

In [None]:
def join_and_agg(TODSums, routemode):
    '''aggregate the on and offs by route or metaMode'''
#set the group by field depending on if standard or detailed report
    if len(route_list) > 0:
        agg = 'ROUTE'
    else: 
        agg = 'metaMode'

    for x in TODsums.keys():
        if len(routeList)> 0:
            TODsums[x] = routefile.merge(TODsums[x], how='outer', left_on='Route_ID', right_on='ROUTE')
            TODsums[x]['ROUTE'] = TODsums[x]['Route_Name'].str.split('.:()').str[0]
        #join each table to route mode
            TODsums[x] = routemode.merge(TODsums[x], how='right', left_on='Routes_ID', right_on='Route_ID')
        else:
            TODsums[x] = routemode.merge(TODsums[x], how='right', left_on='Routes_ID', right_on='ROUTE')
        #sum all On/Off fields by metamode 
        TODsums[x] = TODsums[x].groupby([agg])[['DirectTransferOff','DirectTransferOn','DriveAccessOn','EgressOff','Off','On',
                                                'WalkAccessOn','WalkTransferOff','WalkTransferOn']].agg('sum').reset_index()
    return TODsums

In [None]:
def plots(scen2, g):
    '''make graphs!'''
    onfdict = {}
    if len(routeList) > 0: #if detailed/standard use appropriate agg field to graph
        xVal = 'ROUTE'
    else:
        xVal = 'metaMode'
    # Make faceted graph for base and comparative scenario together    
    if 'Comparative Model' in scenarios.keys():
        scen2['compGraph']={}
        for tod in scen['Base Model'].keys(): #add flag field so can smush both scenario tables into one
            scen2['Base Model'][tod]['Scenario']='Base'
            scen2['Comparative Model'][tod]['Scenario']='Comparative'
            scen2['compGraph'][tod]=scen2['Base Model'][tod].append(scen2['Comparative Model'][tod]) #smoosh
            
        TODsums = scen2['compGraph']
        
        for z in TODsums.keys(): #make graphs (stacked bar)
            #set up table so can use for facets (wide to long format and flag field)
            lng = TODsums[z].drop(['DirectTransferOff','EgressOff','Off','On','WalkTransferOff'], axis = 1).melt(id_vars = [xVal, 'Scenario'], value_name = 'Count', ignore_index=False) #long to allow flag
            #lng=lng.reset_index() #Scenario will be facet field
            #make sure ids are strings for graphing purposes
            lng[xVal] = lng[xVal].astype(str)
            #make faceted stacked bar graphs (on and off dif graphs)
            on_off = px.bar(lng, x = xVal, y = 'Count', color = 'variable', facet_col = 'Scenario',title='Base and Comparative Model: '+z+' Boardings')
            #save graphs
            onfdict[z] = on_off
    else: #if only BASE
        TODsums = scen2['Base Model']
        for z in TODsums.keys(): #go through TOD
            TODsums[z][xVal] = TODsums[z][xVal].astype(str) #make safe for graphing
            onfdict[z] = px.bar(TODsums[z], x=xVal, y=['DirectTransferOn','DriveAccessOn','WalkAccessOn','WalkTransferOn'],  
                               title='Base Model '+z+' Boardings') #graph!
    return onfdict

In [None]:
def diftab(scen):
#make difference tables
    if len(routeList) > 0: #if detailed/standard use appropriate agg field to graph
        xVal = 'ROUTE'
    else:
        xVal = 'metaMode'
    if len(scenarios.keys()) ==  2:#if two scenarios
        for z in TODsums.keys(): #for each TOD
            #take the difference (and replace for TOD in the global TODsums)
            TODsums[z] = (scen['Base Model'][z].set_index(xVal).drop('Scenario', axis=1) - scen['Comparative Model'][z].set_index(xVal).drop('Scenario', axis=1)).reset_index()
            #make sure ids are strings for graphing purposes
            TODsums[z][xVal] = TODsums[z][xVal].astype(str)
            onfdict[z] = px.bar(TODsums[z], x=xVal, y=['DirectTransferOn','DriveAccessOn','WalkAccessOn','WalkTransferOn'],  
                               title='Difference in '+z+' Boardings')
    scen['Difference'] = [TODsums, onfdict] #add difference data and graphs to scen dict
    return scen

In [None]:
# call things
# write a SUPER FUNCTION!!! (which calls all functions)

# 'scen' is a two-level dict in which full set of results are accumulated.
# Level 1 = scenario
# Level 2 = tod
scen = {}

for g in scenarios.keys(): #run all these functions for each scenario
    TODsums = import_transit_assignment(scenarios[g]) #get the total boarding per route per TOD
    # Have to generate a route-to-mode=-to-metamode amapping table for _each scenario_
    # because the list of routes MAY NOT be the same for each scenario!
    routemode = set_up_metamode_table(scenarios[g]) 
    TODsums = join_and_agg(TODsums, routemode) #aggregate by mode or route
    scen[g] = TODsums
    #make graphs
scen['compGraph'] = plots(scen,g)  #package the data for showing graphs

#this is just for getting the difference to happen
if len(scenarios.keys())==2:
    scen = diftab(scen)


## Look at Results by TOD

In [None]:
#Show AM Boardings
scen['compGraph']['AM'].show()
#if comparative, also show graphs of boarding differences (base - comparative) 
if len(scenarios.keys())==2:
    scen['Difference'][1]['AM'].show()


In [None]:
#Show MD Boardings
scen['compGraph']['MD'].show()
#if comparative, also show graphs of boarding differences (base - comparative) 
if len(scenarios.keys())==2:
    scen['Difference'][1]['MD'].show()

In [None]:
#Show PM Boardings
scen['compGraph']['PM'].show()
#if comparative, also show graphs of boarding differences (base - comparative) 
if len(scenarios.keys())==2:
    scen['Difference'][1]['PM'].show()

In [None]:
#Show NT Boardings
scen['compGraph']['NT'].show()
#if comparative, also show graphs of boarding differences (base - comparative) 
if len(scenarios.keys())==2:
    scen['Difference'][1]['NT'].show()

In [None]:
#Show Daily Boardings
scen['compGraph']['daily'].show()
#if comparative, also show graphs of boarding differences (base - comparative) 
if len(scenarios.keys())==2:
    scen['Difference'][1]['daily'].show()