# Fast-Trips Tutorial 2: Test Stochastic Dispersion and Overlap
The purpose of this tutorial is to illustrate the effects of the dispersion and path overlap parameters

In [None]:
import os,datetime
import pandas as pd
import fasttrips
from fasttrips import Run

# Specify Input Networks

In [None]:
INPUT_NETWORKS   = r"/Users/elizabeth/Documents/urbanlabs/MTC/SHRP2/tutorial/tta/input/network-simple"
INPUT_DEMAND     = r"/Users/elizabeth/Documents/urbanlabs/MTC/SHRP2/tutorial/tta/input/demand-single"

# Examine Effect of Dispersion
Note that if you run this more than once, you will probably need to go to `kernel>>restart` menu so that fast-trips doesn't get confused by variables being held in memory.

In [None]:
OUTPUT_DIR       = r"/Users/elizabeth/Documents/urbanlabs/MTC/SHRP2/tutorial/tta/output"
ITERATIONS       = 1
PATHFINDING_TYPE = "stochastic"
OVERLAP          = "None"
DISPERSION_TESTS = [0.50,0.60,0.80,1.00]

## **Running Multiple FT Runs in iPy Notebook**
Due to some iPython issues with variables lurking, in order to run multiple Fast-Trips runs in same notebook, you must restart the kernel after each run.

In [None]:
##TODO in future, run this in a loop.  For now, iPython has "secretly saved variables" that make this difficult.


OUTPUT_FOLDER    = r"test_simplenet_wDispersion050"
DISPERSION       = 0.50
Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  dispersion       = DISPERSION )

In [None]:
OUTPUT_FOLDER    = r"test_simplenet_wDispersion060"
DISPERSION       = 0.60
Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  dispersion       = DISPERSION )

In [None]:
OUTPUT_FOLDER    = r"test_simplenet_wDispersion080"
DISPERSION       = 0.80
Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  dispersion       = DISPERSION )

In [None]:
OUTPUT_FOLDER    = r"test_simplenet_wDispersion100"
DISPERSION       = 1.00
Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  dispersion       = DISPERSION )

# Examine Dispersion Results

In [None]:
from bokeh.charts import Bar, Histogram, TimeSeries, output_file, Line
from bokeh.models import ColumnDataSource
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()

In [None]:
run_collection    = {0.50: {"out":r"test_simplenet_wDispersion050"},
                     0.60: {"out":r"test_simplenet_wDispersion060"},
                     0.80: {"out":r"test_simplenet_wDispersion080"},
                     1.00: {"out":r"test_simplenet_wDispersion100"}}
                     
pd.set_option('display.max_colwidth',160) #widen so you can see whole description
PATHS  = r"pathset_paths.csv"
##todo this is a rather dumb way to do this; make more elegant
for count, (k,v) in enumerate(run_collection.iteritems(), 0):
    full_output_directory = os.path.join(OUTPUT_DIR,v["out"])
    df                    = pd.read_csv(os.path.join(full_output_directory,PATHS), sep=",")[["person_id","person_trip_id","pathnum","description","sim_cost","probability"]]
    df["dispersion"] = k
    if not count: 
        longpaths_df  = df
        continue
    longpaths_df = pd.concat([longpaths_df, df], ignore_index=True, axis = 0)

#longpaths_df

In [None]:
p = Bar(longpaths_df, label='dispersion', values='probability', group='pathnum', title="Probability by Dispersion")
show(p, notebook_handle=True)

We can see that as the dispersion parameter increases, the good path looks even better relative to the less good path.  

Looking at the table, it is evident that only dispersion parameter = 0.5 even assigns a non-zero probability to waiting for another green bus.  

## Examine Effect of Overlap Calculation

In [None]:
OUTPUT_DIR       = r"/Users/elizabeth/Documents/urbanlabs/MTC/SHRP2/tutorial/tta/output"
ITERATIONS       = 1
PATHFINDING_TYPE = "stochastic"
OVERLAP          = "None"
DISPERSION       = 0.50

In [None]:
# Possible overlap methods ['None','count','distance','time'] * Split transit boolean

OUTPUT_FOLDER         = r"test_simplenet_wDispersion050_wOverlapCount"
OVERLAP               = 'count'
OVERLAP_SPLIT_TRANSIT = False

Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  overlap_split_transit = OVERLAP_SPLIT_TRANSIT,
                  dispersion       = DISPERSION )



In [None]:
# Possible overlap methods ['None','count','distance','time'] * Split transit boolean

OUTPUT_FOLDER         = r"test_simplenet_wDispersion050_wOverlapDistance"
OVERLAP               = 'distance'
OVERLAP_SPLIT_TRANSIT = False

Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  overlap_split_transit = OVERLAP_SPLIT_TRANSIT,
                  dispersion       = DISPERSION )


In [None]:
# Possible overlap methods ['None','count','distance','time'] * Split transit boolean

OUTPUT_FOLDER         = r"test_simplenet_wDispersion050_wOverlapTime"
OVERLAP               = 'time'
OVERLAP_SPLIT_TRANSIT = False

Run.run_fasttrips(input_net_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  overlap_split_transit = OVERLAP_SPLIT_TRANSIT,
                  dispersion       = DISPERSION )

In [None]:
## Analyze Results of Overlap Calculation

In [None]:
run_collection    = {"none"    : r"test_simplenet_wDispersion050",
                     "count"   : r"test_simplenet_wDispersion050_wOverlapCount",
                     "distance": r"test_simplenet_wDispersion050_wOverlapDistance",
                     "time"    : r"test_simplenet_wDispersion050_wOverlapTime"}
                     
pd.set_option('display.max_colwidth',160) #widen so you can see whole description
PATHS  = r"pathset_paths.csv"
##todo this is a rather dumb way to do this; make more elegant
for count, (k,v) in enumerate(run_collection.iteritems(), 0):
    full_output_directory = os.path.join(OUTPUT_DIR,v)
    df                    = pd.read_csv(os.path.join(full_output_directory,PATHS), sep=",")[["person_id","person_trip_id","pathnum","description","sim_cost","logsum","probability"]]
    df["overlap_var"] = k
    if not count: 
        longpaths_df  = df
        continue
    longpaths_df = pd.concat([longpaths_df, df], ignore_index=True, axis = 0)
longpaths_df

In [None]:
p = Bar(longpaths_df, label='overlap_var', values='probability', group='pathnum', title="Probability by Overlap")
show(p, notebook_handle=True)

As you can see, even in this very simple example, not accounting for overlap can have a fairly significant [ ~20%+ ] effect on the probabilities.  However, since the distance and time of the links does not vary significantly for this example, there is not a great deal of variation among the different variables.

The other variable that can greatly affect the overlap calculations is whether the transit line is considered to be the same based on having the same on/off pattern, or if we consider overlapping transit segments.  Because this example does not have any intermediate stops, the difference here would be null, but this is another degree of freedom that we should consider in later tutorials.