# Fast-Trips Tutorial 5: Test Stochastic Dispersion and Overlap
The purpose of this tutorial is to illustrate the effects of the dispersion and path overlap parameters

In [None]:
import os,datetime
import pandas as pd
import fasttrips
from fasttrips import Run

# Specify Input Networks

In [None]:
### REPLACE THIS WITH YOUR OWN DIRECTORY
BASE_DIR         = r"YOUR PATH TO THE TUTORIAL DIRECTORY"
INPUT_NETWORKS   = os.path.join(BASE_DIR,"tta","input","network-simple")
INPUT_DEMAND     = os.path.join(BASE_DIR,"tta","input","demand-single")

# Examine Effect of Dispersion
Note that if you run this more than once, you will probably need to go to `kernel>>restart` menu so that fast-trips doesn't get confused by variables being held in memory.

In [None]:
INPUT_WEIGHTS      = os.path.join(BASE_DIR,"tta","input","demand-single","pathweight_ft.txt")
RUN_CONFIG         = os.path.join(BASE_DIR,"tta","input","demand-single","config_ft.txt")
OUTPUT_DIR         = os.path.join(BASE_DIR,"tta","output")
ITERATIONS         = 1
PATHFINDING_TYPE   = "stochastic"
OVERLAP            = "None"
OUTPUT_FOLDER_BASE = r"test_simplenet_wDispersion"
DISPERSION_TESTS   = [0.50,0.60,0.80,1.00]

In [None]:
for d in DISPERSION_TESTS:
    OUTPUT_FOLDER    = OUTPUT_FOLDER_BASE+"%2.2f" % (d)
    Run.run_fasttrips(input_network_dir    = INPUT_NETWORKS,
                  input_demand_dir = INPUT_DEMAND,
                  run_config       = RUN_CONFIG,
                  input_weights    = INPUT_WEIGHTS,
                  output_dir       = OUTPUT_DIR,
                  output_folder    = OUTPUT_FOLDER,
                  pathfinding_type = PATHFINDING_TYPE,
                  iters            = ITERATIONS,
                  overlap_variable = OVERLAP,
                  dispersion       = d )

# Examine Dispersion Results

In [None]:
from bokeh.charts import Bar, Histogram, TimeSeries, output_file, Line
from bokeh.models import ColumnDataSource
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
output_notebook()

In [None]:
pd.set_option('display.max_colwidth',160) #widen so you can see whole description
PATHS  = r"pathset_paths.csv"

for d in DISPERSION_TESTS:
    full_output_directory = os.path.join(OUTPUT_DIR,OUTPUT_FOLDER_BASE+"%2.2f" % (d))                     
    df = pd.read_csv(os.path.join(full_output_directory,PATHS), sep=",")[["person_id","person_trip_id","pathnum","description","sim_cost","probability"]]
    df['dispersion'] = d
    if d == DISPERSION_TESTS[0]: 
        paths_df  = df
        continue
    paths_df = pd.concat([paths_df, df], ignore_index=True, axis = 0)

paths_df

In [None]:
p = Bar(paths_df, label='dispersion', values='probability', group='pathnum', title="Probability by Dispersion")
show(p, notebook_handle=True)

We can see that as the dispersion parameter increases, the good path looks even better relative to the less good path.  

Looking at the table, it is evident that only dispersion parameter = 0.5 even assigns a non-zero probability to waiting for another green bus.  

## Examine Effect of Overlap Calculation

In [None]:
INPUT_WEIGHTS      = os.path.join(BASE_DIR,"tta", "input", "demand-single", "pathweight_ft.txt")
RUN_CONFIG         = os.path.join(BASE_DIR,"tta","input", "demand-single", "config_ft.txt")
OUTPUT_DIR         = os.path.join(BASE_DIR,"tta","output")
ITERATIONS       = 1
PATHFINDING_TYPE = "stochastic"
DISPERSION       = 0.50
OUTPUT_FOLDER_BASE = r"test_simplenet_wDispersion50_wOverlap"
OVERLAP_METHODS    = ['None','count','distance','time'] 

In [None]:
for o in OVERLAP_METHODS:
    OUTPUT_FOLDER         = OUTPUT_FOLDER_BASE+"%s" % (o)
    OVERLAP_SPLIT_TRANSIT = False

    Run.run_fasttrips(input_network_dir     = INPUT_NETWORKS,
                  input_demand_dir      = INPUT_DEMAND,
                  run_config            = RUN_CONFIG,
                  input_weights         = INPUT_WEIGHTS,
                  output_dir            = OUTPUT_DIR,
                  output_folder         = OUTPUT_FOLDER,
                  pathfinding_type      = PATHFINDING_TYPE,
                  iters                 = ITERATIONS,
                  overlap_variable      = o,
                  overlap_split_transit = OVERLAP_SPLIT_TRANSIT,
                  dispersion            = DISPERSION )


## Analyze Results of Overlap Calculation

In [None]:
pd.set_option('display.max_colwidth',160) #widen so you can see whole description
PATHS  = r"pathset_paths.csv"

for o in OVERLAP_METHODS:
    full_output_directory = os.path.join(OUTPUT_DIR,OUTPUT_FOLDER_BASE+"%s" % (o))                     
    df = pd.read_csv(os.path.join(full_output_directory,PATHS), sep=",")[["person_id","person_trip_id","pathnum","description","sim_cost","probability"]]
    df['overlap_var'] = o
    if o == OVERLAP_METHODS[0]: 
        overlap_paths_df  = df
        continue
    overlap_paths_df = pd.concat([overlap_paths_df, df], ignore_index=True, axis = 0)

overlap_paths_df

In [None]:
p = Bar(overlap_paths_df, label='overlap_var', values='probability', group='pathnum', title="Probability by Overlap")
show(p, notebook_handle=True)

## QUESTIONS

**2-a:** What is the effect of using overlap?  
**2-b:** What network characteristics would make overlap more or less important?