Single-Layer Effects of OD Demand
==============

Argues the following claim: the organized, partially-optimized demand structure of transportation networks (given by OD data), rather than edge weights, tend to dominate single-layer global behavior, such as travel times and congestion.

- **Methods:**
    - Computation of congested edge weights using ITA + BPR
- **Figs:**   
    - Shortest path distributions -- plots and summary stats
    - Betweenness distributions 


In [1]:
% matplotlib inline

# homebrewed modules
from metro import multiplex as mx
from metro import utility       # for manipulating multiplex
from metro import analysis      # analytical functions
from metro import viz           # for bubble_plot()
from metro import assignment    # for reading and manipulating OD data
from metro import ita

# Other modules
import networkx as nx
import random
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import pylab
import numpy as np
import pandas as pd
from IPython.display import display, HTML
import warnings 
import itertools
import math
from matplotlib import cm
import fiona
from shapely.geometry import MultiPolygon, Point, shape
from descartes import PolygonPatch
from matplotlib.collections import PatchCollection




In [2]:
warnings.filterwarnings('ignore') # don't display warnings when executing commands
sns.set_style("white") # nicer plots
pylab.rcParams['savefig.dpi'] = 100 # change to 300 for final publication figs

In [3]:
m = mx.read_multi(nodes_file_name = '3_throughput/mx_flow_nodes.txt', 
                      edges_file_name = '3_throughput/mx_flow_edges.txt')


for layer in ['metro', 'metro--streets']:
    m.remove_layer(layer)
    
m.read_od(layer = 'taz', # keys are in this layer
          key = 'taz', # this is the key attribute
          od_file = '1_data/taz_od/0_1.txt', # here's where the file lives
          sep = ' ') # this is what separates entries

m.summary()

OD: loaded

layer           	nodes 	edges
----------------------------------------
streets          	10728 	21508
taz--streets     	0 	8486
taz              	4243 	0


In [4]:
# Certain analyses in this worksheet are very processor intensive. 
# You have the option of only running them on a subset of the nodes in the graph. 
# To do this, set a value for `n_nodes` below. 
# To run full computations, choose `n_nodes = None`.

n_nodes = None

# Weighted and Unweighted Shortest Paths

In [5]:
g, od = m.to_igraph()

In [6]:
weights = ['congested_time_m_100', 'free_flow_time_m', 'uniform_time_m', 'dist_km'] # define weights we'll use
nodes = np.array([v.index for v in g.vs if g.vs[v.index]['layer'] == 'streets']) # nodes on which to compute
if n_nodes: # optional sampling
    nodes = np.random.choice(nodes, size = n_nodes, replace = False) 

In [7]:
# Now we'll compute shortest paths under each of the edge weights and collect them 
# into a dataframe for easy visualization. 

# main computation
d = {weight : analysis.path_lengths_igraph(g, nodes, weight, mode = 'array') for weight in weights}

# collect into df
uniform_demand = pd.DataFrame(d)

# reorder the columns (just for aesthetics)
uniform_demand = uniform_demand[weights]

# filter out paths between unreachable nodes
for weight in weights:
    uniform_demand = uniform_demand[uniform_demand[weight] != np.inf]

In [8]:
congested = pd.read_csv('3_throughput/shortest_congested_time_m_100.csv')
free = pd.read_csv('3_throughput/shortest_free_flow_time_m.csv')
constant = pd.read_csv('3_throughput/shortest_uniform_time_m.csv')

In [None]:
np.average(constant.base_cost, weights = constant.flow)

22.08729573883674

In [None]:
current_palette = sns.color_palette()

fig = plt.figure(figsize = (12, 4))

edge_weights = {'congested_time_m_100' : 'congested',
                'free_flow_time_m' : 'free flow', 
                'uniform_time_m' : 'constant'}

dfs = {'congested' : congested,
      'free flow' : free,
      'constant' : constant}

ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

i = 0
for w in edge_weights:
    sns.kdeplot(uniform_demand[w], 
                     label = edge_weights[w] + ' -- uniform', 
                     ls = '--',
                     ax = ax1)
    
    viz.weighted_hist(ax = ax1, 
                      measure = dfs[edge_weights[w]]['base_cost'], 
                      weights = dfs[edge_weights[w]]['flow'], 
                      label = edge_weights[w] + ' -- observed',  
                      color = current_palette[i])
    
    sns.kdeplot(analysis.standardize(uniform_demand[w]), 
                     label = edge_weights[w] + ' -- uniform', 
                     ls = '--',
                     ax = ax2)
    
    viz.weighted_hist(ax = ax2, 
                      measure = dfs[edge_weights[w]]['base_cost'], 
                      weights = dfs[edge_weights[w]]['flow'], 
                      label = edge_weights[w] + ' -- observed',  
                      standardized = True,
                      color = current_palette[i])
    i += 1

ax1.legend()
ax2.legend()

ax1.set(xlim = (0, 100), 
    xlabel = 'time (m)', 
    ylabel = 'density', 
    title = 'Shortest paths und constant and observed ODs')

ax2.set(xlim = (-3, 5), 
        xlabel = 'z-score (standard deviations from the mean)', 
        title = 'Uniform demand, standardized')

sns.despine()

In [None]:
print '__SUMMARY TABLE__'
print '{0: <30}'.format('UNIFORM DEMAND') + 'MEAN'
for w in edge_weights:
    print '{0: <30}'.format(' ' + w) + str(round(uniform_demand[w].mean(), 1))

print 'OD DEMAND'
for w in edge_weights:
    df = dfs[edge_weights[w]]
    print '{0: <30}'.format(' ' + w) + str(round(np.average(df['base_cost'], weights = df['flow']),1))


# Weighted and unweighted distance between nodes

### Weighted

In [None]:
df = pd.read_csv('3_throughput/shortest_dist_km.csv')
np.average(df.dist_km, weights = df.flow)

### Uniform

In [None]:
uniform_demand.mean()['dist_km']

In [None]:
from metro import viz
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(111)

viz.flow_plot(m, 'flow_1', ax)

# Scrap work

## Distance mean

In [None]:
df = pd.read_csv('3_throughput/shortest_congested_time_m_100.csv')

In [None]:
df.flow.sum()

In [None]:
dg = df[df.gamma > 0]
sns.jointplot(dg.gamma * dg.dist_km, dg.gradient, alpha = .05)

In [None]:
df['bins'] = pd.cut(df.gamma, 100)

In [None]:
df.head()

In [None]:
binned = df.groupby('bins').mean()
binned['speed'] = binned.dist_km / binned.congested_time_m

In [None]:
binned.head()

In [None]:
sns.jointplot(dg.dist_km, dg.congested_time_m, alpha = .05)

In [None]:
dg.describe()

Hypothesis: some of these are crazy because they go through a few, very specific, high-congestion edges. This would explain both the few hudge congested travel times and the existence of banding. 

In [None]:
dg.flow.sum()