# Lesson 1 - Hyperpath-Based Path-Finding

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
from bqplot import (
    OrdinalScale, LinearScale, LogScale, Bars, Lines, Axis, Figure, Scatter, FlexLine,
)

from bqplot.marks import Graph
from ipywidgets import Layout

import bqplot
#%matplotlib inline
#import matplotlib.pyplot as plt
#import seaborn as sns
#from IPython import display
#%matplotlib notebook

## Define A Simple Network with Competing Options


B	---	----	-->	D    
B	-->	C	-->	D    
		C	-->	D    
        C	-->	D    


In [2]:
links=[{'A':'orig','B':'b','name':'access' , 'IVT':0  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd1', 'IVT':8  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd2', 'IVT':10  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd3', 'IVT':30 ,  'xfers': 0},
 {'A':'b','B':'c', 'name':'bc' , 'IVT':3  ,  'xfers': 1},
 {'A':'b','B':'d', 'name':'bd' , 'IVT':15  ,  'xfers': 0},
 {'A':'d','B':'dest','name':'egress' , 'IVT':0  ,  'xfers': 0},
]

n_df = pd.DataFrame(links)

nodes  = [{"label":"orig","x":0,"y":0},
          {"label":"b","x":1,"y":0},
          {"label":"c","x":3,"y":-.25},
          {"label":"d","x":5,"y":0},
          {"label":"dest","x":6,"y":0},
         ]

pos_df = pd.DataFrame([['orig',0,0],
                      ['b',1,0],
                      ['c',3,-.25],
                      ['d',5,0],
                      ['dest',6,0]], columns = ['N','x','y'])
#n_df, pos_df


### Examine Network to Make Sure it is what you want

In [3]:
net_df=n_df.merge(pos_df,left_on=['A'],right_on=['N'],how='left')
net_df.rename(columns={"x":"Ax","y":"Ay"}, inplace=True)
net_df=net_df.merge(pos_df,left_on=['B'],right_on=['N'],how='left')
net_df.rename(columns={"x":"Bx","y":"By"}, inplace=True)

net_df['ab_num'] = net_df.groupby(['A','B']).cumcount()

def make_link(row):
    import math
    r=200
    ##TODO calculate x and Y based on arcs based on how many links are overlapping
    ##...or just offset a convenient number
    x0=row["Ax"]*(1+(row["ab_num"]/30))
    x1=row["Bx"]*(1+(row["ab_num"]/30))
    
    #y_coords = [(1/r)*math.sin(r*y) for y in ys]
    y0=row["Ay"]*(1+(row["ab_num"]/30))
    y1=row["By"]*(1+(row["ab_num"]/30))
    
    x_coords = np.linspace(x0,x1, 50)
    y_coords = np.linspace(y0,y1, 50)
    return x_coords, y_coords

net_df['link_x'], net_df['link_y'] = zip(*net_df.apply(make_link, axis=1))
net_df

Unnamed: 0,A,B,IVT,name,xfers,N_x,Ax,Ay,N_y,Bx,By,ab_num,link_x,link_y
0,orig,b,0,access,0,orig,0,0.0,b,1,0.0,0,"[0.0, 0.0204081632653, 0.0408163265306, 0.0612...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,c,d,8,cd1,0,c,3,-0.25,d,5,0.0,0,"[3.0, 3.04081632653, 3.08163265306, 3.12244897...","[-0.25, -0.244897959184, -0.239795918367, -0.2..."
2,c,d,10,cd2,0,c,3,-0.25,d,5,0.0,1,"[3.1, 3.14217687075, 3.1843537415, 3.226530612...","[-0.258333333333, -0.25306122449, -0.247789115..."
3,c,d,30,cd3,0,c,3,-0.25,d,5,0.0,2,"[3.2, 3.24353741497, 3.28707482993, 3.33061224...","[-0.266666666667, -0.261224489796, -0.25578231..."
4,b,c,3,bc,1,b,1,0.0,c,3,-0.25,0,"[1.0, 1.04081632653, 1.08163265306, 1.12244897...","[0.0, -0.00510204081633, -0.0102040816327, -0...."
5,b,d,15,bd,0,b,1,0.0,d,5,0.0,0,"[1.0, 1.08163265306, 1.16326530612, 1.24489795...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6,d,dest,0,egress,0,d,5,0.0,dest,6,0.0,0,"[5.0, 5.02040816327, 5.04081632653, 5.06122448...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [4]:
#print(net_df)
def plt_network_(nodes_df,links_df):
    plt.figure(figsize=(12,8))
    
    #nodes
    ax1 = sns.pointplot(x='x', y='y', hue='N',data=nodes_df)
    
    #links
    ##TODO - fix these links to not be offset
    for index, row in net_df.iterrows():
        plt.plot(row['link_x'], row['link_y'])
        
    ##TODO - add in labels for links and values like IVT, etc.
    ax1.set_title("Network")
    ax1.set_ylim(-5,5)
    plt.show()
    
#plt_network(pos_df,n_df)

In [5]:
def bq_network(net_df, pos_df):
    x_sc = LinearScale()
    y_sc  = LinearScale()

    colorpool = list(bqplot.CATEGORY10)

    def as_link(row):
        link = Lines(x=row["link_x"], y=row["link_y"], scales={'x': x_sc, 'y': y_sc},
                 stroke_width=3, colors = [colorpool.pop()], display_legend=True, labels=[row["name"]])

        #label = 
        return link

    links = list(net_df.apply(as_link,axis=1))

    #print(links)
    nodes = Scatter(x = pos_df['x'], y = pos_df['y'], 
                      scales = {'x': x_sc, 'y': y_sc}, 
                      default_opacities = [0.4], default_size = 600,
                      names   = pos_df['N'])

    marks = links + [nodes]

    ax_x = Axis(scale=x_sc, grid_lines='solid', label='X')
    ax_y = Axis(scale=y_sc, orientation='vertical',
                grid_lines='solid', label='Y')

    fig = Figure(marks=marks, 
           axes=[ax_x, ax_y], 
           title='Simple Network',
           legend_location='bottom-right',
           padding_x = 0.05,
           padding_y = 0.05)
    return fig

f = bq_network(net_df, pos_df)

f

In [27]:
def bq_graph(net_df,nodes):

    temp_net_df = net_df.rename(columns={"A":"source","B":"target"})
    link_data   = temp_net_df[['source','target']].to_dict('records')
    print(link_data[0])
    nodes_df     = pd.DataFrame(nodes)
    
    fig_layout = Layout(width='960px', height='500px')
    xs = LinearScale()
    ys = LinearScale()
    
    graph = Graph(node_data=list(nodes_df['label']), 
                  link_data=link_data, 
                  scales={'x': xs, 'y': ys }, 
                  directed=True,
                  link_type='arc',
                  x=list(nodes_df['x']),
                  y=list(nodes_df['y']))
    
    fig = Figure(marks=[graph], layout=fig_layout)
    return fig

bq_graph(net_df,nodes)

{'source': 'orig', 'target': 'b'}


## Initialize Parameters and Variables

 * Coefficients on variables
 * Dispersion Paramter, Theta
 * Direction on SP (Backward SP is the one currently implemented)
 * Start and End Nodes
 * Initial Labels 
 * Scan-eligible queue

In [7]:

C_IVT = -.1
C_xfer= C_IVT*15
THETA = 0.5

BEGIN = 'dest'
END   = 'orig' 
FORWARD = False     #TODO implement forward SP
INIT_LABEL = -9999  #should be same sign as the C_IVT

current_node   = BEGIN
n_df['label_j']= INIT_LABEL
n_df['label_i']= INIT_LABEL

# Initialize labels of starting node to zero
if FORWARD:
    n_df.loc[n_df['A']==current_node,'label_i'] = 0
elif not FORWARD:
    n_df.loc[n_df['B']==current_node,'label_j'] = 0

# Calculate link-based utility
n_df['u_link']=(C_IVT*n_df['IVT']) + (C_xfer*n_df['xfers'])

# Initialize the queue of nodes(stops) to be examined
import queue
scan_eligible = queue.PriorityQueue()
scan_eligible.put((0,current_node))

#store results of each step in df
df_iters = {}

### Define Helper Functions

  * Logsum Calculation

In [8]:
def calculate_logsum(net_df, current_node, theta, forward = False, flag_positive = True, flag_negative = False):
    '''
    Calculates and returns logsum of the labels of the predecessor links.
    *net_df* is a pandas dataframe of the network
    *current_node* is the name node that the logsum is being calculated for
    *theta* is the dispersion parameter
    *forward* is a boolean value to flag whether the shortest path is forward or backward
    *flag_positive* is a boolean. If the labels are based on utilities, the logsum should always be negative.  
    *flag_negative* is a boolean. If the labels are based on costs, the logsum value should be negative.  
    '''

    if forward: 
        predecessor_node = 'B'
        predecessor_label = 'label_j'
        print("Calculating Forward SP Logsum")
    else:
        predecessor_node = 'A'
        predecessor_label = 'label_i'
        print("Calculating Backward SP Logsum")
    
    ## Exponentiate the label from the predecessor link
    print("Using these links: ",list(net_df[n_df[predecessor_node]==current_node]['name']))
    n_df.loc[n_df[predecessor_node]==current_node,'exp_label'] = np.exp((1/theta)*n_df.loc[n_df[predecessor_node]==current_node,predecessor_label])

    ## Logsum of exponentiated labels of predecessor links
    logsum = theta*np.log(np.sum(n_df.loc[n_df[predecessor_node] == current_node,'exp_label']))
    print("Logsum: ", logsum)
    
    if flag_positive and logsum>0:
        print("FLAG POSITIVE")
    
    if flag_negative and logsum>0:
        print("FLAG NEGATIVE")
        
    return logsum

## Backwards Hyperpath Calculation Algorithm

Calculates hyperlink utilities weighted based on log-sum formulations.

 * Implemented for very simple network of links and nodes.  
 * Does not consider transfer times.  
 * Does not consider timing, preferred arrival/departure times.  
 * Does not consider fares.  

**TODO** 

 * Implement true version of scan eligible list  
 * Calculate Probabilities

In [9]:
def backward_sp(n_df,scan_eligible,theta, steps_df = True):
    step = 0
    steps = []
    while not scan_eligible.empty():
        step += 1
        
        current_node = scan_eligible.get()[1]
        print("---Current node: ", current_node)
        
        
        # update label based on labels predecessors (if there are any), store in label_j
        logsum=-99999
        if not n_df[n_df['A']==current_node].empty:  
            logsum = calculate_logsum(n_df, current_node, theta, forward=False)
            n_df.loc[n_df['B']==current_node,'label_j'] = logsum
            
        # label at predecessor node should be the label at current node + link cost
        n_df.loc[n_df['B']==current_node,'label_i'] = n_df.loc[n_df['B']==current_node,'label_j'] + n_df.loc[n_df['B']==current_node,'u_link']
        #print(n_df)
        
        pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))

        # store the results of this iteration if desired
        if steps_df:
            steps.append([step, current_node,logsum])
            df_iters[step] = n_df.copy(deep=True)
            
        # add nodes to scan-eligible and prioritize based on label
        ## TODO make this real
        if current_node == 'dest': scan_eligible.put((0,'d'))
        if current_node == 'd': 
            scan_eligible.put((1,'c'))
            scan_eligible.put((2,'b'))
            
    if steps_df:
        return pd.DataFrame(steps, columns=['step','current_node','logsum'])
        
    

In [10]:
steps_df = backward_sp(n_df,scan_eligible,THETA, steps_df=True)

---Current node:  dest
---Current node:  d
Calculating Backward SP Logsum
Using these links:  ['egress']
Logsum:  0.0
---Current node:  c
Calculating Backward SP Logsum
Using these links:  ['cd1', 'cd2', 'cd3']
Logsum:  -0.539830668726
---Current node:  b
Calculating Backward SP Logsum
Using these links:  ['bc', 'bd']
Logsum:  -1.41452260684


## Examine Hyperpath Algorithm's Process

  * all logums should be the sign of the C_IVT coefficient
  * All labels should be "improving" for each node for each step

In [11]:
## Rehape Iters_DF
iters_df=pd.concat(df_iters,axis=0)
iters_df.index.names = ['step', 'link_num']
iters_df.reset_index(inplace=True)  
#iters_df

steps_df
iters_df

Unnamed: 0,step,link_num,A,B,IVT,exp_label,label_i,label_j,name,u_link,xfers
0,1,0,orig,b,0,,-9999.0,-9999.0,access,-0.0,0
1,1,1,c,d,8,,-9999.0,-9999.0,cd1,-0.8,0
2,1,2,c,d,10,,-9999.0,-9999.0,cd2,-1.0,0
3,1,3,c,d,30,,-9999.0,-9999.0,cd3,-3.0,0
4,1,4,b,c,3,,-9999.0,-9999.0,bc,-1.8,1
5,1,5,b,d,15,,-9999.0,-9999.0,bd,-1.5,0
6,1,6,d,dest,0,,0.0,0.0,egress,-0.0,0
7,2,0,orig,b,0,,-9999.0,-9999.0,access,-0.0,0
8,2,1,c,d,8,,-0.8,0.0,cd1,-0.8,0
9,2,2,c,d,10,,-1.0,0.0,cd2,-1.0,0


In [12]:
def plt_logsums_each_step(steps_df):
    plt.figure(figsize=(12,8))
    ax3 = sns.pointplot(x='step', y='logsum', hue='current_node',data=steps_df)
    ax3.set_title("Logsums by Node")
    ax3.set_ylim(-5,5)
    plt.show()


def bq_logsums_each_step(steps_df):
    sc_x1 = OrdinalScale()
    sc_y1 = LinearScale(min=-9)

    bar_x = Axis(label='step', scale=sc_x1)
    bar_y = Axis(label='logsum', scale=sc_y1, orientation='vertical', tick_format='0.0f', grid_lines='solid', min=-9)
    
    bar_chart = Bars(x=steps_df["step"],
                 y=steps_df["logsum"], scales={'x': sc_x1, 'y': sc_y1})

    fig = Figure(axes=[bar_x, bar_y], marks=[bar_chart], padding_x=0, title="Logsum by Step")
    return fig
#plot_logsums_each_step(steps_df)  

bq_logsums_each_step(steps_df)

In [13]:
#sns.set_style("whitegrid")
def plt_labels_by_step(iters_df):
    plt.figure(figsize=(12,8))
    ax3 = sns.pointplot(x='step', y='label_i', hue='name',data=iters_df)
    ax3.set_title("Label i over each step")
    ax3.grid(b=True, which='major', color='#d3d3d3', linewidth=1.0)
    ax3.grid(b=True, which='minor', color='#d3d3d3', linewidth=0.5)
    ax3.set_ylim(-5,5)
    plt.show()
#plt_labels_by_step(iters_df)

def bq_labels_by_step(iters_df):
    sc_x1 = OrdinalScale()
    sc_y1 = LinearScale(min=-9)

    ax_x = Axis(label='step', scale=sc_x1)
    ax_y = Axis(label='logsum', scale=sc_y1, orientation='vertical', tick_format='0.0f', grid_lines='solid', min=-9)
    
    node_labels = FlexLine(x=iters_df["step"],y=iters_df["label_i"], color = list(iters_df["A"]),
                          scales={'x': sc_x1, 'y': sc_y1})

    fig = Figure(axes=[ax_x, ax_y], marks=[node_labels], padding_x=0, title="Labels by Step")
    return fig

bq_labels_by_step(iters_df)