# Lesson 1 - Hyperpath-Based Path-Finding

In [None]:
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from IPython import display
%matplotlib notebook

## Define A Simple Network with Competing Options


B	---	----	-->	D    
B	-->	C	-->	D    
		C	-->	D    
        C	-->	D    


In [None]:
n=[{'A':'orig','B':'b','name':'access' , 'IVT':0  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd1', 'IVT':8  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd2', 'IVT':10  ,  'xfers': 0},
 {'A':'c','B':'d', 'name':'cd3', 'IVT':30 ,  'xfers': 0},
 {'A':'b','B':'c', 'name':'bc' , 'IVT':3  ,  'xfers': 1},
 {'A':'b','B':'d', 'name':'bd' , 'IVT':15  ,  'xfers': 0},
 {'A':'d','B':'dest','name':'egress' , 'IVT':0  ,  'xfers': 0},
]

n_df = pd.DataFrame(n)

pos_df = pd.DataFrame([['orig',0,0],
                      ['b',1,0],
                      ['c',3,-.5],
                      ['d',5,0],
                      ['dest',6,0]], columns = ['N','x','y'])
#n_df, pos_df


### Examine Network to Make Sure it is what you want

In [None]:
def make_link(row):
    import math
    r=200
    ##TODO calculate x and Y based on arcs based on how many links are overlapping
    ##...or just offset a convenient number
    x_coords = np.linspace(row["Ax"],row["Bx"], 50)
    ys = np.linspace(row["Ay"],row["By"], 50)
    y_coords = [(1/r)*math.sin(r*y) for y in ys]
    #y_coords = np.linspace(row["Ay"],row["By"], 50)
    return x_coords, y_coords

net_df=n_df.merge(pos_df,left_on=['A'],right_on=['N'],how='left')
net_df.rename(columns={"x":"Ax","y":"Ay"}, inplace=True)
net_df=net_df.merge(pos_df,left_on=['B'],right_on=['N'],how='left')
net_df.rename(columns={"x":"Bx","y":"By"}, inplace=True)

net_df['link_x'], net_df['link_y'] = zip(*net_df.apply(make_link, axis=1))

#print(net_df)
def plot_network(nodes_df,links_df):
    plt.figure(figsize=(12,8))
    
    #nodes
    ax1 = sns.pointplot(x='x', y='y', hue='N',data=nodes_df)
    
    #links
    ##TODO - fix these links to not be offset
    for index, row in net_df.iterrows():
        plt.plot(row['link_x'], row['link_y'])
        
    ##TODO - add in labels for links and values like IVT, etc.
    ax1.set_title("Network")
    ax1.set_ylim(-5,5)
    plt.show()
    
plot_network(pos_df,n_df)

## Initialize Parameters and Variables

 * Coefficients on variables
 * Dispersion Paramter, Theta
 * Direction on SP (Backward SP is the one currently implemented)
 * Start and End Nodes
 * Initial Labels 
 * Scan-eligible queue

In [None]:

C_IVT = -.1
C_xfer= C_IVT*15
THETA = 0.5

BEGIN = 'dest'
END   = 'orig' 
FORWARD = False     #TODO implement forward SP
INIT_LABEL = -9999  #should be same sign as the C_IVT

current_node   = BEGIN
n_df['label_j']= INIT_LABEL
n_df['label_i']= INIT_LABEL

# Initialize labels of starting node to zero
if FORWARD:
    n_df.loc[n_df['A']==current_node,'label_i'] = 0
elif not FORWARD:
    n_df.loc[n_df['B']==current_node,'label_j'] = 0

# Calculate link-based utility
n_df['u_link']=(C_IVT*n_df['IVT']) + (C_xfer*n_df['xfers'])

# Initialize the queue of nodes(stops) to be examined
import queue
scan_eligible = queue.PriorityQueue()
scan_eligible.put((0,current_node))

#store results of each step in df
df_iters = {}

### Define Helper Functions

  * Logsum Calculation

In [None]:
def calculate_logsum(net_df, current_node, theta, forward = False, flag_positive = True, flag_negative = False):
    '''
    Calculates and returns logsum of the labels of the predecessor links.
    *net_df* is a pandas dataframe of the network
    *current_node* is the name node that the logsum is being calculated for
    *theta* is the dispersion parameter
    *forward* is a boolean value to flag whether the shortest path is forward or backward
    *flag_positive* is a boolean. If the labels are based on utilities, the logsum should always be negative.  
    *flag_negative* is a boolean. If the labels are based on costs, the logsum value should be negative.  
    '''

    if forward: 
        predecessor_node = 'B'
        predecessor_label = 'label_j'
        print("Calculating Forward SP Logsum")
    else:
        predecessor_node = 'A'
        predecessor_label = 'label_i'
        print("Calculating Backward SP Logsum")
    
    ## Exponentiate the label from the predecessor link
    print("Using these links: ",list(net_df[n_df[predecessor_node]==current_node]['name']))
    n_df.loc[n_df[predecessor_node]==current_node,'exp_label'] = np.exp((1/theta)*n_df.loc[n_df[predecessor_node]==current_node,predecessor_label])

    ## Logsum of exponentiated labels of predecessor links
    logsum = theta*np.log(np.sum(n_df.loc[n_df[predecessor_node] == current_node,'exp_label']))
    print("Logsum: ", logsum)
    
    if flag_positive and logsum>0:
        print("FLAG POSITIVE")
    
    if flag_negative and logsum>0:
        print("FLAG NEGATIVE")
        
    return logsum

## Backwards Hyperpath Calculation Algorithm

Calculates hyperlink utilities weighted based on log-sum formulations.

 * Implemented for very simple network of links and nodes.  
 * Does not consider transfer times.  
 * Does not consider timing, preferred arrival/departure times.  
 * Does not consider fares.  

**TODO** 

 * Implement true version of scan eligible list  
 * Calculate Probabilities

In [None]:
def backward_sp(n_df,scan_eligible,theta, steps_df = True):
    step = 0
    steps = []
    while not scan_eligible.empty():
        step += 1
        
        current_node = scan_eligible.get()[1]
        print("---Current node: ", current_node)
        
        
        # update label based on labels predecessors (if there are any), store in label_j
        logsum=-99999
        if not n_df[n_df['A']==current_node].empty:  
            logsum = calculate_logsum(n_df, current_node, theta, forward=False)
            n_df.loc[n_df['B']==current_node,'label_j'] = logsum
            
        # label at predecessor node should be the label at current node + link cost
        n_df.loc[n_df['B']==current_node,'label_i'] = n_df.loc[n_df['B']==current_node,'label_j'] + n_df.loc[n_df['B']==current_node,'u_link']
        #print(n_df)
        
        pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))

        # store the results of this iteration if desired
        if steps_df:
            steps.append([step, current_node,logsum])
            df_iters[step] = n_df.copy(deep=True)
            
        # add nodes to scan-eligible and prioritize based on label
        ## TODO make this real
        if current_node == 'dest': scan_eligible.put((0,'d'))
        if current_node == 'd': 
            scan_eligible.put((1,'c'))
            scan_eligible.put((2,'b'))
            
    if steps_df:
        return pd.DataFrame(steps, columns=['step','current_node','logsum'])
        
    

In [None]:
steps_df = backward_sp(n_df,scan_eligible,THETA, steps_df=True)

## Examine Hyperpath Algorithm's Process

  * all logums should be the sign of the C_IVT coefficient
  * All labels should be "improving" for each node for each step

In [None]:
## Rehape Iters_DF
iters_df=pd.concat(df_iters,axis=0)
iters_df.index.names = ['step', 'link_num']
iters_df.reset_index(inplace=True)  
#iters_df

steps_df

In [None]:
def plot_logsums_each_step(steps_df):
    plt.figure(figsize=(12,8))
    ax3 = sns.pointplot(x='step', y='logsum', hue='current_node',data=steps_df)
    ax3.set_title("Logsums by Node")
    ax3.set_ylim(-5,5)
    plt.show()
plot_logsums_each_step(steps_df)  

In [None]:
#sns.set_style("whitegrid")
def plot_labels_by_step(iters_df):
    plt.figure(figsize=(12,8))
    ax3 = sns.pointplot(x='step', y='label_i', hue='name',data=iters_df)
    ax3.set_title("Label i over each step")
    ax3.grid(b=True, which='major', color='#d3d3d3', linewidth=1.0)
    ax3.grid(b=True, which='minor', color='#d3d3d3', linewidth=0.5)
    ax3.set_ylim(-5,5)
    plt.show()
plot_labels_by_step(iters_df)