# Creating the OD Matrices
- Matrix 0: shortest trips between centroids
- Baseline: pop density and exp(normalized distance) -> gravity model baseline like Yap et al.
- Matrix set 1: equalizing for median income, education level, number of schools and number of jobs SEPARATELY
- Matrix set 2: equalizing for different attributes in O and D. O/D equalized for education level/number of schools, median income/number of jobs

In [1]:
import timeit
start = timeit.default_timer()
import pandas as pd
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'
import osmnx as nx
import shapely
import multiprocess as mp
import multiprocessing
import numpy as np
import math
import igraph as ig

crs_fr = 2154


In [2]:
#--- Custom function (Anastassia)
%run -i packages.py
def make_attr_dict(*args, **kwargs): 
    
    argCount = len(kwargs)
    
    if argCount > 0:
        attributes = {}
        for kwarg in kwargs:
            attributes[kwarg] = kwargs.get(kwarg, None)
        return attributes
    else:
        return None # (if no attributes are given)

In [3]:
#--- Custom function (Jin)
# equalize an OD for the same attribute in O and D
def equalization_all(od, variable, colname, delta, centroids): 
    
    od_ = od.copy()
    variable_ = variable.copy()
    
    variable_average = np.mean(variable_[colname]) 
    
    variable_['weight'] = variable_[colname].apply(lambda x: (x/variable_average)**-delta)

    i =0
    for val in variable_['ig']:
        weight = variable_.loc[variable_['ig']==val]['weight'].iloc[0]
        try:
            od_[centroids.index(val)] *= weight 
            od_.loc[centroids.index(val)] *= weight 
        except:
            continue
#             print(val, ' not found')
        i +=1
    
    return od_

In [4]:
#--- Custom function to use the function above in a batch
def clean_data_with_od_matrices(nodes_carbike_centroids_RER_complete, baseline_df, centroids, COLOFINTEREST, delta):
    col_tokeep = ['osmid', 'ig', 'CODE_IRIS', COLOFINTEREST]
    COLOFINTEREST_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
    COLOFINTEREST_df = COLOFINTEREST_df[col_tokeep]
    
    OD_equalization = equalization_all(baseline_df, COLOFINTEREST_df, COLOFINTEREST, delta, centroids)
    
    OD_equalization_name = "OD_equalization_" + COLOFINTEREST + "_" + str(delta)
    
    return {OD_equalization_name: OD_equalization}

In [31]:
# #--- Custom function (adapted from Jin)
# # equalize an OD for DIFFFERENT attributes in O and D

def equalization_all_2attributes(od, variable, colnameO, colnameD, delta, centroids): 
    
    od_ = od.copy()
    variable_ = variable.copy()
    
    variable_average1 = np.mean(variable_[colnameO])
    variable_average2 = np.mean(variable_[colnameD])
    
    #here we keep -delta because we want to penalize the high values
    # e.g low income is prioritized 
    variable_['weightO'] = variable_[colnameO].apply(lambda x: (x / variable_average1) ** -delta)

    # here we use +delta because we want to penalize the low values
    # e.g high number of jobs in an area is prioritzed
    variable_['weightD'] = variable_[colnameD].apply(lambda x: (x / variable_average2) ** delta) #
    
    i = 0
    for val in variable_['ig']:
        weightO = variable_.loc[variable_['ig'] == val]['weightO'].iloc[0]
        weightD = variable_.loc[variable_['ig'] == val]['weightD'].iloc[0]
        try:
            od_.loc[centroids.index(val)] *= weightO #row = origin
            od_[centroids.index(val)] *= weightD #column = destination
        except:
            continue
        i += 1
    
    return od_


In [39]:
#--- Custom function to use the function above in a batch
def equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_df, centroids, COLOFINTEREST1, COLOFINTEREST2, delta):
    col_tokeep = ['osmid', 'ig', 'CODE_IRIS', COLOFINTEREST1, COLOFINTEREST2]
    COLSOFINTEREST_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
    COLSOFINTEREST_df = COLSOFINTEREST_df[col_tokeep]
    
    equalized_od = equalization_all_2attributes(baseline_df, COLSOFINTEREST_df, COLOFINTEREST1, COLOFINTEREST2, delta, centroids)
    
    equalized_od_name = "OD_equalization_" + COLOFINTEREST1 + "_O_"+ COLOFINTEREST2 + "_D_delta_" + str(delta)
    
    return {equalized_od_name: equalized_od}


In [7]:
#--- Shapes

# GPM outline
GPM = gpd.read_file('data/raw/GPM.geojson').to_crs(crs_fr)

# IRIS codes and shapes 
IRIS_GPM = gpd.read_file('data/raw/IRIS_GPM.geojson')

## Creating the network and adding igraph IDs to the node table

In [8]:
#--- Create the network in NetworkX
# Retrieve edges
edges_with_id = pd.read_csv('data/clean/initial_network_edges.csv')
edges_with_id["geometry"] = edges_with_id.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
edges_with_id = gpd.GeoDataFrame(edges_with_id, geometry = 'geometry', crs = 4326).to_crs(2154)

# Retrieve nodes
nodes_carbike_centroids_RER_complete = pd.read_csv('data/clean/initial_network_nodes_complete.csv')
nodes_carbike_centroids_RER_complete["geometry"] = nodes_carbike_centroids_RER_complete.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
nodes_carbike_centroids_RER_complete = gpd.GeoDataFrame(nodes_carbike_centroids_RER_complete, geometry = 'geometry', crs = 2154)

# Create the attr_dict
nodes_carbike_centroids_RER_complete["attr_dict"] = nodes_carbike_centroids_RER_complete.apply(lambda x: make_attr_dict(
                                                                  nodetype = x.nodetype,
                                                                  centroid = x.centroid,
                                                                  RER = x.RER,
                                                                  IRIS = x.CODE_IRIS,
                                                                  pop_dens = x.pop_density,
                                                                  active_pop_density = x.active_pop_density,
                                                                  school_pop_density = x.school_pop_density,
                                                                  school_count = x.school_count,
                                                                  num_jobs = x.num_jobs,
                                                                  ),
                                                                  axis = 1) 

# Create Graph with all nodes and edges
G = nx.from_pandas_edgelist(edges_with_id, source='x', target='y', edge_attr=True)
G.add_nodes_from(nodes_carbike_centroids_RER_complete.loc[:,["osmid", "attr_dict"]].itertuples(index = False))

In [9]:
#--- Moving from NetworkX to igraph
g_igraph = ig.Graph()
networkx_graph = G
g_igraph = ig.Graph.from_networkx(networkx_graph)

# eids: "conversion table" for edge ids from igraph to nx 
eids_nx = [tuple(sorted(literal_eval(g_igraph.es(i)["edge_id"][0]))) for i in range(len(g_igraph.es))]
eids_ig = [i for i in range(len(g_igraph.es))]
eids_conv = pd.DataFrame({"nx": eids_nx, "ig": eids_ig})

# nids: "conversion table" for node ids from igraph to nx
nids_nx = [g_igraph.vs(i)["_nx_name"][0] for i in range(len(g_igraph.vs))]
nids_ig = [i for i in range(len(g_igraph.vs))]
nids_conv = pd.DataFrame({"nx": nids_nx, "ig": nids_ig})


In [10]:
nids_conv['nx'] = nids_conv['nx'].astype(int)

# combine the conversion table with nodes_carbike_centroids_RER_complete
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.merge(nids_conv, left_on = "osmid", right_on = "nx", how = "left")
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.drop(columns = ["nx"])

In [11]:
# Isolate centroids
from itertools import combinations
seq = g_igraph.vs.select(centroid_eq = True)
centroids = [v.index for v in seq]
centroids = centroids[0:2] #for testing purposes 
node_combinations = list(combinations(centroids, 2))

## Matrix 0: shortest path between each pair of centroids

In [12]:
# Create OD matrix
def process_node(args):
    start_node, end_node = args
    global g_igraph
    shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]
    return (start_node, end_node, shortest_path_length)

if __name__ == '__main__':
    # Number of processes (cores) to use for parallel processing
    num_processes = 4
    global g_igraph

    # Create a pool of processes
    pool = mp.Pool(processes=num_processes)

    # Apply the function to each node combination using parallel processing
    results = pool.map(process_node, node_combinations)

    # Create a dictionary to store the shortest path lengths
    output = {}
    for start_node, end_node, shortest_path_length in results:
        if start_node not in output:
            output[start_node] = {}
        output[start_node][end_node] = shortest_path_length

    # Create an empty adjacency matrix
    matrix = np.zeros((len(centroids), len(centroids)))

    # Fill the adjacency matrix with shortest path lengths
    for i, start_node in enumerate(centroids):
        for j, end_node in enumerate(centroids):
            if start_node in output and end_node in output[start_node]:
                matrix[i, j] = output[start_node][end_node]
                matrix[j, i] = output[start_node][end_node]

    # Close the pool
    pool.close()
    pool.join()

print(matrix.shape)

stop = timeit.default_timer()

print('Time: ', stop - start)  


  shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]


(2, 2)
Time:  28.87299732500105


## Baseline: population densities and exponential term with normalised distance

In [13]:
# Calculate demand between each origin and destination
# NO MULTIPROCESSING
baseline = np.zeros((len(centroids), len(centroids)))
maxtrips = 100
dist_decay = 1

for o in range(0, len(centroids)):
    for d in range(0, len(centroids)):
        if o == d:
            # do not insert demand down the spine - no trips where origin = destination
            baseline[o][d] = 0
        else:
            # normalize the current travel time versus the largest travel time between nodes in the matrix
            normalized_dist = matrix[o][d] / matrix.max()

            #  here, demand is a function of the product of the population of the origin and
            #  the destination - but reduced by the distance between them. 'Gravity demand'
            baseline[o][d] = ((g_igraph.vs[centroids[o]]['pop_dens'] * g_igraph.vs[centroids[d]]['pop_dens']) * dist_decay * np.exp(-1 * normalized_dist))

# we normalize the matrix to the number of maxtrips
baseline = ((baseline / baseline.max()) * maxtrips)

# we round up - to ensure each journey is made at least once
baseline = np.ceil(baseline).astype(int)
baseline_df = pd.DataFrame(baseline)

In [14]:
baseline_df

Unnamed: 0,0,1
0,0,100
1,100,0


## Matrix Set 1: equalizing for median income, education level, number of schools and number of jobs SEPARATELY


In [15]:
results = []
for COLOFINTEREST in ['median_income', 'school_count', 'num_jobs']:
    for delta in (0.5, 1, 1.5):
        result = clean_data_with_od_matrices(nodes_carbike_centroids_RER_complete, baseline_df, centroids, COLOFINTEREST, delta)
        results.append(result)

## Matrix Set 2: equalize for O/D attributes median income/ number of jobs, education level/number of schools

In [41]:
combinations = [['median_income', 'num_jobs']]#,['edu_level', 'num_schools']]
Results = []
for combination in combinations:
    for delta in (0.5, 1, 1.5):
        result = equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_df, centroids, combination[0], combination[1], delta)
        Results.append(result)

Results

[{'OD_equalization_median_income_O_num_jobs_D_delta_0.5':            0           1
  0   0.000000  139.714224
  1  52.024671    0.000000},
 {'OD_equalization_median_income_O_num_jobs_D_delta_1':            0           1
  0   0.000000  195.200644
  1  27.065664    0.000000},
 {'OD_equalization_median_income_O_num_jobs_D_delta_1.5':            0           1
  0   0.000000  272.723065
  1  14.080823    0.000000}]