# Creating the OD Matrices
- Matrix 0: shortest trips between centroids
- Baseline: pop density and exp(normalized distance) -> gravity model baseline like Yap et al.
- Scenario 1: Attribute to equalize for: median income
- Scenario 2: attribute to equalize for: education level and school access 
- Scenario 3: attribute to equalize for: job access ( = #available jobs, maybe add competition )

In [11]:
import timeit
start = timeit.default_timer()
import pandas as pd
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'
import osmnx as nx
import shapely
import multiprocess as mp
import multiprocessing
import numpy as np
import math
import igraph as ig

crs_fr = 2154


In [2]:
#--- Custom function (Anastassia)
%run -i packages.py
def make_attr_dict(*args, **kwargs): 
    
    argCount = len(kwargs)
    
    if argCount > 0:
        attributes = {}
        for kwarg in kwargs:
            attributes[kwarg] = kwargs.get(kwarg, None)
        return attributes
    else:
        return None # (if no attributes are given)

In [3]:
def equalization_all(od, variable, colname, delta, centroids): #For equalisation matrices (Jin)
    
    od_ = od.copy()
    variable_ = variable.copy()
    
    variable_average = np.mean(variable_[colname]) 
    
    variable_['weight'] = variable_[colname].apply(lambda x: (x/variable_average)**-delta)

    i =0
    for val in variable_['ig']:
        weight = variable_.loc[variable_['ig']==val]['weight'].iloc[0]
        try:
            od_[centroids.index(val)] *= weight 
            od_.loc[centroids.index(val)] *= weight 
        except:
            continue
#             print(val, ' not found')
        i +=1
    
    return od_

In [4]:
#--- Shapes

# GPM outline
GPM = gpd.read_file('data/raw/GPM.geojson').to_crs(crs_fr)

# IRIS codes and shapes 
IRIS_GPM = gpd.read_file('data/raw/IRIS_GPM.geojson')

## Creating the network and adding igraph IDs to the node table

In [5]:
#--- Create the network in NetworkX
# Retrieve edges
edges_with_id = pd.read_csv('data/clean/initial_network_edges_complete.csv')
edges_with_id["geometry"] = edges_with_id.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
edges_with_id = gpd.GeoDataFrame(edges_with_id, geometry = 'geometry', crs = 4326).to_crs(2154)

# Retrieve nodes
nodes_carbike_centroids_RER_complete = pd.read_csv('data/clean/initial_network_nodes_complete.csv')
nodes_carbike_centroids_RER_complete["geometry"] = nodes_carbike_centroids_RER_complete.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
nodes_carbike_centroids_RER_complete = gpd.GeoDataFrame(nodes_carbike_centroids_RER_complete, geometry = 'geometry', crs = 2154)

# Create the attr_dict
nodes_carbike_centroids_RER_complete["attr_dict"] = nodes_carbike_centroids_RER_complete.apply(lambda x: make_attr_dict(
                                                                  nodetype = x.nodetype,
                                                                  centroid = x.centroid,
                                                                  RER = x.RER,
                                                                  IRIS = x.CODE_IRIS,
                                                                  pop_dens = x.pop_density,
                                                                  active_pop_density = x.active_pop_density,
                                                                  school_pop_density = x.school_pop_density,
                                                                  school_count = x.school_count,
                                                                  num_jobs = x.num_jobs,
                                                                  ),
                                                                  axis = 1) 

# Create Graph with all nodes and edges
G = nx.from_pandas_edgelist(edges_with_id, source='x', target='y', edge_attr=True)
G.add_nodes_from(nodes_carbike_centroids_RER_complete.loc[:,["osmid", "attr_dict"]].itertuples(index = False))

In [6]:
#--- Moving from NetworkX to igraph
g_igraph = ig.Graph()
networkx_graph = G
g_igraph = ig.Graph.from_networkx(networkx_graph)

# eids: "conversion table" for edge ids from igraph to nx 
eids_nx = [tuple(sorted(literal_eval(g_igraph.es(i)["edge_id"][0]))) for i in range(len(g_igraph.es))]
eids_ig = [i for i in range(len(g_igraph.es))]
eids_conv = pd.DataFrame({"nx": eids_nx, "ig": eids_ig})

# nids: "conversion table" for node ids from igraph to nx
nids_nx = [g_igraph.vs(i)["_nx_name"][0] for i in range(len(g_igraph.vs))]
nids_ig = [i for i in range(len(g_igraph.vs))]
nids_conv = pd.DataFrame({"nx": nids_nx, "ig": nids_ig})


In [7]:
nids_conv['nx'] = nids_conv['nx'].astype(int)

# combine the conversion table with nodes_carbike_centroids_RER_complete
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.merge(nids_conv, left_on = "osmid", right_on = "nx", how = "left")
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.drop(columns = ["nx"])

In [17]:
# Isolate centroids
from itertools import combinations
seq = g_igraph.vs.select(centroid_eq = True)
centroids = [v.index for v in seq]
centroids = centroids[0:500] #for testing purposes 
node_combinations = list(combinations(centroids, 2))

## Matrix 0: shortest path between each pair of centroids

In [18]:
# Create OD matrix
def process_node(args):
    start_node, end_node = args
    global g_igraph
    shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]
    return (start_node, end_node, shortest_path_length)

if __name__ == '__main__':
    # Number of processes (cores) to use for parallel processing
    num_processes = 4
    global g_igraph

    # Create a pool of processes
    pool = mp.Pool(processes=num_processes)

    # Apply the function to each node combination using parallel processing
    results = pool.map(process_node, node_combinations)

    # Create a dictionary to store the shortest path lengths
    output = {}
    for start_node, end_node, shortest_path_length in results:
        if start_node not in output:
            output[start_node] = {}
        output[start_node][end_node] = shortest_path_length

    # Create an empty adjacency matrix
    matrix = np.zeros((len(centroids), len(centroids)))

    # Fill the adjacency matrix with shortest path lengths
    for i, start_node in enumerate(centroids):
        for j, end_node in enumerate(centroids):
            if start_node in output and end_node in output[start_node]:
                matrix[i, j] = output[start_node][end_node]
                matrix[j, i] = output[start_node][end_node]

    # Close the pool
    pool.close()
    pool.join()

print(matrix.shape)

stop = timeit.default_timer()

print('Time: ', stop - start)  


  shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]
  shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]
  shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]
  shortest_path_length = g_igraph.shortest_paths_dijkstra(source=start_node, target=end_node, weights='weight')[0][0]


## Baseline: population densities and exponential term with normalised distance

In [None]:
# Calculate demand between each origin and destination
# NO MULTIPROCESSING
baseline = np.zeros((len(centroids), len(centroids)))
maxtrips = 100
dist_decay = 1

for o in range(0, len(centroids)):
    for d in range(0, len(centroids)):
        if o == d:
            # do not insert demand down the spine - no trips where origin = destination
            baseline[o][d] = 0
        else:
            # normalize the current travel time versus the largest travel time between nodes in the matrix
            normalized_dist = matrix[o][d] / matrix.max()

            #  here, demand is a function of the product of the population of the origin and
            #  the destination - but reduced by the distance between them. 'Gravity demand'
            baseline[o][d] = ((g_igraph.vs[centroids[o]]['pop_dens'] * g_igraph.vs[centroids[d]]['pop_dens']) * dist_decay * np.exp(-1 * normalized_dist))

# we normalize the matrix to the number of maxtrips
baseline = ((baseline / baseline.max()) * maxtrips)

# we round up - to ensure each journey is made at least once
baseline = np.ceil(baseline).astype(int)
baseline_df = pd.DataFrame(baseline)

In [None]:
baseline_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0,4,2,7,4,1,3,1,4,6,...,6,1,1,4,4,6,1,5,3,1
1,4,0,3,16,12,2,6,1,10,12,...,14,2,2,9,11,13,2,10,7,1
2,2,3,0,6,4,1,2,1,4,5,...,5,1,1,3,3,5,1,4,3,1
3,7,16,6,0,18,3,10,1,18,22,...,24,3,4,15,15,23,4,17,12,2
4,4,12,4,18,0,2,6,1,12,14,...,16,3,2,10,11,15,3,11,8,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,6,13,5,23,15,2,9,1,15,18,...,22,3,3,13,13,0,3,16,10,2
96,1,2,1,4,3,1,2,1,3,4,...,3,1,1,2,2,3,0,2,2,1
97,5,10,4,17,11,2,8,1,12,13,...,16,2,3,10,10,16,2,0,8,2
98,3,7,3,12,8,1,4,1,8,11,...,10,2,2,7,7,10,2,8,0,1


## Matrix 1: Equalize for the median income TODO

## Matrix 2: Equalize for number of schools present (TODO add education level?)


In [12]:
col_tokeep = ['osmid', 'ig', 'CODE_IRIS', 'school_count']

school_count_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
school_count_df = school_count_df[col_tokeep]

In [13]:
OD_equalization_schoolcount_05 = equalization_all(baseline_df, school_count_df, 'school_count', 0.5, centroids)
OD_equalization_schoolcount_1 = equalization_all(baseline_df, school_count_df, 'school_count', 1, centroids)
OD_equalization_schoolcount_1_5 = equalization_all(baseline_df, school_count_df, 'school_count', 1.5, centroids)

## Matrix 3: Equalize for the number of jobs (TODO add median income?)

In [22]:
col_tokeep = ['osmid', 'ig', 'CODE_IRIS', 'num_jobs']

job_count_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
job_count_df = job_count_df[col_tokeep]

In [24]:
OD_equalization_jobcount_05 = equalization_all(baseline_df, job_count_df, 'num_jobs', 0.5, centroids)
OD_equalization_jobcount_1 = equalization_all(baseline_df, job_count_df, 'num_jobs', 1, centroids)
OD_equalization_jobcount_1_5 = equalization_all(baseline_df, job_count_df, 'num_jobs', 1.5, centroids)