# Creating the OD Matrices
- Matrix 0: shortest trips between centroids
- Baseline: pop density and exp(normalized distance) -> gravity model baseline like Yap et al.
- Matrix set 1: equalizing for median income, education level, number of schools and number of jobs SEPARATELY
- Matrix set 2: equalizing for different attributes in O and D. O/D equalized for education level/number of schools, median income/number of jobs

In [1]:
import pandas as pd
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'
import networkx as nx
import shapely
import multiprocess as mp
import numpy as np
import igraph as ig
from ta_lab.assignment.assign import frank_wolfe
from ta_lab.assignment.line import *
from ta_lab.assignment.graph import *
from ta_lab.assignment.shortest_path import ShortestPath as SPP

crs_fr = 2154


## Custom functions

In [2]:
#--- Custom function (Anastassia)
# Create a dictionary of attributes (useful for networkX)
%run -i packages.py
def make_attr_dict(*args, **kwargs): 
    
    argCount = len(kwargs)
    
    if argCount > 0:
        attributes = {}
        for kwarg in kwargs:
            attributes[kwarg] = kwargs.get(kwarg, None)
        return attributes
    else:
        return None # (if no attributes are given)

In [3]:
#--- Custom function (adapted from Jin)
# equalize an OD for DIFFFERENT attributes in O and D
# multiply baseline with the number of opportunities in the destination and with the attribute of i over the avg **-delta

def equalization_all_2attributes(od, variable, colnameO, colnameD, delta, centroids): 
    
    od_ = od.copy()
    variable_ = variable.copy()
    
    variable_average1 = np.mean(variable_[colnameO])
    
    # calculate the attribute of i over the avg and **-delta 
    variable_['weightO'] = variable_[colnameO].apply(lambda x: (x / variable_average1) ** -delta)

    # get the number of opportunities at j
    variable_['weightD'] = variable_[colnameD]
    
    i = 0
    for val in variable_['ig']:
        weightO = variable_.loc[variable_['ig'] == val]['weightO'].iloc[0]
        weightD = variable_.loc[variable_['ig'] == val]['weightD'].iloc[0]
        try:
            od_.loc[centroids.index(val)] *= weightO #row = origin
            od_[centroids.index(val)] *= weightD #column = destination
        except:
            continue
        i += 1
    
    return od_


In [4]:
#--- Custom function to use the function above in a batch
def equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_df, centroids, COLOFINTEREST1, COLOFINTEREST2, delta):
    col_tokeep = ['osmid', 'ig', 'CODE_IRIS', COLOFINTEREST1, COLOFINTEREST2]
    COLSOFINTEREST_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
    COLSOFINTEREST_df = COLSOFINTEREST_df[col_tokeep]
    
    equalized_od = equalization_all_2attributes(baseline_df, COLSOFINTEREST_df, COLOFINTEREST1, COLOFINTEREST2, delta, centroids)
    
    equalized_od_name = "OD_equalization_" + COLOFINTEREST1 + "_O_"+ COLOFINTEREST2 + "_D_delta_" + str(delta)
    
    return {equalized_od_name: equalized_od}


In [5]:
#--- Shapes

# GPM outline
GPM = gpd.read_file('data/raw/GPM.geojson').to_crs(crs_fr)

# IRIS codes and shapes 
IRIS_GPM = gpd.read_file('data/raw/IRIS_GPM.geojson')

## Creating the network in both NetworkX and igraph

In [6]:
#--- Create the network in NetworkX
# Retrieve edges
edges_with_id = pd.read_csv('data/clean/initial_network_edges.csv')
edges_with_id["geometry"] = edges_with_id.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
edges_with_id = gpd.GeoDataFrame(edges_with_id, geometry = 'geometry', crs = 4326).to_crs(2154)
edges_with_id = edges_with_id.rename(columns={"id": "G"})

# Retrieve nodes
nodes_carbike_centroids_RER_complete = pd.read_csv('data/clean/initial_network_nodes_complete.csv')
nodes_carbike_centroids_RER_complete["geometry"] = nodes_carbike_centroids_RER_complete.apply(lambda x: shapely.wkt.loads(x.geometry), axis = 1)
nodes_carbike_centroids_RER_complete = gpd.GeoDataFrame(nodes_carbike_centroids_RER_complete, geometry = 'geometry', crs = 2154)

# Create the attr_dict
nodes_carbike_centroids_RER_complete["attr_dict"] = nodes_carbike_centroids_RER_complete.apply(lambda x: make_attr_dict(
                                                                  nodetype = x.nodetype,
                                                                  centroid = x.centroid,
                                                                  RER = x.RER,
                                                                  IRIS = x.CODE_IRIS,
                                                                  pop_dens = x.pop_density,
                                                                  active_pop_density = x.active_pop_density,
                                                                  school_pop_density = x.school_pop_density,
                                                                  num_schools = x.school_count,
                                                                  num_jobs = x.num_jobs,
                                                                  ),
                                                                  axis = 1) 

# Create Graph with all nodes and edges
G = nx.from_pandas_edgelist(edges_with_id, source='x', target='y', edge_attr=True)
G.add_nodes_from(nodes_carbike_centroids_RER_complete.loc[:,["osmid", "attr_dict"]].itertuples(index = False))

In [7]:
# #--- Moving from NetworkX to igraph
g_igraph = ig.Graph()
networkx_graph = G
g_igraph = ig.Graph.from_networkx(networkx_graph)

# eids: "conversion table" for edge ids from igraph to nx 
# eids_nx = [tuple(sorted(literal_eval(g_igraph.es(i)["edge_id"][0]))) for i in range(len(g_igraph.es))]
# eids_ig = [i for i in range(len(g_igraph.es))]
# eids_conv = pd.DataFrame({"nx": eids_nx, "ig": eids_ig})

#MOD made my own way
eids_nx = [g_igraph.es[i]["G"] for i in range(len(g_igraph.es))]
eids_ig = [i for i in range(len(g_igraph.es))]
eids_conv = pd.DataFrame({"nx": eids_nx, "ig": eids_ig})

# nids: "conversion table" for node ids from igraph to nx
nids_nx = [g_igraph.vs(i)["_nx_name"][0] for i in range(len(g_igraph.vs))]
nids_ig = [i for i in range(len(g_igraph.vs))]
nids_conv = pd.DataFrame({"nx": nids_nx, "ig": nids_ig})
nids_conv['nx'] = nids_conv['nx'].astype(int)

# combine the conversion table with nodes_carbike_centroids_RER_complete
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.merge(nids_conv, left_on = "osmid", right_on = "nx", how = "left")
nodes_carbike_centroids_RER_complete = nodes_carbike_centroids_RER_complete.drop(columns = ["nx"])

In [8]:
# Isolate centroids

from itertools import combinations
seq = g_igraph.vs.select(centroid_eq = True)
centroids = [v.index for v in seq]
centroids = centroids[0:2] #for testing purposes #TODO 
node_combinations = list(combinations(centroids, 2))

## Baselines
- shortest path length
- school population density in i * deterrence factor
- active population density in i * deterrence factor

In [9]:
#--- Shortest path length 

def process_node(args):
    start_node, end_node = args
    global g_igraph
    shortest_path_length = g_igraph.distances(source=start_node, target=end_node, weights='weight')[0][0]
    return (start_node, end_node, shortest_path_length)

if __name__ == '__main__':
    # Number of processes (cores) to use for parallel processing
    num_processes = mp.cpu_count()
    global g_igraph

    # Create a pool of processes
    pool = mp.Pool(processes=num_processes)

    # Apply the function to each node combination using parallel processing
    results = pool.map(process_node, node_combinations)

    # Create a dictionary to store the shortest path lengths
    output = {}
    for start_node, end_node, shortest_path_length in results:
        if start_node not in output:
            output[start_node] = {}
        output[start_node][end_node] = shortest_path_length

    # Create an empty adjacency matrix
    matrix = np.zeros((len(centroids), len(centroids)))

    # Fill the adjacency matrix with shortest path lengths
    for i, start_node in enumerate(centroids):
        for j, end_node in enumerate(centroids):
            if start_node in output and end_node in output[start_node]:
                matrix[i, j] = output[start_node][end_node]
                matrix[j, i] = output[start_node][end_node]

    # Close the pool
    pool.close()
    pool.join()


In [10]:
#--- School population density in i, deterrence factor

def process_node(args):
    global matrix
    o, d = args
    if o == d:
        return (o, d, 0)
    else:
        normalized_dist = matrix[o][d] / matrix.max()
        demand = (
            (g_igraph.vs[centroids[o]]['school_pop_density'])
            * dist_decay * np.exp(-1 * normalized_dist)
        )
        return (o, d, demand)

if __name__ == '__main__':
    baseline = np.zeros((len(centroids), len(centroids)))
    maxtrips = 100
    dist_decay = 1

    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)

    # Create node combinations
    node_combinations = [(o, d) for o in range(len(centroids)) for d in range(len(centroids))]

    # Calculate demand for each node combination using multiprocessing
    results = pool.map(process_node, node_combinations)

    # Update baseline matrix with calculated demand
    for o, d, demand in results:
        baseline[o][d] = demand

    # Normalize the matrix to the number of maxtrips
    baseline = ((baseline / baseline.max()) * maxtrips)

    # Round up to ensure each journey is made at least once
    baseline = np.ceil(baseline).astype(int)
    baseline_schoolpopdens = pd.DataFrame(baseline)

    pool.close()
    pool.join()

In [11]:
#--- Active population density in i, deterrence factor

def process_node(args):
    global matrix
    o, d = args
    if o == d:
        return (o, d, 0)
    else:
        normalized_dist = matrix[o][d] / matrix.max()
        demand = (
            (g_igraph.vs[centroids[o]]['active_pop_density'])
            * dist_decay * np.exp(-1 * normalized_dist)
        )
        return (o, d, demand)

if __name__ == '__main__':
    baseline = np.zeros((len(centroids), len(centroids)))
    maxtrips = 100
    dist_decay = 1

    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)

    # Create node combinations
    node_combinations = [(o, d) for o in range(len(centroids)) for d in range(len(centroids))]

    # Calculate demand for each node combination using multiprocessing
    results = pool.map(process_node, node_combinations)

    # Update baseline matrix with calculated demand
    for o, d, demand in results:
        baseline[o][d] = demand

    # Normalize the matrix to the number of maxtrips
    baseline = ((baseline / baseline.max()) * maxtrips)

    # Round up to ensure each journey is made at least once
    baseline = np.ceil(baseline).astype(int)
    baseline_activepopdens = pd.DataFrame(baseline)

    pool.close()
    pool.join()

## Matrix Set 1: No equalization yet, only appropriate population density and relevant POIs

In [12]:
#--- Multiply the columns of baseline_schoolpopdens with the number of schools in j

def process_node(args):
    global baseline_schoolpopdens
    o, d = args
    if o == d:
        return (o, d, 0)
    else:
        demand = (baseline_schoolpopdens[o][d] * (g_igraph.vs[centroids[d]]['num_schools']))
        return (o, d, demand)

if __name__ == '__main__':
    school_pop_dens_school_count_noEQ  = np.zeros((len(centroids), len(centroids)))

    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)

    # Create node combinations
    node_combinations = [(o, d) for o in range(len(centroids)) for d in range(len(centroids))]

    # Calculate demand for each node combination using multiprocessing
    results = pool.map(process_node, node_combinations)

    # Update baseline matrix with calculated demand
    for o, d, demand in results:
        school_pop_dens_school_count_noEQ[o][d] = demand

    pool.close()
    pool.join()

In [13]:
#--- Multiply the columns of baseline_activepopdens with the number of jobs in j

def process_node(args):
    global baseline_activepopdens
    o, d = args
    if o == d:
        return (o, d, 0)
    else:
        demand = (baseline_activepopdens[o][d] * (g_igraph.vs[centroids[d]]['num_jobs']))
        return (o, d, demand)

if __name__ == '__main__':
    active_pop_dens_job_count_noEQ  = np.zeros((len(centroids), len(centroids)))

    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)

    # Create node combinations
    node_combinations = [(o, d) for o in range(len(centroids)) for d in range(len(centroids))]

    # Calculate demand for each node combination using multiprocessing
    results = pool.map(process_node, node_combinations)

    # Update baseline matrix with calculated demand
    for o, d, demand in results:
        active_pop_dens_job_count_noEQ[o][d] = demand

    pool.close()
    pool.join()

## Matrix Set 2: Same as above but equalizing for median income (jobs) and education level (schools)

In [14]:
#--- Schools/education level/school pop density

def process_combination(combination):
    COLOFINTEREST1, COLOFINTEREST2 = combination
    delta_list = [0.5, 1, 1.5]
    results = {}
    for delta in delta_list:
        result = equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_schoolpopdens, centroids, COLOFINTEREST1, COLOFINTEREST2, delta)
        results.update(result)
    return results

if __name__ == '__main__':
    combinations = [['edu_level', 'school_count']]
    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)
    Results = pool.map(process_combination, combinations)
    pool.close()
    pool.join()


In [15]:
#--- Jobs/median income/active pop density
def process_combination(combination):
    COLOFINTEREST1, COLOFINTEREST2 = combination
    delta_list = [0.5, 1, 1.5]
    results = {}
    for delta in delta_list:
        result = equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_activepopdens, centroids, COLOFINTEREST1, COLOFINTEREST2, delta)
        results.update(result)
    return results

if __name__ == '__main__':
    combinations = [['median_income', 'num_jobs']]
    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)
    RResults = pool.map(process_combination, combinations)
    pool.close()
    pool.join()

## Assign traffic flow

In [20]:
#--- Create dataframe of edges compatible with frank_wolfe function

# goal columns: edge name, source, target, free flow time, capacity, alpha, beta

#check whether all IDs are 
g_df = nx.to_pandas_edgelist(G)

# Create compatible edge names
g_df['edge'] = g_df.index + 1
g_df['edge'] = g_df['edge'].apply(lambda x: 'E'+ str(x).zfill(4))

# Adding the columns we don't have from the NetworkX network
g_df = g_df[['edge', 'source', 'target', 'length', 'geometry', 'G']]
g_df['capacity'] = 1e10
g_df['alpha'] = 0.15 #no idea how this is set
g_df['beta'] = 4.0 #same here

# Create compatible node names based on the osmIDs
g_df['source'] = g_df['source'].apply(lambda x: 'N'+ str(x).zfill(5))
g_df['target'] = g_df['target'].apply(lambda x: 'N'+ str(x).zfill(5))
g_df.reset_index(inplace=True)

# We have to explicitly say, and assume, that each link is a two-way road
g_df2 = g_df.copy()
g_df2['source'] = g_df['target']
g_df2['target'] = g_df['source']
g_df2['edge'] = g_df2.index + 1 + len(g_df)
g_df2['edge'] = g_df2['edge'].apply(lambda x: 'E'+ str(x).zfill(4))
g_df = pd.concat([g_df, g_df2])
geoms = g_df[['edge', 'geometry', 'index']]

# Clean-up
g_df.drop(['geometry', 'index'], axis=1, inplace=True)

# Correct order of columns
g_df = g_df[['edge', 'source', 'target', 'length', 'capacity', 'alpha', 'beta', 'G']]

g_df.to_csv('data/clean/network.csv', index=False)


In [21]:
#--- Create network compatible with frank_wolfe function
nt = Network('net')
node = Vertex("a")

# Use the file created above
with open("data/clean/network.csv") as fo:
    lines = fo.readlines()[1:]
    for ln in lines:
        eg = ln.split(',')
        nt.add_edge(Edge(eg))


nt.init_cost()       

In [22]:
#--- Make it a batch run

# Gather all result OD matrices
Results.extend(RResults)
# Results.extend(school_pop_dens_school_count_noEQ)
# Results.extend(active_pop_dens_job_count_noEQ)

OD_matrix_names = []
OD_matrix = []

for result in Results:
    OD_matrix_names.append(list(result.keys()))
    OD_matrix.append(list(result.values()))

OD_matrices_names = [item for sublist in OD_matrix_names for item in sublist]
OD_matrices = [dataframe for sublist in OD_matrix for dataframe in sublist]

# create dictionary of igraph ID to modified osmID
centroid_igraph_to_mod_osmID = {}
for i in range(len(centroids)):
    centroid_igraph_to_mod_osmID[i] = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['ig'] == centroids[i]]['osmid'].apply(lambda x: 'N'+ (str(x) + '.0').zfill(5)).values[0]

# Create dictionary of matrix names and dict index
dict_index = {}
for i in range(len(OD_matrices_names)):
    dict_index[i] = OD_matrices_names[i]

# run frank-wolfe on all of them 
dicts = []
for name in OD_matrices_names:
    vol2 = None

    # Get OD matrix
    OD = OD_matrices[OD_matrices_names.index(name)]

    # Rename the columns and rows according to the modified osmID 
    OD = OD.rename(columns = {i : centroid_igraph_to_mod_osmID[i] for i in range(len(OD))}) #rename index of centroid as osmid of centroid
    OD.index = OD.columns

    # From all centroids to all centroids
    origins = OD.columns
    destinations = origins
    
    vol2 = frank_wolfe(nt, OD, origins, destinations)
    dicts.append(vol2)


## Compute benefit metric for all gaps

In [28]:

# def process_chunk(chunk):
#         chunk['path'] = chunk['path'].apply(eval)
#         for j in range(len(dicts)):
#                 try:
#                         chunk["B_star"+str(j)] = chunk.apply(lambda x: 
#                                         np.sum([dicts[j][g_df.loc[g_df['id'] == i]['edge'].values[0]] * edge_lengths[i] for i in x.path]), 
#                                         axis=1)
#                         chunk["B"+str(j)] = chunk["B_star"+str(j)] / chunk["length"]
#                 except:
#                        continue
#         try:            
#                 rows_to_delete = chunk[['B'+str(j) for j in range(len(dicts))]].apply(lambda x: all(val == 0.000000 for val in x), axis=1)
#                 chunk = chunk[~rows_to_delete]
#         except:
#                 pass
#         print('processed one chunk!')
#         return chunk

### TEST, only one dict ###
import ast

def process_row(row, edge_lengths, dicts, g_df):
    try:
        row['path'] = ast.literal_eval(row['path'])
        j = 0
        #TODO if still fails, try row.path_nx put into G.edges instead of row.path
        row["B_star" + str(j)] = np.sum([dicts[j][g_df.loc[g_df['G'] == eids_conv_dict[i]]['edge'].values[0]] * edge_lengths[i] for i in row.path])
        row["B" + str(j)] = row["B_star" + str(j)] / row["length"]
        return row
    except Exception as e:
        print("Exception occurred at index:", row.name)
        print("Exception message:", str(e))
        return None

def process_chunk(chunk):
    processed_rows = [process_row(row, edge_lengths, dicts, g_df) for _, row in chunk.iterrows()]
    processed_chunk = pd.DataFrame(processed_rows)
    return processed_chunk

if __name__ == '__main__':
    file_path = './data/clean/identified_gaps_under50.csv'
    mygaps = pd.read_csv(file_path, chunksize=50000)

    eids_conv_dict = eids_conv.set_index('ig')['nx'].to_dict()
    edge_lengths = {i: g_igraph.es[i]["length"] for i in range(len(g_igraph.es))}

    num_processes = mp.cpu_count()
    pool = mp.Pool(processes=num_processes)

    results = pool.map(process_chunk, mygaps)
    pool.close()
    pool.join()

    # Concatenate all the processed chunks back into a single DataFrame
    final_result = pd.concat(results)


KeyboardInterrupt: 

In [None]:
# Sort the dataframe by each benefit metric and save in separate csvs
for j in range(len(dicts)):
    mygaps.sort_values(by=['B'+str(j)], ascending=False).head(1500).to_csv('./data/clean/mygaps_B'+str(j)+'.csv', index=False)

In [None]:
dict_index = pd.DataFrame.from_dict(dict_index, orient='index')
dict_index.to_csv('data/clean/dict_index.csv')

# OLD

In [None]:
# #--- Custom function
# # equalize an OD for the same attribute in O and D
# def equalization_all(od, variable, colname, delta, centroids): 
    
#     od_ = od.copy()
#     variable_ = variable.copy()
    
#     variable_average = np.mean(variable_[colname]) 
    
#     variable_['weight'] = variable_[colname].apply(lambda x: (x/variable_average)**-delta)

#     i =0
#     for val in variable_['ig']:
#         weight = variable_.loc[variable_['ig']==val]['weight'].iloc[0]
#         try:
#             od_[centroids.index(val)] *= weight 
#             od_.loc[centroids.index(val)] *= weight 
#         except:
#             continue
# #             print(val, ' not found')
#         i +=1
    
#     return od_

In [None]:
# #--- Custom function to use the function above in a batch
# def clean_data_with_od_matrices(nodes_carbike_centroids_RER_complete, baseline_df, centroids, COLOFINTEREST, delta):
#     col_tokeep = ['osmid', 'ig', 'CODE_IRIS', COLOFINTEREST]
#     COLOFINTEREST_df = nodes_carbike_centroids_RER_complete.loc[nodes_carbike_centroids_RER_complete['centroid'] == True].copy()
#     COLOFINTEREST_df = COLOFINTEREST_df[col_tokeep]
    
#     OD_equalization = equalization_all(baseline_df, COLOFINTEREST_df, COLOFINTEREST, delta, centroids)
    
#     OD_equalization_name = "OD_equalization_" + COLOFINTEREST + "_" + str(delta)
    
#     return {OD_equalization_name: OD_equalization}

## Matrix Set 1: equalizing for median income, education level, number of schools and number of jobs SEPARATELY (and multiplying with the population density of j)


### Baseline: population density of i and j and exponential term with normalised distance

In [None]:
# %%time

# def process_node(args):
#     global matrix
#     o, d = args
#     if o == d:
#         return (o, d, 0)
#     else:
#         normalized_dist = matrix[o][d] / matrix.max()
#         demand = (
#             (g_igraph.vs[centroids[o]]['pop_dens'] * g_igraph.vs[centroids[d]]['pop_dens'])
#             * dist_decay * np.exp(-1 * normalized_dist)
#         )
#         return (o, d, demand)

# if __name__ == '__main__':
#     baseline = np.zeros((len(centroids), len(centroids)))
#     maxtrips = 100
#     dist_decay = 1

#     num_processes = mp.cpu_count()
#     pool = mp.Pool(processes=num_processes)

#     # Create node combinations
#     node_combinations = [(o, d) for o in range(len(centroids)) for d in range(len(centroids))]

#     # Calculate demand for each node combination using multiprocessing
#     results = pool.map(process_node, node_combinations)

#     # Update baseline matrix with calculated demand
#     for o, d, demand in results:
#         baseline[o][d] = demand

#     # Normalize the matrix to the number of maxtrips
#     baseline = ((baseline / baseline.max()) * maxtrips)

#     # Round up to ensure each journey is made at least once
#     baseline = np.ceil(baseline).astype(int)
#     baseline_df1 = pd.DataFrame(baseline)

#     pool.close()
#     pool.join()


### Running the equalisation code like in Jin's paper

In [None]:
# %%time

# def process_data(args):
#     COLOFINTEREST = args
#     delta_list = [0.5, 1, 1.5]
#     results = {}
#     for delta in delta_list:
#         result = clean_data_with_od_matrices(nodes_carbike_centroids_RER_complete, baseline_df1, centroids, COLOFINTEREST, delta)
#         results.update(result)
#     return results

# if __name__ == '__main__':
#     num_processes = mp.cpu_count()

#     # Create a pool of processes
#     pool = mp.Pool(processes=num_processes)    
#     COLOFINTEREST_list = ['median_income', 'school_count', 'num_jobs', 'edu_level']
#     arguments = [COLOFINTEREST for COLOFINTEREST in COLOFINTEREST_list]
#     results = pool.map(process_data, arguments)
#     pool.close()
#     pool.join()


In [None]:
# results[0]['OD_equalization_median_income_1.5']
# results_JinEQ = results

## Matrix Set 2: equalize for median income/ education level when looking at number of jobs/number of schools, use total pop density

In [None]:
# #--- Total Population density at origin, num_jobs and school_count at destination, Median income and edu level at origin
# def process_combination(combination):
#     COLOFINTEREST1, COLOFINTEREST2 = combination
#     delta_list = [0.5, 1, 1.5]
#     results = {}
#     for delta in delta_list:
#         result = equalization_with_2attributes(nodes_carbike_centroids_RER_complete, baseline_df2, centroids, COLOFINTEREST1, COLOFINTEREST2, delta)
#         results.update(result)
#     return results

# if __name__ == '__main__':
#     combinations = [['median_income', 'num_jobs'], ['edu_level', 'school_count']]
#     num_processes = mp.cpu_count()
#     pool = mp.Pool(processes=num_processes)
#     Results = pool.map(process_combination, combinations)
#     pool.close()
#     pool.join()

# for i in range(len(Results)):
#     for old_key in Results[i]:
#         Results[i][old_key+'_totpopdens'] = Results[i].pop(old_key)
