In [2]:
# parameters.ipynb

import geopandas as gdp
import numpy as np
import pandas as pd
import itertools
from scenarios import scenarios  # Import scenario configurations
from scipy.spatial.distance import cdist

In [3]:
# Select Scenario
scenario_name = "pugnido_baseline"  # Change this to switch scenarios
params = scenarios[scenario_name]  # Load selected scenario parameters

In [4]:
# Load the GeoJSON file
location_nodes =  gdp.read_file(params["location_file"])
location_nodes

Unnamed: 0,Name,Camp,type_f,geometry
0,Pugnido1,Pugnido,demand_point,POINT (34.22348 7.6806)
1,Pugnido2,Pugnido,demand_point,POINT (34.25126 7.67723)
2,Pugnido3,Pugnido,demand_point,POINT (34.23382 7.67988)
3,Pugnido4,Pugnido,demand_point,POINT (34.25482 7.68938)
4,Pugnido5,Pugnido,demand_point,POINT (34.24193 7.67457)
...,...,...,...,...
205,HP_CandidateLocation6,Pugnido,HP,POINT (34.22076 7.68178)
206,HC_CandidateLocation1,Pugnido,HC,POINT (34.23015 7.6799)
207,HC_CandidateLocation2,Pugnido,HC,POINT (34.25528 7.6843)
208,HC_CandidateLocation3,Pugnido,HC,POINT (34.24948 7.67685)


In [5]:
# Add x and y coordinates
location_nodes.loc[:, 'x'] = location_nodes.geometry.x
location_nodes.loc[:, 'y'] = location_nodes.geometry.y

# Sort 
location_nodes = location_nodes.sort_values(by=['y', 'x']).reset_index(drop=True)

# Label sorted demand points
demand_points_gdf = location_nodes.loc[location_nodes.type_f == "demand_point"].copy()
demand_points_gdf['label'] = ['i' + str(i + 1) for i in range(len(demand_points_gdf))]

# Save demand point labels to a Numpy Array
dps = demand_points_gdf['label'].to_numpy()

# Subset location types
hps_gdf = location_nodes[location_nodes.type_f == "HP"]
hcs_gdf = location_nodes[location_nodes.type_f == "HC"]
hfs_gdf = location_nodes[(location_nodes.type_f == "HC") | (location_nodes.type_f == "HP")].drop_duplicates(subset='geometry').reset_index(drop=False)

# Label candidate locations
hfs_gdf['label'] = ['j' + str(j + 1) for j in range(len(hfs_gdf))]

# Save location labels
hfs = hfs_gdf['label'].to_numpy()
hps = hfs_gdf[hfs_gdf['geometry'].isin(hps_gdf['geometry'])]['label'].to_numpy()
hcs = hfs_gdf[hfs_gdf['geometry'].isin(hcs_gdf['geometry'])]['label'].to_numpy()


In [6]:
demand_points_gdf

Unnamed: 0,Name,Camp,type_f,geometry,x,y,label
0,Pugnido83,Pugnido,demand_point,POINT (34.24178 7.66798),34.241778,7.667985,i1
2,Pugnido118,Pugnido,demand_point,POINT (34.24213 7.67027),34.242135,7.670269,i2
3,Pugnido16,Pugnido,demand_point,POINT (34.24039 7.67059),34.240385,7.670593,i3
4,Pugnido177,Pugnido,demand_point,POINT (34.2377 7.67126),34.237696,7.671259,i4
5,Pugnido96,Pugnido,demand_point,POINT (34.24089 7.67145),34.240893,7.671448,i5
...,...,...,...,...,...,...,...
205,Pugnido49,Pugnido,demand_point,POINT (34.25615 7.6903),34.256152,7.690303,i196
206,Pugnido93,Pugnido,demand_point,POINT (34.25403 7.69047),34.254035,7.690471,i197
207,Pugnido120,Pugnido,demand_point,POINT (34.25545 7.69117),34.255450,7.691167,i198
208,Pugnido23,Pugnido,demand_point,POINT (34.25652 7.69118),34.256524,7.691175,i199


In [7]:
camps = set(location_nodes["Camp"].unique())
camps
camp_demand_labels = demand_points_gdf.groupby("Camp")["label"].apply(set).to_dict()
camp_demand_labels
camp_candidate_location_labels = hfs_gdf.groupby("Camp")["label"].apply(set).to_dict()
camp_candidate_location_labels 

{'Pugnido': {'j1', 'j10', 'j2', 'j3', 'j4', 'j5', 'j6', 'j7', 'j8', 'j9'}}

In [8]:
# This is _just_ to have insights on which values would be adequate for t'max and t''max 


# Initialize dictionaries to store results
avg_dist_demand_to_hp = {}
avg_dist_demand_to_hc = {}
min_intercamp_distance = {} # For distances between different camps
max_withincamp_distance = {} # For maximum distance within the same camp

# Compute distances for each camp
for camp in camps:
    # Filter locations by camp
    camp_demand_points = demand_points_gdf[demand_points_gdf["Camp"] == camp]
    camp_hps = hps_gdf[hps_gdf["Camp"] == camp]
    camp_hcs = hcs_gdf[hcs_gdf["Camp"] == camp]

    # Compute distances between demand points and HPs
    if not camp_hps.empty:
        avg_distances_per_demand_point_to_hp = camp_demand_points.to_crs(epsg=3857).geometry.apply(lambda dp: camp_hps.to_crs(epsg=3857).geometry.distance(dp).mean())
        avg_dist_demand_to_hp[camp] = avg_distances_per_demand_point_to_hp.mean()
    else:
        avg_dist_demand_to_hp[camp] = None  # No HPs in this camp

    # Compute distances between demand points and HCs
    if not camp_hcs.empty:
        avg_distances_per_demand_point_to_hc = camp_demand_points.to_crs(epsg=3857).geometry.apply(lambda dp: camp_hcs.to_crs(epsg=3857).geometry.distance(dp).mean())
        avg_dist_demand_to_hc[camp] = avg_distances_per_demand_point_to_hc.mean()
    else:
        avg_dist_demand_to_hc[camp] = None  # No HCs in this camp

    # Now calculate maximum within-camp distance (distance between any two locations within the same camp)
    locations_camp = location_nodes[location_nodes["Camp"] == camp]
    
    # Reproject to EPSG:3857 (meters) and compute the maximum pairwise distance within the camp
    max_within_distance = locations_camp.to_crs(epsg=3857).geometry.apply(
        lambda loc: locations_camp.to_crs(epsg=3857).geometry.distance(loc).max()
    ).max()
    
    # Store the maximum distance within the camp
    max_withincamp_distance[camp] = max_within_distance


# Compute minimum distance between any location in different camps
if len(camps)>1:
    for camp1 in camps:
        for camp2 in camps:
            if camp1 != camp2:
                locations_camp1 = location_nodes[location_nodes["Camp"] == camp1]
                locations_camp2 = location_nodes[location_nodes["Camp"] == camp2]
                min_distance = locations_camp1.to_crs(epsg=3857).geometry.apply(lambda loc: locations_camp2.to_crs(epsg=3857).geometry.distance(loc).min()).min()
                min_intercamp_distance[(camp1, camp2)] = min_distance

    # Now find the overall minimum distance across all inter-camp pairs
    overall_min_distance = min(min_intercamp_distance.values())

# Find the overall maximum within-camp distance
overall_max_withincamp_distance = max(max_withincamp_distance.values())


# # Convert results to DataFrames for better visualization
# df_avg_dist_hp = pd.DataFrame(list(avg_dist_demand_to_hp.items()), columns=["Camp", "Avg_Dist_Demand_to_HP"])
# df_avg_dist_hc = pd.DataFrame(list(avg_dist_demand_to_hc.items()), columns=["Camp", "Avg_Dist_Demand_to_HC"])
# df_min_intercamp = pd.DataFrame(list(overall_min_distance.items()), columns=["Camp_Pair", "Min_Distance"])
# df_max_withincamp = pd.DataFrame(list(overall_max_withincamp_distance.items()), columns=["Camp", "Max_Distance"])

# Display results
print(avg_dist_demand_to_hp)
print(avg_dist_demand_to_hc)
if len(camps)>1: print(min_intercamp_distance)
if len(camps)>1: print(overall_min_distance)
print(max_withincamp_distance)
print(overall_max_withincamp_distance)



{'Pugnido': np.float64(1476.0151363558032)}
{'Pugnido': np.float64(1650.1297532405165)}
{'Pugnido': np.float64(4341.792464199426)}
4341.792464199426


In [9]:
def compute_distance_matrix(demand_points_gdf, hfs_gdf):
    """
    Compute the distance matrix between demand points and candidate health facility locations.

    Parameters:
    - demand_points_gdf: GeoDataFrame containing demand points with 'geometry'.
    - hfs_gdf: GeoDataFrame containing candidate health facility locations with 'geometry'.

    Returns:
    - distance_matrix: 2D NumPy array of distances (rows: demand points, columns: health facilities).
    """
    # Extract coordinates as NumPy arrays directly from the geometry column
    demand_coords = np.array(demand_points_gdf.geometry.apply(lambda point: (point.x, point.y)).tolist())
    hfs_coords = np.array(hfs_gdf.geometry.apply(lambda point: (point.x, point.y)).tolist())
    
    # Compute the distance matrix using cdist with Euclidean metric
    distance_matrix = cdist(demand_coords, hfs_coords, metric='euclidean')
    
    # Create a labeled DataFrame
    distance_df = pd.DataFrame(distance_matrix, index=dps, columns=hfs)

    return distance_df

In [10]:
# Example usage:
distance_df = compute_distance_matrix(demand_points_gdf, hfs_gdf)

# To save the above matrix into an Excel file to subsequently read
# distance_df.to_excel('distance_matrix_refcamps.xlsx', sheet_name='DistanceMatrixRefCamps')#, float_format="%.2f")

# Distance matrix
# distance_matrix = pd.read_excel('distance_matrix_ij.xlsx', index_col=0)
# distance_matrix = pd.read_excel('distance_matrix_refcamps.xlsx', index_col=0)
distance_df

Unnamed: 0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10
i1,0.003094,0.006034,0.008028,0.009596,0.011742,0.012383,0.016645,0.017742,0.025143,0.021179
i2,0.002702,0.003881,0.006143,0.007805,0.009861,0.010809,0.015371,0.016345,0.024280,0.019228
i3,0.001104,0.003409,0.007404,0.006643,0.011039,0.009448,0.013830,0.014873,0.022592,0.020243
i4,0.002145,0.004212,0.009613,0.005369,0.013043,0.007542,0.011469,0.012630,0.019940,0.021894
i5,0.002037,0.002511,0.006563,0.006133,0.010145,0.009101,0.013665,0.014633,0.022632,0.019293
...,...,...,...,...,...,...,...,...,...,...
i196,0.026281,0.022335,0.018589,0.023040,0.015019,0.024502,0.028004,0.027475,0.036408,0.006068
i197,0.025126,0.021081,0.017778,0.021481,0.014366,0.022774,0.026119,0.025541,0.034396,0.006299
i198,0.026525,0.022517,0.019015,0.023016,0.015515,0.024342,0.027695,0.027112,0.035943,0.006872
i199,0.027192,0.023229,0.019531,0.023862,0.015966,0.025262,0.028682,0.028119,0.036982,0.006988


In [11]:
def compute_distance_matrix_meters(demand_points_gdf, hfs_gdf, crs_epsg=3857):
    """
    Compute the distance matrix between demand points and candidate health facility locations.

    Parameters:
    - demand_points_gdf: GeoDataFrame containing the demand points with geometry (usually point geometries).
    - hfs_gdf: GeoDataFrame containing the candidate health facility locations with geometry.
    - crs_epsg: The EPSG code to which the geometries will be reprojected. Default is 3857 (Web Mercator).

    Returns:
    - distance_df: A pandas DataFrame where the rows are demand points, the columns are health facilities,
                   and the values are the distances between them.
    """
    # Reproject both demand points and health facilities to the target CRS (e.g., EPSG:3857 for meters)
    demand_points_gdf = demand_points_gdf.to_crs(epsg=crs_epsg)
    hfs_gdf = hfs_gdf.to_crs(epsg=crs_epsg)

    # Initialize an empty distance matrix with dimensions (num_demand_points x num_health_facilities)
    num_demand_points = len(demand_points_gdf)
    num_health_facilities = len(hfs_gdf)
    distance_matrix = np.zeros((num_demand_points, num_health_facilities))

    # Compute distances
    for i, demand_point in enumerate(demand_points_gdf.geometry):
        for j, hf_location in enumerate(hfs_gdf.geometry):
            distance_matrix[i, j] = demand_point.distance(hf_location)
    
    # Create a DataFrame with labeled indices and columns
    distance_df = pd.DataFrame(distance_matrix, index=dps, columns=hfs) 


    return distance_df


In [13]:
# Example usage:
distance_df = compute_distance_matrix_meters(demand_points_gdf, hfs_gdf)
distance_df

# To save the above matrix into an Excel file to subsequently read
# distance_df.to_excel('distance_matrix_refcamps_meters.xlsx', sheet_name='DistanceMatrixRefCamps')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_tierkidi.xlsx', sheet_name='DistanceMatrixTierkidi')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_terkidi_baseline.xlsx', sheet_name='DistanceMatrixTierkidiBaseline')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_nguenyyiel_baseline.xlsx', sheet_name='DistMatrixNguenyyielBaseline')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_kule_baseline.xlsx', sheet_name='DistMatrixKuleBaseline')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_jewi_baseline.xlsx', sheet_name='DistMatrixJewiBaseline')#, float_format="%.2f")
# distance_df.to_excel('distance_matrix_pugnido_baseline.xlsx', sheet_name='DistMatrixPugnidoBaseline')#, float_format="%.2f")

# Distance matrix
# distance_matrix = pd.read_excel('distance_matrix_ij.xlsx', index_col=0)
# distance_matrix = pd.read_excel('distance_matrix_refcamps_meters.xlsx', index_col=0)
distance_matrix = pd.read_excel('individual_refugee_camps/distance_matrix_pugnido_baseline.xlsx', index_col=0)
distance_matrix

Unnamed: 0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10
i1,345.722068,677.592011,898.547986,1076.018780,1313.874971,1387.058632,1861.493982,1985.263877,2806.506604,2370.257570
i2,300.840601,435.594977,686.337335,874.050102,1102.166207,1209.168680,1717.223534,1827.176102,2708.313651,2150.761595
i3,123.167035,382.839891,826.020954,744.959786,1232.410738,1057.996936,1545.887689,1663.608652,2520.522939,2262.842077
i4,239.500254,470.632701,1071.067558,603.120296,1454.374352,846.061480,1283.232965,1414.177887,2225.344266,2445.041827
i5,227.749762,282.005082,731.709038,687.100095,1132.216866,1018.243493,1526.491119,1635.720912,2524.099955,2156.334749
...,...,...,...,...,...,...,...,...,...,...
i196,2941.422219,2498.422099,2083.385028,2573.009205,1684.115368,2733.561605,3121.349708,3061.448581,4054.959048,681.471812
i197,2813.781465,2359.814684,1994.079527,2400.307182,1612.233753,2541.801127,2911.850095,2846.507006,3831.163542,707.267517
i198,2969.774864,2519.832210,2131.945474,2571.364969,1740.496299,2716.716301,3087.638468,3021.695069,4003.637687,771.879229
i199,3043.656765,2598.759168,2189.006009,2665.284901,1790.306224,2818.874825,3197.359304,3133.643754,4119.267256,784.760510


In [14]:
######################################

# Health services and workers
services = ['basic','maternal1','maternal2']
health_workers = ['doctor','nurse','midwife']
levels = ['hp', 'hc']



# Assign scenario parameters
HFs_to_locate = params["HFs_to_locate"]
t1max = params["t1max"]
t2max = params["t2max"]
workers_to_allocate = params["workers_to_allocate"]
working_hours = params["working_hours"]
service_time = params["service_time"]

# Lower bound workers per HF type
lb_workers_df = pd.DataFrame(params["lb_workers"], index=health_workers)
lb_workers = {(health_workers[p], levels[l]): lb_workers_df.iloc[p, l] 
      for p, l in itertools.product(range(len(health_workers)), range(len(levels)))}

# Upper bound workers per HF type
ub_workers_df = pd.DataFrame(params["ub_workers"], index=health_workers)
ub_workers = {(health_workers[p], levels[l]): ub_workers_df.iloc[p, l] 
      for p, l in itertools.product(range(len(health_workers)), range(len(levels)))}

# Where can each service be provided?
services_at_HFs_df = pd.DataFrame(params["services_at_HFs"], index=services)
a_HF = {(services[s], levels[l]): services_at_HFs_df.iloc[s, l] 
      for s, l in itertools.product(range(len(services)), range(len(levels)))}

# Which health worker can deliver each service?
services_per_worker_df = pd.DataFrame(params["services_per_worker"], index=health_workers)
a_W = {(health_workers[p], services[s]): services_per_worker_df.iloc[p, s] 
      for p, s in itertools.product(range(len(health_workers)), range(len(services)))}

# Demand rates
total_population = {(key): params["total_population"] for key in dps}

# Opening hours
demand_rate_opening_hours_df = pd.DataFrame([params["demand_rate_opening_hours"]] * len(dps), index=dps, columns=services)
dr_oh = {(dps[i], services[s]): demand_rate_opening_hours_df.iloc[i, s] 
      for i, s in itertools.product(range(len(dps)), range(len(services)))}

dd_oh = {(key): int(round(total_population[i] * dr_oh[key])) for i in dps for key in dr_oh}

# Closing hours
demand_rate_closing_hours_df = pd.DataFrame([params["demand_rate_closing_hours"]] * len(dps), index=dps, columns=services)
dr_ch = {(dps[i], services[s]): demand_rate_closing_hours_df.iloc[i, s] 
      for i, s in itertools.product(range(len(dps)), range(len(services)))}

dd_ch = {(key): int(round(total_population[i] * dr_ch[key])) for i in dps for key in dr_ch}


In [17]:
# Define the sets and parameters to use in the model
I = dps
J = hfs
J_HP = hps
J_HC = hcs
C = np.array(list(camps), dtype=object)
I_c = {key: np.array(list(value), dtype=object) for key, value in camp_demand_labels.items()} #check
J_c = {key: np.array(list(value), dtype=object) for key, value in camp_candidate_location_labels.items()} #check
t = distance_matrix # travel time (distances) between demand points and HFs
S = services
P = health_workers
L = levels
n_HF = dict(zip(levels, HFs_to_locate))
n_W = dict(zip(health_workers, workers_to_allocate))
Pi = total_population #new Feb 19
r1 = dr_oh #new Feb 19
r2 = dr_ch #new Feb 19
d1 = dd_oh #new Feb 19; either this is necessary, or Pi and r1
d2 = dd_ch #new Feb 19; either this is necessary, or Pi and r2
lb = lb_workers
ub = ub_workers
q = dict(zip(services, service_time))
h = dict(zip(health_workers, working_hours))


model_data = {
    "I": I,
    "J": J,
    "J_HP": J_HP,
    "J_HC": J_HC,
    "C": C,
    "I_c": I_c,
    "J_c": J_c,
    "S": S,
    "P": P,
    "L": L,
    "t": t,
    "t1max": t1max,
    "t2max": t2max,
    "n_HF": n_HF,
    "n_W": n_W,
    "a_HF": a_HF,
    "a_W": a_W,
    "Pi": Pi,
    "r1": r1,
    "r2": r2,
    "d1": d1,
    "d2": d2,
    "lb": lb,
    "ub": ub,
    "q": q,
    "h": h
}