In [None]:
# SELECT runtime { local | colab }
# runtime = 'local'

# SELECT Road Network Data { osm | estrada | combined }
road_path = 'osm'

# SELECT Population Data { facebook | worldpop | census }
pop_path = 'worldpop'

# SELECT Distance Threshold in KM
distance_thresholds = [10]

# Daily probability to a stroke
prob_stroke  = 0.0000114 # 0.00114% calculated out: 1 out of 3 gets stroke, average person can be 80 years old

# Average days in stroke center
average_days = 4 # 4 days, Processes of Stroke Unit Care and Outcomes at Discharge in Vietnam: Findings from the Registry of Stroke Care Quality (RES-Q) in a Major Public Hospital

# Utilization rate (% of people with a stroke actually go to the hospital)
utilization_rate = 0.8 # 80%, -> TO TEST, NO REFERENCES

# List of number of additional stroke facilities added in the optimization
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000]

# List of number of additional beds added in the optimization
nbeds = [100, 200, 300, 400, 500, 600]

# SELECT potential stroke facilities { existing hospitals }
potential_facilities = 'existing hospitals'

In [None]:
%%time
import pandas as pd
import geopandas as gpd

import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px

import folium
import itertools
import geopy.distance

import warnings
warnings.filterwarnings('ignore')

import json
import time
from datetime import datetime

data_path = '../../Vietnam project/0906Data/'
code_path = '../Scripts/'

import sys
sys.path.append(code_path)
sys.path.append(data_path)

# Import python functions
from optimization_model import OptimizationModel
from distance_matrix import DistanceCalculation, haversine_vectorize
from network_gpbp import get_nodes_and_edges
from InputDataPreprocessingv1 import CurrentHospitals, NewHospitals, NewHospitalsGrid, NewHospitalsCSV, Population, PopulationFB
from Visualization_GPBP import ParetoCurve, CreateMap

%load_ext line_profiler

# Import data

In [None]:
%%time
# Preprocessing of the road network
nodes, edges_attr, network = get_nodes_and_edges(data_path+'road_osm_preprocessed.geojson')

In [None]:
#Plot
f, ax = plt.subplots(figsize=(10, 10))
edges_attr.plot(ax=ax, color='grey', zorder=1)
ax.set_axis_off()
plt.show()

In [None]:
edges_attr.shape

In [None]:
%%time
# Current Hospitals -- stroke facilities in this case
health_facilities =  pd.read_csv(data_path+'stroke-facs.csv').reset_index()
health_facilities = health_facilities[['index','longitude','latitude','Name_English']]
current_hospitals_ID, current_hospitals = CurrentHospitals(health_facilities, network, nodes)
len(current_hospitals)

In [None]:
%%time
# Location of new potential hospitals. 
# Pick "1kmGrid", "5kmGrid", "10kmGrid" or "existing hospitals"

hospitals = gpd.read_file(data_path+'hospitals_vietnam.geojson')
new_hospitals_ID, new_hospitals = NewHospitals(current_hospitals, hospitals, network, nodes)

len(new_hospitals)

In [None]:
%%time

# Population Data
#Round the coordinates to cluster the population. 8 digits: no rounding
digits_rounding = 1

if pop_path == 'facebook':
    read_population = pd.read_csv(data_path+'pop_fb_2020.csv').reset_index()
    read_population = read_population[['index','longitude','latitude','population_2020']]
elif pop_path == 'worldpop':
    read_population = pd.read_csv(data_path+'WorldPop_2020_1km.csv').reset_index()
    
array_household, population = PopulationFB(digits_rounding, read_population, network, nodes)

len(population)

# Distance matrix

In [None]:
# Determine the smallest and largest threshold
distance_threshold_largest = max(distance_thresholds)
distance_threshold_smallest = min(distance_thresholds)

In [None]:
%%time
# Calculate distance from households to current hospitals
pop_subset = population[['nearest_node','xcoord','ycoord']]
matrix_current = DistanceCalculation(network, current_hospitals, population, pop_subset, distance_threshold_largest)
matrix_current = matrix_current[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Hosp_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_current = matrix_current.rename(columns={'ID':'Pop_ID'})


In [None]:
%%time
# Create a subset of the population. 
# This subset contains only the population that is not already within a range of S kilometers
withinSkm = matrix_current[matrix_current.total_network_distance<=distance_threshold_smallest]
withinSkm=withinSkm['Pop_ID'].unique()
population_new = population.drop(population[population.ID.isin(withinSkm)].index.tolist())

In [None]:
%%time
# Calculate distance to new hospitals
pop_subset = population_new[['nearest_node','xcoord','ycoord']]
matrix_new = DistanceCalculation(network, new_hospitals, population, pop_subset, distance_threshold_largest)
matrix_new = matrix_new[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Cluster_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_new = matrix_new.rename(columns={'ID':'Pop_ID'})

# Optimization

In [None]:
%%time
# Add matrix with distances of new hospitals to matrix of distances of current hospitals
matrix_new1 = matrix_new[['Pop_ID','Cluster_ID','distance_corrected']]
matrix_current1 = matrix_current[['Pop_ID','Hosp_ID','distance_corrected']]
matrix_new1.columns = ['Pop_ID','HospCluster','distance']
matrix_current1.columns = ['Pop_ID','HospCluster','distance']
df_combined = pd.concat([matrix_current1, matrix_new1],axis=0)
df_combined= df_combined.sort_values(by='Pop_ID')

# Model




$\textrm{Maximize } \quad \sum_{i \in I} \sum_{j \in J} v_iy_{ij} $ 

$\textrm{Subject to } \quad (1) x_j = 1 \quad \quad \quad \quad \quad\forall j = 1, ... m $

$ \quad \quad \quad \quad \quad (2) \sum^{M}_{j=m+1} x_j \leq p  $

$ \quad \quad \quad \quad \quad  (3) \sum_{j=1}^M z_j \leq q $

$ \quad \quad \quad \quad \quad  (4) \sum_{i \in I} y_{ij} \leq nx_j \quad \quad \quad \quad \quad \forall j \in J $

$ \quad \quad \quad \quad \quad  (5) z_j \leq Mx_j \quad \quad \quad \quad \quad \forall j \in J $

$ \quad \quad \quad \quad \quad (6) \sum_{j \in J} y_{ij} \leq 1 \quad \quad \quad \quad \quad \forall i \in I$

$ \quad \quad \quad \quad \quad (7) a * s * \sum_{i \in I} v_iy_{ij} \leq u(c_j + z_j)  \quad \quad \quad \quad \quad \forall j \in J  $

$ \quad \quad \quad \quad \quad  (8) y_{ij} = 0 \quad \quad \quad \quad \quad \forall i \in I, \forall j \in J, d_{ij} > S $

$ \quad \quad \quad \quad \quad x_j \in \{0,1\}, y_{ij} \in [0,1] \quad \quad\quad \quad \quad\forall i \in I, \forall j \in J $

where: 

- $I$ = the index set of households, or clusters of households, indexed by $i = 1, ..., n$. 

- $J$ = the index set of all healthcare sites, where indices $j = 1, ..., m$ are corresponding to the already existing healthcare facilities and indices $j = m+1, ..., M$ are corresponding to potential hospital locations

- $v_i$ = the number of people in (cluster of) household(s) $i$

- $d_{ij}$ = the travel distance from (cluster of) household(s) $i$ to hospital facility $j$

- $S$ = the maximum travel distance from a household (or cluster) to a health care facility

- $p$ = the number of additional stroke centers 

- $q$ = the total number of additional capacity (in #beds)

- $c_j$ = the current capacity at stroke center $j$ (in # beds)

- $u$ = maximal allowed utilization rate (between 0 and 1)

- $s$ = average number o days patient occupies a bed in stroke center

- $a$ = probability (per days) that a person has to visit a stroke center

Decision variables:
$
x_j = \begin{cases} 1 & \text{ if hospital } j \text{ is opened} \\
0 & \text{ otherwise}
\end{cases} \\
$
$
y_i = \begin{cases} 1 & \text{ if there is an opened health facility within } S \\ & \text{ kilometers travel distance away from the (cluster of) household(s) } i\\
0 & \text{ otherwise}
\end{cases} \\
$
$
z_j = \text{extra capacity at stroke center j}
$


In [None]:
def getvariables(n, m, X, Y, Z, II, JJ):
    
    import numpy as np
    
    Xvalues = np.zeros(m)
    Yvalues = np.zeros(n*m)
    Zvalues = np.zeros(m)
    for j in range(m):
        Xvalues[j]=X[j].x
        Zvalues[j]=Z[j].x
    Yvalues = { j: [] for j in range(m) }
    for i,j in zip(II,JJ):
        Yvalues[j].append(i)
    
    return(Xvalues, Yvalues, Zvalues)

In [None]:
import gurobipy as gb
gurobicode = { gb.GRB.LOADED : 'loaded',
gb.GRB.OPTIMAL : 'optimal',
gb.GRB.INFEASIBLE : 'infeasible',
gb.GRB.INF_OR_UNBD : 'inf_or_unbd',
gb.GRB.UNBOUNDED  : 'unbounded',
gb.GRB.CUTOFF : 'cutoff',
gb.GRB.ITERATION_LIMIT  : 'iteration_limit',
gb.GRB.NODE_LIMIT : 'node_limit',
gb.GRB.TIME_LIMIT : 'time_limit',
gb.GRB.SOLUTION_LIMIT : 'solution_limit',
gb.GRB.INTERRUPTED : 'interrupted',
gb.GRB.NUMERIC : 'numeric',
gb.GRB.SUBOPTIMAL : 'suboptimal',
gb.GRB.INPROGRESS : 'inprogress',
gb.GRB.USER_OBJ_LIMIT : 'user_obj_limit'}

In [None]:
# Optimization 1 runs for 1 number of max beds, 1 number of added hospitals.
def Optimization1(max_beds, array_household, current_hospitals_ID, new_hospitals_ID, distance_matrix, S, extra_hospitals, maxTimeInSeconds = 100, mipGap = 0.001, trace=False, seed = 2021 ):
    
    import time
    import gurobipy as gb
    from gurobipy import GRB
    import numpy as np
    import pandas as pd
    import random
    
    np.random.seed( seed )
    
    tStart = time.time()
    
    distances = distance_matrix[distance_matrix.distance <= S]
    existinghosp = len(current_hospitals_ID)
    p = existinghosp + extra_hospitals
    m = len(current_hospitals_ID) + len(new_hospitals_ID)
    n = len(array_household)
    groot_getal = max_beds
    
    # Create capacity
    currently_served = matrix_current1[matrix_current1.distance <= S]
    currently_served = currently_served.merge(population[['ID','household_count']],how='left', left_on='Pop_ID', right_on = 'ID')
    c=[0]*(len(current_hospitals_ID) + len(new_hospitals_ID))
    for i in currently_served['HospCluster'].unique():
        served_hosp_i = currently_served[currently_served.HospCluster == i]
        c[i] = served_hosp_i['household_count'].sum()
    currently_served = currently_served[['Pop_ID', 'household_count']].drop_duplicates().sum()
    average_served = sum(c) / currently_served['household_count'].sum()
    ratio = np.random.uniform(low=0.8, high=1.0, size=(len(c))) * prob_stroke * average_days * (1/utilization_rate) / average_served.sum()
    c = np.round(np.multiply(ratio, c), decimals = 0)
#     c = [10000000] * m

    II           = distances['Pop_ID']
    JJ           = distances['HospCluster']
    combinations = gb.tuplelist(zip(II,JJ))
    II           = frozenset(II)
    JJ           = frozenset(JJ)

    exJ = frozenset(set(list(range(existinghosp))).intersection(JJ))
    
    M = gb.Model("Facility location problem")
    
    X = M.addVars(JJ, vtype=gb.GRB.BINARY)
    Z = M.addVars(JJ, lb = 0, vtype=gb.GRB.INTEGER)
    Y = M.addVars(combinations, lb=0.0, ub=1.0, vtype=gb.GRB.CONTINUOUS, obj=[-array_household[i] for i,j in combinations])
    
    # Create lists for I and J
    ah = { j : [] for j in JJ }
    JI = { j : [] for j in JJ }
    IJ = { i : [] for i in II }
    for i,j in combinations:
        ah[j].append(array_household[i])
        JI[j].append(Y[i,j])
        IJ[i].append(Y[i,j]) 
    
    #Set parameters
    M.Params.OutputFlag = trace 
    M.Params.mipgap     = mipGap
    M.Params.timelimit  = maxTimeInSeconds
    
    #Constraints
    # 1 Existing hospitals are opened
    M.addConstrs( (X[j] == 1 for j in exJ), name="Existing hospitals" )
    
    #2 Additional hopsitals less or equal to maximum
    s1 = M.addLConstr( gb.LinExpr( [ (1,X[j]) for j in JJ ] ) <= p )
    
    #3 Extra beds less or equal to maximum
    s2 = M.addLConstr( gb.LinExpr( [ (1,Z[j]) for j in JJ ] ) <= max_beds )
    
    #4 Only assign beneficiaries if the hospital is opened
    #5 Only add beds when the hospital is opened
    #7 Utilization constraint
    for j in JJ:
        M.addLConstr( Z[j] <= groot_getal*X[j] )
        M.addLConstr( gb.LinExpr( [1]*len(JI[j]), JI[j] ) <= n*X[j] )
        M.addLConstr( prob_stroke*average_days*gb.LinExpr( ah[j], JI[j] ) <= utilization_rate*(c[j]+Z[j]) ) 

    #6 One can only be assigned once
    for i in II:
        M.addLConstr( gb.LinExpr( [1]*len(IJ[i]), IJ[i] ) <= 1 )
    
    tModel = time.time() - tStart
    tStart = time.time()
    
    # Optimize and extract solution
    M.optimize()
    obj_val = -M.objVal
    tSolving = time.time() - tStart
#     Xvalues, Yvalues, Zvalues = getvariables(n, m, X, Y, Z, II, JJ)
    
#     return obj_val, Xvalues, Yvalues, Zvalues, tModel, gurobicode[M.status]
    return obj_val, tModel, tSolving, gurobicode[M.status]

In [None]:
S = distance_threshold_largest
v,tm,ts,status = Optimization1(2000, array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, nhospitals[-1], maxTimeInSeconds = 300, mipGap = 0.001, trace=False)
(v,tm,ts,status)

In [None]:
%lprun -u 1e-3 -T opt.txt -f Optimization1 Optimization1(2000, array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, nhospitals[-1], maxTimeInSeconds = 300, mipGap = 0.001, trace=False)

In [None]:
# Optimize directly for all number of beds in nmax_beds and all additional number
# of hospitals in hosp_count
def Optimization2(nmax_beds, array_household, current_hospitals_ID, new_hospitals_ID, distance_matrix, S, hosp_count, maxTimeInSeconds = 100, mipGap = 0.001, trace=False, seed = 2021 ):
    
    import time
    import gurobipy as gb
    from gurobipy import GRB
    import numpy as np
    import pandas as pd
    import random
    
    np.random.seed( seed )
    
    tStart = time.time()
    obj_val_array = []
    
    distances = distance_matrix[distance_matrix.distance <= S]
    existinghosp = len(current_hospitals_ID)
    m = len(current_hospitals_ID) + len(new_hospitals_ID)
    n = len(array_household)
    p = existinghosp + 0
    maxbeds = 0
    groot_getal = 1000000000
    
    # Create existing capacity
    currently_served = matrix_current1[matrix_current1.distance <= S]
    currently_served = currently_served.merge(population[['ID','household_count']],how='left', left_on='Pop_ID', right_on = 'ID')
    c=[0]*(len(current_hospitals_ID) + len(new_hospitals_ID))
    for i in currently_served['HospCluster'].unique():
        served_hosp_i = currently_served[currently_served.HospCluster == i]
        c[i] = served_hosp_i['household_count'].sum()
    currently_served = currently_served[['Pop_ID', 'household_count']].drop_duplicates().sum()
    average_served = sum(c) / currently_served['household_count'].sum()
    ratio = np.random.uniform(low=0.8, high=1.0, size=(len(c))) * prob_stroke * average_days * (1/utilization_rate) / average_served.sum()
    c = np.round(np.multiply(ratio, c), decimals = 0)
    
#     Existing capacity as large number (no limitations)
#     c = [10000000] * m

    II           = distances['Pop_ID']
    JJ           = distances['HospCluster']
    combinations = gb.tuplelist(zip(II,JJ))
    
    M = gb.Model("Facility location problem")
    
    # Add variables AND objective
    X = M.addVars(m, vtype=gb.GRB.BINARY)
    Z = M.addVars(m, lb = 0, vtype=gb.GRB.INTEGER)
    Y = M.addVars(combinations, lb=0, ub=1, vtype=gb.GRB.CONTINUOUS, obj=[-array_household[i] for i,j in combinations])
    
    # Create lists for I and J
    ah = { j : [] for j in range(m) }
    JI = { j : [] for j in range(m) }
    IJ = { i : [] for i in range(n) }
    for i,j in combinations:
        ah[j].append(array_household[i])
        JI[j].append(Y[i,j])
        IJ[i].append(Y[i,j]) 
    
    #Set parameters
    M.Params.OutputFlag = trace 
    M.Params.mipgap     = mipGap
    M.Params.timelimit  = maxTimeInSeconds
    
    #Constraints
    # 1 Existing hospitals are opened
    M.addConstrs( (X[j] == 1 for j in range(existinghosp)), name="Existing hospitals" )
    
    #2 Additional hopsitals less or equal to maximum
    s1 = M.addLConstr( gb.LinExpr( [ (1,X[j]) for j in range(m) ] ) <= p )
    
    #3 Extra beds less or equal to maximum
    s2 = M.addLConstr( gb.LinExpr( [ (1,Z[j]) for j in range(m) ] ) <= maxbeds )
    
    #4 Only assign beneficiaries if the hospital is opened
    #5 Only add beds when the hospital is opened
    #7 Utilization constraint
    for j in range(m):
        M.addLConstr( Z[j] <= groot_getal*X[j] )
        M.addLConstr( gb.LinExpr( [1]*len(JI[j]), JI[j] ) <= n*X[j] )
        M.addLConstr( prob_stroke*average_days*gb.LinExpr( ah[j], JI[j] ) <= utilization_rate*(c[j]+Z[j]) ) 
        
    #6 One can only be assigned once
    for i in set(II):
        M.addLConstr( gb.LinExpr( [1]*len(IJ[i]), IJ[i] ) <= 1 )
    
    modelling_time = time.time() - tStart
    tStart = time.time()
    
    # Optimize and extract solution
    M.optimize()
    obj_val = -M.objVal
    obj_val_array.append([S,0,0,obj_val])

    for each_bed_count in nmax_beds:
        M.remove(s2)
        s2 = M.addLConstr( gb.LinExpr( [ (1,Z[j]) for j in range(m) ] ) <= each_bed_count )
        M.optimize()
        obj_val = -M.objVal
        obj_val_array.append([S, 0,each_bed_count,obj_val])
    
    # Iterate for multiple additional hospital facilities
    for each_hosp_count in hosp_count:
        M.remove(s1)
        p = existinghosp + each_hosp_count
        s1 = M.addLConstr( gb.LinExpr( [ (1,X[j]) for j in range(m) ] ) <= p )
        for each_bed_count in nmax_beds:
            M.remove(s2)
            s2 = M.addLConstr( gb.LinExpr( [ (1,Z[j]) for j in range(m) ] ) <= each_bed_count )
            M.optimize()
            obj_val = -M.objVal
        
            obj_val_array.append([S, each_hosp_count,each_bed_count,obj_val])
    
    solving_time = time.time() - tStart
    
#     Xvalues, Yvalues, Zvalues = getvariables(n, m, X, Y, Z, II, JJ)
    
    df_opt_array = pd.DataFrame(obj_val_array)
    df_opt_array.columns = ['km','number_of_new_facilities','number_of_extra_beds','count']
    df_opt_array['number_of_facilities'] = df_opt_array['number_of_new_facilities'] + existinghosp
    df_opt_array['%'] = (df_opt_array['count']*100/sum(array_household)).round(1)
    
#     return obj_val, Xvalues, Yvalues, Zvalues, tModel, gurobicode[M.status]
    return df_opt_array, modelling_time, solving_time, gurobicode[M.status]

In [None]:
%%time
S = distance_threshold_largest
# Multiple optimizations, multiple numbers of additional hospitals, multiple numbers of additional beds
nmax_beds = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000,2000]
max_beds=1000
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000]
df_combined_output = pd.DataFrame()
for each_threshold in distance_thresholds:
    opt_array, tModelling, tSolving, _ = Optimization2(nmax_beds, array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, nhospitals, maxTimeInSeconds = 300, mipGap = 0.001, trace=False)
    df_opt_outputs = pd.DataFrame(opt_array)
    df_combined_output = df_combined_output.append(df_opt_outputs)
    print("Threshold distance: " + str(each_threshold))
    print("Solving time in minutes: " + str(tSolving/60) + ", modelling time: " + str(tModelling/60))
    

In [None]:
%%time
# Just one optimization (one number of beds, number of hospitals)
extra_hospitals = 100
max_beds = 10
for each_threshold in distance_thresholds:
    obj_val, tModelling, tSolving, status = Optimization1(max_beds, array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, extra_hospitals )
    print("Threshold distance: " + str(each_threshold))
    print("Solving time (in minutes): " + str(tSolving/60) + ", modelling time (in minutes): " + str(tModelling/60))
    print("Objective value: " + str(obj_val))

In [None]:
df_combined_output.head(10)

# Optimization without capacity

## Model

$\textrm{Maximize } \quad \sum_i v_iy_i $ 

$\textrm{Subject to } \quad x_j = 1 \quad \quad \quad \quad \quad\forall j = 1, ... m $

$ \quad \quad \quad \quad \quad \sum^{M}_{j=m+1} x_j \leq p  $

$ \quad \quad \quad \quad \quad y_i \leq \sum_{j|d_{ij}\leq S} x_j \quad  \forall i \in I$

$ \quad \quad \quad \quad \quad y_i, x_j \in \{0,1\} \quad \quad\forall i \in I, \forall j \in J $

where: 

- $I$ = the index set of households, or clusters of households, indexed by $i = 1, ..., n$. 

- $J$ = the index set of all healthcare sites, where indices $j = 1, ..., m$ are corresponding to the already existing healthcare facilities and indices $j = m+1, ..., M$ are corresponding to potential hospital locations

- $v_i$ = the number of people in (cluster of) household(s) $i$

- $d_{ij}$ = the travel distance from (cluster of) household(s) $i$ to hospital facility $j$

- $S$ = the maximum travel distance from a household (or cluster) to a health care facility

- $p$ = the number of additional hospitals located.

Decision variables:
$
x_j = \begin{cases} 1 & \text{ if hospital } j \text{ is opened} \\
0 & \text{ otherwise}
\end{cases} \\
$
$
y_i = \begin{cases} 1 & \text{ if there is an opened health facility within } S \\ & \text{ kilometers travel distance away from the (cluster of) household(s) } i\\
0 & \text{ otherwise}
\end{cases}
$

Interesting to check whether this model gives the same results when you set a huge capacity on the extra number of beds added in the previous model.


In [None]:
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000]
df_combined_output = pd.DataFrame()
for each_threshold in distance_thresholds:
    opt_array, tModelling, tSolving = OptimizationModel(array_household, current_hospitals_ID, new_hospitals_ID, df_combined, each_threshold, nhospitals)
    df_opt_outputs = pd.DataFrame(opt_array)
    df_combined_output = df_combined_output.append(df_opt_outputs)
    print("Threshold distance: " + str(each_threshold))
    print("Solving time: " + str(tSolving/60) + ", modelling time: " + str(tModelling/60))
    

In [None]:
df_combined_output