In [3]:
import pandas as pd
import numpy as np
from gurobipy import Model, GRB, quicksum

## Data loading and cleasing

In [23]:
employment = pd.read_csv('new_employment.csv')
income = pd.read_csv('new_income.csv')
pop =  pd.read_csv('new_population.csv')
fac = pd.read_csv('new_child_care.csv')
loc = pd.read_csv('new_potential_loc.csv')

# population is uniform within each slot
pop['10-12'] = pop['10-14'] * 3 / 5

# identifying high-demand zipcodes
zip = income.merge(employment, how="left", left_on="zip_code", right_on="zip_code")
zip['is_high_demand'] = (zip['employment rate'] >= 0.6) | (zip['average income'] < 60000)

# arranging cols to make a zip dataframe containing information on the area
slots = fac.groupby("zip_code").sum()[['infant_capacity', 'toddler_capacity', 'preschool_capacity', 'school_age_capacity', 'children_capacity', 'total_capacity']]
zip = zip.merge(slots, how="left", left_on="zip_code", right_on="zip_code")
zip = zip.merge(pop[['zip_code', '-5', '5-9', '10-12']], left_on="zip_code", right_on="zip_code")

#identifying deserts
zip['is_desert'] = (zip['is_high_demand'] & (zip['total_capacity'] <= 1/2 * (zip['-5'] + zip['5-9'] + zip['10-12']))) | \
                   (~zip['is_high_demand'] & (zip['total_capacity'] <= 1/3 * (zip['-5'] + zip['5-9'] + zip['10-12'])))


#print(pop)
fac = fac.merge(pop,how="left",left_on="zip_code", right_on="zip_code")
#test = fac.groupby('facility_id').count()
#print(test[test["zip_code"]>1])



# cleaning data
fac = fac[fac['total_capacity'] > 0]
zip['demand_child'] = zip['-5'] + zip['5-9'] + zip['10-12']
zip['under5_capacity'] = zip['infant_capacity'] + zip['toddler_capacity'] + zip['preschool_capacity']
fac['under5_capacity'] = fac['infant_capacity'] + fac['toddler_capacity'] + fac['preschool_capacity']
zip = zip[zip['demand_child'] > 0]

new_fac_cost = {
    'size': ['small', 'medium', 'large'],
    'total_slots': [100, 200, 400],
    'under5_slots': [50, 100, 200],
    'cost': [65000, 95000, 115000]
}
new_fac_cost = pd.DataFrame(new_fac_cost)
new_fac_cost


fac = fac.reset_index(drop=True)
zip = zip.reset_index(drop=True)

zip.head(20)
fac.head(10)



# identifying key elements
zipcode = zip['zip_code'].tolist()
new_fac_size = ['small', 'medium', 'large']
exist_fac = fac['facility_id'].unique()
age = ['under5', 'over5']


## Solving for question 1

In [13]:
m = Model("child_solution")

Set parameter Username
Academic license - for non-commercial use only - expires 2025-09-09


In [14]:
# Decision Variables：new facilities & slot expansion
x = {}
for i in zipcode:
    for j in new_fac_size:
        x[i, j] = m.addVar(vtype = GRB.INTEGER, name = f"x_{i}_{j}")

y = {}
for f in exist_fac:
    for a in age:
        y[f, a] = m.addVar(vtype = GRB.INTEGER, name = f"y_{f}_{a}")


In [15]:
# Objective Function：Min the cost of...
new_fac_cost_expr = quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['cost'].values[0] for i in zipcode for j in new_fac_size)
expand_cost_expr = sum(((20000 + (y[f, 'under5'] + y[f, 'over5']) * 200) * (y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] + 100 * y[f, 'under5']) for f in exist_fac)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)


In [16]:
# Constraints:
# (1) no desert in both high demand and normal demand region
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/2 * zip[zip['zip_code'] == i]['is_high_demand'].values[0], f"no_desert_high_demand_{i}")
    
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/3, f"no_desert_low_demand_{i}")


In [17]:
# (2) under5 take up a higher coverage percantage
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['under5_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, 'under5'] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist()) +
         zip[zip['zip_code'] == i]['under5_capacity'].values[0]) >=
        2/3 * zip[zip['zip_code'] == i]['-5'].values[0], f"under5_demand_{i}")


In [18]:
# (3) expansion maximum scale
for f in exist_fac:
    m.addConstr(quicksum(y[f, a] for a in age) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.20, f"expansion_restriction_{f}")


In [19]:
# Solve
m.optimize()

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.0.0 23A344)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 17815 rows, 32570 columns and 112450 nonzeros
Model fingerprint: 0xfa870b48
Model has 44265 quadratic objective terms
Variable types: 0 continuous, 32570 integer (0 binary)
Coefficient statistics:
  Matrix range     [4e-05, 2e+02]
  Objective range  [2e+01, 1e+05]
  QObjective range [4e-01, 3e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e-04, 8e+03]
Found heuristic solution: objective 5.610000e+17
Presolve removed 16523 rows and 22896 columns
Presolve time: 2.22s
Presolved: 1292 rows, 9674 columns, 12651 nonzeros
Presolved model has 10079 quadratic objective terms
Found heuristic solution: objective 9.573037e+08
Variable types: 0 continuous, 9674 integer (552 binary)
Found heuristic solution: objective 3.460081e+08

Root relaxation: objective 3.272711e+08, 3407 i

In [20]:
if m.status == GRB.OPTIMAL:
    print("Optimal solution found. Results:")
    # 新设施的建设情况
    for i in zipcode:
        for j in new_fac_size:
            if x[i, j].x > 0:
                print(f"Region {i}: Build {x[i, j].x} {j} size new facilities.")
    
    # 现有设施kuojian情况
    for f in exist_fac:
        for a in age:
            if y[f, a].x > 0:
                print(f"Facility {f}: Expand capacity for age group {a} by {y[f, a].x}.")
elif m.status == GRB.INFEASIBLE:
    print("The model is infeasible. Check constraints or data.")
elif m.status == GRB.UNBOUNDED:
    print("The model is unbounded. Consider revising the model.")
else:
    print(f"Optimization was stopped with status {m.status}")


Optimal solution found. Results:
Region 10001: Build 2.0 large size new facilities.
Region 10002: Build 3.0 large size new facilities.
Region 10003: Build 3.0 large size new facilities.
Region 10004: Build 1.0 small size new facilities.
Region 10004: Build 1.0 large size new facilities.
Region 10005: Build 2.0 large size new facilities.
Region 10006: Build 1.0 small size new facilities.
Region 10007: Build 2.0 large size new facilities.
Region 10009: Build 5.0 large size new facilities.
Region 10010: Build 5.0 large size new facilities.
Region 10011: Build 2.0 large size new facilities.
Region 10012: Build 1.0 small size new facilities.
Region 10012: Build 2.0 large size new facilities.
Region 10013: Build 4.0 large size new facilities.
Region 10014: Build 1.0 large size new facilities.
Region 10016: Build 1.0 small size new facilities.
Region 10016: Build 5.0 large size new facilities.
Region 10017: Build 1.0 large size new facilities.
Region 10019: Build 1.0 small size new facilities

## Solving for question 2: Realistic Capacity Expansion and Distance

In [27]:
# defining usful functions in question 2
from haversine import haversine, Unit

potential_loc = pd.read_csv("new_potential_loc.csv")
potential_loc['location_string'] = potential_loc['latitude'].astype(str)+","+potential_loc['longitude'].astype(str)
potential_loc.head(10)


#print(cost_func(300,13))
#print(calc_dist([70,40],[68,38]))


Unnamed: 0,zip_code,latitude,longitude,location_string
0,10001,40.741893,-74.00014,"40.7418931677079,-74.00014023998185"
1,10001,40.752007,-74.005436,"40.7520069733203,-74.00543584763324"
2,10001,40.750545,-73.997147,"40.75054498748023,-73.99714706994247"
3,10001,40.74408,-74.001932,"40.74407999953524,-74.00193173438153"
4,10001,40.74869,-73.999341,"40.74869025486597,-73.99934144838926"
5,10001,40.74753,-73.99995,"40.74753039782525,-73.99995045236471"
6,10001,40.75781,-74.004031,"40.75780987387191,-74.00403121136509"
7,10001,40.740336,-73.999832,"40.74033607906357,-73.99983216542418"
8,10001,40.750221,-73.993516,"40.75022053801741,-73.99351647056301"
9,10001,40.746345,-73.986714,"40.74634513494336,-73.98671444012156"


### setting variables

In [28]:
m = Model("Realistic Capacity Expansion and Distance")

# expansion to existing factories, binary
y = {}
for f in exist_fac:
    for a in age:
        y[f, a] = m.addVar(vtype = GRB.INTEGER, name = f'y_{f}_{a}')

# expansion steps to existing factories, binary
steps = ['step1', 'step2', 'step3']
z = {}
for f in exist_fac:
    for s in steps:
        z[f, s] = m.addVar(vtype = GRB.BINARY, name = f'step_{f}_{s}')


# tf1 
tf1 = {}
tf2 = {}
tf3 = {}
for f in exist_fac:
    tf1[f] = m.addVar(vtype = GRB.INTEGER, name = f'total1_{f}')
    tf2[f] = m.addVar(vtype = GRB.INTEGER, name = f'total2_{f}')
    tf3[f] = m.addVar(vtype = GRB.INTEGER, name = f'total3_{f}')


# the amount of total expansion from each steps (<10%, 10%~15%, >15%)
c1 = {}
c2 = {}
c3 = {}
for f in exist_fac:
    c1[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c1_{f}')
    c2[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c2_{f}')
    c3[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c3_{f}')


x = {}
loc_list = []
loc_str_list = []

for row in range(len(potential_loc)):
    #print()
    loc = potential_loc.loc[row,['latitude','longitude']].tolist()
    loc_list.append(loc)
    lat = loc[0]
    lon = loc[1]
    loc_string = str(lat)+","+str(lon)
    loc_str_list.append(loc_string)
    #print(loc)
    for size in new_fac_size:
        x[loc_string, size] = m.addVar(vtype = GRB.BINARY, name = f"x_{loc_string}_{size}")
        #print(loc_string)
        #print(zipzip,loc,size)
        
        

### target function

In [29]:
expand_cost_expr = quicksum(20000+200*c1[f]*z[f, 'step1']
                       +400*c2[f]*z[f, 'step2']
                       +1000*c3[f]*z[f, 'step2'] for f in exist_fac)
    
new_fac_cost_expr = quicksum(x[j, k] * new_fac_cost[new_fac_cost['size'] == k]['cost'].values[0] for j in loc_str_list for k in new_fac_size)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)

### adding constrains

In [37]:
from gurobipy import quicksum, min_, Var, LinExpr


#(1) no desert
for code in zipcode:
    i_ = potential_loc[potential_loc["zip_code"]==code]["location_string"].to_list()
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == code]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == code]['total_capacity'].values[0]) / zip[zip['zip_code'] == code]['demand_child'].values[0] >=
        1/2 * zip[zip['zip_code'] == code]['is_high_demand'].values[0], f"no_desert_high_demand_{code}")
    
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == code]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == code]['total_capacity'].values[0]) / zip[zip['zip_code'] == code]['demand_child'].values[0] >=
        1/3, f"no_desert_low_demand_{code}")
    
#(2) under5
for code in zipcode:
    i_ = potential_loc[potential_loc["zip_code"]==code]["location_string"].to_list()
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['under5_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, 'under5'] for f in fac[fac['zip_code'] == code]['facility_id'].unique().tolist()) +
         zip[zip['zip_code'] == code]['under5_capacity'].values[0]) >=
        2/3 * zip[zip['zip_code'] == code]['-5'].values[0], f"under5_demand_{code}")
    
#(3) expansion maximum
for f in exist_fac:
    m.addConstr(quicksum(y[f, a] for a in age) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.20, f'expansion_restriction_{f}')

for i in potential_loc["location_string"].to_list():
    m.addConstr(quicksum(x[i, j] for j in new_fac_size) <= 1, f'new_facility_num_{i}')

# constraining on the locations: for all news built facilities, distance to the nearest existing facility must be greater than 0.06miles

def calc_dist(latlon1,latlon2):
    #notice that latlon1 comes in as a string
    latlon1 = latlon1.split(",")
    lat1 = float(latlon1[0])
    lon1 = float(latlon1[1])
    lat2 = latlon2[0]
    lon2 = latlon2[1]
    distance = haversine((lat1, lon1), (lat2, lon2), unit=Unit.MILES)
    return distance

for row in range(len(fac)):
    zip_now = fac.loc[row,"zip_code"]
    latlon2 = fac.loc[row,["latitude","longitude"]].tolist()
    print(latlon2)
    for j in potential_loc[potential_loc["zip_code"]==zip_now]["location_string"].to_list():
        distance = quicksum(x[j, i]*calc_dist(j,latlon2) for i in new_fac_size)
        m.addConstr(distance >= 0.06)

# defining the amount of facilities built within each steps using constraint
for f in exist_fac:
    m.addConstr(tf1[f] == y[f, 'under5'] + y[f, 'over5'])
    m.addConstr(tf2[f] == y[f, 'under5'] + y[f, 'over5'] - 0.1 * fac[fac['facility_id'] == f]['total_capacity'].values[0])
    m.addConstr(tf3[f] == y[f, 'under5'] + y[f, 'over5'] - 0.15 * fac[fac['facility_id'] == f]['total_capacity'].values[0])
for f in exist_fac:
    m.addConstr(c1[f] == min_(tf1[f], constant = 0.1 * fac[fac['facility_id'] == f]['total_capacity'].values[0]))
    m.addConstr(c2[f] == min_(tf2[f], constant =  0.15 * fac[fac['facility_id'] == f]['total_capacity'].values[0]))
    m.addConstr(c3[f] == min_(tf3[f], constant =  0.05 * fac[fac['facility_id'] == f]['total_capacity'].values[0]))

m.update()

#(5) constraint for var z
for f in exist_fac:
    m.addConstr((y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.1 + (1 - z[f, 'step1']) * 1000, 'step1_limit')
    m.addConstr((y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] >= 0.1 + z[f, 'step2'], 'step2_limit')
    m.addConstr((y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.15 + (1 - z[f, 'step2']) * 1000, 'step2_upper_limit')
    m.addConstr((y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] >= 0.15 + z[f, 'step3'], 'step3_limit')
    m.addConstr((y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.2 + (1 - z[f, 'step3']) * 1000, 'step3_upper_limit')


[40.748836, -73.99981]
[40.748911, -74.001546]
[40.752093, -74.002588]
[40.748296, -74.001263]
[40.749247, -74.001598]
[40.748836, -73.99981]
[40.747845, -73.989419]
[40.755382, -73.99933]
[40.874106, -73.833117]
[40.720388, -73.990992]
[40.72131, -73.986567]
[40.712202, -73.996677]
[40.717059, -73.990634]
[40.711922, -73.986388]
[40.710371, -73.98999]
[40.716446, -73.993532]
[40.719132, -73.979369]
[40.712107, -73.992011]
[40.714323, -73.995529]
[40.71162, -73.989382]
[40.714515, -73.983172]
[40.712323, -73.989475]
[40.716809, -73.985242]
[40.718631, -73.993771]
[40.717675, -73.982399]
[40.710808, -73.994435]
[40.712201, -73.983522]
[40.712331, -73.990589]
[40.71162, -73.989382]
[40.718949, -73.981744]
[40.720527, -73.985777]
[40.718498, -73.984215]
[40.718193, -73.97541]
[40.717367, -73.988462]
[40.714323, -73.995529]
[40.716882, -73.988856]
[40.712107, -73.992011]
[40.71171, -73.981341]
[40.712107, -73.992011]
[40.713158, -73.991349]
[40.71816, -73.981774]
[40.716635, -73.980113]
[4

In [38]:
m.optimize()

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.0.0 23A344)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 6540615 rows, 469205 columns and 20923895 nonzeros
Model fingerprint: 0x547cd24a
Model has 44265 quadratic objective terms
Model has 177060 general constraints
Variable types: 44265 continuous, 424940 integer (351165 binary)
Coefficient statistics:
  Matrix range     [4e-05, 1e+03]
  Objective range  [6e+04, 1e+05]
  QObjective range [4e+02, 2e+03]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e-04, 8e+03]
  GenCon const rng [2e-01, 1e+02]
Presolve time: 0.20s

Explored 0 nodes (0 simplex iterations) in 0.44 seconds (0.13 work units)
Thread count was 1 (of 10 available processors)

Solution count 0

Model is infeasible or unbounded
Best objective -, best bound -, gap -
