In [1]:
import pandas as pd
import numpy as np
from gurobipy import Model, GRB, quicksum

## Data loading and cleasing

In [2]:
employment = pd.read_csv('new_employment.csv')
income = pd.read_csv('new_income.csv')
pop =  pd.read_csv('new_population.csv')
fac = pd.read_csv('new_child_care.csv')
loc = pd.read_csv('new_potential_loc.csv')

# population is uniform within each slot
pop['10-12'] = pop['10-14'] * 3 / 5

# identifying high-demand zipcodes
zip = income.merge(employment, how="left", left_on="zip_code", right_on="zip_code")
zip['is_high_demand'] = (zip['employment rate'] >= 0.6) | (zip['average income'] < 60000)

# arranging cols to make a zip dataframe containing information on the area
slots = fac.groupby("zip_code").sum()[['infant_capacity', 'toddler_capacity', 'preschool_capacity', 'school_age_capacity', 'children_capacity', 'total_capacity']]
zip = zip.merge(slots, how="left", left_on="zip_code", right_on="zip_code")
zip = zip.merge(pop[['zip_code', '-5', '5-9', '10-12']], left_on="zip_code", right_on="zip_code")

#identifying deserts
zip['is_desert'] = (zip['is_high_demand'] & (zip['total_capacity'] <= 1/2 * (zip['-5'] + zip['5-9'] + zip['10-12']))) | \
                   (~zip['is_high_demand'] & (zip['total_capacity'] <= 1/3 * (zip['-5'] + zip['5-9'] + zip['10-12'])))


#print(pop)
fac = fac.merge(pop,how="left",left_on="zip_code", right_on="zip_code")
#test = fac.groupby('facility_id').count()
#print(test[test["zip_code"]>1])



# cleaning data
fac = fac[fac['total_capacity'] > 0]
zip['demand_child'] = zip['-5'] + zip['5-9'] + zip['10-12']
zip['under5_capacity'] = zip['infant_capacity'] + zip['toddler_capacity'] + zip['preschool_capacity']
fac['under5_capacity'] = fac['infant_capacity'] + fac['toddler_capacity'] + fac['preschool_capacity']
zip = zip[zip['demand_child'] > 0]

new_fac_cost = {
    'size': ['small', 'medium', 'large'],
    'total_slots': [100, 200, 400],
    'under5_slots': [50, 100, 200],
    'cost': [65000, 95000, 115000]
}
new_fac_cost = pd.DataFrame(new_fac_cost)
new_fac_cost


fac = fac.reset_index(drop=True)
zip = zip.reset_index(drop=True)

zip.head(20)
fac.head(10)



# identifying key elements
zipcode = zip['zip_code'].tolist()
new_fac_size = ['small', 'medium', 'large']
exist_fac = fac['facility_id'].unique()
age = ['under5', 'over5']


## Solving for question 1

In [13]:
m = Model("child_solution")

Set parameter Username
Academic license - for non-commercial use only - expires 2025-09-09


In [14]:
# Decision Variables：new facilities & slot expansion
x = {}
for i in zipcode:
    for j in new_fac_size:
        x[i, j] = m.addVar(vtype = GRB.INTEGER, name = f"x_{i}_{j}")

y = {}
for f in exist_fac:
    for a in age:
        y[f, a] = m.addVar(vtype = GRB.INTEGER, name = f"y_{f}_{a}")


In [15]:
# Objective Function：Min the cost of...
new_fac_cost_expr = quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['cost'].values[0] for i in zipcode for j in new_fac_size)
expand_cost_expr = sum(((20000 + (y[f, 'under5'] + y[f, 'over5']) * 200) * (y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] + 100 * y[f, 'under5']) for f in exist_fac)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)


In [16]:
# Constraints:
# (1) no desert in both high demand and normal demand region
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/2 * zip[zip['zip_code'] == i]['is_high_demand'].values[0], f"no_desert_high_demand_{i}")
    
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/3, f"no_desert_low_demand_{i}")


In [17]:
# (2) under5 take up a higher coverage percantage
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['under5_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, 'under5'] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist()) +
         zip[zip['zip_code'] == i]['under5_capacity'].values[0]) >=
        2/3 * zip[zip['zip_code'] == i]['-5'].values[0], f"under5_demand_{i}")


In [18]:
# (3) expansion maximum scale
for f in exist_fac:
    m.addConstr(quicksum(y[f, a] for a in age) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.20, f"expansion_restriction_{f}")


In [19]:
# Solve
m.optimize()

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.0.0 23A344)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 17815 rows, 32570 columns and 112450 nonzeros
Model fingerprint: 0xfa870b48
Model has 44265 quadratic objective terms
Variable types: 0 continuous, 32570 integer (0 binary)
Coefficient statistics:
  Matrix range     [4e-05, 2e+02]
  Objective range  [2e+01, 1e+05]
  QObjective range [4e-01, 3e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e-04, 8e+03]
Found heuristic solution: objective 5.610000e+17
Presolve removed 16523 rows and 22896 columns
Presolve time: 2.22s
Presolved: 1292 rows, 9674 columns, 12651 nonzeros
Presolved model has 10079 quadratic objective terms
Found heuristic solution: objective 9.573037e+08
Variable types: 0 continuous, 9674 integer (552 binary)
Found heuristic solution: objective 3.460081e+08

Root relaxation: objective 3.272711e+08, 3407 i

In [20]:
if m.status == GRB.OPTIMAL:
    print("Optimal solution found. Results:")
    # 新设施的建设情况
    for i in zipcode:
        for j in new_fac_size:
            if x[i, j].x > 0:
                print(f"Region {i}: Build {x[i, j].x} {j} size new facilities.")
    
    # 现有设施kuojian情况
    for f in exist_fac:
        for a in age:
            if y[f, a].x > 0:
                print(f"Facility {f}: Expand capacity for age group {a} by {y[f, a].x}.")
elif m.status == GRB.INFEASIBLE:
    print("The model is infeasible. Check constraints or data.")
elif m.status == GRB.UNBOUNDED:
    print("The model is unbounded. Consider revising the model.")
else:
    print(f"Optimization was stopped with status {m.status}")


Optimal solution found. Results:
Region 10001: Build 2.0 large size new facilities.
Region 10002: Build 3.0 large size new facilities.
Region 10003: Build 3.0 large size new facilities.
Region 10004: Build 1.0 small size new facilities.
Region 10004: Build 1.0 large size new facilities.
Region 10005: Build 2.0 large size new facilities.
Region 10006: Build 1.0 small size new facilities.
Region 10007: Build 2.0 large size new facilities.
Region 10009: Build 5.0 large size new facilities.
Region 10010: Build 5.0 large size new facilities.
Region 10011: Build 2.0 large size new facilities.
Region 10012: Build 1.0 small size new facilities.
Region 10012: Build 2.0 large size new facilities.
Region 10013: Build 4.0 large size new facilities.
Region 10014: Build 1.0 large size new facilities.
Region 10016: Build 1.0 small size new facilities.
Region 10016: Build 5.0 large size new facilities.
Region 10017: Build 1.0 large size new facilities.
Region 10019: Build 1.0 small size new facilities

## Solving for question 2: Realistic Capacity Expansion and Distance

In [3]:
# defining usful functions in question 2
from haversine import haversine, Unit

potential_loc = pd.read_csv("new_potential_loc.csv")
potential_loc['location_string'] = potential_loc['latitude'].astype(str)+","+potential_loc['longitude'].astype(str)
potential_loc.head(10)


Unnamed: 0,zip_code,latitude,longitude,location_string
0,10001,40.741893,-74.00014,"40.7418931677079,-74.00014023998185"
1,10001,40.752007,-74.005436,"40.7520069733203,-74.00543584763324"
2,10001,40.750545,-73.997147,"40.75054498748023,-73.99714706994247"
3,10001,40.74408,-74.001932,"40.74407999953524,-74.00193173438153"
4,10001,40.74869,-73.999341,"40.74869025486597,-73.99934144838926"
5,10001,40.74753,-73.99995,"40.74753039782525,-73.99995045236471"
6,10001,40.75781,-74.004031,"40.75780987387191,-74.00403121136509"
7,10001,40.740336,-73.999832,"40.74033607906357,-73.99983216542418"
8,10001,40.750221,-73.993516,"40.75022053801741,-73.99351647056301"
9,10001,40.746345,-73.986714,"40.74634513494336,-73.98671444012156"


### setting variables

In [4]:
m = Model("Realistic Capacity Expansion and Distance")

# expansion to existing factories, binary
y = {}
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    # notice that facilities shouldn't have slot greater than 500
    for a in age:
        y[f, a] = m.addVar(vtype = GRB.INTEGER,lb=0, name = f'y_{f}_{a}')

# expansion steps to existing factories, binary
steps = ['step2', 'step3']
z = {}
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    for s in steps:
        z[f, s] = m.addVar(vtype = GRB.BINARY, name = f'step_{f}_{s}')


# tf1 
tf1 = {}
tf2 = {}
tf3 = {}

tf1_super = {}
tf2_super = {}
tf3_super = {}

for f in exist_fac:
    tf1[f] = m.addVar(vtype = GRB.CONTINUOUS, lb = -10000, name = f'total1_{f}')
    tf2[f] = m.addVar(vtype = GRB.CONTINUOUS, lb = -10000, name = f'total2_{f}')
    tf3[f] = m.addVar(vtype = GRB.CONTINUOUS, lb = -10000, name = f'total3_{f}')
    tf1_super[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'total1_super_{f}')
    tf2_super[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'total2_super_{f}')
    tf3_super[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'total3_super_{f}')


# the amount of total expansion from each steps (<10%, 10%~15%, >15%)
c1 = {}
c2 = {}
c3 = {}
for f in exist_fac:
    c1[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c1_{f}')
    c2[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c2_{f}')
    c3[f] = m.addVar(vtype = GRB.CONTINUOUS, name = f'c3_{f}')


x = {}
loc_list = []
loc_str_list = []

for row in range(len(potential_loc)):
    #print()
    loc = potential_loc.loc[row,['latitude','longitude']].tolist()
    loc_list.append(loc)
    lat = loc[0]
    lon = loc[1]
    loc_string = str(lat)+","+str(lon)
    loc_str_list.append(loc_string)
    #print(loc)
    for size in new_fac_size:
        x[loc_string, size] = m.addVar(vtype = GRB.BINARY, name = f"x_{loc_string}_{size}")
        #print(loc_string)
        #print(zipzip,loc,size)
        
        

Set parameter Username
Academic license - for non-commercial use only - expires 2025-09-09


### target function

In [5]:
expand_cost_expr = quicksum(
                      (20000 * c1[f]/fac[fac['facility_id'] == f]['total_capacity'].values[0] + 200 * c1[f]) * 1 +
                      (20000 * c2[f]/fac[fac['facility_id'] == f]['total_capacity'].values[0] + 400 * c2[f]) * z[f, 'step2'] +
                      (20000 * c3[f]/fac[fac['facility_id'] == f]['total_capacity'].values[0] + 1000 * c3[f]) * z[f, 'step3'] +
                      y[f, 'under5'] * 100
                      for f in fac[fac["total_capacity"]<=500]['facility_id'].unique()) 
    
new_fac_cost_expr = quicksum(x[j, k] * new_fac_cost[new_fac_cost['size'] == k]['cost'].values[0] for j in loc_str_list for k in new_fac_size)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)

### adding constrains

In [6]:
from gurobipy import quicksum, min_, Var, LinExpr, max_


# distance between any two new-built facilities should be greater than 0.06 (consider only within zipcodes)

def calc_dist(latlon1,latlon2):
    #notice that latlon1 comes in as a string
    latlon1 = latlon1.split(",")
    lat1 = float(latlon1[0])
    lon1 = float(latlon1[1])
    lat2 = latlon2[0]
    lon2 = latlon2[1]
    distance = haversine((lat1, lon1), (lat2, lon2), unit=Unit.MILES)
    return distance

def loc_distance(lat1, lon1, lat2,lon2):
    distance = haversine((lat1, lon1), (lat2, lon2), unit=Unit.MILES)
    return distance

for code in zipcode:
    loc_in_this_zip = potential_loc[potential_loc["zip_code"]==code]['location_string'].tolist()
    x_sum = {l: quicksum(x[l, j] for j in new_fac_size) for l in loc_in_this_zip}
    for l1 in loc_in_this_zip:
        for l2 in loc_in_this_zip:
            if l1 < l2:
                dist = loc_distance(float(l1.split(",")[0]),float(l1.split(",")[1]), float(l2.split(",")[0]),float(l2.split(",")[1]))
                m.addConstr(
                    dist <= 0.06 + (1 - x_sum[l1]) * 1000 + (1 - x_sum[l2]) * 1000,
                    name=f'distance_{l1}_{l2}'
                )

print('4 done')


#(1) no desert
for code in zipcode:
    i_ = potential_loc[potential_loc["zip_code"]==code]["location_string"].to_list()
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[(fac['zip_code'] == code) & (fac['total_capacity']<=500)]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == code]['total_capacity'].values[0]) / zip[zip['zip_code'] == code]['demand_child'].values[0] >=
        1/2 * zip[zip['zip_code'] == code]['is_high_demand'].values[0], f"no_desert_high_demand_{code}")
    
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[(fac['zip_code'] == code) & (fac['total_capacity']<=500)]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == code]['total_capacity'].values[0]) / zip[zip['zip_code'] == code]['demand_child'].values[0] >=
        1/3, f"no_desert_low_demand_{code}")

print('1 done')

#(2) under5
for code in zipcode:
    i_ = potential_loc[potential_loc["zip_code"]==code]["location_string"].to_list()
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['under5_slots'].values[0] for i in i_ for j in new_fac_size) +
         quicksum(y[f, 'under5'] for f in fac[(fac['zip_code'] == code) & (fac['total_capacity']<=500)]['facility_id'].unique().tolist()) +
         zip[zip['zip_code'] == code]['under5_capacity'].values[0]) >=
        2/3 * zip[zip['zip_code'] == code]['-5'].values[0], f"under5_demand_{code}")

print('2 done')

#(3) expansion maximum + new facility limit
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    total_expansion = (y[f,'under5']+y[f,'over5'])
    total_cap = fac[(fac['facility_id' ] == f)]['total_capacity'].values[0]

    m.addConstr(total_expansion<=0.2*total_cap,name=f"expansion_ratio_lim_{f}")
    m.addConstr(total_expansion+total_cap<=500, name=f"total_cap_lim_{f}")



for i in potential_loc["location_string"].to_list():
    m.addConstr(quicksum(x[i, j] for j in new_fac_size) <= 1, f'new_facility_num_{i}')

# constraining on the locations: for all news built facilities, distance to the nearest existing facility must be greater than 0.06miles

print('3 done')



# defining the amount of facilities built within each steps using constraint
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    m.addConstr(tf1[f] == y[f, 'under5'] + y[f, 'over5'], f'tf1_{f}')
    m.addConstr(tf2[f] == y[f, 'under5'] + y[f, 'over5'] - 0.1 * fac[fac['facility_id'] == f]['total_capacity'].values[0], f'tf2_{f}')
    m.addConstr(tf3[f] == y[f, 'under5'] + y[f, 'over5'] - 0.15 * fac[fac['facility_id'] == f]['total_capacity'].values[0], f'tf3_{f}')

    m.addConstr(tf1_super[f] == max_(tf1[f], constant = 0))
    m.addConstr(tf2_super[f] == max_(tf2[f], constant = 0))
    m.addConstr(tf3_super[f] == max_(tf3[f], constant = 0))


    
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    m.addConstr(c1[f] == min_(tf1_super[f], constant = 0.1 * fac[fac['facility_id'] == f]['total_capacity'].values[0]), f'c1_{f}')
    m.addConstr(c2[f] == min_(tf2_super[f], constant =  0.05 * fac[fac['facility_id'] == f]['total_capacity'].values[0]), f'c2_{f}')
    m.addConstr(c3[f] == min_(tf3_super[f], constant =  0.05 * fac[fac['facility_id'] == f]['total_capacity'].values[0]), f'c3_{f}')

print('5 done')
m.update()

#(5) constraint for var z
for f in fac[fac["total_capacity"]<=500]['facility_id'].unique():
    total_capacity = fac[fac['facility_id'] == f]['total_capacity'].values[0]
    total_y = y[f, 'under5'] + y[f, 'over5']
    # 公共表达式
    #ratio = total_y / total_capacity
    ratio_1 = c1[f] / total_capacity
    ratio_2 = c2[f] / total_capacity
    ratio_3 = c3[f] / total_capacity


    # 添加约束
    #m.addConstr(0.1*total_capacity*z[f,"step2"]<=c1[f])
    #m.addConstr(0.05*total_capacity*z[f,"step3"]<=c2[f])
    #m.addConstr(0.05*total_capacity*z[f,"step2"]>=c2[f])
    #m.addConstr(c3[f]<=0.05*total_capacity*z[f,"step3"])
    # 添加约束
    m.addConstr(ratio_1 <= 0.1 , 'step1_upper_limit')
    m.addConstr(ratio_1 >= 0.1 * z[f, 'step2'], 'step1_lower_limit')
    m.addConstr(ratio_2 <= 0.05 * z[f, 'step2'], 'step2_upper_limit')
    m.addConstr(ratio_2 >= 0.05 * z[f, 'step3'], 'step2_lower_limit')
    m.addConstr(ratio_3 <= 0.05 * z[f, 'step3'], 'step3_upper_limit')
    m.addConstr(ratio_3 >= 0 , 'step3_lower_limit')

# 添加约束

print('6 done')


4 done
1 done
2 done
3 done
5 done
6 done


In [99]:
m.optimize()
m.computeIIS()



m.write('infeasible4.ilp')

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.0.0 23A344)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 5316511 rows, 498659 columns and 31931633 nonzeros
Model fingerprint: 0x0d3596f5
Model has 29482 quadratic objective terms
Model has 88446 general constraints
Variable types: 132795 continuous, 365864 integer (336382 binary)
Coefficient statistics:
  Matrix range     [4e-05, 1e+03]
  Objective range  [1e+02, 1e+05]
  QObjective range [9e+02, 2e+04]
  Bounds range     [1e+00, 1e+04]
  RHS range        [1e-04, 8e+03]
  GenCon const rng [2e-01, 5e+01]
Presolve removed 423893 rows and 83911 columns (presolve time = 5s) ...
Presolve removed 810896 rows and 158601 columns (presolve time = 10s) ...
Presolve removed 2285944 rows and 158601 columns (presolve time = 15s) ...
Presolve removed 4449752 rows and 158601 columns (presolve time = 20s) ...
Presolve added 0 rows and 280386 column