In [52]:
import pandas as pd
import numpy as np
from gurobipy import Model, GRB, quicksum

## Data loading and cleasing

In [53]:
employment = pd.read_csv('new_employment.csv')
income = pd.read_csv('new_income.csv')
pop =  pd.read_csv('new_population.csv')
fac = pd.read_csv('new_child_care.csv')
loc = pd.read_csv('new_potential_loc.csv')

In [54]:

# population is uniform within each slot
pop['10-12'] = pop['10-14'] * 3 / 5

In [55]:
# identifying high-demand zipcodes
zip = income.merge(employment, how="left", left_on="zip_code", right_on="zip_code")
zip['is_high_demand'] = (zip['employment rate'] >= 0.6) | (zip['average income'] < 60000)


In [56]:
# arranging cols

slots = fac.groupby("zip_code").sum()[['infant_capacity', 'toddler_capacity', 'preschool_capacity', 'school_age_capacity', 'children_capacity', 'total_capacity']]
zip = zip.merge(slots, how="left", left_on="zip_code", right_on="zip_code")
zip = zip.merge(pop[['zip_code', '-5', '5-9', '10-12']], left_on="zip_code", right_on="zip_code")


In [57]:
#identifying deserts
zip['is_desert'] = (zip['is_high_demand'] & (zip['total_capacity'] <= 1/2 * (zip['-5'] + zip['5-9'] + zip['10-12']))) | \
                   (~zip['is_high_demand'] & (zip['total_capacity'] <= 1/3 * (zip['-5'] + zip['5-9'] + zip['10-12'])))
zip.head(20)

Unnamed: 0,zip_code,average income,employment rate,is_high_demand,infant_capacity,toddler_capacity,preschool_capacity,school_age_capacity,children_capacity,total_capacity,-5,5-9,10-12,is_desert
0,10001,102878.033603,0.595097,False,0,0,0,585,24,609,744,784,565.2,True
1,10002,59604.041165,0.520662,True,0,0,18,4508,203,4729,2142,3046,1918.8,False
2,10003,114273.049645,0.497244,False,0,0,0,1995,0,1995,1440,1034,571.8,False
3,10004,132004.310345,0.506661,False,0,0,0,263,0,263,433,182,96.6,False
4,10005,121437.713311,0.665833,True,0,0,0,39,0,39,484,204,137.4,True
5,10006,126377.118644,0.631692,True,0,0,14,142,0,156,128,96,45.0,False
6,10007,138853.904282,0.52891,False,0,0,0,284,0,284,605,451,174.0,True
7,10009,77133.233533,0.514567,False,0,0,18,1660,106,1784,1896,1658,1101.0,False
8,10010,116272.69881,0.492749,False,0,0,0,234,0,234,1422,1592,568.8,True
9,10011,120420.792079,0.557,False,0,0,42,1908,6,1956,1209,1200,818.4,False


In [58]:
#print(pop)
fac = fac.merge(pop,how="left",left_on="zip_code", right_on="zip_code")
#test = fac.groupby('facility_id').count()
#print(test[test["zip_code"]>1])
fac

Unnamed: 0,zip_code,facility_id,program_type,facility_status,facility_name,city,school_district_name,infant_capacity,toddler_capacity,preschool_capacity,...,45-49,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85+,10-12
0,10001,837597,SACC,Registration,I Have a Dream Foundation,New York,Manhattan 2,0,0,0,...,1903,1704,1225,1323,933,815,616,488,576,565.2
1,10001,661697,GFDC,License,Chelsea Little Angels Day Care,New York,Manhattan 2,0,0,0,...,1903,1704,1225,1323,933,815,616,488,576,565.2
2,10001,837329,SACC,Registration,Bright Horizons at Hudson Yards,New York,Manhattan 2,0,0,0,...,1903,1704,1225,1323,933,815,616,488,576,565.2
3,10001,350076,FDC,Registration,GRAMMAS HANDS,New York,Manhattan 2,0,0,0,...,1903,1704,1225,1323,933,815,616,488,576,565.2
4,10001,292419,SACC,Registration,The Hudson Guild @26th Street,New York,Manhattan 2,0,0,0,...,1903,1704,1225,1323,933,815,616,488,576,565.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14751,14770,115279,SACC,Registration,Young Men's Christian Association of Olean NY ...,Portville,Portville,0,0,15,...,218,210,177,181,120,191,83,80,41,133.8
14752,14772,782407,SACC,Registration,"The Relief Zone, Inc.",Randolph,Randolph,0,0,32,...,334,238,272,311,306,170,196,55,58,134.4
14753,14772,888218,GFDC,License,Main Street Munchkins,Randolph,Randolph,0,0,0,...,334,238,272,311,306,170,196,55,58,134.4
14754,14805,866946,FDC,Registration,Alpine Children's House,Alpine,Trumansburg,0,0,0,...,31,29,143,105,36,141,12,8,10,17.4


In [60]:
fac = fac[fac['total_capacity'] > 0]

zip['demand_child'] = zip['-5'] + zip['5-9'] + zip['10-12']
zip['under5_capacity'] = zip['infant_capacity'] + zip['toddler_capacity'] + zip['preschool_capacity']
fac['under5_capacity'] = fac['infant_capacity'] + fac['toddler_capacity'] + fac['preschool_capacity']
zip = zip[zip['demand_child'] > 0]

In [61]:
new_fac_cost = {
    'size': ['small', 'medium', 'large'],
    'total_slots': [100, 200, 400],
    'under5_slots': [50, 100, 200],
    'cost': [65000, 95000, 115000]
}
new_fac_cost = pd.DataFrame(new_fac_cost)
new_fac_cost

Unnamed: 0,size,total_slots,under5_slots,cost
0,small,100,50,65000
1,medium,200,100,95000
2,large,400,200,115000


In [62]:
zipcode = zip['zip_code'].tolist()
new_fac_size = ['small', 'medium', 'large']
exist_fac = fac['facility_id'].unique()
age = ['under5', 'over5']

## Solving for question 1

In [13]:
m = Model("child_solution")

Set parameter Username
Academic license - for non-commercial use only - expires 2025-09-09


In [14]:
# Decision Variables：new facilities & slot expansion
x = {}
for i in zipcode:
    for j in new_fac_size:
        x[i, j] = m.addVar(vtype = GRB.INTEGER, name = f"x_{i}_{j}")

y = {}
for f in exist_fac:
    for a in age:
        y[f, a] = m.addVar(vtype = GRB.INTEGER, name = f"y_{f}_{a}")


In [15]:
# Objective Function：Min the cost of...
new_fac_cost_expr = quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['cost'].values[0] for i in zipcode for j in new_fac_size)
expand_cost_expr = sum(((20000 + (y[f, 'under5'] + y[f, 'over5']) * 200) * (y[f, 'under5'] + y[f, 'over5']) / fac[fac['facility_id'] == f]['total_capacity'].values[0] + 100 * y[f, 'under5']) for f in exist_fac)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)


In [16]:
# Constraints:
# (1) no desert in both high demand and normal demand region
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/2 * zip[zip['zip_code'] == i]['is_high_demand'].values[0], f"no_desert_high_demand_{i}")
    
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['total_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, a] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist() for a in age) +
         zip[zip['zip_code'] == i]['total_capacity'].values[0]) / zip[zip['zip_code'] == i]['demand_child'].values[0] >=
        1/3, f"no_desert_low_demand_{i}")


In [17]:
# (2) under5 take up a higher coverage percantage
for i in zipcode:
    m.addConstr(
        (quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['under5_slots'].values[0] for j in new_fac_size) +
         quicksum(y[f, 'under5'] for f in fac[fac['zip_code'] == i]['facility_id'].unique().tolist()) +
         zip[zip['zip_code'] == i]['under5_capacity'].values[0]) >=
        2/3 * zip[zip['zip_code'] == i]['-5'].values[0], f"under5_demand_{i}")


In [18]:
# (3) expansion maximum scale
for f in exist_fac:
    m.addConstr(quicksum(y[f, a] for a in age) / fac[fac['facility_id'] == f]['total_capacity'].values[0] <= 0.20, f"expansion_restriction_{f}")


In [19]:
# Solve
m.optimize()

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[arm] - Darwin 23.0.0 23A344)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 17815 rows, 32570 columns and 112450 nonzeros
Model fingerprint: 0xfa870b48
Model has 44265 quadratic objective terms
Variable types: 0 continuous, 32570 integer (0 binary)
Coefficient statistics:
  Matrix range     [4e-05, 2e+02]
  Objective range  [2e+01, 1e+05]
  QObjective range [4e-01, 3e+02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e-04, 8e+03]
Found heuristic solution: objective 5.610000e+17
Presolve removed 16523 rows and 22896 columns
Presolve time: 2.22s
Presolved: 1292 rows, 9674 columns, 12651 nonzeros
Presolved model has 10079 quadratic objective terms
Found heuristic solution: objective 9.573037e+08
Variable types: 0 continuous, 9674 integer (552 binary)
Found heuristic solution: objective 3.460081e+08

Root relaxation: objective 3.272711e+08, 3407 i

In [20]:
if m.status == GRB.OPTIMAL:
    print("Optimal solution found. Results:")
    # 新设施的建设情况
    for i in zipcode:
        for j in new_fac_size:
            if x[i, j].x > 0:
                print(f"Region {i}: Build {x[i, j].x} {j} size new facilities.")
    
    # 现有设施kuojian情况
    for f in exist_fac:
        for a in age:
            if y[f, a].x > 0:
                print(f"Facility {f}: Expand capacity for age group {a} by {y[f, a].x}.")
elif m.status == GRB.INFEASIBLE:
    print("The model is infeasible. Check constraints or data.")
elif m.status == GRB.UNBOUNDED:
    print("The model is unbounded. Consider revising the model.")
else:
    print(f"Optimization was stopped with status {m.status}")


Optimal solution found. Results:
Region 10001: Build 2.0 large size new facilities.
Region 10002: Build 3.0 large size new facilities.
Region 10003: Build 3.0 large size new facilities.
Region 10004: Build 1.0 small size new facilities.
Region 10004: Build 1.0 large size new facilities.
Region 10005: Build 2.0 large size new facilities.
Region 10006: Build 1.0 small size new facilities.
Region 10007: Build 2.0 large size new facilities.
Region 10009: Build 5.0 large size new facilities.
Region 10010: Build 5.0 large size new facilities.
Region 10011: Build 2.0 large size new facilities.
Region 10012: Build 1.0 small size new facilities.
Region 10012: Build 2.0 large size new facilities.
Region 10013: Build 4.0 large size new facilities.
Region 10014: Build 1.0 large size new facilities.
Region 10016: Build 1.0 small size new facilities.
Region 10016: Build 5.0 large size new facilities.
Region 10017: Build 1.0 large size new facilities.
Region 10019: Build 1.0 small size new facilities

## Solving for question 2: Realistic Capacity Expansion and Distance

In [63]:
# defining usful functions in question 2
from haversine import haversine, Unit

potential_loc = pd.read_csv("new_potential_loc.csv")
potential_loc['location_string'] = potential_loc['latitude'].astype(str)+","+potential_loc['longitude'].astype(str)
print(potential_loc.head(10))


'''
def cost_func(existing,expands):
    if expands/existing < 0.1:
        cost = 20000 + 200*expands
    elif 0.1 < expands/existing < 0.15:
        cost = 20000 + 200*0.1*existing + (expands-0.1*existing)*400
    elif 0.15 < expands/existing < 0.2:
        cost = 20000 + 200*0.1*existing + 400*0.05*existing +(expands-0.15*existing)*1000
    return cost

def cost_func_2(existing,expands):
    cost = (20000 + 200*expands)*(expands/existing < 0.1)
    +(20000 + (200*0.1*existing + (expands-0.1*existing)*400))*(0.1 < expands/existing < 0.15)
    +(20000 + 200*0.1*existing + 400*0.05*existing +(expands-0.15*existing)*1000)*(expands/existing < 0.2)
    return cost
'''

def calc_dist(latlon1,latlon2):
    #duplet
    lat1 = latlon1[0]
    lon1 = latlon1[1]
    lat2 = latlon2[0]
    lon2 = latlon2[1]
    distance = haversine((lat1, lon1), (lat2, lon2), unit=Unit.MILES)

    return distance

#print(cost_func(300,13))
#print(calc_dist([70,40],[68,38]))


   zip_code   latitude  longitude                       location_string
0     10001  40.741893 -74.000140   40.7418931677079,-74.00014023998185
1     10001  40.752007 -74.005436   40.7520069733203,-74.00543584763324
2     10001  40.750545 -73.997147  40.75054498748023,-73.99714706994247
3     10001  40.744080 -74.001932  40.74407999953524,-74.00193173438153
4     10001  40.748690 -73.999341  40.74869025486597,-73.99934144838926
5     10001  40.747530 -73.999950  40.74753039782525,-73.99995045236471
6     10001  40.757810 -74.004031  40.75780987387191,-74.00403121136509
7     10001  40.740336 -73.999832  40.74033607906357,-73.99983216542418
8     10001  40.750221 -73.993516  40.75022053801741,-73.99351647056301
9     10001  40.746345 -73.986714  40.74634513494336,-73.98671444012156


### setting variables

In [64]:
m = Model("Realistic Capacity Expansion and Distance")


# adding slots to existing facilities
# add
expansions_types = ['expand1','expand2','expand3']


y = {}
for f in exist_fac:
    for a in age:
        for expansion in expansions_types:
            y[f, a, expansion] = m.addVar(vtype = GRB.INTEGER, name = f"y_{f}_{a}_{expansion}")

# adding new fac of given location with binary ()
w1 = m.addVar(vtype = GRB.BINARY, name = f"w_{1}")  # denotes expansion proportion greater than 10% but smaller than 15%
w2 = m.addVar(vtype = GRB.BINARY, name = f"w_{2}")  # denotes expansion proportion greater than 15% but smaller than 20%

x = {}
loc_list = []
loc_str_list = []
for zipzip in potential_loc['zip_code'].unique():
    #print(zipzip)
    now_locs = potential_loc[potential_loc['zip_code']==zipzip].reset_index(drop=True)
    #print(now_locs)
    for row in range(len(now_locs)):
        #print()
        loc = now_locs.loc[row,['latitude','longitude']].tolist()
        loc_list.append(loc)
        lat = loc[0]
        lon = loc[1]
        loc_string = str(lat)+","+str(lon)
        loc_str_list.append(loc_string)
        #print(loc)
        for size in new_fac_size:
            x[zipzip, loc_string, size] = m.addVar(vtype = GRB.BINARY, name = f"x_{zipzip}_{loc_string}_{size}")
            #print(loc_string)
            #print(zipzip,loc,size)
        
        

### target function

In [67]:

'''
(y[f,"under5",'expand1']+y[f,"over5",'expand1'])  # built below 10%
(y[f,"under5",'expand2']+y[f,"over5",'expand2'])  # built from  10%~15%
(y[f,"under5",'expand2']+y[f,"over5",'expand3'])  # built from  15%~20%

fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1 # threshold 10%
fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1 # threshold 15%
fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1 # threshold 20%
'''

expand_cost_expr = sum(20000+200*(y[f,"under5",'expand1']+y[f,"over5",'expand1'])
                       +400*(y[f,"under5",'expand2']+y[f,"over5",'expand2'])
                       +1000*(y[f,"under5",'expand2']+y[f,"over5",'expand3']) for f in exist_fac)
    
new_fac_cost_expr = quicksum(x[i, j, k] * new_fac_cost[new_fac_cost['size'] == k]['cost'].values[0] for i in zipcode for j in potential_loc[potential_loc['zip_code']==i]['location_string'].unique() for k in new_fac_size)

#new_fac_cost_expr = quicksum(x[i, j] * new_fac_cost[new_fac_cost['size'] == j]['cost'].values[0] for i in zipcode for j in new_fac_size)

m.setObjective(new_fac_cost_expr + expand_cost_expr, GRB.MINIMIZE)

### adding constrains

In [75]:
# expanding facility constraints

for f in exist_fac:
    #print(fac)
    print(fac[fac['facility_id'] == f]['total_capacity'].values[0])
    m.addConstr((fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1)*w1 < (y[f,"under5",'expand1']+y[f,"over5",'expand1']) < (fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1))
    m.addConstr((fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.15)*w2 < (y[f,"under5",'expand2']+y[f,"over5",'expand2']) < (fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.1)*w1)
    m.addConstr(0 < (y[f,"under5",'expand3']+y[f,"over5",'expand3']) < (fac[fac['facility_id'] == f]['total_capacity'].values[0]*0.2))

84


NotImplementedError: 