In [1]:
import pandas as pd
import gurobipy as grb

In [2]:
df_income = pd.read_csv("./Data/income.csv")
df_care = pd.read_csv("./Data/child_care.csv")
df_employment = pd.read_csv("./Data/employment.csv")
df_population = pd.read_csv("./Data/population.csv")
df_locations = pd.read_csv("./Data/potential_loc.csv")

In [3]:
## The data set of population of children

df_children = df_population.loc[:, ['zip_code', '-5', '5-9', '10-14']]

## Calculate population in each group
df_children['0-14'] = df_children['-5'] + df_children['5-9'] + df_children['10-14']
df_children['population_total'] = ((13 / 15) * df_children['0-14']).astype(int)
df_children['population_5-12'] = df_children['population_total'] - df_children['-5']
df_children = df_children.drop(['5-9', '10-14', '0-14'], axis = 1).rename({'-5': 'population_0-5'}, axis = 1)

In [4]:
df_children

Unnamed: 0,zip_code,population_0-5,population_total,population_5-12
0,10001,744,2140,1396
1,10002,2142,7267,5125
2,10003,1440,2970,1530
3,10004,433,672,239
4,10005,484,794,310
...,...,...,...,...
1018,14767,101,422,321
1019,14770,137,482,345
1020,14772,256,635,379
1021,14805,31,65,34


In [5]:
## The demographical data of each region
df_demo = pd.merge(left = df_income, right = df_employment, how = "left", on = "zip_code").merge(df_children, on = "zip_code")

## Identify high demand regions
df_demo['high demand'] = ((df_demo['average income'] > 60000) * (df_demo['employment rate'] > 0.6)).astype(int)

In [6]:
df_demo

Unnamed: 0,zip_code,average income,employment rate,population_0-5,population_total,population_5-12,high demand
0,10001,102878.033603,0.595097,744,2140,1396,0
1,10002,59604.041165,0.520662,2142,7267,5125,0
2,10003,114273.049645,0.497244,1440,2970,1530,0
3,10004,132004.310345,0.506661,433,672,239,0
4,10005,121437.713311,0.665833,484,794,310,1
...,...,...,...,...,...,...,...
1018,14767,54623.287671,0.322296,101,422,321,0
1019,14770,55523.255814,0.446676,137,482,345,0
1020,14772,57164.634146,0.410719,256,635,379,0
1021,14805,59375.000000,0.679739,31,65,34,0


In [7]:
## Get current capacity
## Can add latitude and longitude to include coordinates
df_care["current_0-5"] = (df_care['infant_capacity'] 
                              + df_care['toddler_capacity'] 
                              + df_care['preschool_capacity'] 
                              + (5/12) * df_care['children_capacity']
                             ).astype(int)

df_care["current_5-12"] = (df_care['capacities'] - df_care['current_0-5']).astype(int)

In [8]:
df_current = df_care.loc[:, ["zip_code", "current_0-5", "current_5-12", "total_capacity"]]

## For existing facilities with 0 capacity, we cannot estimate the cost of expanding, thus we drop them
df_current = df_current.where(df_current['total_capacity'] != 0).dropna().astype(int)

"""
df_temp = df_current.merge(df_current['zip_code'].value_counts().rename("count"), right_index = True, left_on = "zip_code")

counts = df_temp['count'].to_list()
curr_idx = 0
curr_count = counts[0]
n = len(counts)
new_idx = []

for i in range(n):
    if curr_idx == curr_count:
        curr_count = counts[i]
        curr_idx = 0
    
    new_idx.append(curr_idx)
    curr_idx += 1

new_facility_idx = pd.Series(new_idx, name = "new_id")
df_current = df_current.merge(new_facility_idx, how = "left", left_index = True, right_index = True).fillna(0).astype(int)
"""

## Get the upper bound for x + y, if total_capacity > 500, then set the upper bound to 0
df_current['upper_bound'] = 500 - df_current['total_capacity']
df_current['upper_bound'] = df_current['upper_bound'].where(df_current['upper_bound'] > 0).fillna(0).astype(int)

In [9]:
## Create the whole data set, containing demographical data, current capacities and requirements by constraints
df_current_capacity = df_current.groupby("zip_code")[["current_0-5", "current_5-12"]].sum().reset_index()
df_main = df_demo.merge(df_current_capacity, on = "zip_code")

## How many 0-5 slots are needed to meet 2/3 population of 0-5 children
df_main['demand_policy'] = (2 * df_main['population_0-5'] / 3).astype(int)

## How many 0-5 slots should be created to meet 2/3 population of 0-5 children
df_main['gap_policy'] = df_main['demand_policy'] - df_main["current_0-5"]

## How many slots are needed to make sure this region not classified as child care desert
df_main['demand_not_desert'] = df_main.apply(
    lambda x: int(x['population_total'] / 2) if x['high demand'] else int(x['population_total'] / 3), 
    axis = 1
)

## How many slots should be created to eliminate child care deserts
df_main['gap_not_desert'] = df_main['demand_not_desert'] - df_main['demand_policy'] - df_main['current_5-12']

## If the gap is negaives, it means no new slots are needed, thus set to 0
df_main['gap_not_desert'] = df_main['gap_not_desert'].where(df_main['gap_not_desert'] > 0).fillna(0)

df_main = df_main.drop(["average income", "employment rate"], axis = 1)\
                 .set_index("zip_code")\
                 .astype(int)
df_main

Unnamed: 0_level_0,population_0-5,population_total,population_5-12,high demand,current_0-5,current_5-12,demand_policy,gap_policy,demand_not_desert,gap_not_desert
zip_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10001,744,2140,1396,0,9,600,496,487,713,0
10002,2142,7267,5125,0,95,4634,1428,1333,2422,0
10003,1440,2970,1530,0,0,1995,960,960,990,0
10004,433,672,239,0,0,263,288,288,224,0
10005,484,794,310,1,0,39,322,322,397,36
...,...,...,...,...,...,...,...,...,...,...
14767,101,422,321,0,5,11,67,62,140,62
14770,137,482,345,0,15,55,91,76,160,14
14772,256,635,379,0,37,71,170,133,211,0
14805,31,65,34,0,2,6,20,18,21,0


In [10]:
model_pt1 = grb.Model(name = "Part1")

Set parameter Username
Academic license - for non-commercial use only - expires 2025-09-06


In [11]:
## Set of zip codes
zip_codes = df_main.index.to_list()

## Set of constraints. type = dict, with zip codes as keys
gap_policy = df_main['gap_policy'].to_dict()
gap_not_desert = df_main['gap_not_desert'].to_dict()

## Set of constants, type = dict, with zip codes as keys
num_facilities = df_current['zip_code'].value_counts().sort_index().to_dict()           ## Number of existing facilities in each region
capacities = df_current.groupby("zip_code")['total_capacity'].apply(list).to_dict()     ## Capacities of existing facilities, categorized by zip codes
upper_bounds = df_current.groupby("zip_code")['upper_bound'].apply(list).to_dict()      ## Upper bounds of x + y

decision_varsX = {k: [0] * v for k, v in num_facilities.items()}        ## Decision variables: x
decision_varsY = {k: [0] * v for k, v in num_facilities.items()}        ## Decision variables: y

decision_varsS = {z: [] for z in zip_codes}                             ## Decision variables: s
decision_varsM = {z: [] for z in zip_codes}                             ## Decision variables: m
decision_varsL = {z: [] for z in zip_codes}                             ## Decision variables: l

In [12]:
## Add decision variables
for z in zip_codes:
    for j in range(num_facilities[z]):
        x_temp = model_pt1.addVar(lb = 0, vtype = grb.GRB.INTEGER, name = f"X_{z}_{j}")
        y_temp = model_pt1.addVar(lb = 0, vtype = grb.GRB.INTEGER, name = f"Y_{z}_{j}")
        decision_varsX[z][j] = x_temp           ## Make sure decision_varsX[i][j] = x_{ij}
        decision_varsY[z][j] = y_temp

for z in zip_codes:
    for k in range(100):
        s_temp = model_pt1.addVar(lb = 0, ub = 1, vtype = grb.GRB.INTEGER, name = f"S_{z}_{k}")
        m_temp = model_pt1.addVar(lb = 0, ub = 1, vtype = grb.GRB.INTEGER, name = f"M_{z}_{k}")
        l_temp = model_pt1.addVar(lb = 0, ub = 1, vtype = grb.GRB.INTEGER, name = f"L_{z}_{k}")
        decision_varsS[z].append(s_temp)
        decision_varsM[z].append(m_temp)
        decision_varsL[z].append(l_temp)

model_pt1.update()

In [13]:
model_pt1.setObjective(
    grb.quicksum(
        sum(
            (20000 + 200 * capacities[i][j])                                            ## Cost of expanding 100%
            * ((decision_varsX[i][j] + decision_varsY[i][j]) / capacities[i][j])        ## Ratio of expansion
            + 100 * decision_varsX[i][j]                                                ## Additional cost for 0-5 slots
            for j in range(num_facilities[i])                                           ## Sum up all facilities in a region
        ) for i in zip_codes)                                                           ## Sum up all regions
    # New-built facilities
    + grb.quicksum(
        sum(
            65000 * decision_varsS[i][k] + 95000 * decision_varsM[i][k] + 115000 * decision_varsL[i][k]
            for k in range(100)
        ) for i in zip_codes
    ), grb.GRB.MINIMIZE)

In [14]:
## 2/3 available slots of population of 0-5 children in each region
model_pt1.addConstrs(
    (grb.quicksum(decision_varsX[i])
    + grb.quicksum(50 * decision_varsS[i][k] + 100 * decision_varsM[i][k] + 200 * decision_varsL[i][k] for k in range(100))
    >= gap_policy[i] for i in zip_codes),
    name = "policy"
)

## No desert region, for each region
model_pt1.addConstrs(
    (grb.quicksum(decision_varsX[i]) + grb.quicksum(decision_varsY[i])
    + grb.quicksum(100 * decision_varsS[i][k] + 200 * decision_varsM[i][k] + 400 * decision_varsL[i][k] for k in range(100))
    >= gap_not_desert[i] for i in zip_codes),
    name = "not_desert"
)

## Constraints on expansions
for z, l in num_facilities.items():
    
    ## Maximum capacity: 500
    model_pt1.addConstrs(
        (decision_varsX[z][j] + decision_varsY[z][j] <= upper_bounds[z][j] for j in range(l)),
        name = "total_capacity"
    )
    
    ## Ratio of expansion is no more than 20%
    model_pt1.addConstrs(
        (5 * (decision_varsX[z][j] + decision_varsY[z][j]) <= capacities[z][j] for j in range(l)),
        name = "ratio"
    )

## Only one new facility can be built in a location
model_pt1.addConstrs(
    (decision_varsS[i][j] + decision_varsM[i][j] + decision_varsL[i][j] <= 1 for i in zip_codes for j in range(100)), 
    name = "no_more_than_one"
)

model_pt1.update()

In [15]:
print(f"Number of constraints: {len(model_pt1.getConstrs())}")

133856

In [16]:
model_pt1.optimize()

Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 133856 rows, 336410 columns and 1023985 nonzeros
Model fingerprint: 0x080a60f5
Variable types: 0 continuous, 336410 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 4e+02]
  Objective range  [2e+02, 1e+05]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 8e+03]
Found heuristic solution: objective 4.360224e+08
Presolve removed 133730 rows and 336054 columns (presolve time = 11s) ...
Presolve removed 133730 rows and 336062 columns
Presolve time: 11.31s
Presolved: 126 rows, 348 columns, 1020 nonzeros
Found heuristic solution: objective 3.131091e+08
Variable types: 0 continuous, 348 integer (306 binary)

Root simplex log...

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    3.1299409e+08   2.012500

In [17]:
if model_pt1.Status == grb.GRB.OPTIMAL:
    print(f"The minimal funding is {model_pt1.ObjVal:.2f}")

The minimal funding is 313109090.63


In [None]:
zip_code_cols = []
x_cols = []
y_cols = []

for z in zip_codes:
    for j in range(num_facilities[z]):
        zip_code_cols.append(z)
        x_cols.append(decision_varsX[z][j].x)
        y_cols.append(decision_varsY[z][j].x)

pd.DataFrame({
    "zip_code": zip_code_cols,
    "expaned_0-5": x_cols,
    
})