In [2]:
import os
import math
import pandas as pd
import numpy as np
from itertools import combinations
from gurobipy import Model, GRB, quicksum

### Problem 2 Full Solution

In [3]:
fac_path = "child_care_regulated.csv"
pot_path = "potential_locations.csv"
zip_path = "zipcode_summary.csv"
outdir = "outputs_problem2_solution2"
os.makedirs(outdir, exist_ok=True)

fac = pd.read_csv(fac_path)
pot = pd.read_csv(pot_path)
zipdf = pd.read_csv(zip_path)

In [4]:
# Variable name - uniform by zipcode
for df in (fac, pot, zipdf):
    if 'zipcode' not in df.columns and 'zip_code' in df.columns:
        df.rename(columns={'zip_code':'zipcode'}, inplace=True)
    df['zipcode'] = df['zipcode'].astype(str).str.zfill(5)

def pick_col(df, cand, default_name):
    for c in cand:
        if c in df.columns:
            return df[c].fillna(0)
    # if none exists, add zero column
    df[default_name] = 0
    return df[default_name]

infant = pick_col(fac, ['infant_capacity','infant_cap','infant'], '_tmp_infant')
toddler = pick_col(fac, ['toddler_capacity','toddler_cap','toddler'], '_tmp_toddler')
preschool = pick_col(fac, ['preschool_capacity','preschool_cap','preschool_c','preschool'], '_tmp_preschool')
school_age = pick_col(fac, ['school_age_capacity','school_age_cap','school_age'], '_tmp_school_age')

# Stats
fac['_012_cap'] = infant + toddler + preschool + school_age
fac['_05_cap']  = infant + toddler + preschool

### Distance function
Firstly, delete all potential location that is less than 0.06miles to existing facility;\
Then, add logical conditions on each potential location.



In [5]:
# Distance function
def haversine_miles(lat1, lon1, lat2, lon2):
    R = 3958.7613
    lat1 = np.asarray(lat1, dtype=float)
    lon1 = np.asarray(lon1, dtype=float)
    lat2 = np.asarray(lat2, dtype=float)
    lon2 = np.asarray(lon2, dtype=float)
    dlat = np.radians(lat2 - lat1)
    dlon = np.radians(lon2 - lon1)
    a = np.sin(dlat/2)**2 + np.cos(np.radians(lat1))*np.cos(np.radians(lat2))*np.sin(dlon/2)**2
    return 2 * R * np.arcsin(np.sqrt(a))

In [6]:

# Data wrangling: filter ALL potential locations too close to existing facilities
fac_valid = fac.dropna(subset=['latitude','longitude']).copy()
mask = np.ones(len(pot), dtype=bool)

# group by zip for speed
fac_groups = {z: g[['latitude','longitude']].to_numpy() for z, g in fac_valid.groupby('zipcode')}
for i, row in pot.iterrows():
    z = row['zipcode']
    lat, lon = row['latitude'], row['longitude']
    if z in fac_groups:
        pts = fac_groups[z]
        if len(pts)>0 and not np.isnan(lat) and not np.isnan(lon):
            d = haversine_miles(lat, lon, pts[:,0], pts[:,1])
            if np.any(d < 0.06):
                mask[i] = False
pot_f = pot[mask].copy()

# Outcome to_csv
pot_kept = pot[mask].copy()
pot_removed = pot[~mask].copy()

pot_kept.to_csv(os.path.join(outdir, "potential_locations_filtered.csv"), index=False)
pot_removed.to_csv(os.path.join(outdir, "potential_locations_removed.csv"), index=False)



In [7]:

# Binary variables for conflicts within potential locations
zip_conflicts = {}
for z, g in pot_f.groupby('zipcode'):
    ids = g.index.to_list()
    edges = []
    for a, b in combinations(ids, 2):
        d = haversine_miles(g.loc[a,'latitude'], g.loc[a,'longitude'],
                            g.loc[b,'latitude'], g.loc[b,'longitude'])
        if d < 0.06:
            edges.append((a,b))
    zip_conflicts[z] = edges

In [8]:

# Model setup
m = Model("Project2_Problem2_pandas")
m.Params.MIPGap = 1e-4
m.Params.TimeLimit = 600

# Parameters preparation
zips = sorted(zipdf['zipcode'].unique())

pop012 = zipdf.set_index('zipcode')['pop_0_12'].astype(float).to_dict()
pop05  = zipdf.set_index('zipcode')['pop_0_5'].astype(float).to_dict()
highd  = zipdf.set_index('zipcode')['high_demand'].astype(int).to_dict()

base012 = fac.groupby('zipcode')['_012_cap'].sum().to_dict()
base05  = fac.groupby('zipcode')['_05_cap'].sum().to_dict()

fac['n012'] = fac['_012_cap'].astype(float)
fac_by_zip = {z: fac.index[fac['zipcode']==z].tolist() for z in zips}
pot_by_zip = {z: pot_f.index[pot_f['zipcode']==z].tolist() for z in zips}


types = ['S', 'M', 'L']
cap012 = {'S': 100, 'M': 200, 'L': 400}
cap05 = {'S': 50, 'M': 100, 'L': 200}
cost_build = {'S': 65000, 'M': 95000, 'L': 115000}

# --------------
# Decision variables (SLAB pricing for expansion)
# --------------
s, x = {}, {}
u1, u2, u3 = {}, {}, {}
sp1, sp2, sp3 = {}, {}, {}

y = {}
e05, b05 = {}, {}

U = 0.20
eps = 1e-6

for f in fac.index:
    n = float(fac.loc[f, 'n012'])
    if n <= 0:
        s[f] = m.addVar(lb=0.0, ub=0.0, vtype=GRB.INTEGER)
        x[f] = m.addVar(lb=0.0, ub=0.0, vtype=GRB.INTEGER)
        continue

    s[f]   = m.addVar(lb=0.0, ub=U, vtype=GRB.CONTINUOUS, name=f"s[{f}]")
    x[f]   = m.addVar(lb=0.0, ub=U*n, vtype=GRB.CONTINUOUS,  name=f"x[{f}]")
    m.addConstr(x[f] == n * s[f])

    # bracket choices
    u1[f] = m.addVar(vtype=GRB.BINARY)
    u2[f] = m.addVar(vtype=GRB.BINARY)
    u3[f] = m.addVar(vtype=GRB.BINARY)
    sp1[f] = m.addVar(lb=0.0, ub=U, vtype=GRB.CONTINUOUS,  name=f"sp1[{f}]")
    sp2[f] = m.addVar(lb=0.0, ub=U, vtype=GRB.CONTINUOUS,  name=f"sp2[{f}]")
    sp3[f] = m.addVar(lb=0.0, ub=U, vtype=GRB.CONTINUOUS,  name=f"sp3[{f}]")

    m.addConstr(u1[f] + u2[f] + u3[f] <= 1)
    m.addConstr(s[f] <= U * (u1[f] + u2[f] + u3[f]))

    m.addConstr(s[f] <= 0.10 + U*(1 - u1[f]))
    m.addConstr(s[f] >= eps * u1[f])
    m.addConstr(s[f] >= (0.10 + eps)*u2[f])
    m.addConstr(s[f] <= 0.15 + U*(1 - u2[f]))
    m.addConstr(s[f] >= (0.15 + eps)*u3[f])
    m.addConstr(s[f] <= 0.20 + U*(1 - u3[f]))

    for sp, uk in [(sp1[f], u1[f]), (sp2[f], u2[f]), (sp3[f], u3[f])]:
        m.addConstr(sp <= s[f])
        m.addConstr(sp <= U*uk)
        m.addConstr(sp >= s[f] - U*(1 - uk))
    m.addConstr(sp1[f] + sp2[f] + sp3[f] == s[f])

# build site choices
for i in pot_f.index:
    for t in types:
        y[(i, t)] = m.addVar(vtype=GRB.BINARY)

for z in zips:
    e05[z] = m.addVar(lb=0, vtype=GRB.CONTINUOUS)
    b05[z] = m.addVar(lb=0, vtype=GRB.CONTINUOUS)

m.update()

Set parameter Username
Set parameter LicenseID to value 2722020
Academic license - for non-commercial use only - expires 2026-10-14
Set parameter MIPGap to value 0.0001
Set parameter TimeLimit to value 600


In [None]:
# constraints
for i in pot_f.index:
    m.addConstr(quicksum(y[(i, t)] for t in types) <= 1)
for z, edges in zip_conflicts.items():
    for a, b in edges:
        m.addConstr(quicksum(y[(a, t)] for t in types) +
                    quicksum(y[(b, t)] for t in types) <= 1)

for z in zips:
    m.addConstr(b05[z] <= quicksum(cap05[t]*y[(i, t)] for i in pot_by_zip.get(z, []) for t in types))
    m.addConstr(e05[z] <= quicksum(x[f] for f in fac_by_zip.get(z, [])))

for z in zips:
    thr = 0.50 if highd.get(z, 0) == 1 else (1.0/3.0)
    lhs012 = base012.get(z, 0.0) + quicksum(x[f] for f in fac_by_zip.get(z, [])) + quicksum(cap012[t]*y[(i, t)] for i in pot_by_zip.get(z, []) for t in types)
    lhs05 = base05.get(z, 0.0) + e05[z] + b05[z]
    m.addConstr(lhs012 >= thr*pop012[z])
    m.addConstr(lhs05 >= (2.0/3.0)*pop05[z])

# objective
expand_terms = []
for f in fac.index:
    n = float(fac.loc[f, 'n012'])
    if n <= 0:
        continue
    r1 = 20000.0 + 200.0*n
    r2 = 20000.0 + 400.0*n
    r3 = 20000.0 + 1000.0*n
    expand_terms.append(r1*sp1[f] + r2*sp2[f] + r3*sp3[f])

exp_cost = quicksum(expand_terms)
build_cost = quicksum(cost_build[t]*y[(i, t)] for i in pot_f.index for t in types)
m.setObjective(exp_cost + build_cost, GRB.MINIMIZE)

# optimize
m.optimize()

status = int(m.Status)
obj = float(m.ObjVal) if m.SolCount > 0 else float('nan')
with open(os.path.join(outdir, "overall_objective.txt"), "w") as f:
    f.write(f"Status={status}\nObj={obj}\n")

# === Facility-level expansion slab output ===
rows = []
for f in fac.index:
    n = float(fac.loc[f, 'n012'])

    x_val  = int(round(x[f].X)) if f in x else 0
    sp1_v  = int(round(sp1[f].X)) if f in sp1 else 0
    sp2_v  = int(round(sp2[f].X)) if f in sp2 else 0
    sp3_v  = int(round(sp3[f].X)) if f in sp3 else 0

    if f in u1 and u1[f].X > 0.5:
        tier = "0-10%"
    elif f in u2 and u2[f].X > 0.5:
        tier = "10-15%"
    elif f in u3 and u3[f].X > 0.5:
        tier = "15-20%"
    else:
        tier = "none"
    
    ratio = (x_val / n) if n > 0 else 0.0

    rows.append({
        "facility_id": f,
        "zipcode": fac.loc[f, "zipcode"],
        "cap_now_0_12": n,
        "x_total": x_val,        
        "sp1_0_10": sp1_v,         
        "sp2_10_15": sp2_v,
        "sp3_15_20": sp3_v,
        "tier": tier,               
        "x_ratio": round(ratio, 4) 
    })

df_fac_slab = pd.DataFrame(rows).sort_values(["zipcode","facility_id"])
df_fac_slab.to_csv(os.path.join(outdir, "facility_expansion_slab.csv"), index=False)
print("Saved:", os.path.join(outdir, "facility_expansion_slab.csv"))

Gurobi Optimizer version 12.0.3 build v12.0.3rc0 (mac64[arm] - Darwin 25.0.0 25A354)

CPU model: Apple M4
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Non-default parameters:
TimeLimit  600

Optimize a model with 575860 rows, 755850 columns and 2432668 nonzeros
Model fingerprint: 0x71bffb8d
Variable types: 76055 continuous, 679795 integer (678165 binary)
Coefficient statistics:
  Matrix range     [1e-06, 9e+02]
  Objective range  [2e+04, 9e+05]
  Bounds range     [2e-01, 2e+02]
  RHS range        [2e-01, 1e+04]
Presolve removed 440755 rows and 626833 columns (presolve time = 36s)...
Presolve removed 440743 rows and 626821 columns
Presolve time: 35.63s
Presolved: 135117 rows, 129029 columns, 507753 nonzeros
Variable types: 42314 continuous, 86715 integer (86708 binary)
Deterministic concurrent LP optimizer: primal simplex, dual simplex, and barrier
Showing barrier log only...

Root barrier log...

Ordering time: 0.01s

Barrier statistics:
 AA' NZ     :