In [1]:
import pulp

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')

import time

In [2]:
df = pd.read_csv('../../data/data.csv', index_col=0)
df = pd.read_csv('../../data/subset_data.csv', index_col=0)
capacity_df = df['Real'].value_counts()
df = df[df['Used']]

print(df.shape)
df.head()

(10129, 10)


Unnamed: 0,HouseholdID,Real,Reentered,ES,PSH,TH,RRH,PREV,Week,Used
2,2036520,2,0,0.32755,0.188743,0.266543,0.337444,0.218234,283,True
3,2108682,2,0,0.186973,0.760772,0.137805,0.178759,0.106168,205,True
4,2116994,2,0,0.234827,0.21769,0.183896,0.239388,0.148293,244,True
7,200001139,5,1,0.570296,0.476207,0.618977,0.634829,0.541514,161,True
8,200001156,5,0,0.274172,0.193441,0.227518,0.225883,0.162229,271,True


In [3]:
capacity_df

5    6202
1    4441
3    2451
4     846
2     271
Name: Real, dtype: int64

In [4]:
households = list(df.index)
types = ['ES', 'PSH', 'TH', 'RRH', 'PREV']

In [5]:
x = pulp.LpVariable.dicts(
    'assignment',
    [(household, type_) for household in households for type_ in types],
    cat='Binary'
)

prob = pulp.LpProblem('homelessness', pulp.LpMinimize)

prob += pulp.lpSum(x[(household, type_)] * df.loc[household, type_] 
                   for household in households for type_ in types)

for household in households:
    prob += pulp.lpSum(x[(household, type_)] for type_ in types) == 1
    
for type_index, type_ in enumerate(types):
    prob += pulp.lpSum(x[(household, type_)] for household in households) <= capacity_df.loc[type_index + 1]

In [6]:
# t0 = time.time()
# prob.solve()
# print(pulp.LpStatus[prob.status])
# print(f'Took {time.time() - t0:.4f} seconds')

In [7]:
t0 = time.time()
prob.solve(solver=pulp.solvers.GUROBI_CMD())
print(pulp.LpStatus[prob.status])
print(f'Took {time.time() - t0:.4f} seconds')

Optimal
Took 1.6466 seconds


In [8]:
optimal_df = pd.DataFrame(columns=types)

for household in households:
    for type_ in types:
        optimal_df.loc[household, type_] = x[(household, type_)].varValue

optimal_df.head()

Unnamed: 0,ES,PSH,TH,RRH,PREV
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0
7,1.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,1.0


In [9]:
optimal_df.to_csv('../../data/optimal_subset.csv')

In [10]:
print(prob.objective.value())

2302.525906114997


In [11]:
t0 = time.time()
prob.solve(solver=pulp.solvers.GLPK_CMD())
print(pulp.LpStatus[prob.status])
print(f'Took {time.time() - t0:.4f} seconds')

Optimal
Took 11.3892 seconds


In [12]:
print(prob.objective.value())

2302.525906114997


In [13]:
optimal_df = pd.DataFrame(columns=types)

for household in households:
    for type_ in types:
        optimal_df.loc[household, type_] = x[(household, type_)].varValue

optimal_df.head()

Unnamed: 0,ES,PSH,TH,RRH,PREV
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0
7,1.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,1.0


In [14]:
running_sum = 0
for household in households:
    running_sum += sum(x[(household, type_)].varValue * df.loc[household, type_] for type_ in types)

running_sum

2302.525906114997