In [1]:
import pandas as pd
import numpy as np
np.random.seed(0)
import pulp

import sys
sys.path.insert(0, '../..')
import assignment

import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
import matplotlib
matplotlib.rcParams.update({'font.size': 15})

import time
from tqdm import tqdm

import warnings

In [29]:
usage_columns = ['Real', 'Reentered', 'Week', 'Used']

usage_df = pd.read_csv(
    '../../data/subset_data.csv', index_col='HouseholdID'
).drop('Unnamed: 0', axis=1)[usage_columns]
usage_df.head()

Unnamed: 0_level_0,Real,Reentered,Week,Used
HouseholdID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2028039,2,0,33,False
2036520,2,0,283,True
2108682,2,0,205,True
2116994,2,0,244,True
2154639,2,0,107,False


In [51]:
path_to_file = '../../data/dat_all_preds_100_without_022720.csv'
types = ['ES', 'TH', 'RRH', 'PREV']
assignment_map = {1: 0, 3: 1, 4: 2, 5: 3}

# Process the data set
df = pd.read_csv(path_to_file, index_col='HouseholdID').drop('Unnamed: 0', axis=1)
print('Shape before join:', df.shape)
df = df.join(usage_df, lsuffix='_old')
print('Shape after join:', df.shape)
capacity_df = df['Real'].value_counts()

for name, df in [('All households', df), ('Used households', df[df['Used']])]:
    print()
    print(name)
    print('- Number of households:', df.shape[0])
    
    cost_matrix = df[types].to_numpy()
    capacities = capacity_df.sort_index().to_numpy()

    # Compute the assignment
    assigner = assignment.AssignmentHelperV2(cost_matrix, capacities)
    assignments = assigner.ip_solve()

    total_cost = assigner.get_cost(assignments, cost_matrix)

    # Compare with the real assignment
    original_assignments = df['Real'].to_numpy()
    original_assignments = np.vectorize(assignment_map.get)(original_assignments)

    # original_total_cost = sum(
    #     cost_matrix[agent_id, original_assignments[agent_id]]
    #     for agent_id in range(cost_matrix.shape[0])
    # )
    original_total_cost = assigner.get_cost(original_assignments, cost_matrix)

    print('- Optimized cost:', total_cost)
    print('- Original cost:', df['Reentered'].sum())
    print('- Original estimated cost:', original_total_cost)

Shape before join: (13940, 6)
Shape after join: (13940, 10)

All households
- Number of households: 13940
- Optimized cost: 2989.9553964065
- Original cost: 3987
- Original estimated cost: 3875.4206969328216

Used households
- Number of households: 10043
- Optimized cost: 1827.261717924134
- Original cost: 2765
- Original estimated cost: 2687.600345811662


In [50]:
path_to_file = '../../data/dat_all_preds_100_withpsh_022720.csv'
types = ['ES', 'PSH', 'TH', 'RRH', 'PREV']
assignment_map = {i: i - 1 for i in range(1, 6)}

# Process the data set
df = pd.read_csv(path_to_file, index_col='HouseholdID').drop('Unnamed: 0', axis=1)
print('Shape before join:', df.shape)
df = df.join(usage_df, lsuffix='_old')
print('Shape after join:', df.shape)
capacity_df = df['Real'].value_counts()

for name, df in [('All households', df), ('Used households', df[df['Used']])]:
    print()
    print(name)
    print('- Number of households:', df.shape[0])
    
    cost_matrix = df[types].to_numpy()
    capacities = capacity_df.sort_index().to_numpy()

    # Compute the assignment
    assigner = assignment.AssignmentHelperV2(cost_matrix, capacities)
    assignments = assigner.ip_solve()

    total_cost = assigner.get_cost(assignments, cost_matrix)

    # Compare with the real assignment
    original_assignments = df['Real'].to_numpy()
    original_assignments = np.vectorize(assignment_map.get)(original_assignments)

    # original_total_cost = sum(
    #     cost_matrix[agent_id, original_assignments[agent_id]]
    #     for agent_id in range(cost_matrix.shape[0])
    # )
    original_total_cost = assigner.get_cost(original_assignments, cost_matrix)

    print('- Optimized cost:', total_cost)
    print('- Original cost:', df['Reentered'].sum())
    print('- Original estimated cost:', original_total_cost)

Shape before join: (14211, 7)
Shape after join: (14211, 11)

All households
- Number of households: 14211
- Optimized cost: 3650.061983372012
- Original cost: 4082
- Original estimated cost: 4084.472900467309

Used households
- Number of households: 10129
- Optimized cost: 2300.510196034292
- Original cost: 2796
- Original estimated cost: 2793.895941862182


In [61]:
path_to_file = '../../data/weekly_OOSProbs_dat_030120_noPSH.csv'
types = ['ES', 'TH', 'RRH', 'Prev']
assignment_map = {1: 0, 3: 1, 4: 2, 5: 3}

# Process the data set
df = pd.read_csv(path_to_file, index_col='HouseholdID').drop('Unnamed: 0', axis=1)
print('Shape before join:', df.shape)
df = df.join(usage_df, lsuffix='_old')
print('Shape after join:', df.shape)
capacity_df = df['Real'].value_counts()

for name, df in [('All households', df), ('Used households', df[df['Used']])]:
    print()
    print(name)
    print('- Number of households:', df.shape[0])
    
    # capacity_df = df['Real'].value_counts()
    cost_matrix = df[types].to_numpy()
    capacities = capacity_df.sort_index().to_numpy()

    # Compute the assignment
    assigner = assignment.AssignmentHelperV2(cost_matrix, capacities)
    assignments = assigner.ip_solve()

    total_cost = assigner.get_cost(assignments, cost_matrix)

    # Compare with the real assignment
    original_assignments = df['Real'].to_numpy()
    original_assignments = np.vectorize(assignment_map.get)(original_assignments)

    # original_total_cost = sum(
    #     cost_matrix[agent_id, original_assignments[agent_id]]
    #     for agent_id in range(cost_matrix.shape[0])
    # )
    original_total_cost = assigner.get_cost(original_assignments, cost_matrix)

    print('- Optimized cost:', total_cost)
    print('- Original cost:', df['Reentered'].sum())
    print('- Original estimated cost:', original_total_cost)

Shape before join: (13940, 6)
Shape after join: (13940, 10)

All households
- Number of households: 13940
- Optimized cost: 2983.887127661806
- Original cost: 3987
- Original estimated cost: 3900.5806508812657

Used households
- Number of households: 10043
- Optimized cost: 1837.0828789611237
- Original cost: 2765
- Original estimated cost: 2724.7732631535696


In [57]:
capacities

array([4441, 2451,  846, 6202])

In [56]:
np.unique(assignments, return_counts=True)

(array([0, 1, 2, 3]), array([ 544, 2451,  846, 6202]))

In [53]:
path_to_file = '../../data/weekly_OOSProbs_dat_030120_notthinned.csv'
types = ['ES', 'PSH', 'TH', 'RRH', 'Prev']
assignment_map = {i: i - 1 for i in range(1, 6)}

# Process the data set
df = pd.read_csv(path_to_file, index_col='HouseholdID').drop('Unnamed: 0', axis=1)
print('Shape before join:', df.shape)
df = df.join(usage_df, lsuffix='_old')
print('Shape after join:', df.shape)
capacity_df = df['Real'].value_counts()

for name, df in [('All households', df), ('Used households', df[df['Used']])]:
    print()
    print(name)
    print('- Number of households:', df.shape[0])
    
    cost_matrix = df[types].to_numpy()
    capacities = capacity_df.sort_index().to_numpy()

    # Compute the assignment
    assigner = assignment.AssignmentHelperV2(cost_matrix, capacities)
    assignments = assigner.ip_solve()

    total_cost = assigner.get_cost(assignments, cost_matrix)

    # Compare with the real assignment
    original_assignments = df['Real'].to_numpy()
    original_assignments = np.vectorize(assignment_map.get)(original_assignments)

    # original_total_cost = sum(
    #     cost_matrix[agent_id, original_assignments[agent_id]]
    #     for agent_id in range(cost_matrix.shape[0])
    # )
    original_total_cost = assigner.get_cost(original_assignments, cost_matrix)

    print('- Optimized cost:', total_cost)
    print('- Original cost:', df['Reentered'].sum())
    print('- Original estimated cost:', original_total_cost)

Shape before join: (14211, 7)
Shape after join: (14211, 11)

All households
- Number of households: 14211
- Optimized cost: 3575.6969635077476
- Original cost: 4082
- Original estimated cost: 4082.91515836435

Used households
- Number of households: 10129
- Optimized cost: 2281.7630233549735
- Original cost: 2796
- Original estimated cost: 2816.5312889986076
