In [15]:
import pandas as pd
import numpy as np
import pulp

import matplotlib.pyplot as plt
plt.style.use('seaborn-dark')
import matplotlib
matplotlib.rcParams.update({'font.size': 13})

import time

import sys
sys.path.insert(0, '../..')

import assignment
import bottleneck_assignment

In [16]:
types = ['ES', 'PSH', 'TH', 'RRH', 'PREV']

prob_df = pd.read_csv('../../data/subset_data.csv', index_col=0)
subset_prob_df = prob_df[prob_df['Used']]
capacity_df = prob_df['Real'].value_counts()

prob_df.shape, subset_prob_df.shape

((14211, 10), (10129, 10))

In [17]:
prob_df['Real'].value_counts()

5    6202
1    4441
3    2451
4     846
2     271
Name: Real, dtype: int64

In [18]:
subset_prob_df['Real'].value_counts()

5    4737
1    2997
3    1469
4     840
2      86
Name: Real, dtype: int64

# Full data set

In [19]:
helper = assignment.AssignmentHelper(prob_df=prob_df, types=types)
helper.update_constraints(capacity_df=capacity_df)

sol_df = helper.ip_solve()
print(pulp.LpStatus[helper.prob.status])

real_n_reentries, bart_n_reentries, uncon_sol_n_reentries = helper.compare_reentry(sol_df)
print(f'Percentage reduction in reentry probability: {(real_n_reentries - uncon_sol_n_reentries) / real_n_reentries * 100}')

prob_compare_df = helper.compare_prob(sol_df)
print(f'Max increase in reentry probability from best: {prob_compare_df["Input_to_best"].max()}')

Optimal
Total number of reentered households: 4082
BART-estimated: 4085.7049
Input assignment: 3627.0457
Percentage reduction in reentry probability: 11.14537832824632
Max increase in reentry probability from best: 0.5050538149999999


In [20]:
increase_from_best_df = prob_df.copy()
increase_from_best_df['Best'] = increase_from_best_df[types].min(axis=1)
for type_ in types:
    increase_from_best_df[type_] = prob_df[type_] - increase_from_best_df['Best']
increase_from_best_df = increase_from_best_df[types]

helper = bottleneck_assignment.BottleneckAssignmentHelperV2(
    increase_from_best_df.to_numpy(),
    capacity_df.sort_index().to_numpy()
)

t0 = time.time()
print(helper.solve(verbose=True))
print(f'Took {time.time() - t0:.4f} seconds')



Searching between 0.021094955 and 0.801554275




Searching between 0.09667532000000001 and 0.801554275




Searching between 0.203843269 and 0.801554275




Searching between 0.31724527799999996 and 0.801554275




Searching between 0.409406607 and 0.801554275
Searching between 0.409406607 and 0.48743338199999997
Searching between 0.409406607 and 0.44247619600000004




Searching between 0.42505548300000007 and 0.44247619600000004
Searching between 0.42505548300000007 and 0.4337425020000001




Searching between 0.42918097399999994 and 0.4337425020000001




Searching between 0.43151666199999994 and 0.4337425020000001
Searching between 0.43151666199999994 and 0.432711237
Searching between 0.43151666199999994 and 0.432126689




Searching between 0.431810356 and 0.432126689




Searching between 0.4320012750000001 and 0.432126689
Searching between 0.4320012750000001 and 0.43212075400000005
[0.43200128 0.43200557]
Took 67.2226 seconds


In [14]:
constraints = [0.43200128, 0.43200557, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7]  # TODO: change the first two values

summed_probs = []
compare_dfs = []

for constraint in constraints:
    helper = assignment.AssignmentHelper()
    helper.update_constraints(fairness_best_constraint=constraint)
    
    sol_df = helper.ip_solve()
    if sol_df is False:
        summed_probs.append(False)
        compare_dfs.append(False)
    else:
        summed_probs.append(pulp.value(helper.prob.objective))
        compare_dfs.append(helper.compare_prob(sol_df))
    
    print(constraint, pulp.LpStatus[helper.prob.status])

KeyboardInterrupt: 

In [None]:
f, ax1 = plt.subplots(figsize=(13, 10))

for c_idx, c in enumerate(constraints):
    df = compare_dfs[c_idx]
    
    if df is not False:
        ax1.boxplot(
            df['Input_to_best'],
            positions=[c_idx],
            # widths=[0.1]
        )

ax1.plot(
    [i for i in range(len(constraints))],
    [df['Input_to_best'].max()
     if df is not False else np.nan
     for df in compare_dfs]
)
        
# ax1.axhline(0.43200557, c='red')
    
ax1.set_xlabel('Maximum allowed probability increase', fontsize=16)
ax1.set_ylabel('Increase from ind. best allocations', fontsize=16)
ax1.set_xlim(-1, len(constraints))

ax1.set_xticks([i for i in range(len(constraints))])
ax1.set_xticklabels(constraints)

plt.show()

In [None]:
print(summed_probs)

reductions = []
for prob in summed_probs:
    if prob is False:
        reductions.append(False)
    else:
        reductions.append((4082 - prob) / 4082)

print(reductions)

# Sub data set

In [23]:
helper = assignment.AssignmentHelper(prob_df=subset_prob_df)
helper.update_constraints(capacity_df=capacity_df)

sol_df = helper.ip_solve()
print(pulp.LpStatus[helper.prob.status])

real_n_reentries, bart_n_reentries, uncon_sol_n_reentries = helper.compare_reentry(sol_df)
print(f'Percentage reduction in reentry probability: {(real_n_reentries - uncon_sol_n_reentries) / real_n_reentries * 100}')

prob_compare_df = helper.compare_prob(sol_df)
print(f'Max increase in reentry probability from best: {prob_compare_df["Input_to_best"].max()}')

Optimal
Total number of reentered households: 2796
BART-estimated: 2795.9972
Input assignment: 2302.5259
Percentage reduction in reentry probability: 17.649288050250473
Max increase in reentry probability from best: 0.4348843519999999


In [24]:
increase_from_best_df = subset_prob_df.copy()
increase_from_best_df['Best'] = increase_from_best_df[types].min(axis=1)
for type_ in types:
    increase_from_best_df[type_] = subset_prob_df[type_] - increase_from_best_df['Best']
increase_from_best_df = increase_from_best_df[types]

helper = bottleneck_assignment.BottleneckAssignmentHelperV2(
    increase_from_best_df.to_numpy(),
    capacity_df.sort_index().to_numpy()
)

t0 = time.time()
print(helper.solve(verbose=True))
print(f'Took {time.time() - t0:.4f} seconds')



Searching between 0.021094955 and 0.801554275




Searching between 0.093918491 and 0.801554275




Searching between 0.19467070600000003 and 0.801554275




Searching between 0.307160741 and 0.801554275
Searching between 0.307160741 and 0.401839292




Searching between 0.348290327 and 0.401839292




Searching between 0.37268020100000004 and 0.401839292




Searching between 0.38588789500000004 and 0.401839292
Searching between 0.38588789500000004 and 0.39406465600000007




Searching between 0.390127914 and 0.39406465600000007




Searching between 0.3920954670000001 and 0.39406465600000007
Searching between 0.3920954670000001 and 0.392969303




Searching between 0.39272293700000005 and 0.392969303
Searching between 0.39272293700000005 and 0.39286463200000005




Searching between 0.392810789 and 0.39286463200000005
[0.39281079 0.39285834]
Took 37.7723 seconds


In [None]:
constraints = [0.39281079, 0.39285834, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7]  # TODO: change the first two values

summed_probs = []
compare_dfs = []

for constraint in constraints:
    helper = assignment.AssignmentHelper(prob_df=subset_prob_df)
    helper.update_constraints(
        fairness_best_constraint=constraint,
        capacity_df=capacity_df
    )
    
    sol_df = helper.ip_solve()
    if sol_df is False:
        summed_probs.append(False)
        compare_dfs.append(False)
    else:
        summed_probs.append(pulp.value(helper.prob.objective))
        compare_dfs.append(helper.compare_prob(sol_df))
    
    print(constraint, pulp.LpStatus[helper.prob.status])

In [None]:
f, ax1 = plt.subplots(figsize=(13, 10))

for c_idx, c in enumerate(constraints):
    df = compare_dfs[c_idx]
    
    if df is not False:
        ax1.boxplot(
            df['Input_to_best'],
            positions=[c_idx],
            # widths=[0.1]
        )

ax1.plot(
    [i for i in range(len(constraints))],
    [df['Input_to_best'].max()
     if df is not False else np.nan
     for df in compare_dfs]
)
    
ax1.set_xlabel('Maximum allowed probability increase', fontsize=16)
ax1.set_ylabel('Increase from ind. best allocations', fontsize=16)
ax1.set_xlim(-1, len(constraints))

ax1.set_xticks([i for i in range(len(constraints))])
ax1.set_xticklabels(constraints)

# ax2 = ax1.twinx()
# ax2.plot([i for i in range(1, len(constraints))])

plt.show()

In [None]:
print(summed_probs)

reductions = []
for prob in summed_probs:
    if prob is False:
        reductions.append(False)
    else:
        reductions.append((4082 - prob) / 4082)

print(reductions)