In [56]:
import pandas as pd
import numpy as np
from pulp import *
import os
import pulp as lp

In [46]:
##Load datasets

# Change the working directory to the notebook's directory
notebook_dir = "C:\\Users\\Anu Zan\\Desktop\\GeorgiaTech Spring 2024\\ISYE 6740 CDA\\project"
os.chdir(notebook_dir)

# Now, define the relative path to the dataset
dataset_path = os.path.join("..", "Dataset")

Optimization_dataset=pd.read_csv(os.path.join(dataset_path, "OptimizationData_04012024.csv"))
Intervention_dataset=pd.read_csv(os.path.join(dataset_path,"InterventionData_04052024.csv"))

#removed last column(P_NOTX) since it does not appear in the intervention dataset
#Household_dataset=Household_dataset.iloc[:, :-1]

In [47]:
## Data cleaning and conversions to datetype
Optimization_dataset['intervention_eligibility_start'] = pd.to_datetime(Optimization_dataset['intervention_eligibility_start'])
Optimization_dataset['intervention_eligibility_end'] = pd.to_datetime(Optimization_dataset['intervention_eligibility_end'])
Intervention_dataset['EnrollmentDate'] = pd.to_datetime(Intervention_dataset['EnrollmentDate'])
Optimization_dataset

Unnamed: 0.1,Unnamed: 0,ID,intervention_eligibility_start,intervention_eligibility_end,subpopulation,P_RRH,P_TSH,P_PSH,P_NOTX
0,0,5346,2020-08-12,2020-11-12,0,0.403523,0.365126,0.317578,0.038237
1,1,10810,2020-06-14,2020-09-14,1,0.486524,0.312525,0.308390,0.086466
2,2,13875,2021-07-25,2022-11-08,0,0.390293,0.380058,0.247997,0.040080
3,3,13440,2020-06-20,2021-08-21,0,0.343476,0.388433,0.295636,0.026810
4,4,11434,2020-06-17,2020-09-17,1,0.340353,0.415589,0.271931,0.026012
...,...,...,...,...,...,...,...,...,...
20676,20890,257,2020-06-10,2022-03-09,1,0.614422,0.373384,0.398029,0.238472
20677,20891,257,2022-11-09,2023-01-03,1,0.563452,0.379900,0.383452,0.252566
20678,20892,12780,2020-06-21,2020-09-21,1,0.483045,0.375423,0.282675,0.034415
20679,20893,2256,2020-06-09,2020-09-09,1,0.495911,0.397076,0.272658,0.052616


In [48]:
## Remove rows where intervention ends in 2023
Training_Optimization_dataset=Optimization_dataset[Optimization_dataset['intervention_eligibility_end'] < "2021-12-31"]
Training_intervention_dataset=Intervention_dataset[Intervention_dataset['EnrollmentDate']< "2021-12-31" ]


In [49]:
## Groupe intervention type per week 
Training_intervention_dataset.sort_values('EnrollmentDate', inplace=True)
Training_intervention_dataset_grouped=Training_intervention_dataset.groupby([pd.Grouper(key='EnrollmentDate', freq='W'), 'ProjectTypeName']).size().unstack(fill_value=0)
Training_intervention_dataset_grouped

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Training_intervention_dataset.sort_values('EnrollmentDate', inplace=True)


ProjectTypeName,PH - Permanent Supportive Housing,PH - Rapid Re-Housing,Transitional Housing
EnrollmentDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-08,1,1,0
2017-01-15,0,2,0
2017-01-22,0,2,0
2017-02-05,0,1,0
2017-02-12,1,3,0
...,...,...,...
2022-12-04,1,4,3
2022-12-11,1,3,0
2022-12-18,2,1,0
2022-12-25,2,5,0


In [54]:
Training_Optimization_dataset.sort_values('intervention_eligibility_start', inplace=True)

# Now, grouping by week and applying the provided lambda function to reset index within each group, but ensuring 'Week_Start_Date' is preserved
Training_Optimization_dataset_grouped = Training_Optimization_dataset.set_index("intervention_eligibility_start").groupby(pd.Grouper(freq='W')).apply(lambda _df: _df.assign(Week_Start=_df.name)).reset_index(drop=True)

# The 'Week_Start' column now contains the start of the week for the 'intervention_eligibility_start' date
Training_Optimization_dataset_grouped

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Training_Optimization_dataset.sort_values('intervention_eligibility_start', inplace=True)


Unnamed: 0.1,Unnamed: 0,ID,intervention_eligibility_end,subpopulation,P_RRH,P_TSH,P_PSH,P_NOTX,Week_Start
0,7681,13861,2017-03-31,1,0.605708,0.392315,0.390282,0.081395,2017-01-01
1,18382,12417,2017-04-01,1,0.384913,0.323653,0.331396,0.078390,2017-01-01
2,19079,9994,2017-06-28,1,0.364034,0.396487,0.285617,0.032998,2017-01-08
3,18327,4867,2017-10-23,1,0.459237,0.369800,0.362774,0.101124,2017-01-08
4,8521,5981,2018-10-03,1,0.387764,0.385844,0.360858,0.066253,2017-01-08
...,...,...,...,...,...,...,...,...,...
17410,18790,10317,2020-10-05,0,0.486860,0.435341,0.424961,0.373232,2023-12-03
17411,10530,11974,2017-06-01,1,0.423834,0.379707,0.309554,0.148024,2023-12-10
17412,14315,2622,2021-08-01,0,0.417905,0.394632,0.310507,0.113796,2023-12-17
17413,10406,15750,2017-09-08,1,0.474584,0.403793,0.272180,0.126019,2023-12-24


In [55]:
## Mapping 2 datasets on intervention type
intervention_mapping = {
    'PH - Rapid Re-Housing': 'P_RRH',
    'Transitional Housing': 'P_TSH',
    'PH - Permanent Supportive Housing': 'P_PSH',
    #  P_NOTX 
}

In [57]:
intervention_types = list(intervention_mapping.values())
intervention_types

['P_RRH', 'P_TSH', 'P_PSH']

In [None]:
# Initialize the problem
prob = lp.LpProblem("Housing_Intervention_Optimization", lp.LpMaximize)

In [58]:
household_ids = Training_Optimization_dataset_grouped['ID'].unique()
intervention_types = ['PH - Permanent Supportive Housing', 'PH - Rapid Re-Housing', 'Transitional Housing']  # Assuming these are your interventions
weeks = pd.date_range(start=Training_Optimization_dataset_grouped['Week_Start'].min(), end=Training_Optimization_dataset_grouped['Week_Start'].max(), freq='W-MON')  # Generating weeks from the range of your dataset

In [60]:
# Initialize your optimization problem
prob = LpProblem("Housing_Intervention_Optimization", LpMaximize)

# Define decision variables
x = LpVariable.dicts("Assignment", (household_ids, intervention_types, weeks.format()), cat='Binary')



  x = LpVariable.dicts("Assignment", (household_ids, intervention_types, weeks.format()), cat='Binary')


In [65]:
##construct the probability dictionary
probabilities = {}
# Iterate over each row in the DataFrame to populate the dictionary
for index, row in Training_Optimization_dataset_grouped.iterrows():
    # Format the 'Week_Start' date as a string for consistent dictionary keys
    week = row['Week_Start'].strftime('%Y-%m-%d')
    
    # Add probabilities for each intervention type to the dictionary
    probabilities[(row['ID'], 'PH - Rapid Re-Housing', week)] = row['P_RRH']
    probabilities[(row['ID'], 'Transitional Housing', week)] = row['P_TSH']
    probabilities[(row['ID'], 'PH - Permanent Supportive Housing', week)] = row['P_PSH']


In [67]:
# Define the objective function
prob += lp.lpSum(probabilities[(i, j, t)] * x[(i, j, t.strftime('%Y-%m-%d'))]
                 for i in household_ids
                 for j in intervention_types
                 for t in weeks
                 if (i, j, t.strftime('%Y-%m-%d')) in probabilities), "Total_Probability_of_Exiting_Homelessness"

In [71]:
##capacity Dictionary

Training_intervention_dataset_grouped.index = pd.to_datetime(Training_intervention_dataset_grouped.index)

capacities = {}
for t, row in Training_intervention_dataset_grouped.iterrows():
    week_str = t.strftime('%Y-%m-%d')
    for j in intervention_types[:]:  # Skipping 'No Intervention'
        capacities[(j, week_str)] = row[j]

In [72]:
capacities

{('PH - Permanent Supportive Housing', '2017-01-08'): 1,
 ('PH - Rapid Re-Housing', '2017-01-08'): 1,
 ('Transitional Housing', '2017-01-08'): 0,
 ('PH - Permanent Supportive Housing', '2017-01-15'): 0,
 ('PH - Rapid Re-Housing', '2017-01-15'): 2,
 ('Transitional Housing', '2017-01-15'): 0,
 ('PH - Permanent Supportive Housing', '2017-01-22'): 0,
 ('PH - Rapid Re-Housing', '2017-01-22'): 2,
 ('Transitional Housing', '2017-01-22'): 0,
 ('PH - Permanent Supportive Housing', '2017-02-05'): 0,
 ('PH - Rapid Re-Housing', '2017-02-05'): 1,
 ('Transitional Housing', '2017-02-05'): 0,
 ('PH - Permanent Supportive Housing', '2017-02-12'): 1,
 ('PH - Rapid Re-Housing', '2017-02-12'): 3,
 ('Transitional Housing', '2017-02-12'): 0,
 ('PH - Permanent Supportive Housing', '2017-02-19'): 0,
 ('PH - Rapid Re-Housing', '2017-02-19'): 5,
 ('Transitional Housing', '2017-02-19'): 0,
 ('PH - Permanent Supportive Housing', '2017-02-26'): 0,
 ('PH - Rapid Re-Housing', '2017-02-26'): 2,
 ('Transitional Housin