In [1]:
import numpy as np
import pandas as pd
import os
import re

In [24]:
def getDischargeColNames(df: pd.DataFrame) -> list:
    return df.filter(regex='(?i)discharge').columns.tolist()


In [25]:
input_dir = os.path.join(os.getcwd(), 'data', 'input')
wa_file = 'WA_Hospital_Financial_Quarterly_Aggregate_Report_20250520.csv'
or_file = 'OR_quarterly_aggregation.csv'
wa_data = pd.read_csv(os.path.join(input_dir,wa_file), header=0)
or_data = pd.read_csv(os.path.join(input_dir, or_file), header=0)

In [26]:
discharge_cols_df = pd.DataFrame({'WA':sorted(getDischargeColNames(wa_data)+['']),
                                  'OR':sorted(getDischargeColNames(or_data))})

In [27]:
discharge_cols_df

Unnamed: 0,WA,OR
0,,Discharges of Acute Inpatient
1,Acute_Care_Commercial_Discharges,Discharges of Acute Inpatient (Commercial)
2,Acute_Care_Medicaid_Discharges,Discharges of Acute Inpatient (Medicaid)
3,Acute_Care_Medicare_Discharges,Discharges of Acute Inpatient (Medicare)
4,Acute_Care_Other_Discharges,Discharges of Acute Inpatient (Others)
5,Acute_Care_Self_Pay_Discharges,Discharges of Acute Inpatient (Self Pay)
6,Acute_Care_Total_Discharges,Discharges of DPU
7,Psych_Rehab_CDU_Commercial_Discharges,Discharges of DPU (Commercial)
8,Psych_Rehab_CDU_Medicaid_Discharges,Discharges of DPU (Medicaid)
9,Psych_Rehab_CDU_Medicare_Discharges,Discharges of DPU (Medicare)


In [28]:
# one to one mappings
wa_to_or_map = {
        'Total_Discharges': 'Total Discharges',
        'Total_Medicaid_Discharges': 'Total Discharges (Medicaid)',
        'Total_Medicare_Discharges': 'Total Discharges (Medicare)',
        'Total_Commercial_Discharges': 'Total Discharges (Commercial)',
        'Total_Self_Pay_Discharges': 'Total Discharges (Self Pay)',
        'Total_Other_Discharges': 'Total Discharges (Others)',
        'Swing_Bed_Total_Discharges': 'Discharges of Swing Bed',
        'Swing_Bed_Medicaid_Discharges': 'Discharges of Swing Bed (Medicaid)',
        'Swing_Bed_Medicare_Discharges': 'Discharges of Swing Bed (Medicare)',
        'Swing_Bed_Commercial_Discharges': 'Discharges of Swing Bed (Commercial)',
        'Swing_Bed_Self_Pay_Discharges': 'Discharges of Swing Bed (Self Pay)',
        'Swing_Bed_Other_Discharges': 'Discharges of Swing Bed (Others)',
        'Psych_Rehab_CDU_Total_Discharges': 'Discharges of DPU',
        'Psych_Rehab_CDU_Commercial_Discharges': 'Discharges of DPU (Commercial)',
        'Psych_Rehab_CDU_Medicaid_Discharges': 'Discharges of DPU (Medicaid)',
        'Psych_Rehab_CDU_Medicare_Discharges': 'Discharges of DPU (Medicare)',
        'Psych_Rehab_CDU_Self_Pay_Discharges': 'Discharges of DPU (Self Pay)',
        'Psych_Rehab_CDU_Other_Discharges': 'Discharges of DPU (Others)',
        'Acute_Care_Commercial_Discharges':	'Discharges of Acute Inpatient (Commercial)',
        'Acute_Care_Medicaid_Discharges': 'Discharges of Acute Inpatient (Medicaid)',
        'Acute_Care_Medicare_Discharges': 'Discharges of Acute Inpatient (Medicare)',
        'Acute_Care_Other_Discharges': 'Discharges of Acute Inpatient (Others)',
        'Acute_Care_Self_Pay_Discharges': 'Discharges of Acute Inpatient (Self Pay)',
        'Acute_Care_Total_Discharges': 'Discharges of Acute Inpatient'
    }

# SNF and LTC are close enough to the same concept that we'll map them together bu then rename
wa_to_or_map.update({
    'SNF_Commercial_Discharges': 'Discharges of Subacute & LTC (Commercial)',
    'SNF_Medicaid_Discharges': 'Discharges of Subacute & LTC (Medicaid)',
    'SNF_Medicare_Discharges': 'Discharges of Subacute & LTC (Medicare)',
    'SNF_Other_Discharges': 'Discharges of Subacute & LTC (Others)',
    'SNF_Self_Pay_Discharges': 'Discharges of Subacute & LTC (Self Pay)',
    'SNF_Total_Discharges': 'Discharges of Subacute & LTC'
})

# this is for later
new_snf_ltc_names = {
    'SNF_Commercial_Discharges': 'SNF_Subacute_LTC_Commercial_Discharges',
    'SNF_Medicaid_Discharges': 'SNF_Subacute_LTC_Medicaid_Discharges',
    'SNF_Medicare_Discharges': 'SNF_Subacute_LTC_Medicare_Discharges',
    'SNF_Other_Discharges': 'SNF_Subacute_LTC_Other_Discharges',
    'SNF_Self_Pay_Discharges': 'SNF_Subacute_LTC_Self_Pay_Discharges',
    'SNF_Total_Discharges': 'SNF_Subacute_LTC_Total_Discharges'
}

# other maps

wa_to_or_map.update({
    'Total_Births': 'Births',
    'Number_of_Admissions_from_ER': 'Admissions from ED',
    'Emergency_Room_Visits': 'Emergency Department Visits',
    'Outpatient_Surgery_Visits': 'Ambulatory Surgery Visits',
    'Total_Inpatient_Surgeries': 'Inpatient Surgeries',
    'Observation_Visits': 'Observation Visits',
    'Home_Health_Visits': 'Home Health Visits',
    'Total_Outpatient_Visits': 'Total Outpatient Visits'
})

wa_keys_set = set(wa_to_or_map.keys())
or_to_wa_map = {val:key for key, val in wa_to_or_map.items()}
or_keys_set = set(or_to_wa_map.keys())

context = [
    wa_to_or_map.copy(),
    wa_keys_set.copy(),
    or_to_wa_map.copy(),
    or_keys_set.copy()
]

def swapStateFeatureName(fname, context):
    wa_to_or_map, wa_keys_set, or_to_wa_map, or_keys_set = context
    if fname in wa_keys_set:
        return wa_to_or_map[fname]
    elif fname in or_keys_set:
        return or_to_wa_map[fname]
    else:
        raise ValueError("Invalid column name to map.")

In [29]:
wa_selected = wa_data[list(wa_to_or_map.keys()) + ['Hospital_Name', 'Year', 'Quarter']]
or_selected = or_data[list(or_to_wa_map.keys()) + ['Hospital Name', 'Year', 'Quarter']]