In [53]:
import pandas as pd

In [54]:
folder = './input_files/'

file = '370512290_GOOD-SHEPHERD-HOSPITAL_STANDARDCHAGES.txt'

In [55]:
df = pd.read_csv(folder + file, dtype=str, skiprows=1, encoding='ansi')

In [56]:
df.drop('Site', axis=1, inplace=True)

In [57]:
df.rename(columns=lambda x: x.strip(), inplace=True)

In [58]:
df.rename(columns={
    'Svc_Cd': 'local_code',
    'HCPC/Cpt_Cd': 'code',
    'CDM_Svc_Descr': 'description',
    'Rev_Cd': 'rev_code',
    'Quantity/Units': 'drug_quantity', 
}, inplace=True)

In [59]:
df['drug_quantity'] = df['drug_quantity'].str.strip()
df.loc[df['drug_quantity'] == '1', 'drug_quantity'] = pd.NA

In [60]:
cols = df.columns.tolist()
id_vars = cols[:5]
val_vars = cols[5:]

df = pd.melt(df, id_vars=id_vars, value_vars=val_vars, var_name='payer_name', value_name='standard_charge')

In [61]:
df['code'] = df['code'].str.strip()

In [62]:
mask = ~df['code'].isna()

In [63]:
df.loc[mask & df['code'].str.match(r'^\d{3}$'), 'ms_drg'] = df['code']

In [64]:
df['temp_code'] = df['code'].str.upper()
df.loc[mask & df['temp_code'].str.match(r'^[A-Z][0-9]{4}$|^[0-9]{5}$|^[0-9]{4}[A-Z]$'), 'hcpcs_cpt'] = df['code']

In [65]:
payer_map = {
    'Hospital_Cdm_Chg': 'gross',
    'Self_Pay_Chg': 'cash',
    'Minimum_Negotiated_Chg': 'min',
    'Maximum_Negotiated_Chg': 'max',
}

df['rate_category'] = df['payer_name'].map(payer_map).fillna('negotiated')

In [66]:
df['standard_charge'] = df['standard_charge'].str.replace('$', '').str.replace(',', '')

In [67]:
df.loc[df['code'] == '1967', 'hcpcs_cpt'] = '01967'

In [68]:
df.loc[df['code'].str.len() == 4, 'hcpcs_cpt'] = df['code'].str.zfill(5)
df.loc[~df['rev_code'].isna(), 'rev_code'] = df['rev_code'].str.zfill(4)


In [69]:
df.drop('temp_code', axis=1, inplace=True)

## NEW STUFF

In [70]:
df['standard_charge'] = df['standard_charge'].str.strip()

In [71]:
# Remove rows where standard_charge has no value
df.loc[df['standard_charge'].str.match('^Inpatient Medicare$|^Inpatient Medicaid$'), 'standard_charge'] = pd.NA
df.loc[df['standard_charge'] == 'N/A', 'standard_charge'] = pd.NA
df.dropna(subset='standard_charge', inplace=True)

df.loc[df['standard_charge'].str.lower().str.contains('inpatient'), 'setting'] = 'inpatient'
df.loc[df['standard_charge'].str.lower().str.contains('outpatient'), 'setting'] = 'outpatient'

In [72]:
df.loc[df['standard_charge'].str.contains(r'Medicare|Medicaid'), 'standard_charge_percent'] = df['standard_charge'].str.extract(r'\+(\d{1,3})\%', expand=False)

In [74]:
df['standard_charge_percent'] = df['standard_charge_percent'].astype(float)

In [75]:
df.loc[~df['standard_charge_percent'].isna(), 'standard_charge_percent'] = df['standard_charge_percent'] + 100

In [49]:
mask = df['standard_charge'].str.contains('Medicare|Medicaid')

df.loc[mask, 'additional_generic_notes'] = df['standard_charge']
df.loc[mask, 'standard_charge'] = pd.NA

In [50]:
df.loc[~df['additional_generic_notes'].isna()]

Unnamed: 0,local_code,code,description,rev_code,drug_quantity,payer_name,standard_charge,ms_drg,hcpcs_cpt,rate_category,standard_charge_percent,additional_generic_notes
39489,12000001,,HC DAILY SERVICE GENERAL,120,,"6 DEGREES HEALTH, INC",,,,negotiated,250.0,Inpatient Medicare +150%
39490,12000006,,HC DAILY SERVICE W/TELEMETRY,120,,"6 DEGREES HEALTH, INC",,,,negotiated,250.0,Inpatient Medicare +150%
39491,12200001,,HC DAILY SERVICE OBSTETRICS,122,,"6 DEGREES HEALTH, INC",,,,negotiated,250.0,Inpatient Medicare +150%
39492,12500001,,HC DAILY HOSPICE ACUTE CARE,125,,"6 DEGREES HEALTH, INC",,,,negotiated,250.0,Inpatient Medicare +150%
39493,17100001,,HC DAILY INFANT SERVICE LEV 1,171,,"6 DEGREES HEALTH, INC",,,,negotiated,250.0,Inpatient Medicare +150%
...,...,...,...,...,...,...,...,...,...,...,...,...
463944,12200001,,HC DAILY SERVICE OBSTETRICS,122,,NAPHCARE,,,,negotiated,200.0,Inpatient Medicare +100%
463945,12500001,,HC DAILY HOSPICE ACUTE CARE,125,,NAPHCARE,,,,negotiated,200.0,Inpatient Medicare +100%
463946,17100001,,HC DAILY INFANT SERVICE LEV 1,171,,NAPHCARE,,,,negotiated,200.0,Inpatient Medicare +100%
463947,17200002,,HC DAILY INFANT SERVICE LEV 2,172,,NAPHCARE,,,,negotiated,200.0,Inpatient Medicare +100%


In [52]:
df.loc[df['standard_charge'] == 'N/A']

Unnamed: 0,local_code,code,description,rev_code,drug_quantity,payer_name,standard_charge,ms_drg,hcpcs_cpt,rate_category,standard_charge_percent,additional_generic_notes
39496,25000003,,ACETYLCYSTEINE 20 % SOLN,250,,"6 DEGREES HEALTH, INC",,,,negotiated,,
39497,25000003,,ALBUTEROL (5 MG/ML) 0.5% NEBU 20 ML BOTTLE,250,,"6 DEGREES HEALTH, INC",,,,negotiated,,
39498,25000003,,ATROPINE 1 % SOLN 5 ML BOTTLE,250,,"6 DEGREES HEALTH, INC",,,,negotiated,,
39499,25000003,,AZTREONAM 1 G SOLR 1 EACH VIAL,250,,"6 DEGREES HEALTH, INC",,,,negotiated,,
39500,25000003,,AZTREONAM 2 G SOLR 1 EACH VIAL,250,,"6 DEGREES HEALTH, INC",,,,negotiated,,
...,...,...,...,...,...,...,...,...,...,...,...,...
542900,98800015,99222,PC INITIAL HOSPITAL INP/OBSERV CARE LVL 2 MODE...,987,,UNITED HEALTH CARE MEDICARE,,,99222,negotiated,,
542901,98800016,99223,PC INITIAL HOSPITAL INP/OBSERV CARE LVL 3 HIGH...,987,,UNITED HEALTH CARE MEDICARE,,,99223,negotiated,,
542902,98800017,99231,PC SUBSQ HOSPITAL INP/OBSERV CARE LVL 1 LOW 25...,987,,UNITED HEALTH CARE MEDICARE,,,99231,negotiated,,
542903,98800018,99232,PC SUBSQ HOSPITAL INP/OBSERV CARE LVL 2 MODERA...,987,,UNITED HEALTH CARE MEDICARE,,,99232,negotiated,,


In [20]:
hosp_map = {
    '370512290_GOOD-SHEPHERD-HOSPITAL_STANDARDCHAGES.txt': '140019',
    '370661233_ST-ANTHONY-S-MEMORIAL-HOSPITAL_STANDARDCHARGES.txt': '140032',
    '370661238_ST-JOHN-S-HOSPITAL_STANDARDCHARGES.txt': '140053',
    '370792770_HOLY-FAMILY-HOSPITAL_STANDARDCHARGES.txt': '140137',
    '371208459_ST-JOSEPH-S-HOSPITAL-BREESE_STANDARDCHARGES.txt': '140145',
    '370661244_ST-MARY-S-HOSPITAL-DECATUR_STANDARDCHARGES.txt': '140166',
    '370663567_ST-ELIZABETH-S-HOSPITAL_STANDARDCHARGES.txt': '140187',
    '370663568_ST-JOSEPH-S-HOSPITAL-HIGHLAND_STANDARDCHARGES.txt': '141336',
    '370661236_ST-FRANCIS-HOSPITAL_STANDARDCHARGES.txt': '141350',
    '390807060_SACRED-HEART-HOSPITAL_STANDARDCHARGES.txt': '520013',
    '390810545_ST-JOSEPH-S-CHIPPEWA-FALLS_STANDARDCHARGES.txt': '520017',
    '390808480_ST-NICHOLAS-HOSPITAL_STANDARDCHARGES.txt': '520044',
    '390817529_ST-VINCENT-HOSPITAL_STANDARDCHARGES.txt': '520075',
    '390818682_ST-MARY-S-HOSPITAL_STANDARDCHARGES.txt': '520097',
    '390848401_ST-CLARE-MEMORIAL-HOSPITAL_STANDARDCHARGES.txt': '521310'
}

hosp_id = hosp_map[file]

df['hospital_id'] = hosp_id

output_folder = './output_files/'

# df.to_csv(output_folder + hosp_id + '_' + file.split('_')[1] + '.csv', index=False)