In [41]:
import pandas as pd
import numpy as np

In [42]:
folder = '.\\input_files\\'

file = '362174832_silvercrosshospital_standardcharges_033123.csv'

In [43]:
df = pd.read_csv(folder + file, dtype=str, skiprows=2)

In [44]:
df.drop(columns='hospital_id', inplace=True)

In [45]:
df.rename(columns={
    'DRG': 'ms_drg',
    'APR-DRG': 'apr_drg',
    'APC': 'apc',
    'EAPG': 'eapg',
    'item_description': 'description',
    'item_setting': 'setting'
}, inplace=True)

In [46]:
df.rename(columns=lambda x: x.strip(), inplace=True)

In [47]:
cols = df.columns.tolist()
id_vars = cols[:11]
value_vars = cols[11:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_name', value_name='standard_charge')

In [48]:
df.loc[~df['ms_drg'].isna(), 'ms_drg'] = df['ms_drg'].str.zfill(3)

In [49]:
df.loc[~df['cpt'].isna(), 'hcpcs_cpt'] = df['cpt']
df.loc[~df['hcpcs'].isna(), 'hcpcs_cpt'] = df['hcpcs']

In [50]:
df['setting'] = np.where(df['setting'] == 'O', 'outpatient', np.where(df['setting'] == 'I', 'inpatient', 1))

In [51]:
df.drop(columns=['cpt_modifier', 'hcpcs_modifier', 'cpt', 'hcpcs'], inplace=True)

In [52]:
payer_mapping = {
    'gross_charge': 'gross',
    'cash_price': 'cash',
    'de_identified_min': 'min',
    'de_identified_max': 'max',
}

df['payer_category'] = df['payer_name'].map(payer_mapping).fillna('payer')

In [53]:
df.dropna(subset=['standard_charge'], inplace=True)

In [54]:
df['hospital_id'] = '140213'

In [55]:
df.loc[~df['rev_code'].isna(), 'rev_code'] = df['rev_code'].str.zfill(4)
df.loc[~df['apc'].isna(), 'apc'] = df['apc'].str.zfill(4)

In [56]:
df.loc[df['standard_charge'].str.contains('%')].drop_duplicates(subset='standard_charge')

Unnamed: 0,ms_drg,apr_drg,apc,eapg,rev_code,description,setting,payer_name,standard_charge,hcpcs_cpt,payer_category,hospital_id
72125,3,,,,,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,BCBS Indemnity,38% of Gross Charge,,payer,140213
230922,3,,,,,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,MPI Complementary,90% of Gross Charge,,payer,140213
240263,3,,,,,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,MPI Primary,80% of Gross Charge,,payer,140213


In [57]:
mask = df['standard_charge'].str.contains('%')
df.loc[mask, 'standard_charge_percent'] = df['standard_charge'].str.extract(r'(\d{2})%', expand=False)
df.loc[mask, 'contracting_method'] = 'percent of total billed charge'
df.loc[mask, 'additional_payer_specific_notes'] = df['standard_charge']
df.loc[mask, 'standard_charge'] = pd.NA

In [58]:
df.loc[mask]

Unnamed: 0,ms_drg,apr_drg,apc,eapg,rev_code,description,setting,payer_name,standard_charge,hcpcs_cpt,payer_category,hospital_id,standard_charge_percent,contracting_method,additional_payer_specific_notes
72125,003,,,,,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,BCBS Indemnity,,,payer,140213,38,percent of total billed charge,38% of Gross Charge
72126,004,,,,,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,BCBS Indemnity,,,payer,140213,38,percent of total billed charge,38% of Gross Charge
72127,011,,,,,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,BCBS Indemnity,,,payer,140213,38,percent of total billed charge,38% of Gross Charge
72128,012,,,,,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,BCBS Indemnity,,,payer,140213,38,percent of total billed charge,38% of Gross Charge
72129,013,,,,,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,BCBS Indemnity,,,payer,140213,38,percent of total billed charge,38% of Gross Charge
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242861,,952-2,,,,NONEXTENSIVE PROCEDURE UNRELATED TO PRINCIPAL ...,inpatient,MPI Primary,,,payer,140213,80,percent of total billed charge,80% of Gross Charge
242862,,952-3,,,,NONEXTENSIVE PROCEDURE UNRELATED TO PRINCIPAL ...,inpatient,MPI Primary,,,payer,140213,80,percent of total billed charge,80% of Gross Charge
242863,,952-4,,,,NONEXTENSIVE PROCEDURE UNRELATED TO PRINCIPAL ...,inpatient,MPI Primary,,,payer,140213,80,percent of total billed charge,80% of Gross Charge
242864,,955-0,,,,Principal Diag Invalid,inpatient,MPI Primary,,,payer,140213,80,percent of total billed charge,80% of Gross Charge


In [59]:
df.loc[df['standard_charge'].isna() & df['standard_charge_percent'].isna()]

Unnamed: 0,ms_drg,apr_drg,apc,eapg,rev_code,description,setting,payer_name,standard_charge,hcpcs_cpt,payer_category,hospital_id,standard_charge_percent,contracting_method,additional_payer_specific_notes


In [60]:
df.to_csv('.\\output_files\\output.csv', index=False)