In [22]:
import pandas as pd
import polars as pl

In [23]:
folder = '.\\input_files\\'

file = '26-0194016_baylor-scott-and-white-medical-center-lake-pointe_standardcharges.csv'

df = pd.read_csv(folder + file, dtype=str, skiprows=4)

In [24]:
df.rename(columns={
    'Patient Type': 'setting',
    'DRG': 'ms_drg',
    'APR-DRG': 'apr_drg',
    'Procedure Code': 'local_code',
    'Procedure Name': 'description',
    'NDC': 'ndc',
    'Default Rev Code': 'rev_code',
    'CPT / HCPCS Code': 'hcpcs_cpt',
    'Service Package Type': 'contracting_method',
}, inplace=True)

In [25]:
df['setting'] = df['setting'].str.lower()
df['contracting_method'] = df['contracting_method'].str.lower()

In [26]:
df.drop(columns='Gross Charge Min/Max', inplace=True)

In [27]:
cols = df.columns.tolist()
id_vars = cols[:9]
value_vars = cols[9:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [28]:
df['standard_charge'] = pd.to_numeric(df['standard_charge'], errors='coerce')
df.dropna(subset=['standard_charge'], inplace=True)

In [29]:
empty_cols = ['ms_drg', 'apr_drg', 'local_code', 'ndc', 'rev_code', 'hcpcs_cpt']
df.loc[:, empty_cols] = df[empty_cols].fillna('')

In [30]:
mapping = {
    'Gross Charge': 'gross',
    'Discounted Cash Price': 'cash',
    'De-Identified Minimum Reimbursement*': 'min',
    'De-Identified Maximum Reimbursement*': 'max',
}

df['payer_category'] = df['payer'].map(mapping).fillna('payer')

In [31]:
ccn = {
        '74-1166904': '450054',
    '75-2586857': '450079',
    '74-1161944': '450101',
    '75-1008430': '450137',
    '75-1844139': '450372',
    '75-1777119': '450563',
    '75-2834135': '450851',
    '82-0551704': '450890',
    '74-1595711': '451374',
    '20-2850920': '452105',
    '41-2101361': '670025',
    '20-3749695': '670043',
    '27-4434451': '670088',
    '46-4007700': '670108',
    '81-3040663': '670136',
    '74-2519752': '450187',
    '82-4052186': '450885',
    '26-0194016': '450742',
    '75-1037591': '670082',
    '26-3603862': '450893',
    '75-1837454': '450021'
}

ein = file.split('_')[0]

df['hospital_id'] = ccn[ein]


# df = pl.from_pandas(df)
# df.write_csv('.\\output_files\\' + ein + '.csv')


In [35]:
df.loc[(df['hcpcs_cpt'].str.len() > 5)]

Unnamed: 0,setting,ms_drg,apr_drg,local_code,description,ndc,rev_code,hcpcs_cpt,contracting_method,payer,standard_charge,payer_category,hospital_id
4567,inpatient,,,99800182,HC SURFACTANT ADMINISTRATION,,998,99800182,,Gross Charge,205.6,gross,450742
4568,inpatient,,,99800351,HC INCENTIVE SPIRO,,998,99800351,,Gross Charge,167.2,gross,450742
9150,outpatient,,,99800182,HC SURFACTANT ADMINISTRATION,,998,99800182,,Gross Charge,205.6,gross,450742
9151,outpatient,,,99800351,HC INCENTIVE SPIRO,,998,99800351,,Gross Charge,167.2,gross,450742
35248,inpatient,,,99800182,HC SURFACTANT ADMINISTRATION,,998,99800182,,Discounted Cash Price,123.36,cash,450742
35249,inpatient,,,99800351,HC INCENTIVE SPIRO,,998,99800351,,Discounted Cash Price,100.32,cash,450742
39831,outpatient,,,99800182,HC SURFACTANT ADMINISTRATION,,998,99800182,,Discounted Cash Price,123.36,cash,450742
39832,outpatient,,,99800351,HC INCENTIVE SPIRO,,998,99800351,,Discounted Cash Price,100.32,cash,450742


In [33]:
df

Unnamed: 0,setting,ms_drg,apr_drg,local_code,description,ndc,rev_code,hcpcs_cpt,contracting_method,payer,standard_charge,payer_category,hospital_id
0,inpatient,,,11000001,HC PRIVATE - GENERAL,,110,,,Gross Charge,1132.60,gross,450742
1,inpatient,,,11100001,HC PRIVATE - MED/SURG/GYN,,111,,,Gross Charge,1063.66,gross,450742
2,inpatient,,,11200001,HC PRIVATE - OB,,112,,,Gross Charge,1134.86,gross,450742
3,inpatient,,,11264451,HC PRIVATE- ANTEPARTUM,,112,,,Gross Charge,1134.62,gross,450742
4,inpatient,,,11264452,HC PRIVATE- POSTPARTUM,,112,,,Gross Charge,1123.38,gross,450742
...,...,...,...,...,...,...,...,...,...,...,...,...,...
642169,inpatient,982,,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,,case rate,Wellmed Medicare,31290.00,payer,450742
642170,inpatient,983,,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,,case rate,Wellmed Medicare,20374.00,payer,450742
642171,inpatient,987,,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,,case rate,Wellmed Medicare,40163.00,payer,450742
642172,inpatient,988,,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,,case rate,Wellmed Medicare,20693.00,payer,450742
