In [194]:
import pandas as pd
import polars as pl

In [195]:
def payer_category(payer):
    payers = {
        'AVERAGE CHARGES': 'gross',
        'CHARGE AMOUNT': 'gross',
        'SELF PAY CHARGE': 'cash',
        'MINIMUM RATE': 'min',
        'MAXIMUM RATE': 'max',
    }

    try:
        return payers[payer]
    except KeyError:
        return 'payer'

In [196]:
hospital_id = {
    '050258896': '410009',
    '050258937': '410010'
}

In [248]:
file = '050258937_W&I Price Transparency 2023.xlsx'
df = pd.read_excel(file, sheet_name='INPATIENT SERVICES')

In [249]:
# Strip columns
# df.rename(columns=lambda x: x.strip().replace('\n', ' '), inplace=True)
df.rename(columns=lambda x: x.strip(), inplace=True)

In [250]:
df.rename(columns={
    'DRG': 'code',
    'DRG TYPE': 'line_type',
    'DESCRIPTION': 'description',
}, inplace=True)

In [251]:
cols = df.columns.tolist()
id_vars = cols[:3]
value_vars = cols[3:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [252]:
df.dropna(subset=['standard_charge'], inplace=True)

In [253]:
df.loc[df['line_type'] == 'MS-DRG', 'ms_drg'] = df['code']
df.loc[df['line_type'] == 'APR-DRG', 'apr_drg'] = df['code']

In [254]:
df['payer_category'] = df['payer'].apply(payer_category)
df['setting'] = 'inpatient'

df['hospital_id'] = hospital_id[file.split("_")[0]]

In [255]:
df[['apr_drg', 'ms_drg', 'code']].fillna('', inplace=True)
df['apr_drg'] = df['apr_drg'].astype(str)
df['code'] = df['code'].astype(str)
df['ms_drg'] = df['ms_drg'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['apr_drg', 'ms_drg', 'code']].fillna('', inplace=True)


In [256]:
df.loc[df['code'] == 'nan', 'code'] = ''
df.loc[df['ms_drg'] == 'nan', 'ms_drg'] = ''
df.loc[df['apr_drg'] == 'nan', 'apr_drg'] = ''

In [257]:
df['standard_charge'] = pd.to_numeric(df['standard_charge'], errors='coerce')

df.dropna(subset='standard_charge', inplace=True)

In [258]:
df

Unnamed: 0,code,line_type,description,payer,standard_charge,ms_drg,apr_drg,payer_category,setting,hospital_id
0,001,MS-DRG,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,AVERAGE CHARGES,604888.000000,001,,gross,inpatient,410010
1,002,MS-DRG,HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SY...,AVERAGE CHARGES,313187.000000,002,,gross,inpatient,410010
2,003,MS-DRG,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,AVERAGE CHARGES,408077.232500,003,,gross,inpatient,410010
3,004,MS-DRG,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,AVERAGE CHARGES,318503.163333,004,,gross,inpatient,410010
4,005,MS-DRG,LIVER TRANSPLANT WITH MCC OR INTESTINAL TRANSP...,AVERAGE CHARGES,214125.000000,005,,gross,inpatient,410010
...,...,...,...,...,...,...,...,...,...,...
69508,982,MS-DRG,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,UNITED ACO TIERED,29925.000000,982,,payer,inpatient,410010
69509,983,MS-DRG,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,UNITED ACO TIERED,20214.000000,983,,payer,inpatient,410010
69510,987,MS-DRG,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,UNITED ACO TIERED,46110.000000,987,,payer,inpatient,410010
69511,988,MS-DRG,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,UNITED ACO TIERED,29925.000000,988,,payer,inpatient,410010


In [208]:
dtypes = {
    'code': str,
    'ms_drg': str,
    'apr_drg': str
}
df1 = pl.from_pandas(df, schema_overrides=dtypes)
df1.write_csv(file.split('_')[0] + 'inpatient.csv')

In [209]:
df = pd.read_excel(file, sheet_name='OUTPATIENT SERVICES')

In [210]:
df.rename(columns={
    'CPT': 'code',
    'REVENUE CODE': 'rev_code',
    'CHARGE CODE': 'local_code',
    'DESCRIPTION': 'description',
},
    inplace=True)

In [211]:
cols = df.columns.tolist()
id_vars = cols[:6]
value_vars = cols[6:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [212]:
df['payer_category'] = df['payer'].apply(payer_category)

In [213]:
df['hospital_id'] = hospital_id[file.split("_")[0]]
df['setting'] = 'outpatient'

In [214]:
df['standard_charge'] = pd.to_numeric(df['standard_charge'], errors='coerce')

In [215]:
non_null_cols = ['code', 'rev_code', 'local_code']
df[non_null_cols].fillna('', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[non_null_cols].fillna('', inplace=True)


In [216]:
df[non_null_cols] = df[non_null_cols].astype(str)

In [217]:
df.loc[df['code'] == 'nan', 'code'] = ''
df.loc[df['rev_code'] == 'nan','rev_code'] = ''
df.loc[df['local_code'] == 'nan', 'local_code'] = ''

In [218]:
df.drop(columns=['GL DESCRIPTION', 'GL KEY'], inplace=True)

In [219]:
df.dropna(subset='standard_charge', inplace=True)

In [220]:
df['rev_code'] = df['rev_code'].str.zfill(4)

In [221]:
df = df.astype(str)

In [222]:
df1 = pl.from_pandas(df, schema_overrides=dtypes)
df1.write_csv(file.split('_')[0] + 'outpatient.csv')