In [376]:
import pandas as pd
from datetime import datetime
import numpy as np

In [377]:
def payer_category(payer_category):
    cats = {
        'DISCOUNTED CASH PRICE INPATIENT': 'cash', 
        'DISCOUNTED CASH PRICE OUTPATIENT': 'cash', 
        'Gross Charge': 'gross',
        'DE-IDENTIFIED MIN': 'min',
        'DE-IDENTIFIED MAX': 'max'
    }
    try:
        return cats[payer_category]
    except KeyError:
        return "payer"

In [378]:
file = "621811413_South Baldwin_standardcharges.csv"
ccn = '010083'
url = 'https://www.southbaldwinrmc.com/Uploads/Public/Documents/charge-masters/621811413_South%20Baldwin_standardcharges.csv'

In [379]:
df = pd.read_csv(file)

In [380]:
df = df.rename(columns={
    "SVCCD": 'procedure_code',
    'Description': 'description',
    'CPT': 'code',
    'As of Date': 'file_last_updated'
})

In [381]:
df['hcpcs_cpt'] = df['code'].apply(lambda x: x if len(str(x)) == 5 else pd.NA)
df["file_last_updated"] = pd.to_datetime(df["file_last_updated"])

In [382]:
cols = df.columns.tolist()
id_vars = cols[:cols.index('code')+1]
id_vars.extend(['file_last_updated', 'hcpcs_cpt'])

payer_cat = cols[cols.index('DISCOUNTED CASH PRICE INPATIENT'):]
payer_cat.pop(-1) # Remove code from list
payer_cat.append('Gross Charge')

df = pd.melt(df, id_vars=id_vars, value_vars=payer_cat, var_name='payer_orig', value_name='rate')

['procedure_code', 'description', 'code', 'Gross Charge', 'file_last_updated', 'DISCOUNTED CASH PRICE INPATIENT', 'DISCOUNTED CASH PRICE OUTPATIENT', 'DE-IDENTIFIED MIN', 'DE-IDENTIFIED MAX', 'BCBSAL PPO - All Plans', 'UHC PPO - All Plans ', 'Aetna PPO - All Plans ', 'Cigna PPO - All Plans ', 'Humana PPO - All Plans ', 'COMM Viva Health PPO - All Plans ', 'UHC MA - All Plans ', 'Humana MA - All Plans ', 'BCBSAL MA - All Plans ', 'Healthspring MA - All Plans ', 'Aetna/Coventry MA - All Plans ', 'Viva Health MA - All Plans ', 'Multiplan - All Plans ', 'hcpcs_cpt']


['DISCOUNTED CASH PRICE INPATIENT',
 'DISCOUNTED CASH PRICE OUTPATIENT',
 'DE-IDENTIFIED MIN',
 'DE-IDENTIFIED MAX',
 'BCBSAL PPO - All Plans',
 'UHC PPO - All Plans ',
 'Aetna PPO - All Plans ',
 'Cigna PPO - All Plans ',
 'Humana PPO - All Plans ',
 'COMM Viva Health PPO - All Plans ',
 'UHC MA - All Plans ',
 'Humana MA - All Plans ',
 'BCBSAL MA - All Plans ',
 'Healthspring MA - All Plans ',
 'Aetna/Coventry MA - All Plans ',
 'Viva Health MA - All Plans ',
 'Multiplan - All Plans ',
 'Gross Charge']

In [383]:
df["payer_category"] = df["payer_orig"].apply(payer_category)
df['patient_class'] = df['payer_orig'].apply(lambda x: 'inpatient' if x.endswith('INPATIENT') else ('outpatient' if x.endswith('OUTPATIENT') else 'na'))

In [384]:
df["plan_name"] = df['payer_orig'].apply(lambda x: x.split(" - ")[-1].strip() if " - " in x else pd.NA)
df['payer_name'] = df['payer_orig'].str.split(" - ")[0]

In [385]:
tin = file.split("_")[0]
tin = tin[:2] + "-" + tin[2:]

df["filename"] = file
df["url"] = url
df["hospital_ccn"] = ccn
df["hospital_tin"] = tin

In [387]:
df.dropna(subset=['code'], inplace=True)
df = df[(df['code'] != 'SURG') & (df['hcpcs_cpt'] != 'SURG') & (df['hcpcs_cpt'] != 'MANUL') & (df['code'] != 'MANUL')]

In [388]:
df["payer_orig"] = df["payer_orig"].str.strip()
df['hcpcs_cpt'] = df['code'].apply(lambda x: x if len(str(x)) == 5 else pd.NA)
# df["code_type"] = df['code'].apply(lambda x: 'hcpcs_cpt' if len(str(x)) == 5 else pd.NA)
df['code_type'] = np.where(df['procedure_code'] == 'DRG', 'ms-drg', np.where(df['code'].astype(str).str.len() == 5, 'hcpcs_cpt', pd.NA))
df.loc[df['code_type'] == 'ms-drg', 'code'] = df['code'].astype(str).str.zfill(3)

In [390]:
df.to_csv("south_baldwin.csv", index=False)