In [3]:
import pandas as pd
import polars as pl
import os
from tqdm import tqdm

In [4]:
folder = '.\\input_files\\'
file = '221494454_StLukesHospitalWarrenCampus_standardcharges.csv'

df = pd.read_csv(folder + file, dtype=str)

In [5]:
df.rename(columns={
    'Record ID': 'local_code',
    'Description': 'description',
    'CPT/DRG': 'code',
}, inplace=True)

In [6]:
cols = df.columns.tolist()
id_vars = cols[:3]
value_vars = cols[3:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [7]:
df.loc[df['code'].str.len() == 3, 'ms_drg'] = df['code']
df.loc[df['code'].str.len() == 5, 'hcpcs_cpt'] = df['code']

In [8]:
df['payer'] = df['payer'].str.strip()

In [9]:
mapping = {
    'Gross Charge': 'gross',
    'Self Pay': 'cash',
    'Min Payment': 'min',
    'Max Payment': 'max'
}

df['payer_category'] = df['payer'].map(mapping).fillna('payer')

In [10]:
df['standard_charge'] = df['standard_charge'].str.replace('\$|,|\(|\)', '', regex=True)

In [11]:
df['standard_charge'] = df['standard_charge'].astype(float)

In [12]:
df['ms_drg'].fillna('', inplace=True)
df['hcpcs_cpt'].fillna('', inplace=True)
df['code'].fillna('', inplace=True)
df['local_code'].fillna('', inplace=True)

In [13]:
df1 = df.copy()

In [19]:
df = df1.copy()

In [20]:
df.drop_duplicates(subset='payer')

Unnamed: 0,local_code,description,code,payer,standard_charge,ms_drg,hcpcs_cpt,payer_category
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Gross Charge,10925.0,,,gross
41770,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Aetna (All Plans) Payment,3159.11,,,payer
83540,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Aetna MC (All Plans) Payment,1157.34,,,payer
125310,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth Caritas (All Plans) Payment,821.81,,,payer
167080,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth NJ (All Plans) Payment,1135.28,,,payer
208850,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth Northeast (All Plans) Payment,785.52,,,payer
250620,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,CBC (Except EPO) Payment,2762.58,,,payer
292390,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,CBC EPO Payment,2584.11,,,payer
334160,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Cigna (All Plans) Payment,2947.14,,,payer
375930,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Freedom Blue (All Plans) Payment,1229.23,,,payer


In [21]:
df.loc[df['payer'].str.contains('All Plans'), 'plan'] = 'all plans'
df['payer'] = df['payer'].str.replace(' (All Plans) Payment', '')
df['payer'] = df['payer'].str.replace(' Payment', '')
df['payer'] = df['payer'].str.replace(' (All Plans)', '')
df['payer'] = df['payer'].str.strip().str.strip()

In [22]:
df.drop_duplicates(subset='payer')

Unnamed: 0,local_code,description,code,payer,standard_charge,ms_drg,hcpcs_cpt,payer_category,plan
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Gross Charge,10925.0,,,gross,
41770,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Aetna,3159.11,,,payer,all plans
83540,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Aetna MC,1157.34,,,payer,all plans
125310,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth Caritas,821.81,,,payer,all plans
167080,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth NJ,1135.28,,,payer,all plans
208850,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Amerihealth Northeast,785.52,,,payer,all plans
250620,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,CBC (Except EPO),2762.58,,,payer,
292390,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,CBC EPO,2584.11,,,payer,
334160,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Cigna,2947.14,,,payer,all plans
375930,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,Freedom Blue,1229.23,,,payer,all plans


In [16]:
ccn = {
    '844475996': '390162',
    '824432109': '390332',
    '465143606': '390330',
    '454394739': '390326',
    '251550350': '390183',
    '231352213': '390049',
    '231352213': '390335',
    '231352203': '390035',
    '221494454': '310060'
}

ein = file.split('_')[0]

df['hospital_id'] = ccn[ein]

In [17]:
# df1 = pl.from_pandas(df)
# df1.write_csv('.\\output_files\\' + file.split('_')[1] + '.csv')