In [25]:
import pandas as pd

In [26]:
folder = '.\\input_files\\'
file = '56-0554230_novanthealthcharlotteorthopedichospital_standardcharges.csv'

In [27]:
df = pd.read_csv(folder + file, dtype=str)

In [28]:
df.rename(columns={
    'Code': 'code',
    'Code Description': 'description',
    'CPT DRG': 'cpt_drg',
    'Charge Detail': 'contracting_method'
}, inplace=True)

In [29]:
cols = df.columns.tolist()
id_vars = cols[:4]
val_vars = cols[4:]

df = pd.melt(df, id_vars=id_vars, value_vars=val_vars, var_name='payer_name', value_name='standard_charge')

In [30]:
df.dropna(subset='standard_charge', inplace=True)

In [31]:
df['cpt_drg'] = df['cpt_drg'].str.upper()

df.loc[df['cpt_drg'].str.len() == 5, 'hcpcs_cpt'] = df['cpt_drg']
df.loc[df['cpt_drg'].str.len() == 3, 'ms_drg'] = df['cpt_drg']

df.loc[df['hcpcs_cpt'] == '(BLAN', 'hcpcs_cpt'] = pd.NA

In [32]:
df.loc[df['code'].str.startswith('NDC'), 'ndc'] = df['code'].str.replace('NDC', '')

In [33]:
df.drop(columns='cpt_drg', inplace=True)

In [34]:
mask = ~df['contracting_method'].str.lower().isin(['standard charge', 'per diem', 'case rate'])
df.loc[mask, 'additional_payer_specific_notes'] = df['contracting_method']
df.loc[mask, 'contracting_method'] = 'other' # nulls methods not in the mask, after they have been copied to notes

df.loc[df['contracting_method'] == 'Standard Charge', 'contracting_method'] = pd.NA
df['contracting_method'] = df['contracting_method'].str.lower()

In [35]:
df.loc[~df['additional_payer_specific_notes'].isna()]

Unnamed: 0,code,description,contracting_method,payer_name,standard_charge,hcpcs_cpt,ms_drg,ndc,additional_payer_specific_notes
9548,360000033 - 3021,HC OR CATEGORY I EA ADDL 15 MIN,other,Gross Charge,297,,,,Surgery Center Alt
10589,360000066 - 3021,HC OR CATEGORY II EA ADDL 15 MIN,other,Gross Charge,410,,,,Surgery Center Alt
11025,360000107 - 3021,HC OR CATEGORY III EA ADDL 15MIN,other,Gross Charge,454,,,,Surgery Center Alt
11063,750000017 - 3021,HC ENDO LEVEL 1 EA ADDL 15 MINS,other,Gross Charge,465,,,,Surgery Center Alt
11211,710000003 - 3021,HC PACU 0-30 MIN PHASE I,other,Gross Charge,492,,,,Surgery Center Alt
...,...,...,...,...,...,...,...,...,...
1388415,312000044_Specimen - 3071,HC IHC PER SPECIMEN ADDL STAIN,other,Amerihealth Caritas,67,88341,,,Specimen Alt
1388416,312000023_Specimen - 3071,HC SURG PATH LEVEL V ADD,other,Amerihealth Caritas,67,88307,,,Specimen Alt
1388417,312000022_Specimen - 3071,HC SURG PATH LEVEL V,other,Amerihealth Caritas,67,88307,,,Specimen Alt
1388826,312000025_Specimen - 3071,HC SURG PATH LEVEL VI ADD,other,Amerihealth Caritas,142,88309,,,Specimen Alt


In [36]:
df['payer_name'] = df['payer_name'].str.strip()

In [37]:
payer_mapping = {
    'Gross Charge': 'gross',
    'De-identified minimum negotiated charge': 'min',
    'De-identified maximum negotiated charge': 'max',
    'Discounted cash price': 'cash'
}

df['payer_category'] = df['payer_name'].map(payer_mapping).fillna('payer')

In [38]:
df.loc[df['description'].str.match(r'SUP\d{6}'), 'line_type'] = 'sup'

In [24]:
id_mapping = {
 '20-4278130_novanthealthbrunswickmedicalcenter_standardcharges.csv': '340158',
 '56-0554230_novanthealthcharlotteorthopedichospital_standardcharges.csv': '340153',
 '56-0928089_novanthealthforsythmedicalcenter_standardcharges.csv': '340014',
 '56-0554230_novanthealthhuntersvillemedicalcenter_standardcharges.csv': '340183',
 '56-1376368_novanthealthmatthewsmedicalcenter_standardcharges.csv': '340171',
 '56-1340424_novanthealthmedicalparkhospital_standardcharges.csv': '340148',
 '26-0599536_novanthealthminthillmedicalcenter_standardcharges.csv': '340190',
 '56-0887181_novanthealthnewhanorverregionalmedicalcenter_standardcharges.csv': '340141',
 '56-0653348_novanthealthpendermemorialhospital_standardcharges.csv': '341307',
 '56-0554230_novanthealthpresbyterianmedicalcenter_standardcharges.csv': '340053',
 '56-0547479_novanthealthrowanmedicalcenter_standardcharges.csv': '340015',
 '56-0636250_novanthealththomasvillemedicalcenter_standardcharges.csv': '340085'}

hosp_id = id_mapping[file]
df['hospital_id'] = hosp_id

output_folder = '.\\output_files\\'

file = hosp_id + '_' + file.split('_')[1] + '.csv'
# df.to_csv(output_folder + file, index=False)

In [41]:
id_mapping.values()

dict_values(['340158', '340153', '340014', '340183', '340171', '340148', '340190', '340141', '341307', '340053', '340015', '340085'])