In [152]:
import pandas as pd
import numpy as np

In [153]:
folder = '.\\input_files\\'
file = '815427847_AsanteAshlandCommunityHospital_Standardcharges.csv'

In [154]:
df = pd.read_csv(folder + file, dtype=str, encoding='ansi')

In [155]:
df.drop(columns='Location', inplace=True)

In [156]:
df.rename(columns={
    'Procedure': 'local_code',
    'Code Type': 'line_type', 
    'Code': 'code',
    'NDC': 'ndc',
    'Rev Code': 'rev_code',
    'Procedure Description': 'description',
    'Quantity': 'quantity',
    'Payer': 'payer_name', 
    'Plan(s)': 'plan_name'
}, inplace=True)

In [157]:
df['rev_code'] = df['rev_code'].str.split(' -').str[0]

In [158]:
df.loc[df['quantity'] == "1", 'quantity'] = pd.NA

In [159]:
cols = df.columns.tolist()
id_vars = cols[:9]
value_vars = ['IP Expected Reimbursement', 'OP Expected Reimbursement']

payer_df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_temp', value_name='standard_charge')

In [160]:
df.drop(columns=['payer_name', 'plan_name', 'IP Expected Reimbursement', 'OP Expected Reimbursement'], inplace=True)

In [161]:
cols = df.columns.tolist()
id_vars = cols[:7]
value_vars = cols[7:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_name', value_name='standard_charge')

In [162]:
df = pd.concat([df, payer_df])

In [163]:
df

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp
0,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,IP Price,32996.83,,
1,MS784,DRG,MS784,,,Cesarean Section With Sterilization With Cc,,IP Price,33793.32,,
2,MS785,DRG,MS785,,,Cesarean Section With Sterilization Without Cc...,,IP Price,27504.52,,
3,MS786,DRG,MS786,,,Cesarean Section Without Sterilization With McC,,IP Price,37840.81,,
4,MS787,DRG,MS787,,,Cesarean Section Without Sterilization With Cc,,IP Price,37966.57,,
...,...,...,...,...,...,...,...,...,...,...,...
287239,9957,ERX,HCPCS 2500003,67457-0182-00,0250,ESMOLOL 100 MG/10 ML (10 MG/ML) INTRAVENOUS SO...,10 mL,<Self-pay>,46.8,<Self-pay>,OP Expected Reimbursement
287240,99702,ERX,HCPCS J2426,50458-0563-01,0636,PALIPERIDONE PALMITATE 156 MG/ML INTRAMUSCULAR...,1 each,<Self-pay>,4467.4695,<Self-pay>,OP Expected Reimbursement
287241,9972,ERX,HCPCS J1410,00046-0749-05,0636,CONJUGATED ESTROGENS 25 MG SOLUTION FOR INJECTION,25 mg,<Self-pay>,1440.023,<Self-pay>,OP Expected Reimbursement
287242,9974,ERX,HCPCS J7512,00046-1102-81,0250,CONJUGATED ESTROGENS 0.625 MG TABLET,0.5 tablet,<Self-pay>,16.315,<Self-pay>,OP Expected Reimbursement


In [164]:
df['setting'] = np.where(df['payer_name'].str.startswith('IP'), 'inpatient', np.where(df['payer_name'].str.startswith('OP'), 'outpatient', 1))
df['setting'] = np.where(df['payer_temp'].str.startswith('IP'), 'inpatient', np.where(df['payer_temp'].str.startswith('OP'), 'outpatient', 1))

In [165]:
payer_mapping = {
    'IP Price': 'gross',
    'IP De-Identified MIN': 'min',
    'IP De-Identified MAX': 'max',
    'OP Price': 'gross',   
    'OP De-Identified MIN': 'min',
    'OP De-Identified MAX': 'max',
    '<Self-pay>': 'cash'
}

df['payer_category'] = df['payer_name'].map(payer_mapping)

In [166]:
df.loc[df['payer_name'] == '<Self-pay>', 'payer_category'] = 'cash'

In [167]:
df.loc[~df['payer_temp'].isna(), 'payer_category'] = 'payer'

In [168]:
df.drop(columns='payer_temp', inplace=True)

In [169]:
df['code'] = df['code'].str.strip()

In [170]:
df.reset_index(drop=True, inplace=True)

In [171]:
df.loc[~df['code'].isna() & df['code'].str.match(r"MS\d{3}"), 'ms_drg'] = df['code'].str.replace('MS', '')

In [172]:
df['ms_drg'] = df['ms_drg'].astype(str)

In [173]:
df.reset_index(drop=True, inplace=True)

In [174]:
df.loc[~df['code'].isna() & df['code'].str.startswith('HCPCS '), 'hcpcs_cpt'] = df['code'].str.replace('HCPCS ', '')
df.loc[~df['code'].isna() & df['code'].str.startswith('CPT® '), 'hcpcs_cpt'] = df['code'].str.replace('CPT® ', '')

In [175]:
df.loc[df['hcpcs_cpt'].str.len() == 7, 'hcpcs_cpt'] = pd.NA

In [176]:
df.loc[~df['plan_name'].isna() & df['plan_name'].str.contains(',')]

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,setting,payer_category,ms_drg,hcpcs_cpt
899523,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,MODA HEALTH [1070],32996.83,"MODA CONNEXUS NETWORK [100087], MODA CONNEXUS ...",inpatient,payer,783,
899526,MS784,DRG,MS784,,,Cesarean Section With Sterilization With Cc,,MODA HEALTH [1070],27890.66,"MODA CONNEXUS NETWORK [100087], MODA CONNEXUS ...",inpatient,payer,784,
899529,MS785,DRG,MS785,,,Cesarean Section With Sterilization Without Cc...,,MODA HEALTH [1070],23332.56,"MODA CONNEXUS NETWORK [100087], MODA CONNEXUS ...",inpatient,payer,785,
899532,MS786,DRG,MS786,,,Cesarean Section Without Sterilization With McC,,MODA HEALTH [1070],37840.81,"MODA CONNEXUS NETWORK [100087], MODA CONNEXUS ...",inpatient,payer,786,
899535,MS787,DRG,MS787,,,Cesarean Section Without Sterilization With Cc,,MODA HEALTH [1070],27088.98,"MODA CONNEXUS NETWORK [100087], MODA CONNEXUS ...",inpatient,payer,787,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1126295,9957,ERX,HCPCS 2500003,67457-0182-00,0250,ESMOLOL 100 MG/10 ML (10 MG/ML) INTRAVENOUS SO...,10 mL,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],57.6,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",outpatient,payer,,
1126296,99702,ERX,HCPCS J2426,50458-0563-01,0636,PALIPERIDONE PALMITATE 156 MG/ML INTRAMUSCULAR...,1 each,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],5498.42,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",outpatient,payer,,J2426
1126297,9972,ERX,HCPCS J1410,00046-0749-05,0636,CONJUGATED ESTROGENS 25 MG SOLUTION FOR INJECTION,25 mg,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],1772.34,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",outpatient,payer,,J1410
1126298,9974,ERX,HCPCS J7512,00046-1102-81,0250,CONJUGATED ESTROGENS 0.625 MG TABLET,0.5 tablet,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],20.08,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",outpatient,payer,,J7512


In [177]:
df = df.assign(plan_name=df['plan_name'].str.split(', ')).explode('plan_name')

In [178]:
df['standard_charge'] = df['standard_charge'].str.replace(',', '')

In [186]:
df.dropna(subset='standard_charge', inplace=True)

In [185]:
id_mapping = {
 '571181758_AsanteThreeRiversMedicalCenter_Standardcharges.csv': '380002',
 '815427847_AsanteAshlandCommunityHospital_Standardcharges.csv': '380005',
 '930223960_AsanteRogueRegionalMedicalCenter_Standardcharges.csv': '380018'}

hosp_id = id_mapping[file]

df['hospital_id'] = hosp_id

output_folder = '.\\output_files\\'

filename = hosp_id + file.split('_')[1] + '.csv'

df.to_csv(output_folder + filename, index=False)

OSError: Cannot save file into a non-existent directory: 'output_files'