In [97]:
import pandas as pd
import re


In [98]:
def fix_decimal_precision(value):
    if re.match(r'^\d+\.\d+$', value):
        return "{:.3f}".format(float(value))
    else:
        return value

In [99]:
folder = '.\\input_files\\'
file = '411924645_centracare-long-prairie-hospital_standardcharges.csv'

In [100]:
df = pd.read_csv(folder + file, dtype=str, skiprows=1)

In [101]:
df.rename(columns={
    'Procedure': 'local_code',
    'Code Type': 'line_type',
    'Code': 'code',
    'NDC': 'ndc',
    'Rev Code': 'rev_code',
    'Procedure Description': 'description',
    'Quantity': 'drug_quantity',
}, inplace=True)

In [102]:
df['line_type'] = df['line_type'].str.lower()

In [103]:
cols = df.columns.tolist()
id_vars = cols[:7]
value_vars = cols[7:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_name', value_name='standard_charge')

In [104]:
df.dropna(subset='standard_charge', inplace=True)

In [105]:
df['rev_code'] = df['rev_code'].str.split(' -').str[0]

In [106]:
mask = (~df['code'].isna()) & (df['code'].str.startswith('CPT®'))
df.loc[mask, 'hcpcs_cpt'] = df['code'].str.replace('CPT® ', '')

In [107]:
mask = (~df['code'].isna()) & (df['code'].str.startswith('HCPCS'))
df.loc[mask, 'hcpcs_cpt'] = df['code'].str.replace('HCPCS ', '')

In [108]:
mask = (~df['code'].isna()) & (df['code'].str.startswith('MS-DRG'))
df.loc[mask, 'ms_drg'] = df['code'].str.replace('MS-DRG V39 (FY 2022) ', '')

In [109]:
mask = (~df['code'].isna()) & (df['line_type'] == 'apr-drg' )
df.loc[mask, 'apr_drg'] = df['code'].str.zfill(3)

In [110]:
df.loc[df['drug_quantity'] == '1', 'drug_quantity'] = pd.NA

In [111]:
payer_mapping = {
    'Charge': 'gross',
    'Max': 'max',
    'Min': 'min',
    'Discounted Cash Price': 'cash'
}

df['payer_category'] = df['payer_name'].map(payer_mapping).fillna('payer')

In [112]:
mask = ~(df['hcpcs_cpt'].astype(str).str.match(r'^[A-Z][0-9]{4}$|^[0-9]{5}$|^[0-9]{4}[A-Z]$'))
df.loc[mask, 'hcpcs_cpt'] = pd.NA
df.loc[df['apr_drg'].str.len() > 3, 'apr_drg'] = df['apr_drg'][:3] + '-' + df['apr_drg'][3:]

In [126]:
df['local_code'] = df['local_code'].apply(fix_decimal_precision)

In [113]:
hosp_id_mapping = {
 '384089454_centracare-redwood-hospital_standardcharges.csv': '241351',
 '463298651_centracare-paynesville-hospital_standardcharges.csv': '241349',
 '461584944_centracare-monticello-hospital_standardcharges.csv': '241362',
 '411924645_centracare-long-prairie-hospital_standardcharges.csv': '241326',
 '411865315_centracare-melrose-hospital_standardcharges.csv': '241330',
 '452438973_centracare-sauk-centre-hospital_standardcharges.csv': '241368',
 '823166379_centracare-rice-memorial-hospital_standardcharges.csv': '240088',
 '410695596_centracare-st-cloud-hospital_standardcharges.csv': '240036'}

hosp_id = hosp_id_mapping[file]

df['hospital_id'] = hosp_id

out_file = hosp_id + '_' + file.split('_')[1] + '.csv'
out_folder = '.\\output_files\\'

df.to_csv(out_folder + out_file, index=False)

In [125]:
df.loc[df['local_code'].str.contains('.', regex=False)].drop_duplicates(subset=['local_code']).sample(10)

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,drug_quantity,payer_name,standard_charge,hcpcs_cpt,ms_drg,apr_drg,payer_category,hospital_id
8155,281.240,sup,HCPCS C1713,,278,PLATE DHS STANDARD 38MM 4 HOLE SYNTHES 281.240,,Charge,1240.05,C1713,,,gross,241326
8732,212.101,sup,HCPCS C1713,,278,SCREW SYNTHES 3.5 LOCKING SELF-TAP 10MM 212.101,,Charge,341.07,C1713,,,gross,241326
8204,242.491,sup,HCPCS C1713,,278,PLATE 2.4 LCP VOLAR DISTAL RADIUS LEFT 43MM,,Charge,1825.75,C1713,,,gross,241326
8571,280.660,sup,HCPCS C1713,,278,SCREW ONE STEP LAG 14 X 60 280.660,,Charge,847.7,C1713,,,gross,241326
8462,281.925,sup,HCPCS C1713,,278,PLATE DCS 25MM 12 HOLE 95 DEG 281.92,,Charge,2551.38,C1713,,,gross,241326
8043,209.920,sup,HCPCS C1713,,278,SCREW 7.3 CANN 32X120 THRD 209.920,,Charge,632.73,C1713,,,gross,241326
8397,202.734,sup,HCPCS C1713,,278,SCREW SYNTHES LONG THREAD 3X34MM 202.734,,Charge,556.32,C1713,,,gross,241326
8016,207.750,sup,HCPCS C1713,,278,SCREW CANNULATED 4.0X50MM LONG THR 207.750,,Charge,551.47,C1713,,,gross,241326
8327,02.110.451,sup,HCPCS C1713,,278,PLATE VOL LCP DIST RAD 2.4 8HL,,Charge,2034.95,C1713,,,gross,241326
8446,281.308,sup,HCPCS C1713,,278,PLATE DHS STANDARD 38MM 8 HOLE SYNTHES 281.308,,Charge,1618.5,C1713,,,gross,241326


In [116]:
df

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,drug_quantity,payer_name,standard_charge,hcpcs_cpt,ms_drg,apr_drg,payer_category,hospital_id
1320,58001987,eap,,,0490,Flex Sigmoidoscopy,,Charge,1396,,,,gross,241326
1321,25000514,eap,,,0360,Cystourethroscopy,,Charge,5867,,,,gross,241326
1322,25000753,eap,,,0360,Esophagogastroduodenoscopy,,Charge,2750,,,,gross,241326
1323,25000767,eap,,,0360,Esophagogastroduodenoscopy W Dilitation,,Charge,3940,,,,gross,241326
1324,25000908,eap,,,0360,Excision Of Axillary Lymph Node,,Charge,8093,,,,gross,241326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
269363,20025861,eap,HCPCS U0003,,0300,"Covid-19/sars-cov2, Naat (panther)",,Ucare Community Health Plan (PMAP),35.41265,U0003,,,payer,241326
269364,20026076,eap,HCPCS U0003,,0300,"Covid-19/sars-cov2, Pcr (covoo) (mayo)",,Ucare Community Health Plan (PMAP),35.41265,U0003,,,payer,241326
269365,20026618,eap,HCPCS U0005,,0300,Infectious Agent Detect By Nucleic Acid (dna O...,,Ucare Community Health Plan (PMAP),9.7825,U0005,,,payer,241326
269366,20026692,eap,HCPCS U0005,,0300,Infec Agent Detect By Nucleic Acid (dna Or Rna...,,Ucare Community Health Plan (PMAP),9.7825,U0005,,,payer,241326
