In [110]:
import pandas as pd
import json

In [111]:
folder = '.\\input_files\\'
file = '344430849_memorial-hospital_standardcharges.json'

In [112]:
with open(folder + file, 'r') as f:
    jd = json.load(f)
    jd = jd[0]['item']

In [113]:
df = pd.DataFrame(jd)

In [114]:
df.rename(columns={
    'payer': 'payer_name',
    'Associated_Codes': 'code',
    'iobSelection': 'setting',
    'Payer_Allowed_Amount': 'standard_charge',
}, inplace=True)

In [115]:
df['code'] = df['code'].str.strip()

In [116]:
# Extract revenue codes from `code`
df['rev_code'] = df['code'].str.extract(r'.*,(0\d{3})')
# Remove the revenue code from `code`
df['code'] = df['code'].str.replace(r',0\d{3}', '', regex=True)

In [117]:
df = df.assign(code=df['code'].str.split(',')).explode('code')

In [118]:
df['code'] = df['code'].str.strip()

In [119]:
df.reset_index(drop=True, inplace=True)

In [120]:
df[['ms_drg', 'thru']] = df.loc[df['code'].str.contains('-'), 'code'].str.split('-', expand=True)

In [121]:
df_payer = df.copy()
df_payer = df_payer[['payer_name', 'description', 'code', 'standard_charge', 'setting', 'rev_code', 'ms_drg', 'thru']]
df_payer['payer_category'] = 'payer'

In [122]:
df_rates = df.copy()
df_rates = df_rates[['description', 'code', 'setting', 'Gross_Charge', 'Cash_Discount', 'Deidentified_Min_Allowed', 'DeIdentified_Max_Allowed', 'rev_code', 'ms_drg', 'thru']]

In [123]:
cols = df_rates.columns.tolist()
id_vars = ['description', 'code', 'setting', 'rev_code', 'ms_drg', 'thru']
value_vars = [x for x in cols if x not in id_vars]

df_rates = pd.melt(df_rates, id_vars=id_vars, value_vars=value_vars, var_name='payer_name', value_name='standard_charge')

In [126]:
mapping = {
    'Gross_Charge': 'gross',
    'Cash_Discount': 'cash',
    'Deidentified_Min_Allowed': 'min',
    'DeIdentified_Max_Allowed':'max',
    }

df_rates['payer_category'] = df_rates['payer_name'].map(mapping)

In [128]:
df = pd.concat([df_payer, df_rates])

In [129]:
df.reset_index(drop=True, inplace=True)

In [130]:
df['setting'] = df['setting'].str.lower()

In [131]:
df.loc[df['code'].str.match(r'^[A-Z][0-9]{4}$|^[0-9]{5}$|^[0-9]{4}[A-Z]$'), 'hcpcs_cpt'] = df['code']
df.loc[df['code'].str.match(r'^[0-9]{3}$'), 'ms_drg'] = df['code']
df.loc[df['code'].str.len() == 4, 'rev_code'] = df['code']

In [132]:
df.loc[~df['hcpcs_cpt'].isna() & ~df['rev_code'].isna()]

Unnamed: 0,payer_name,description,code,standard_charge,setting,rev_code,ms_drg,thru,payer_category,hcpcs_cpt
11833,,STERILE SUPPLIES - INFUSION CATHETER,C1751,,,0272,,,payer,C1751
11834,,STERILE SUPPLIES - INFUSION CATHETER,C1751,,,0272,,,payer,C1751
11835,,STERILE SUPPLIES - INFUSION CATHETER,C1751,,,0272,,,payer,C1751
11836,,STERILE SUPPLIES - INFUSION CATHETER,C1751,,,0272,,,payer,C1751
11837,,STERILE SUPPLIES - INFUSION CATHETER,C1751,,,0272,,,payer,C1751
...,...,...,...,...,...,...,...,...,...,...
60886,DeIdentified_Max_Allowed,LAB - High Throughput Detection Of COVID-19 Vi...,U0003,,,0300,,,max,U0003
60896,DeIdentified_Max_Allowed,LAB - High Throughput COVID-19 Testing By Non-...,U0004,,,0300,,,max,U0004
60897,DeIdentified_Max_Allowed,LAB - High Throughput Detection Of COVID-19 Vi...,U0005,,,0300,,,max,U0005
60898,DeIdentified_Max_Allowed,STERILE SUPPLIES - INTRAOCULAR LENS - POSTERIO...,V2632,,,0276,,,max,V2632


In [23]:
df.loc[df['standard_charge'] == 'N/A', 'standard_charge'] = pd.NA

In [24]:
df.dropna(subset='standard_charge', inplace=True)

Unnamed: 0,payer_name,description,code,standard_charge,setting,payer_category,hcpcs_cpt,ms_drg,rev_code,hospital_id


In [25]:
id_mapping = {
 '344428256_toledo-hospital_standardcharges copy.json': '360074',
 '341883132_bay-park-hospital_standardcharges.json': '360259',
 '344446484_defiance-regional-hospital_standardcharges.json': '361328',
 '344428256_toledo-hospital_standardcharges.json': '360068',
 '340898745_fostoria-community-hospital_standardcharges.json': '361318',
 '344430849_memorial-hospital_standardcharges.json': '360156',
 '382796005_charles-and-virginia-hickman-hospital_standardcharges.json': '230005',
 '386108110_coldwater-regional-hospital_standardcharges.json': '230022',
 '381984289_monroe-regional-hospital_standardcharges.json': '230099'}

hosp_id = id_mapping[file]

df['hospital_id'] = hosp_id

out_file = hosp_id + file.split('_')[1] + '.csv'

out_folder = '.\\output_files\\'

# df.to_csv(out_folder + out_file, index=False)

In [26]:
df[df.duplicated(subset=['payer_name','description','code','setting','payer_category','hcpcs_cpt','ms_drg','rev_code'], keep=False)]

Unnamed: 0,payer_name,description,code,standard_charge,setting,payer_category,hcpcs_cpt,ms_drg,rev_code,hospital_id
3063,UNITED HEALTHCARE Commercial,Replacement Of Knee Joint,469,2125.4000,inpatient,payer,,469,,360156
3064,UNITED HEALTHCARE Commercial,Replacement Of Knee Joint,470,2125.4000,inpatient,payer,,470,,360156
3099,UNITED HEALTHCARE Commercial,Replacement Of Knee Joint,469,1625.2000,inpatient,payer,,469,,360156
3100,UNITED HEALTHCARE Commercial,Replacement Of Knee Joint,470,1625.2000,inpatient,payer,,470,,360156
12261,Gross_Charge,Lab Test - Identification of DNA from 22 Patho...,0097U,800.0000,outpatient,gross,0097U,,,360156
...,...,...,...,...,...,...,...,...,...,...
60891,DeIdentified_Max_Allowed,Lab Test - COVID-19 Test Panel By Non-CDC Lab ...,U0004,305.5500,outpatient,max,U0004,,,360156
60892,DeIdentified_Max_Allowed,Lab Test - COVID-19 Test Panel By Non-CDC Lab ...,U0004,305.5500,outpatient,max,U0004,,,360156
60893,DeIdentified_Max_Allowed,Lab Test - COVID-19 Test Panel By Non-CDC Lab ...,U0004,305.5500,outpatient,max,U0004,,,360156
60894,DeIdentified_Max_Allowed,Lab Test - COVID-19 Test Panel By Non-CDC Lab ...,U0004,305.5500,outpatient,max,U0004,,,360156


In [27]:
file.split('_')

['344430849', 'memorial-hospital', 'standardcharges.json']