In [97]:
import pandas as pd
from tqdm import tqdm
import numpy as np
tqdm = tqdm.pandas()


In [77]:
def payer_category(payer_category):
    cats = {
        'Discounted Cash Price': 'cash', 
        'Gross Charge': 'gross',
        'Minimum Negotiateed Charge': 'min',
        'Maximum Negotiated Charge': 'max'
    }
    try:
        return cats[payer_category]
    except KeyError:
        return "payer"

def plan_name(x):
    plans = {
        'Aetna Managed Care': 'Managed Care',
        'BCBS\n(PPO,State Health, Federal Employees, Blue Select)': 'PPO,State Health, Federal Employees, Blue Select',
        'BCBS High Performance Network': 'High Performance Network',
        'Cigna Managed Care': 'Managed Care',
        'Healthgram Lexington Brands': 'Lexington Brands',
        'HealthTeam Advantage': pd.NA, # Not a plan
        'UHC Managed Care': 'Managed Care',
        'UHC Managed Care Individual Exchange': 'Managed Care Individual Exchange'
    }

    if x not in ('Gross Charge', 'Discounted Cash Price', 'Minimum Negotiateed Charge', 'Maximum Negotiated Charge'):
        try:
            return plans[x]
        except KeyError:
            if " " in x:
                return x.split(" ")[-1]
            else:
                return pd.NA
    else:
        return pd.NA

def code_type(code_type):
    codes = {
        'eap': 'eapg',
        'cpt': 'hcpcs_cpt',
        'drg': 'ms-drg'
    }
    return codes[code_type]


In [78]:
# def update_df(x):
#     if x in ('Gross Charge', 'Discounted Cash Price', 'Minimum Negotiateed Charge', 'Maximum Negotiated Charge'):
#         return pd.Series([payer_category(x), pd.NA], index=['payer_category', 'plan_name'])
#     else:
#         return pd.Series([pd.NA, plan_name(x)], index=['payer_category', 'plan_name'])

In [79]:
ccn_dict = {
    "Lexington": ['340096', '56-0543238'],
    
}

In [80]:
file = "./input_files/Lexington Transparency CY2022.csv"
df = pd.read_csv(file, encoding='ansi', dtype={'Revenue Code': str, 'DRG': str, 'CPT/HCPCS': str, 'CPT/HCPCS': str, 'EAP': str, 'NDC': str})#, nrows=100000)
file = file.split("/")[-1]

  df = pd.read_csv(file, encoding='ansi', dtype={'Revenue Code': str, 'DRG': str, 'CPT/HCPCS': str, 'CPT/HCPCS': str, 'EAP': str, 'NDC': str})#, nrows=100000)


In [81]:
df = df.rename(columns={
    'Patient Type': 'patient_class',
    'Revenue Code': 'rev_code',
    'NDC': 'ndc',
    'EAP': 'procedure_code',
    'DRG': 'drg',
    'CPT/HCPCS': 'cpt',
    'HCPCS': 'hcpcs',
    'Procedure Description': 'description',
})

In [82]:
# melt codes
value_vars= ['drg', 'cpt']
id_vars = [x for x in df.columns if x not in value_vars]
df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='code_meta', value_name='code')
df = df.drop(subset=['code_meta'])
df = df.dropna(subset=['code'])

df['code_orig'] = df['code']

In [83]:
df.columns

Index(['procedure_code', 'patient_class', 'rev_code', 'ndc', 'description',
       'Gross Charge', 'Discounted Cash Price', 'Minimum Negotiateed Charge',
       'Maximum Negotiated Charge', 'Aetna Managed Care', 'Aetna Medicare',
       'Aetna Wholehealth', 'Ambetter', 'Amerihealth Medicaid', 'BeechStreet',
       'BCBS\n(PPO,State Health, Federal Employees, Blue Select)',
       'BCBS High Performance Network', 'Blue Local Group',
       'Blue Local Individual', 'Blue Medicare', 'Blue Value',
       'Carolina Complete Medicaid', 'Cigna HealthSprings',
       'Cigna Managed Care', 'Coventry Wellpath', 'Coventry First Health',
       'DirectNet', 'Healthgram', 'Healthgram Lexington Brands',
       'HealthTeam Advantage', 'Healthy Blue Medicaid', 'Humana Choicecare',
       'Humana Medicare', 'Medcost', 'Medcost Ultra', 'Multiplan-PCHS',
       'National Provider Network', 'UHC Managed Care',
       'UHC Managed Care Individual Exchange', 'UHC Medicare',
       'United Community Medicaid

In [84]:
# Bring added columns to the beginning to make the next step easier
cols = df.columns.tolist()
cols = cols[-3:] + cols[:-3]
df = df[cols]

In [85]:
# melt payers
desc_index = cols.index("description")
id_vars = cols[:desc_index+1]
value_vars = cols[desc_index+1:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_orig', value_name='rate')

In [86]:
# Schema conformance enforcement
df['rev_code'] = df['rev_code'].replace("#REF!", 'na') # excel mistake on their end

df['rev_code'].fillna('na', inplace=True)
df['ndc'].fillna('na', inplace=True)


In [94]:
# It is faster to just iterate over the df multiple times than to iterate over the whole df
# df[['payer_category', 'plan_name']] = df['payer_orig'].progress_apply(update_df).str.split(",", expand=True)
df['payer_category'] = df['payer_orig'].progress_apply(payer_category)
# df['plan_name'] = df['payer_orig'].progress_apply(plan_name) # Not allowed to extract the plan name
df['code_type'] = df['code_meta'].progress_apply(code_type)
# df['code'] = df[['code', 'code_type']].progress_apply(lambda x: x[0].zfill(3) if x[1] == 'ms-drg' else x[0], axis=1)
df['code'] = np.where(df['code_type'] == 'ms-drg', df['code'].str.zfill(3), df['code'])

100%|██████████| 10881742/10881742 [00:11<00:00, 977563.14it/s]
100%|██████████| 10881742/10881742 [00:08<00:00, 1298072.03it/s]


KeyboardInterrupt: 

In [99]:
# Add required hospital-specific file
ccn, tin = ccn_dict[file.split()[0]]
df['hospital_ccn'], df['hospital_tin'] = ccn, tin
df["filename"] = file
df['url'] = 'https://cdn.atriumhealth.org/-/media/wakeforest/clinical/files/patient-and-family-resources/transparency-files/2022/' + file


In [100]:
df['rate'] = df['rate'].str.replace(',', '')
df = df[(df['rate'] != '#VALUE!') & (df['rate'] != '#NUM!')]
df.dropna(subset=['rate'], inplace=True)
df['patient_class'] = df['patient_class'].str.lower()
# df['rev_code'] = df['rev_code'].str.zfill(4)

In [90]:
 # This has to be done like this to prevent 'na' from being zero padded
df['rev_code'] = df['rev_code'].progress_apply(lambda x: str(x).zfill(4) if x != 'na' else x)

100%|██████████| 10881742/10881742 [00:07<00:00, 1548451.13it/s]


In [91]:
# df = df[df['code'].str.len() < 25]


In [101]:
df.to_csv('F:\\_Bounty\\transparency-in-pricing\\' + file.split()[0] + '.csv', index=False)

In [93]:
df

Unnamed: 0,code_meta,code,code_orig,procedure_code,patient_class,rev_code,ndc,description,payer_orig,rate,payer_category,code_type,hospital_ccn,hospital_tin,filename,url
0,drg,571,571,MS571,inpatient,na,na,Skin Debridement With Cc,Gross Charge,20641,gross,ms-drg,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
1,drg,54,54,MS054,inpatient,na,na,Nervous System Neoplasms With McC,Gross Charge,10014,gross,ms-drg,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
2,drg,56,56,MS056,inpatient,na,na,Degenerative Nervous System Disorders With McC,Gross Charge,45019,gross,ms-drg,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
3,drg,57,57,MS057,inpatient,na,na,Degenerative Nervous System Disorders Without McC,Gross Charge,8487,gross,ms-drg,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
4,drg,60,60,MS060,inpatient,na,na,Multiple Sclerosis And Cerebellar Ataxia Witho...,Gross Charge,37345,gross,ms-drg,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11406341,cpt,C1760,C1760,70503402,outpatient,na,na,DEVICE CLOSURE CORDIS EXOSEAL OD7 FR FEMORAL A...,Wellcare Medicare,0,payer,hcpcs_cpt,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
11406342,cpt,C1789,C1789,121464,outpatient,na,na,IMPLANT BREAST MENTOR MEMORYGEL COHESIVE I SIL...,Wellcare Medicare,0,payer,hcpcs_cpt,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
11406343,cpt,C1725,C1725,70504176,outpatient,na,na,COYOTE ES ODSEC2.5 MM L20 MM L143 CM OTW ULTRA...,Wellcare Medicare,0,payer,hcpcs_cpt,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
11406344,cpt,C1725,C1725,212833,outpatient,na,na,COYOTE ES ODSEC4 MM L40 MM L145 CM OTW ULTRA L...,Wellcare Medicare,0,payer,hcpcs_cpt,340096,56-0543238,Lexington Transparency CY2022.csv,https://cdn.atriumhealth.org/-/media/wakefores...
