In [27]:
import pandas as pd
import os

In [45]:
ccns = {
    '250965375': '390217',
    '250965414': '390219',
    '250965612': '390145'
}

In [46]:
def payer_category(payer):
    payers = {
        'GROSS CHARGES': 'gross',
        'DISCOUNTED CASH PRICE': 'cash',
        'De-Identified Minimum Negotiated Price': 'min',
        'De-Identified Maximum Negotiated Price': 'max'
    }

    try:
        return payers[payer]
    except KeyError:
        return 'payer'

def plan_type(payer):
    com = 'Commercial Contracts'
    med = 'Medicare Managed Care Contracts'
    # Medical Assistance does not need to be included because they all have it
    payers = {
        'Aetna (PEBTF)': com,
        "Highmark Children's Health Insurance Program (CHIP)": com,
        'Highmark ACA Products': com,
        'Highmark - Laurel Surgical': com,
        'UPMC ACA (Exchange)': com,
        'AmeriHealth Caritas': med,
        'VA CCN': med,
    }

    if 'Commercial' in payer:
        return com
    elif 'Medicare' in payer:
        return med
    elif 'Medical Assistance' in payer:
        return 'Medical Assistance Managed Care Contracts'

    else:
        try:
            return payers[payer]
        except KeyError:
            return pd.NA

def plan_name(payer):
    payers = {
        'Aetna (PEBTF)': 'Pennsylvania Employees Benefit Trust Fund (PEBTF)',
        # 'Highmark ACA Products': 'Affordable Care Act (ACA)',
        'Highmark Medicare Advantage Community Blue': 'Community Blue Medicare',
        'Highmark Medicare Advantage Security Blue': 'Security Blue HMO-POS',
        'Highmark Medicare Advantage Freedom Blue': 'Freedom Blue PPO',
        "Highmark Children's Health Insurance Program (CHIP)": 'Highmark Healthy Kids (CHIP)',
    }

    try:
        return payers[payer]
    except KeyError:
        return pd.NA
    


    

In [47]:
file = '250965612_WestmorelandRegionalHospital_StandardCharges[3].xlsx'
folder = '.\\input_files\\'

In [49]:
df = pd.read_excel(folder + file, header=2)

In [32]:
df.rename(columns={
    'SERVICE CODE': 'internal_code',
    'SERVICE CODE DESC': 'desc',
    'CPT/HCPCS CODE': 'code_orig',
}, inplace=True)

In [33]:
cols = df.columns.tolist()
col_index = cols.index('GROSS CHARGES')
id_vars = cols[:col_index]
value_vars = cols[col_index:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_orig', value_name='rate')

In [43]:
df['payer_category'] = df['payer_orig'].apply(payer_category)
df['plan_name'] = df['payer_orig'].apply(plan_name)

df['payer_orig'] = df['payer_orig'].str.strip()

df['code'] = df['code_orig']
df['payer_name'] = df['payer_orig']

# Set code_prefix to 'hcpcs_cpt' if code is not na
df.loc[df['code'].notna(), 'code_prefix'] = 'hcpcs_cpt'
df.loc[df['plan_name'].notna(), 'plan_orig'] = df['payer_name']

df['code'].fillna('na')
df['code_prefix'].fillna('na')

0         hcpcs_cpt
1         hcpcs_cpt
2         hcpcs_cpt
3         hcpcs_cpt
4         hcpcs_cpt
            ...    
165715           na
165716           na
165717           na
165718           na
165719           na
Name: code_prefix, Length: 165720, dtype: object

In [None]:
ein = file.split("_")[0]
ein = ein[:2] + "-" + ein[2:]

df['hospital_ein'] = ein
df['hospital_ccn'] == ccns[file.split("_")[0]]
df['file_last_updated'] = '2022-12-20'
df['filename'] = file
df['url'] = "https://www.excelahealth.org/documents/content/" + file

In [40]:
df.to_csv('test.csv', index=False)

In [44]:
df

Unnamed: 0,internal_code,desc,code_orig,payer_orig,rate,payer_category,plan_name,code,payer_name,code_prefix,plan_orig
0,721331452,IR THYROID FNA 1ST LESION INC US,10005,GROSS CHARGES,1310.0,gross,,10005,GROSS CHARGES,hcpcs_cpt,
1,721331454,IR FNA 1ST LESION/INC US,10005,GROSS CHARGES,1310.0,gross,,10005,GROSS CHARGES,hcpcs_cpt,
2,721331455,ZZ IR FNA ADD LESION/INC US,10006,GROSS CHARGES,1310.0,gross,,10006,GROSS CHARGES,hcpcs_cpt,
3,721331456,IR FNA 1ST LESION/INC FLUORO,10007,GROSS CHARGES,1310.0,gross,,10007,GROSS CHARGES,hcpcs_cpt,
4,720331285,CT FINE NEEDLE ASPIRATION 1ST,10009,GROSS CHARGES,1310.0,gross,,10009,GROSS CHARGES,hcpcs_cpt,
...,...,...,...,...,...,...,...,...,...,...,...
165715,601502895,OBS FRONTLOAD INITIAL HOUR,,De-Identified Maximum Negotiated Price,182.7,max,,,De-Identified Maximum Negotiated Price,,
165716,601502897,R/B OBSERVATION,,De-Identified Maximum Negotiated Price,182.7,max,,,De-Identified Maximum Negotiated Price,,
165717,601502898,OBS FRONTLOAD INITIAL HOUR,,De-Identified Maximum Negotiated Price,182.7,max,,,De-Identified Maximum Negotiated Price,,
165718,601502900,R/B OBSERVATION,,De-Identified Maximum Negotiated Price,182.7,max,,,De-Identified Maximum Negotiated Price,,
