In [245]:
import pandas as pd
import numpy as np

In [246]:
folder = '.\\input_files\\'
file = '930223960_AsanteRogueRegionalMedicalCenter_Standardcharges.csv'

In [247]:
df = pd.read_csv(folder + file, dtype=str, encoding='ansi')

In [248]:
df.drop(columns='Location', inplace=True)

In [249]:
df.rename(columns={
    'Procedure': 'local_code',
    'Code Type': 'line_type', 
    'Code': 'code',
    'NDC': 'ndc',
    'Rev Code': 'rev_code',
    'Procedure Description': 'description',
    'Quantity': 'quantity',
    'Payer': 'payer_name', 
    'Plan(s)': 'plan_name'
}, inplace=True)

In [250]:
df['rev_code'] = df['rev_code'].str.split(' -').str[0]

In [251]:
df.loc[df['quantity'] == "1", 'quantity'] = pd.NA

In [252]:
cols = df.columns.tolist()
id_vars = cols[:9]
value_vars = ['IP Expected Reimbursement', 'OP Expected Reimbursement']

payer_df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_temp', value_name='standard_charge')

In [253]:
df.drop(columns=['payer_name', 'plan_name', 'IP Expected Reimbursement', 'OP Expected Reimbursement'], inplace=True)

In [254]:
cols = df.columns.tolist()
id_vars = cols[:7]
value_vars = cols[7:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer_name', value_name='standard_charge')

In [255]:
df = pd.concat([df, payer_df])

In [256]:
df['setting'] = np.where(df['payer_name'].str.startswith('IP'), 'inpatient', np.where(df['payer_name'].str.startswith('OP'), 'outpatient', 1))
df['setting'] = np.where(df['payer_temp'].str.startswith('IP'), 'inpatient', np.where(df['payer_temp'].str.startswith('OP'), 'outpatient', 1))

In [257]:
df

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting
0,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,IP Price,32996.83,,,inpatient
1,MS784,DRG,MS784,,,Cesarean Section With Sterilization With Cc,,IP Price,40323.75,,,inpatient
2,MS785,DRG,MS785,,,Cesarean Section With Sterilization Without Cc...,,IP Price,27123.45,,,inpatient
3,MS786,DRG,MS786,,,Cesarean Section Without Sterilization With McC,,IP Price,40926.46,,,inpatient
4,MS787,DRG,MS787,,,Cesarean Section Without Sterilization With Cc,,IP Price,36724.63,,,inpatient
...,...,...,...,...,...,...,...,...,...,...,...,...
334399,9974,ERX,HCPCS J7512,00046-1102-81,0250,CONJUGATED ESTROGENS 0.625 MG TABLET,0.5 tablet,<Self-pay>,16.315,<Self-pay>,OP Expected Reimbursement,outpatient
334400,99754,ERX,HCPCS 2500003,00456-2405-11,0250,ASENAPINE 5 MG SUBLINGUAL TABLET,1 tablet,<Self-pay>,102.232,<Self-pay>,OP Expected Reimbursement,outpatient
334401,9977,ERX,HCPCS 2500003,00046-0872-21,0250,CONJUGATED ESTROGENS 0.625 MG/GRAM VAGINAL CREAM,60 Applicatorful,<Self-pay>,873.665,<Self-pay>,OP Expected Reimbursement,outpatient
334402,9982,ERX,HCPCS 2500003,54879-0001-01,0250,ETHAMBUTOL 100 MG TABLET,8 tablet,<Self-pay>,36.816,<Self-pay>,OP Expected Reimbursement,outpatient


In [258]:
payer_mapping = {
    'IP Price': 'gross',
    'IP De-Identified MIN': 'min',
    'IP De-Identified MAX': 'max',
    'OP Price': 'gross',   
    'OP De-Identified MIN': 'min',
    'OP De-Identified MAX': 'max',
    '<Self-pay>': 'cash',
    'De-Identified IP MIN': 'min',
    'De-Identified IP MAX': 'max',
    'De-Identified OP MIN': 'min',
    'De-Identified OP MAX': 'max'   
    
}

df['payer_category'] = df['payer_name'].map(payer_mapping)

In [259]:
df.loc[df['payer_name'] == '<Self-pay>', 'payer_category'] = 'cash'

In [260]:
df.loc[~df['payer_temp'].isna(), 'payer_category'] = 'payer'

In [261]:
# df.drop(columns='payer_temp', inplace=True)

In [262]:
df['code'] = df['code'].str.strip()

In [263]:
df.reset_index(drop=True, inplace=True)

In [264]:
df.loc[~df['code'].isna() & df['code'].str.match(r"MS\d{3}"), 'ms_drg'] = df['code'].str.replace('MS', '')

In [265]:
df['ms_drg'] = df['ms_drg'].astype(str)

In [266]:
df.reset_index(drop=True, inplace=True)

In [267]:
df.loc[~df['code'].isna() & df['code'].str.startswith('HCPCS '), 'hcpcs_cpt'] = df['code'].str.replace('HCPCS ', '')
df.loc[~df['code'].isna() & df['code'].str.startswith('CPT® '), 'hcpcs_cpt'] = df['code'].str.replace('CPT® ', '')

In [268]:
df.loc[df['hcpcs_cpt'].str.len() == 7, 'hcpcs_cpt'] = pd.NA

In [269]:
df.loc[df['hcpcs_cpt'] == 'SART', ['hcpcs_cpt', 'alt_hcpcs_cpt']] = [pd.NA, 'SART']

mask = ~df['hcpcs_cpt'].fillna('').isna() & ~(df['hcpcs_cpt'].fillna('').str.match(r'^[A-Z][0-9]{4}$|^[0-9]{5}$|^[0-9]{4}[A-Z]$'))
df.loc[mask, 'alt_hcpcs_cpt'] = df['hcpcs_cpt']
df.loc[mask, 'hcpcs_cpt'] = pd.NA

In [270]:
df.loc[df['hcpcs_cpt'] == 'SART']

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting,payer_category,ms_drg,hcpcs_cpt,alt_hcpcs_cpt


In [271]:
df.loc[~df['plan_name'].isna() & df['plan_name'].str.contains(',')]

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting,payer_category,ms_drg,hcpcs_cpt,alt_hcpcs_cpt
1086811,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,PROVIDENCE HEALTH PLAN [1078],31913.86,"PEBB [100100], PEBB PROVIDENCE CHOICE MEDICAL ...",IP Expected Reimbursement,inpatient,payer,783,,
1086816,MS784,DRG,MS784,,,Cesarean Section With Sterilization With Cc,,PROVIDENCE HEALTH PLAN [1078],18728.48,"PEBB [100100], PEBB PROVIDENCE CHOICE MEDICAL ...",IP Expected Reimbursement,inpatient,payer,784,,
1086821,MS785,DRG,MS785,,,Cesarean Section With Sterilization Without Cc...,,PROVIDENCE HEALTH PLAN [1078],15790.66,"PEBB [100100], PEBB PROVIDENCE CHOICE MEDICAL ...",IP Expected Reimbursement,inpatient,payer,785,,
1086826,MS786,DRG,MS786,,,Cesarean Section Without Sterilization With McC,,PROVIDENCE HEALTH PLAN [1078],26895.56,"PEBB [100100], PEBB PROVIDENCE CHOICE MEDICAL ...",IP Expected Reimbursement,inpatient,payer,786,,
1086831,MS787,DRG,MS787,,,Cesarean Section Without Sterilization With Cc,,PROVIDENCE HEALTH PLAN [1078],18208.82,"PEBB [100100], PEBB PROVIDENCE CHOICE MEDICAL ...",IP Expected Reimbursement,inpatient,payer,787,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1312532,9974,ERX,HCPCS J7512,00046-1102-81,0250,CONJUGATED ESTROGENS 0.625 MG TABLET,0.5 tablet,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],20.08,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",OP Expected Reimbursement,outpatient,payer,,J7512,
1312533,99754,ERX,HCPCS 2500003,00456-2405-11,0250,ASENAPINE 5 MG SUBLINGUAL TABLET,1 tablet,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],125.82,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",OP Expected Reimbursement,outpatient,payer,,,
1312534,9977,ERX,HCPCS 2500003,00046-0872-21,0250,CONJUGATED ESTROGENS 0.625 MG/GRAM VAGINAL CREAM,60 Applicatorful,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],1075.28,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",OP Expected Reimbursement,outpatient,payer,,,
1312535,9982,ERX,HCPCS 2500003,54879-0001-01,0250,ETHAMBUTOL 100 MG TABLET,8 tablet,LIBERTY HEALTHSHARE MEDSOLUTIONS LLC [1159],45.31,"LIBERTY HEALTHSHARE MEDSOLUTIONS, LLC [29]",OP Expected Reimbursement,outpatient,payer,,,


In [272]:
df = df.assign(plan_name=df['plan_name'].str.split(', ')).explode('plan_name')

In [273]:
df['payer_category'] = df['payer_name'].map(payer_mapping).fillna('payer')

In [274]:
if file == '930223960_AsanteRogueRegionalMedicalCenter_Standardcharges.csv':
    df['setting'] = np.where(df['payer_name'].str.contains('IP'), 'inpatient', np.where(df['payer_name'].str.contains('OP'), 'outpatient', 1))
    df['setting'] = np.where(df['payer_temp'].notnull() & df['payer_temp'].str.contains('IP'), 'inpatient', np.where(df['payer_temp'].notnull() & df['payer_temp'].str.contains('OP'), 'outpatient', df['setting']))
else:
    df['setting'] = np.where(df['payer_name'].str.startswith('IP'), 'inpatient', np.where(df['payer_name'].str.startswith('OP'), 'outpatient', 1))
    df['setting'] = np.where(df['payer_temp'].notnull() & df['payer_temp'].str.startswith('IP'), 'inpatient', np.where(df['payer_temp'].notnull() & df['payer_temp'].str.startswith('OP'), 'outpatient', df['setting']))

In [275]:
df.drop_duplicates(subset=['payer_name', 'payer_temp'])

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting,payer_category,ms_drg,hcpcs_cpt,alt_hcpcs_cpt
0,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,IP Price,32996.83,,,inpatient,gross,783,,
167202,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified IP MIN,13198.73,,,inpatient,min,783,,
334404,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified IP MAX,52122.22,,,inpatient,max,783,,
501606,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,OP Price,,,,outpatient,gross,783,,
668808,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified OP MIN,,,,outpatient,min,783,,
836010,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified OP MAX,,,,outpatient,max,783,,
1003212,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,AETNA [1002],28047.31,AETNA [100003],IP Expected Reimbursement,inpatient,payer,783,,
1011571,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,ASANTE HEALTH PLAN [1014],13198.73,ASANTE PPO HEALTH PLAN [100389],IP Expected Reimbursement,inpatient,payer,783,,
1019930,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,CIGNA [1025],28047.31,CIGNA [100344],IP Expected Reimbursement,inpatient,payer,783,,
1028289,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,HEALTHNET [1047],28047.31,HEALTHNET COMMERCIAL CLAIMS [100662],IP Expected Reimbursement,inpatient,payer,783,,


In [276]:
df.loc[df['payer_category'].isna()]

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting,payer_category,ms_drg,hcpcs_cpt,alt_hcpcs_cpt


In [277]:
df['standard_charge'] = df['standard_charge'].str.replace(',', '')

In [278]:
df.dropna(subset='standard_charge', inplace=True)

In [279]:
df.drop_duplicates(subset=['payer_category', 'payer_name'])

Unnamed: 0,local_code,line_type,code,ndc,rev_code,description,quantity,payer_name,standard_charge,plan_name,payer_temp,setting,payer_category,ms_drg,hcpcs_cpt,alt_hcpcs_cpt
0,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,IP Price,32996.83,,,inpatient,gross,783.0,,
167202,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified IP MIN,13198.73,,,inpatient,min,783.0,,
334404,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,De-Identified IP MAX,52122.22,,,inpatient,max,783.0,,
502318,3002160,EAP,CPT® 82523,,300.0,"HC C-Telopeptide,Serum S/O",,OP Price,86.9,,,outpatient,gross,,82523.0,
669520,3002160,EAP,CPT® 82523,,300.0,"HC C-Telopeptide,Serum S/O",,De-Identified OP MIN,22.42,,,outpatient,min,,82523.0,
836722,3002160,EAP,CPT® 82523,,300.0,"HC C-Telopeptide,Serum S/O",,De-Identified OP MAX,86.9,,,outpatient,max,,82523.0,
1003212,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,AETNA [1002],28047.31,AETNA [100003],IP Expected Reimbursement,inpatient,payer,783.0,,
1011571,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,ASANTE HEALTH PLAN [1014],13198.73,ASANTE PPO HEALTH PLAN [100389],IP Expected Reimbursement,inpatient,payer,783.0,,
1019930,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,CIGNA [1025],28047.31,CIGNA [100344],IP Expected Reimbursement,inpatient,payer,783.0,,
1028289,MS783,DRG,MS783,,,Cesarean Section With Sterilization With McC,,HEALTHNET [1047],28047.31,HEALTHNET COMMERCIAL CLAIMS [100662],IP Expected Reimbursement,inpatient,payer,783.0,,


In [280]:
id_mapping = {
 '571181758_AsanteThreeRiversMedicalCenter_Standardcharges.csv': '380002',
 '815427847_AsanteAshlandCommunityHospital_Standardcharges.csv': '380005',
 '930223960_AsanteRogueRegionalMedicalCenter_Standardcharges.csv': '380018'}

hosp_id = id_mapping[file]

df['hospital_id'] = hosp_id

output_folder = '.\\output_files\\'

filename = hosp_id + file.split('_')[1] + '.csv'

df.to_csv(output_folder + filename, index=False)