In [450]:
import pandas as pd
import numpy as np

In [451]:
folder = '.\\input_files\\'
file = '141772971_COBLESKILL-REGIONAL-HOSPITAL_STANDARDCHARGES_0.csv'

In [452]:
df = pd.read_csv(folder + file, skiprows=3, dtype=str)

In [453]:
df.rename(columns={
    'Payer': 'payer_name',
    'Internal Code': 'local_code',
    'Code': 'code',
    'NDC': 'ndc',
    'Rev Code': 'rev_code',
    'Procedure Description': 'description',
    'Quantity': 'quantity',
    'Plan(s)': 'plan_name',
    'Contract': 'additional_payer_notes',
    'Code Type': 'line_type'

}, inplace=True)

In [454]:
df = df.assign(plan_name=df['plan_name'].str.split(',')).explode('plan_name')

In [455]:
df

Unnamed: 0,payer_name,additional_payer_notes,plan_name,local_code,line_type,code,ndc,rev_code,description,quantity,IP Price,IP XR Detail,OP Price,OP Expected Reimbursement,Cash Price
0,AETNA [1001],CRH AETNA CONTRACT [115],AETNA ALLIED BENEFIT SYSTEM [100101],H128224,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resusitator Bag,1,46.75,Not reimbursed separately,46.75,39.74,46.75
0,AETNA [1001],CRH AETNA CONTRACT [115],AETNA MANAGED CARE PLANS [100102],H128224,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resusitator Bag,1,46.75,Not reimbursed separately,46.75,39.74,46.75
0,AETNA [1001],CRH AETNA CONTRACT [115],AETNA SIGNATURE ADMINISTRATORS GENERIC [100103],H128224,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resusitator Bag,1,46.75,Not reimbursed separately,46.75,39.74,46.75
0,AETNA [1001],CRH AETNA CONTRACT [115],AETNATRADITIONAL PLANS [100104],H128224,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resusitator Bag,1,46.75,Not reimbursed separately,46.75,39.74,46.75
0,AETNA [1001],CRH AETNA CONTRACT [115],1199 NATIONAL BENEFIT FUND(BAR) [100105],H128224,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resusitator Bag,1,46.75,Not reimbursed separately,46.75,39.74,46.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257890,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],NY [3001901],H128209,EAP,,,0272 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Resolution Tips 846,1,225,Not reimbursed separately,225,225,225
257891,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],MEDICAID,H128215,EAP,CPT« 94799,,0410 - RESPIRATORY SERVICES - GENERAL CLASSIFI...,HC Respiratory Evaluation 742,1,169.46,Not reimbursed separately,169.46,169.46,169.46
257891,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],NY [3001901],H128215,EAP,CPT« 94799,,0410 - RESPIRATORY SERVICES - GENERAL CLASSIFI...,HC Respiratory Evaluation 742,1,169.46,Not reimbursed separately,169.46,169.46,169.46
257892,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],MEDICAID,H128216,EAP,,,0270 - MEDICAL/SURGICAL SUPPLIES AND DEVICES -...,HC Respiratory Therapy Instructions 846,1,1.25,Not reimbursed separately,1.25,1.25,1.25


In [456]:
df.reset_index(drop=True, inplace=True)

In [457]:
df.loc[(~df['rev_code'].isna()), 'rev_code'] = df['rev_code'].str.extract(r'(\d{4})', expand=False)

In [458]:
df.drop(columns=['IP XR Detail'], inplace=True)

In [459]:
cols = df.columns.tolist()

if 'line_type' in cols:
    id_vars = cols[:10]
    val_vars = cols[10:]
else:
    id_vars = cols[:9]
    val_vars = cols[9:]

df = pd.melt(df, id_vars=id_vars, value_vars=val_vars, var_name='payer_type', value_name='standard_charge')

In [460]:
df['payer_type'] = df['payer_type'].str.strip()

In [461]:
# Set column `setting` to either `inpatient` if col `payer` starts with  'IP' or `outpatient` if col `payer` starts with  'OP'
df['setting'] = np.where(df['payer_type'].str.startswith('IP '), 'inpatient', np.where(df['payer_type'].str.startswith('OP '), 'outpatient', 1))

In [462]:
payer_mapping = {
    'IP Price': 'gross',
    'OP Price': 'gross',
    'IP Expected Reimbursement': 'payer',
    'OP Expected Reimbursement': 'payer',
    'Cash Price': 'cash'
}

df['payer_category'] = df['payer_type'].map(payer_mapping)

In [463]:
df

Unnamed: 0,payer_name,additional_payer_notes,plan_name,local_code,line_type,code,ndc,rev_code,description,quantity,payer_type,standard_charge,setting,payer_category
0,AETNA [1001],CRH AETNA CONTRACT [115],AETNA ALLIED BENEFIT SYSTEM [100101],H128224,EAP,,,0270,HC Resusitator Bag,1,IP Price,46.75,inpatient,gross
1,AETNA [1001],CRH AETNA CONTRACT [115],AETNA MANAGED CARE PLANS [100102],H128224,EAP,,,0270,HC Resusitator Bag,1,IP Price,46.75,inpatient,gross
2,AETNA [1001],CRH AETNA CONTRACT [115],AETNA SIGNATURE ADMINISTRATORS GENERIC [100103],H128224,EAP,,,0270,HC Resusitator Bag,1,IP Price,46.75,inpatient,gross
3,AETNA [1001],CRH AETNA CONTRACT [115],AETNATRADITIONAL PLANS [100104],H128224,EAP,,,0270,HC Resusitator Bag,1,IP Price,46.75,inpatient,gross
4,AETNA [1001],CRH AETNA CONTRACT [115],1199 NATIONAL BENEFIT FUND(BAR) [100105],H128224,EAP,,,0270,HC Resusitator Bag,1,IP Price,46.75,inpatient,gross
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3836567,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],NY [3001901],H128209,EAP,,,0272,HC Resolution Tips 846,1,Cash Price,225,1,cash
3836568,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],MEDICAID,H128215,EAP,CPT« 94799,,0410,HC Respiratory Evaluation 742,1,Cash Price,169.46,1,cash
3836569,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],NY [3001901],H128215,EAP,CPT« 94799,,0410,HC Respiratory Evaluation 742,1,Cash Price,169.46,1,cash
3836570,MEDICAID ALTERNATE PAYOR [30019],CRH MEDICAID CONTRACT [118],MEDICAID,H128216,EAP,,,0270,HC Respiratory Therapy Instructions 846,1,Cash Price,1.25,1,cash


In [464]:
df.drop(df.loc[(df['payer_name'] == '<Self-pay>') & (df['payer_type'] != 'Cash Price')].index, inplace=True)

In [465]:
df.loc[df['quantity'] == '1', 'quantity'] = pd.NA
df['standard_charge'] = df['standard_charge'].str.replace('$', '').str.replace(',', '').str.strip()

In [466]:
df.dropna(subset='standard_charge', inplace=True)

In [467]:
df.loc[(~df['code'].isna()) & (df['code'].str.match('HCPCS|CPT®')), 'hcpcs_cpt'] = df['code'].str.replace('HCPCS |CPT® ', '', regex=True)
df['hcpcs_cpt'] = df['hcpcs_cpt'].str.strip()
df.loc[(df['hcpcs_cpt'].str.len() == 8) | (df['hcpcs_cpt'] == 'CUSTOM') | (df['hcpcs_cpt'].str.len() != 5), 'hcpcs_cpt'] = pd.NA

In [None]:
df.drop(columns='payer_type', inplace=True)

In [None]:
hospital_map = {
    '150539039_AURELIA-OSBORN-FOX-MEMORIAL-HOSPITAL_STANDARDCHARGES_0.csv': '330408',
    '135596796_MARY-IMOGENE-BASSETT-HOSPITAL_STANDARDCHARGES.csv': '330136',
    '161540394_OCONNOR-HOSPITAL_STANDARDCHARGES_0.csv': '331305',
    '150533578_LITTLE-FALLS-HOSPITAL_STANDARDCHARGES_0.csv': '331311',
    '141772971_COBLESKILL-REGIONAL-HOSPITAL_STANDARDCHARGES_0.csv': '331320',
    '150539039_AURELIA-OSBORN-FOX-MEMORIAL-HOSPITAL_STANDARDCHARGES_1.csv': '330085'
}

hosp_id = hospital_map[file]

df['hospital_id'] = hosp_id

# df.to_csv('.\\output_files\\' + hosp_id + file.split('_')[1] + '.csv', index=False)