# Found the PeaceHealth Chargemaster

In [2]:
import pandas as pd
import os, json

In [126]:
ph = pd.read_excel("./peacehealth.xlsx", None)

In [127]:
ph.keys()

dict_keys(['St Joseph Code List', 'Southwest Code List', 'Cottage Grove Code List', 'Riverbend Code List', 'St John Code List', 'University Code List', 'Peace Harbor Code List', 'United General Code List', 'Ketchikan Code List', 'Peace Island Code List', 'St Joseph Output', 'Southwest Output', 'St John Output', 'Riverbend Output', 'University Output', 'United General Output', 'Ketchikan Output', 'Peace Island Output', 'Cottage Grove Output', 'Peace Harbor Output'])

In [29]:
hospital_urls = json.load(open("../data/hospital_urls.json", "r"))

In [31]:
hospital_urls.keys()

dict_keys(['Arbor Health', 'Astria Sunnyside Hospital', 'Astria Toppenish Hospital', 'Cascade Behavioral Health', 'Cascade Medical', 'Cascade Valley Hospital and Clinics', 'Columbia Basin Hospital', 'Columbia County Health System', 'Confluence Health', 'Confluence Health/Central Washington Hospital', 'Confluence Health/Wenatchee Valley Hospital &amp; Clinics', 'Coulee Medical Center', 'East Adams Rural Healthcare', 'Eastern State Hospital', 'EvergreenHealth', 'EvergreenHealth Monroe', 'Fairfax Behavioral Health Everett', 'Fairfax Behavioral Health Kirkland', 'Fairfax Behavioral Health Monroe', 'Ferry County Health', 'Forks Community Hospital', 'Garfield County Hospital District', 'Harbor Regional Health', 'Inland Northwest Behavioral Health', 'Island Hospital', 'Jefferson Healthcare', 'Kadlec Regional Medical Center', 'Kaiser Foundation Health Plan of Washington', 'Kindred Hospital Seattle – First Hill', 'Kittitas Valley Healthcare', 'Klickitat Valley Health', 'Lake Chelan Health', 'Le

hospital, hospital_size, county, drg_code, name, price

In [128]:
ph_list = [
    'PeaceHealth St. Joseph Medical Center',
    'PeaceHealth Southwest Medical Center',
    'PeaceHealth St. John Medical Center',
    'PeaceHealth United General Medical Center',
    'PeaceHealth Peace Island Medical Center', 
    ]

These are the PeaceHealth hospitals that are a part of WSHA. So we need to only get the CDM information from these ones

In [117]:
ph.keys()

dict_keys(['St Joseph Code List', 'Southwest Code List', 'Cottage Grove Code List', 'Riverbend Code List', 'St John Code List', 'University Code List', 'Peace Harbor Code List', 'United General Code List', 'Ketchikan Code List', 'Peace Island Code List', 'St Joseph Output', 'Southwest Output', 'St John Output', 'Riverbend Output', 'University Output', 'United General Output', 'Ketchikan Output', 'Peace Island Output', 'Cottage Grove Output', 'Peace Harbor Output'])

In [129]:
for k in ph.copy():
    if 'Output' not in k:
        ph.pop(k, None)

In [130]:
del_list = ['Riverbend Output', 'University Output', 
            'Ketchikan Output', 'Cottage Grove Output', 
            'Peace Harbor Output']

for invalid_hospital in del_list:
    ph.pop(invalid_hospital, None)

In [131]:
ph.keys()

dict_keys(['St Joseph Output', 'Southwest Output', 'St John Output', 'United General Output', 'Peace Island Output'])

In [132]:
for name, k in zip(ph_list, ph):
    print(f"{name} | {k}")

PeaceHealth St. Joseph Medical Center | St Joseph Output
PeaceHealth Southwest Medical Center | Southwest Output
PeaceHealth St. John Medical Center | St John Output
PeaceHealth United General Medical Center | United General Output
PeaceHealth Peace Island Medical Center | Peace Island Output


Sheet names to hospital names match up

In [133]:
column_names = ph[list(ph.keys())[0]].columns.values.tolist() # a dict of df. ugly translation.
print(column_names)

['Service Line', 'CPT/DRG Code', 'Service Desc.', 'Inpatient / Outpatient', 'Price', 'Min Reimb', 'Max Reimb', 'Self-pay Rate', 'Unnamed: 8', 'Cigna', 'First Health', 'Intercare', 'Premera', 'Meritain', 'United Health Care']


Each sheet has specific insurance information from different providers. Need to remove these.

In [134]:
# Remove insurance related columns
for k,v in ph.items():
    for column in column_names:
        v.drop(v.iloc[:, 8:], inplace=True, axis=1)
        ph[k] = v

The dataset doesn't use NaN values or valid Null/None types. It has "N/A" and "TB" as string literals so can't use dropna(). Have to filter manually.

In [135]:
# Grab updated column names
column_names = ph[list(ph.keys())[0]].columns.values.tolist() # a dict of df. ugly translation.
print(column_names)

['Service Line', 'CPT/DRG Code', 'Service Desc.', 'Inpatient / Outpatient', 'Price', 'Min Reimb', 'Max Reimb', 'Self-pay Rate']


In [136]:
"""
#drop rows that contain specific 'value' in 'column_name'
df = df[df.column_name != value]
"""
invalid_values = ['N/A', 'TB']
for k, v in ph.items():
    for column in column_names:
        v = v[v[column].isin(invalid_values) == False]
    v = v[['CPT/DRG Code', 'Service Desc.', 'Price']]
    ph[k] = v.dropna()

Rename columns

In [138]:
for k, v in ph.items():
    ph[k].columns = ['drg_code', 'name', 'price']
    ph[k]['price'] = ph[k]['price'].astype(float)
    ph[k] = ph[k].round({'price': 4})

In [139]:
ph_dfs = []
for name, k in zip(ph_list, ph):
    county, nbeds = hospital_urls[name]['county'], hospital_urls[name]['nbeds']
    hospital_size = 'Small' if nbeds < 100 else 'Medium'
    hospital_size = 'Large' if nbeds > 175 else 'Medium'
    ph[k].insert(1, 'hospital_size', hospital_size)
    ph[k].insert(2, 'county', county)
    ph_dfs.append(ph[k])

In [140]:
all_phs = pd.concat(ph_dfs)
all_phs

Unnamed: 0,drg_code,hospital_size,county,name,price
0,216,Large,Whatcom,Cardiac Valve & Oth Maj Cardiothoracic Proc W ...,306198.98
1,743,Large,Whatcom,Uterine & Adnexa Proc For Non-Malignancy W/O C...,33923.70
2,470,Large,Whatcom,Major Hip And Knee Joint Replacement Or Reatta...,59529.03
3,460,Large,Whatcom,Spinal Fusion Except Cervical W/O Mcc,70992.56
4,473,Large,Whatcom,Cervical Spinal Fusion W/O Cc/Mcc,70391.22
...,...,...,...,...,...
295,99205,Medium,San Juan,"New Patient Office Or Other Outpatient Visit, ...",481.00
296,99243,Medium,San Juan,"Patient Office Consultation, Typically 40 Minutes",259.00
297,99244,Medium,San Juan,"Patient Office Consultation, Typically 60 Minutes",387.00
298,99385,Medium,San Juan,Initial New Patient Preventive Medicine Evalua...,286.00


In [141]:
all_phs.to_csv("all_peacehealth_fee.csv", index=False)

<100: small

100-175: middle

>175: large