Data from:

Physician & Other Supplier Payments - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Physician-and-Other-Supplier2017

Hospital Outpatient - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Outpatient

APC to CPT/HCPCS crosswalk - Addendum B – January 2020
https://www.cms.gov/Medicare/Medicare-Fee-for-Service-Payment/HospitalOutpatientPPS/Addendum-A-and-Addendum-B-Updates

Zip Code to CBSA
https://www.huduser.gov/portal/datasets/usps_crosswalk.html![image.png](attachment:image.png)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

In [3]:
pd.options.display.max_columns = 25

In [4]:
pd.options.display.max_rows = 25

In [5]:
# Reading in the CBSA to ZIP code crosswalk

cbsa_to_zip = pd.read_excel('../data/CBSA_ZIP_032020.xlsx', 
                            usecols = ['CBSA', 'ZIP'],
                            dtypes={'ZIP':'str'})

In [6]:
#Reading in HCPCS to APC crosswalk with Short Descriptor column

hcpcs_to_apc = pd.read_excel('../data/Addendum_B/2020_january_web_addendum_b.12312019.xlsx', header = 2, usecols = ['HCPCS Code', 'Short Descriptor', 'APC '])

In [43]:
#Reading in the Physician & Other Supplier Payments while discarding some columns

phys_other_payments = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017/Medicare_Provider_Util_Payment_PUF_CY2017.txt', 
    sep='\t', 
    skiprows = [1], 
    usecols = lambda column : column not in ['average_submitted_chrg_amt', 'average_Medicare_payment_amt', 'average_Medicare_standard_amt', 'hcpcs_drug_indicator', 'medicare_participation_indicator'], 
    dtype = {'nppes_provider_zip':'str'},
    low_memory=False)

In [8]:
# Reading in the Hospital Outpatient while discarding some columns

column_exclude_list = ['Outlier\nComprehensive\nAPC\nServices', 'Average\nMedicare\nOutlier\nAmount', 'Average\nEstimated\nTotal\nSubmitted\nCharges', 'Average\nMedicare\nPayment\nAmount']

hosp_payments = pd.read_excel('../data/MUP_OHP_R19_P04_V10_D17_APC_Provider/MUP_OHP_R19_P04_V10_D17_APC_Provider.xlsx', 
                              header = 5, 
                              usecols = lambda column : column not in column_exclude_list)

In [9]:
hcpcs_to_apc.columns = ['hcpcs', 'descriptor', 'apc']

In [10]:
cbsa_to_zip.columns = ['cbsa', 'zip']

In [44]:
phys_other_payments.columns = ['npi', 'last_org_name', 'first_name', 'mi', 'creds', 'gender', 'entity', 'street1', 'street2' , 'city', 'zip','state', 'country', 'provider_type', 'service_loc', 'hcpcs', 'description', 'services_cnt', 'benefic_cnt', 'benefic_d_cnt', 'avg_medi_allowed_amt']

In [12]:
hosp_payments.columns = ['provider_id', 'provider_name', 'street', 'city', 'state', 'zip', 'region', 'apc', 'description', 'benefic_cnt', 'services_cnt', 'avg_medi_allowed_amt']

In [45]:
# Dropping non US

phys_other_payments = phys_other_payments[phys_other_payments.country == 'US']

In [46]:
# Making a dataframe with only office entities to compare with the Hospital Data.

phys_other_payments = phys_other_payments[phys_other_payments['entity'] == 'O']

In [48]:
phys_other_payments = phys_other_payments[phys_other_payments['provider_type'] == 'Ambulatory Surgical Center']

In [49]:
phys_other_payments['zip'] = phys_other_payments['zip'].str[:5]

In [17]:
# Putting leading zero back on zips

cbsa_to_zip['zip'] = cbsa_to_zip['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [50]:
# Merging CBSA to Zip in physcian billing data

phys_asc_cbsa = pd.merge(phys_other_payments, cbsa_to_zip, how='left', on = ['zip'])

del phys_other_payments

In [20]:
# Dropping NaN values in the apc column of hcpcs to apc crosswalk

hcpcs_to_apc = hcpcs_to_apc.dropna(subset=['apc'])

In [23]:
# Putting leading zero back on zips

hosp_payments['zip'] = hosp_payments['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [51]:
# Adding matching APC code to HCPCS in the office dataframe, keeping only rows that have an apc match

phys_asc_apc = pd.merge(phys_asc_cbsa, hcpcs_to_apc, how = 'inner', on = ['hcpcs'])

del phys_asc_cbsa

In [52]:
phys_apc_data = phys_asc_apc.drop(['first_name', 'mi', 'creds', 'gender'], axis = 1)

del phys_asc_apc

In [27]:
# Adding CBSA column to hospital data, keeping only rows with a CBSA

hosp_payments_cbsa = pd.merge(hosp_payments, cbsa_to_zip, how = 'inner', on = ['zip'])

del hosp_payments

In [28]:
cbsa_to_region = hosp_payments_cbsa.loc[:,['region', 'cbsa']]

In [29]:
cbsa_to_region = cbsa_to_region.drop_duplicates(subset=['cbsa'])

In [30]:
apc_to_description = hosp_payments_cbsa.loc[:, ['apc', 'description']]

In [31]:
apc_to_description = apc_to_description.drop_duplicates(subset=['apc'])

In [32]:
list = []

for cbsa in hosp_payments_cbsa['cbsa'].unique():
    for apc in hosp_payments_cbsa['apc'].unique():
        selection = hosp_payments_cbsa[(hosp_payments_cbsa['cbsa'] == cbsa) & (hosp_payments_cbsa['apc'] == apc)]
        max_value = round(selection.avg_medi_allowed_amt.max(), 2)
        avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
        min_value = round(selection.avg_medi_allowed_amt.min(), 2)
        list.append([cbsa, apc, max_value, avg_value, min_value])

In [33]:
list_df = pd.DataFrame(list)

del list

In [34]:
list_df.columns = ['cbsa','apc', 'hosp_max', 'hosp_avg', 'hosp_min']

In [35]:
list_df_region = pd.merge(list_df, cbsa_to_region, how='left', on = ['cbsa'])

del list_df

In [36]:
hosp_table = pd.merge(list_df_region, apc_to_description, how='left', on=['apc'])

del list_df_region

In [37]:
hosp_table = hosp_table.dropna(subset=['hosp_avg'])

In [53]:
phys_apc_data = phys_apc_data.dropna(subset=['cbsa'])

In [54]:
phys_apc_data['cbsa'] = phys_apc_data['cbsa'].astype('int')

In [55]:
phys_apc_data['apc'] = phys_apc_data['apc'].astype('int')

In [64]:
# Getting all the unique apc's that are in the hospital table

hosp_apc_list = hosp_table['apc'].unique()

In [68]:
# Narrowing the Ambulatory Surgical Center table down to rows that have an apc that is in the hosp table

phys_apc_data = phys_apc_data[phys_apc_data['apc'].isin(hosp_apc_list)]

In [74]:
phys_apc_data = phys_apc_data[phys_apc_data['cbsa'].isin(hosp_table['cbsa'].unique())]

In [92]:
phys_testing = phys_apc_data.loc[:, ['hcpcs', 'avg_medi_allowed_amt', 'cbsa']]

In [94]:
list2 = []

for cbsa in phys_testing['cbsa'].unique():
    for hcpcs in phys_testing['hcpcs'].unique():
        selection = phys_testing[(phys_testing['cbsa'] == cbsa) & (phys_testing['hcpcs'] == hcpcs)]
        max_value = round(selection.avg_medi_allowed_amt.max(), 2)
        avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
        min_value = round(selection.avg_medi_allowed_amt.min(), 2)
        list2.append([cbsa, hcpcs, max_value, avg_value, min_value])

KeyboardInterrupt: 

In [82]:
list2_df = pd.DataFrame(list2)

del list2

In [87]:
list2_df.columns = ['cbsa','hcpcs', 'office_max', 'office_avg', 'office_min']

In [88]:
list2_df

Unnamed: 0,cbsa,hcpcs,office_max,office_avg,office_min
0,46700,0191T,3549.09,3295.66,3042.24
1,46700,20610,35.35,30.81,26.26
2,46700,29823,837.13,837.13,837.13
3,46700,29824,1016.51,1016.51,1016.51
4,31900,0191T,2320.99,2320.99,2320.99
...,...,...,...,...,...
1627,32900,29824,810.97,810.97,810.97
1631,16580,29824,720.14,720.14,720.14
1635,11620,29824,827.61,827.61,827.61
1639,24940,29824,655.09,655.09,655.09


In [85]:
list2_df = list2_df.dropna(subset=['office_avg'])

In [266]:
hosp_office_data = pd.merge(list2_df, hosp_table, how='inner', on = ['cbsa', 'apc'])

del list2_df
del hosp_table
del phys_apc_data

In [268]:
hosp_office_data

Unnamed: 0,cbsa,apc,office_max,office_avg,office_min,hosp_max,hosp_avg,hosp_min,region,description
0,27740,5113,1050.94,772.29,597.59,1906.12,1906.12,1906.12,TN - Johnson City,Level 3 Musculoskeletal Procedures
1,27740,5114,2211.67,2211.67,2211.67,4285.93,4207.59,4129.25,TN - Johnson City,Level 4 Musculoskeletal Procedures
2,27740,5464,19140.93,19140.93,19140.93,21335.92,21335.92,21335.92,TN - Johnson City,Level 4 Neurostimulator and Related Procedures
3,27740,5491,837.80,825.64,812.00,1497.46,1497.46,1497.46,TN - Johnson City,Level 1 Intraocular Procedures
4,27740,5073,889.35,889.35,889.35,1764.02,1764.02,1764.02,TN - Johnson City,Level 3 Excision/ Biopsy/ Incision and Drainage
...,...,...,...,...,...,...,...,...,...,...
4845,21820,5491,1010.83,1005.72,1000.62,2212.06,2212.06,2212.06,AK - Anchorage,Level 1 Intraocular Procedures
4846,34260,5113,1052.65,852.70,596.51,2275.25,2275.25,2275.25,MO - Springfield,Level 3 Musculoskeletal Procedures
4847,34260,5114,2288.29,2183.22,2078.14,4824.46,4824.46,4824.46,MO - Springfield,Level 4 Musculoskeletal Procedures
4848,34260,5431,668.70,644.60,620.49,1454.97,1454.97,1454.97,MO - Springfield,Level 1 Nerve Procedures


In [96]:
# Selcting a cbsa and a apc and returing the max, mean, and min values for those selections.

selection = hosp_payments_cbsa[(hosp_payments_cbsa['cbsa'] == 20020) & (hosp_payments_cbsa['apc'] == 5072)]
max_value = round(selection.avg_medi_allowed_amt.max(), 2)
avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
min_value = round(selection.avg_medi_allowed_amt.min(), 2)

print('Region = ', selection['region'][0])
print('Procedure = ', selection['description'][0])
print()
print('Max = ', max_value)
print('Mean = ', avg_value)
print('Min = ', min_value)

Region =  AL - Dothan
Procedure =  Level 2 Excision/ Biopsy/ Incision and Drainage

Max =  1102.92
Mean =  1028.73
Min =  986.77
