Data from:

Physician & Other Supplier Payments - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Physician-and-Other-Supplier2017

Hospital Outpatient - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Outpatient

APC to CPT/HCPCS crosswalk - Addendum B – January 2020
https://www.cms.gov/Medicare/Medicare-Fee-for-Service-Payment/HospitalOutpatientPPS/Addendum-A-and-Addendum-B-Updates

Zip Code to CBSA
https://www.huduser.gov/portal/datasets/usps_crosswalk.html![image.png](attachment:image.png)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

In [3]:
pd.options.display.max_columns = 25

In [4]:
pd.options.display.max_rows = 25

In [6]:
# Reading in the CBSA to ZIP code crosswalk

cbsa_to_zip = pd.read_excel('../data/CBSA_ZIP_032020.xlsx', 
                            usecols = ['CBSA', 'ZIP'],
                            dtypes={'ZIP':'str'})

cbsa_to_zip.columns = ['cbsa', 'zip']

In [7]:
#Reading in HCPCS to APC crosswalk with Short Descriptor column

hcpcs_to_apc = pd.read_excel('../data/Addendum_B/2020_january_web_addendum_b.12312019.xlsx', 
                             header = 2, usecols = ['HCPCS Code', 'Short Descriptor', 'APC '])

hcpcs_to_apc.columns = ['hcpcs', 'descriptor', 'apc']

In [8]:
#Reading in the Physician & Other Supplier Payments while discarding some columns

phys_other_payments = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017/Medicare_Provider_Util_Payment_PUF_CY2017.txt', 
    sep='\t', 
    skiprows = [1], 
    usecols = lambda column : column not in ['average_submitted_chrg_amt', 
                                             'average_Medicare_payment_amt', 
                                             'average_Medicare_standard_amt', 
                                             'hcpcs_drug_indicator', 
                                             'medicare_participation_indicator'], 
    dtype = {'nppes_provider_zip':'str'},
    low_memory=False)


phys_other_payments.columns = ['npi', 'last_org_name', 
                               'first_name', 'mi', 'creds', 
                               'gender', 'entity', 'street1', 
                               'street2' , 'city', 'zip','state', 
                               'country', 'provider_type', 'service_loc', 
                               'hcpcs', 'description', 'services_cnt', 
                               'benefic_cnt', 'benefic_d_cnt', 'avg_medi_allowed_amt']

In [9]:
# Reading in the Hospital Outpatient while discarding some columns

column_exclude_list = ['Outlier\nComprehensive\nAPC\nServices', 
                       'Average\nMedicare\nOutlier\nAmount', 
                       'Average\nEstimated\nTotal\nSubmitted\nCharges', 
                       'Average\nMedicare\nPayment\nAmount']

hosp_payments = pd.read_excel('../data/MUP_OHP_R19_P04_V10_D17_APC_Provider/MUP_OHP_R19_P04_V10_D17_APC_Provider.xlsx', 
                              header = 5, 
                              usecols = lambda column : column not in column_exclude_list)

hosp_payments.columns = ['provider_id', 'provider_name', 
                         'street', 'city', 'state', 'zip', 
                         'region', 'apc', 'description', 
                         'benefic_cnt', 'services_cnt', 'avg_medi_allowed_amt']

In [10]:
# Dropping non US from Non-Hospital Data

phys_other_payments = phys_other_payments[phys_other_payments.country == 'US']

In [11]:
# Making a dataframe with only office entities to compare with the Hospital Data.

phys_other_payments = phys_other_payments[phys_other_payments['entity'] == 'O']

In [12]:
# Making a dataframe with only Ambulatory Surgical Centers

phys_other_payments = phys_other_payments[phys_other_payments['provider_type'] == 'Ambulatory Surgical Center']

In [13]:
phys_other_payments['zip'] = phys_other_payments['zip'].str[:5]

In [14]:
# Putting leading zero back on zips

cbsa_to_zip['zip'] = cbsa_to_zip['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [15]:
# Merging CBSA to Zip in Non-Hospital Data

phys_asc_cbsa = pd.merge(phys_other_payments, cbsa_to_zip, how='left', on = ['zip'])

del phys_other_payments

In [16]:
# Dropping NaN values in the apc column of hcpcs to apc crosswalk

hcpcs_to_apc = hcpcs_to_apc.dropna(subset=['apc'])

In [17]:
# Putting leading zero back on zips

hosp_payments['zip'] = hosp_payments['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [18]:
# Adding matching APC code to HCPCS in the Non-Hospital dataframe, keeping only rows that have an apc match

phys_asc_apc = pd.merge(phys_asc_cbsa, hcpcs_to_apc, how = 'inner', on = ['hcpcs'])

del phys_asc_cbsa

In [19]:
phys_apc_data = phys_asc_apc.drop(['first_name', 'mi', 'creds', 'gender'], axis = 1)

del phys_asc_apc

In [20]:
# Adding CBSA column to hospital data, keeping only rows with a CBSA

hosp_payments_cbsa = pd.merge(hosp_payments, cbsa_to_zip, how = 'inner', on = ['zip'])

del hosp_payments

In [21]:
cbsa_to_region = hosp_payments_cbsa.loc[:,['region', 'cbsa']]

In [22]:
cbsa_to_region = cbsa_to_region.drop_duplicates(subset=['cbsa'])

In [23]:
apc_to_description = hosp_payments_cbsa.loc[:, ['apc', 'description']]

In [24]:
apc_to_description = apc_to_description.drop_duplicates(subset=['apc'])

In [65]:
hosp_group = hosp_payments_cbsa.groupby(['cbsa', 'apc']).agg(hosp_max=('avg_medi_allowed_amt', 'max'),
                                                             hosp_avg=('avg_medi_allowed_amt', 'mean'),
                                                             hosp_min=('avg_medi_allowed_amt', 'min'))

In [66]:
hosp_group = hosp_group.reset_index().round(2)

In [67]:
# CBSA 99999 is a filler value for unassigned CBSA

hosp_group = hosp_group[hosp_group['cbsa'] != 99999]

In [68]:
hosp_group_region = pd.merge(hosp_group, cbsa_to_region, how='left', on = ['cbsa'])

del hosp_group

In [69]:
hosp_table = pd.merge(hosp_group_region, apc_to_description, how='left', on=['apc'])

del hosp_group_region

In [70]:
hosp_table = hosp_table.dropna(subset=['hosp_avg'])

In [42]:
phys_apc_data = phys_apc_data.dropna(subset=['cbsa'])

In [43]:
phys_apc_data['cbsa'] = phys_apc_data['cbsa'].astype('int')

In [44]:
phys_apc_data['apc'] = phys_apc_data['apc'].astype('int')

In [71]:
# Getting all the unique apc's that are in the hospital table

hosp_apc_list = hosp_table['apc'].unique()

In [46]:
# Narrowing the Ambulatory Surgical Center table down to rows that have an apc that is in the hosp table

phys_apc_data = phys_apc_data[phys_apc_data['apc'].isin(hosp_apc_list)]

In [47]:
phys_apc_data = phys_apc_data[phys_apc_data['cbsa'].isin(hosp_table['cbsa'].unique())]

In [48]:
hcpcs_to_description = phys_apc_data.loc[:, ['hcpcs', 'description']]

In [49]:
hcpcs_to_description = hcpcs_to_description.drop_duplicates(subset=['hcpcs'])

In [72]:
hosp_table['state'] = hosp_table['region'].str[:2]

In [52]:
cbsa_to_state = hosp_table.loc[:, ['cbsa', 'state']]

In [53]:
cbsa_to_state = cbsa_to_state.drop_duplicates(subset = ['cbsa'])

In [73]:
hosp_table['region'] = hosp_table['region'].str.split('-').str[1]

In [74]:
hosp_table['region'] = hosp_table['region'].str.strip()

In [75]:
hosp_table

Unnamed: 0,cbsa,apc,hosp_max,hosp_avg,hosp_min,region,description,state
0,10100,5072,1216.83,1215.02,1213.21,Sioux Falls,Level 2 Excision/ Biopsy/ Incision and Drainage,SD
1,10100,5073,2114.53,2114.53,2114.53,Sioux Falls,Level 3 Excision/ Biopsy/ Incision and Drainage,SD
2,10100,5091,2459.49,2459.49,2459.49,Sioux Falls,Level 1 Breast/Lymphatic Surgery and Related P...,SD
3,10100,5092,4348.75,4348.75,4348.75,Sioux Falls,Level 2 Breast/Lymphatic Surgery and Related P...,SD
4,10100,5112,1183.42,1183.42,1183.42,Sioux Falls,Level 2 Musculoskeletal Procedures,SD
...,...,...,...,...,...,...,...,...
23297,49780,5462,3827.96,3827.96,3827.96,Columbus,Level 2 Neurostimulator and Related Procedures,OH
23298,49780,5463,17153.82,17153.82,17153.82,Columbus,Level 3 Neurostimulator and Related Procedures,OH
23299,49780,5464,27087.56,27087.56,27087.56,Columbus,Level 4 Neurostimulator and Related Procedures,OH
23300,49780,5491,1832.79,1832.79,1832.79,Columbus,Level 1 Intraocular Procedures,OH


In [62]:
def phys_state(state_list):
    """This function takes a list of states as state codes and narrows the physcian data set to just those states.
    It then groups by unique CBSA then HCPCS in those state(s) and finds the medicare max/avg/min and makes
    those into a table."""
    
    state_phys_data = phys_apc_data[phys_apc_data['state'].isin(state_list)]
    
    global phys_data_state
    
    phys_data_state = state_phys_data.groupby(['cbsa', 'hcpcs']).agg(office_max=('avg_medi_allowed_amt', 'max'),
                                                             office_avg=('avg_medi_allowed_amt', 'mean'),
                                                             office_min=('avg_medi_allowed_amt', 'min'))
    
    phys_data_state = phys_data_state.reset_index().round(2)
    
    phys_data_state = phys_data_state.dropna(subset = ['office_avg'])
    
    return phys_data_state

In [63]:
phys_state(['TN'])

Unnamed: 0,cbsa,hcpcs,office_max,office_avg,office_min
0,11940,66982,843.80,843.80,843.80
1,11940,66984,843.80,843.80,843.80
2,16860,0191T,2408.89,2396.15,2370.66
3,16860,26055,542.43,531.04,519.65
4,16860,26160,627.50,627.50,627.50
...,...,...,...,...,...
469,46100,66982,835.99,823.67,811.35
470,46100,66984,834.88,833.76,832.64
471,46100,67904,562.85,562.85,562.85
472,46100,67950,544.24,544.24,544.24


In [76]:
phys_data_state = pd.merge(phys_data_state, hcpcs_to_apc, how = 'left', on = ['hcpcs'])

In [77]:
merged_data = pd.merge(phys_data_state, hosp_table, how = 'left', on = ['cbsa', 'apc'])

In [78]:
merged_data = merged_data[merged_data['state'] == 'TN']

In [79]:
merged_data

Unnamed: 0,cbsa,hcpcs,office_max,office_avg,office_min,descriptor,apc,hosp_max,hosp_avg,hosp_min,region,description,state
2,16860,0191T,2408.89,2396.15,2370.66,Insert ant segment drain int,5492.0,3148.69,2858.93,2397.92,Chattanooga,Level 2 Intraocular Procedures,TN
3,16860,26055,542.43,531.04,519.65,Incise finger tendon sheath,5112.0,1121.25,1093.91,1070.75,Chattanooga,Level 2 Musculoskeletal Procedures,TN
4,16860,26160,627.50,627.50,627.50,Remove tendon sheath lesion,5112.0,1121.25,1093.91,1070.75,Chattanooga,Level 2 Musculoskeletal Procedures,TN
5,16860,27570,617.54,617.54,617.54,Fixation of knee joint,5112.0,1121.25,1093.91,1070.75,Chattanooga,Level 2 Musculoskeletal Procedures,TN
6,16860,28285,781.26,781.26,781.26,Repair of hammertoe,5113.0,2245.72,2158.94,2076.43,Chattanooga,Level 3 Musculoskeletal Procedures,TN
...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,46100,66982,835.99,823.67,811.35,Xcapsl ctrc rmvl cplx wo ecp,5491.0,1674.79,1636.60,1598.40,Nashville,Level 1 Intraocular Procedures,TN
470,46100,66984,834.88,833.76,832.64,Xcapsl ctrc rmvl w/o ecp,5491.0,1674.79,1636.60,1598.40,Nashville,Level 1 Intraocular Procedures,TN
471,46100,67904,562.85,562.85,562.85,Repair eyelid defect,5503.0,1567.07,1567.07,1567.07,Nashville,"Level 3 Extraocular, Repair, and Plastic Eye P...",TN
472,46100,67950,544.24,544.24,544.24,Revision of eyelid,5503.0,1567.07,1567.07,1567.07,Nashville,"Level 3 Extraocular, Repair, and Plastic Eye P...",TN


In [None]:
merged_data.to_csv('../file_for_viz.csv')