Data from:

Physician & Other Supplier Payments - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Physician-and-Other-Supplier2017

Hospital Outpatient - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Outpatient

APC to CPT/HCPCS crosswalk - Addendum B – January 2020
https://www.cms.gov/Medicare/Medicare-Fee-for-Service-Payment/HospitalOutpatientPPS/Addendum-A-and-Addendum-B-Updates

Zip Code to CBSA
https://www.huduser.gov/portal/datasets/usps_crosswalk.html![image.png](attachment:image.png)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline

In [None]:
pd.options.display.max_columns = 25

In [None]:
pd.options.display.max_rows = 25

In [None]:
# Reading in the CBSA to ZIP code crosswalk

cbsa_to_zip = pd.read_excel('../data/CBSA_ZIP_032020.xlsx', 
                            usecols = ['CBSA', 'ZIP'],
                            dtypes={'ZIP':'str'})

In [None]:
#Reading in HCPCS to APC crosswalk with Short Descriptor column

hcpcs_to_apc = pd.read_excel('../data/Addendum_B/2020_january_web_addendum_b.12312019.xlsx', header = 2, usecols = ['HCPCS Code', 'Short Descriptor', 'APC '])

In [None]:
#Reading in the Physician & Other Supplier Payments while discarding some columns

phys_other_payments = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017/Medicare_Provider_Util_Payment_PUF_CY2017.txt', 
    sep='\t', 
    skiprows = [1], 
    usecols = lambda column : column not in ['average_submitted_chrg_amt', 'average_Medicare_payment_amt', 'average_Medicare_standard_amt', 'hcpcs_drug_indicator', 'medicare_participation_indicator'], 
    dtype = {'nppes_provider_zip':'str'},
    low_memory=False)

In [None]:
# Reading in the Hospital Outpatient while discarding some columns

column_exclude_list = ['Outlier\nComprehensive\nAPC\nServices', 'Average\nMedicare\nOutlier\nAmount', 'Average\nEstimated\nTotal\nSubmitted\nCharges', 'Average\nMedicare\nPayment\nAmount']

hosp_payments = pd.read_excel('../data/MUP_OHP_R19_P04_V10_D17_APC_Provider/MUP_OHP_R19_P04_V10_D17_APC_Provider.xlsx', 
                              header = 5, 
                              usecols = lambda column : column not in column_exclude_list)

In [None]:
hcpcs_to_apc.columns = ['hcpcs', 'descriptor', 'apc']

In [None]:
cbsa_to_zip.columns = ['cbsa', 'zip']

In [None]:
phys_other_payments.columns = ['npi', 'last_org_name', 'first_name', 'mi', 'creds', 'gender', 'entity', 'street1', 'street2' , 'city', 'zip','state', 'country', 'provider_type', 'service_loc', 'hcpcs', 'description', 'services_cnt', 'benefic_cnt', 'benefic_d_cnt', 'avg_medi_allowed_amt']

In [None]:
hosp_payments.columns = ['provider_id', 'provider_name', 'street', 'city', 'state', 'zip', 'region', 'apc', 'description', 'benefic_cnt', 'services_cnt', 'avg_medi_allowed_amt']

In [None]:
# Dropping non US

phys_other_payments = phys_other_payments[phys_other_payments.country == 'US']

In [None]:
# Making a dataframe with only office entities to compare with the Hospital Data.

phys_other_payments = phys_other_payments[phys_other_payments['entity'] == 'O']

In [None]:
# Making a dataframe with only Ambulatory Surgical Centers

phys_other_payments = phys_other_payments[phys_other_payments['provider_type'] == 'Ambulatory Surgical Center']

In [None]:
phys_other_payments['zip'] = phys_other_payments['zip'].str[:5]

In [None]:
# Putting leading zero back on zips

cbsa_to_zip['zip'] = cbsa_to_zip['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [None]:
# Merging CBSA to Zip in physcian billing data

phys_asc_cbsa = pd.merge(phys_other_payments, cbsa_to_zip, how='left', on = ['zip'])

del phys_other_payments

In [None]:
# Dropping NaN values in the apc column of hcpcs to apc crosswalk

hcpcs_to_apc = hcpcs_to_apc.dropna(subset=['apc'])

In [None]:
# Putting leading zero back on zips

hosp_payments['zip'] = hosp_payments['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [None]:
# Adding matching APC code to HCPCS in the office dataframe, keeping only rows that have an apc match

phys_asc_apc = pd.merge(phys_asc_cbsa, hcpcs_to_apc, how = 'inner', on = ['hcpcs'])

del phys_asc_cbsa

In [None]:
phys_apc_data = phys_asc_apc.drop(['first_name', 'mi', 'creds', 'gender'], axis = 1)

del phys_asc_apc

In [None]:
# Adding CBSA column to hospital data, keeping only rows with a CBSA

hosp_payments_cbsa = pd.merge(hosp_payments, cbsa_to_zip, how = 'inner', on = ['zip'])

del hosp_payments

In [None]:
cbsa_to_region = hosp_payments_cbsa.loc[:,['region', 'cbsa']]

In [None]:
cbsa_to_region = cbsa_to_region.drop_duplicates(subset=['cbsa'])

In [None]:
apc_to_description = hosp_payments_cbsa.loc[:, ['apc', 'description']]

In [None]:
apc_to_description = apc_to_description.drop_duplicates(subset=['apc'])

In [None]:
list = []

for cbsa in hosp_payments_cbsa['cbsa'].unique():
    for apc in hosp_payments_cbsa['apc'].unique():
        selection = hosp_payments_cbsa[(hosp_payments_cbsa['cbsa'] == cbsa) & (hosp_payments_cbsa['apc'] == apc)]
        max_value = round(selection.avg_medi_allowed_amt.max(), 2)
        avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
        min_value = round(selection.avg_medi_allowed_amt.min(), 2)
        list.append([cbsa, apc, max_value, avg_value, min_value])

In [None]:
list_df = pd.DataFrame(list)

del list

In [None]:
list_df.columns = ['cbsa','apc', 'hosp_max', 'hosp_avg', 'hosp_min']

In [None]:
list_df_region = pd.merge(list_df, cbsa_to_region, how='left', on = ['cbsa'])

del list_df

In [None]:
hosp_table = pd.merge(list_df_region, apc_to_description, how='left', on=['apc'])

del list_df_region

In [None]:
hosp_table = hosp_table.dropna(subset=['hosp_avg'])

In [None]:
phys_apc_data = phys_apc_data.dropna(subset=['cbsa'])

In [None]:
phys_apc_data['cbsa'] = phys_apc_data['cbsa'].astype('int')

In [None]:
phys_apc_data['apc'] = phys_apc_data['apc'].astype('int')

In [None]:
hosp_table

In [None]:
# Getting all the unique apc's that are in the hospital table

hosp_apc_list = hosp_table['apc'].unique()

In [None]:
# Narrowing the Ambulatory Surgical Center table down to rows that have an apc that is in the hosp table

phys_apc_data = phys_apc_data[phys_apc_data['apc'].isin(hosp_apc_list)]

In [None]:
phys_apc_data = phys_apc_data[phys_apc_data['cbsa'].isin(hosp_table['cbsa'].unique())]

In [None]:
hcpcs_to_description = phys_apc_data.loc[:, ['hcpcs', 'description']]

In [None]:
hcpcs_to_description = hcpcs_to_description.drop_duplicates(subset=['hcpcs'])

In [None]:
hosp_table['state'] = hosp_table['region'].str[:2]

In [None]:
hosp_table

In [None]:
cbsa_to_state = hosp_table.loc[:, ['cbsa', 'state']]

In [None]:
cbsa_to_state = cbsa_to_state.drop_duplicates(subset = ['cbsa'])

In [None]:
hosp_table['region'] = hosp_table['region'].str.split('-').str[1]

In [None]:
hosp_table['region'] = hosp_table['region'].str.strip()

In [None]:
hosp_table

In [None]:
phys_apc_data

In [None]:
def phys_state(state_list):
    """This function takes a list of states as state codes and narrows the physcian data set to just those states
    it then iterates over the unique CBSA and HCPCS combos in that state and finds the medicare max/avg/min and makes
    those into a table."""
    state_phys_data = phys_apc_data[phys_apc_data['state'].isin(state_list)]
    list = []    
    for cbsa in state_phys_data['cbsa'].unique():
        for hcpcs in state_phys_data['hcpcs'].unique():
            selection = state_phys_data[(state_phys_data['cbsa'] == cbsa) & (state_phys_data['hcpcs'] == hcpcs)]
            max_value = round(selection.avg_medi_allowed_amt.max(), 2)
            avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
            min_value = round(selection.avg_medi_allowed_amt.min(), 2)
            list.append([cbsa, hcpcs, max_value, avg_value, min_value])
    list = pd.DataFrame(list)
    list.columns = ['cbsa','hcpcs', 'office_max', 'office_avg', 'office_min']
    global phys_data_state 
    phys_data_state = list.dropna(subset=['office_avg'])
    return phys_data_state

In [None]:
phys_state(['TN'])

In [None]:
phys_data_state = pd.merge(phys_data_state, hcpcs_to_apc, how = 'left', on = ['hcpcs'])

In [None]:
merged_data = pd.merge(phys_data_state, hosp_table, how = 'left', on = ['cbsa', 'apc'])

In [None]:
merged_data = merged_data[merged_data['state'] == 'TN']

In [None]:
merged_data.to_csv('../file_for_viz.csv')

In [None]:
merged_data

In [None]:
phys_data_state

In [None]:
list2 = []

for cbsa in phys_testing['cbsa'].unique():
    for hcpcs in phys_testing['hcpcs'].unique():
        selection = phys_testing[(phys_testing['cbsa'] == cbsa) & (phys_testing['hcpcs'] == hcpcs)]
        max_value = round(selection.avg_medi_allowed_amt.max(), 2)
        avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
        min_value = round(selection.avg_medi_allowed_amt.min(), 2)
        list2.append([cbsa, hcpcs, max_value, avg_value, min_value])

In [None]:
list2_df = pd.DataFrame(list2)

del list2

In [None]:
list2_df.columns = ['cbsa','hcpcs', 'office_max', 'office_avg', 'office_min']

In [None]:
list2_df

In [None]:
list2_df = list2_df.dropna(subset=['office_avg'])

In [None]:
hosp_office_data = pd.merge(list2_df, hosp_table, how='inner', on = ['cbsa', 'apc'])

del list2_df
del hosp_table
del phys_apc_data

In [None]:
hosp_office_data

In [None]:
# Selcting a cbsa and a apc and returing the max, mean, and min values for those selections.

selection = hosp_payments_cbsa[(hosp_payments_cbsa['cbsa'] == 20020) & (hosp_payments_cbsa['apc'] == 5072)]
max_value = round(selection.avg_medi_allowed_amt.max(), 2)
avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
min_value = round(selection.avg_medi_allowed_amt.min(), 2)

print('Region = ', selection['region'][0])
print('Procedure = ', selection['description'][0])
print()
print('Max = ', max_value)
print('Mean = ', avg_value)
print('Min = ', min_value)