Data from:

Physician & Other Supplier Payments - Detailed Data https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Physician-and-Other-Supplier2017

Zip Code to CBSA https://www.huduser.gov/portal/datasets/usps_crosswalk.html![image.png](attachment:image.png)

Population data https://www.census.gov/data/tables/time-series/demo/popest/2010s-total-metro-and-micro-statistical-areas.html#par_textimage

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

In [3]:
pd.options.display.max_columns = 25

pd.options.display.max_rows = 100

In [4]:
# Reading in the CBSA to ZIP code crosswalk

cbsa_to_zip = pd.read_excel('../data/CBSA_ZIP_032020.xlsx', 
                            usecols = ['CBSA', 'ZIP'],
                            dtypes={'ZIP':'str'})

In [5]:
#Reading in the Physician & Other Supplier Payments while discarding some columns

phys_other_payments = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017/Medicare_Provider_Util_Payment_PUF_CY2017.txt', 
    sep='\t', 
    skiprows = [1], 
    usecols = lambda column : column not in ['average_submitted_chrg_amt', 
                                             'average_Medicare_payment_amt', 
                                             'average_Medicare_standard_amt', 
                                             'hcpcs_drug_indicator', 
                                             'medicare_participation_indicator'], 
    dtype = {'nppes_provider_zip':'str'},
    low_memory=False)

In [6]:
cbsa_population = pd.read_csv('..\data\csa-est2019-alldata.csv', engine = 'python', 
           usecols = ['CBSA', 'LSAD', 'NAME', 'POPESTIMATE2017'])


In [7]:
cbsa_to_zip.columns = ['cbsa', 'zip']
phys_other_payments.columns = ['npi', 
                               'last_org_name', 
                               'first_name', 
                               'mi', 
                               'creds', 
                               'gender', 
                               'entity', 
                               'street1', 
                               'street2' , 
                               'city', 
                               'zip',
                               'state', 
                               'country', 
                               'provider_type', 
                               'service_loc', 
                               'hcpcs', 
                               'description', 
                               'services_cnt', 
                               'benefic_cnt', 
                               'benefic_d_cnt', 
                               'avg_medi_allowed_amt']
cbsa_population.columns = ['cbsa', 'lsad', 'name', 'pop2017']

In [8]:
cbsa_population = cbsa_population[cbsa_population['lsad'] != 'County or equivalent']

In [9]:
cbsa_population = cbsa_population.dropna(subset = ['cbsa'])

In [10]:
cbsa_population['cbsa'] = cbsa_population['cbsa'].astype('int')

In [11]:
cbsa_pop_sum = cbsa_population.groupby(['cbsa'])[['pop2017']].sum()

In [12]:
# Dropping non US

phys_other_payments = phys_other_payments[phys_other_payments.country == 'US']

In [13]:
# Removing trailing zip digits

phys_other_payments['zip'] = phys_other_payments['zip'].str[:5]

In [14]:
# Putting leading zero back on zips

cbsa_to_zip['zip'] = cbsa_to_zip['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [15]:
phys_office = phys_other_payments[phys_other_payments['service_loc'] == 'O']

In [16]:
phys_office_tn = phys_office[phys_office['state'] == 'TN']

In [17]:
# Merging CBSA to Zip in physcian billing data

phys_tn_cbsa = pd.merge(phys_office_tn, cbsa_to_zip, how='left', on = ['zip'])

In [18]:
phys_tn_cbsa = phys_tn_cbsa.dropna(subset = ['cbsa'])

In [19]:
phys_tn_cbsa['cbsa'] = phys_tn_cbsa['cbsa'].astype('int')

In [20]:
# CBSA 99999 is an 'other' grouping. Discarding

phys_tn_cbsa = phys_tn_cbsa[phys_tn_cbsa['cbsa'] != 99999]

In [21]:
provider_cbsa_sum_visits_tn = phys_tn_cbsa.groupby(['provider_type', 'cbsa'])[['benefic_d_cnt']].sum()

provider_cbsa_sum_visits_tn  = provider_cbsa_sum_visits_tn.reset_index()

In [22]:
cbsa_provider_sum_visits_tn = phys_tn_cbsa.groupby(['cbsa', 'provider_type'])[['benefic_d_cnt']].sum()

cbsa_provider_sum_visits_tn = cbsa_provider_sum_visits_tn.reset_index()

In [23]:
provider_cbsa_sum_visits_tn = pd.merge(provider_cbsa_sum_visits_tn, cbsa_pop_sum, how = 'left', on = ['cbsa'])

In [24]:
cbsa_provider_sum_visits_tn = pd.merge(cbsa_provider_sum_visits_tn, cbsa_pop_sum, how = 'left', on = ['cbsa'])

In [25]:
cbsa_population['state'] = cbsa_population['lsad'].str.split(',').str[1]

In [26]:
cbsa_population['region'] = cbsa_population['lsad'].str.split(',').str[0]

In [27]:
cbsa_population['state'] = cbsa_population['state'].str.strip()

cbsa_population['region'] = cbsa_population['region'].str.strip()

In [28]:
cbsa_region_state = cbsa_population.loc[:, ['cbsa', 'state', 'region']]

In [29]:
cbsa_region_state = cbsa_region_state.drop_duplicates(subset = ['cbsa'])

In [30]:
provider_cbsa_merge = pd.merge(provider_cbsa_sum_visits_tn, cbsa_region_state, how = 'left', on = ['cbsa'])

In [31]:
cbsa_provider_merge = pd.merge(cbsa_provider_sum_visits_tn, cbsa_region_state, how = 'left', on = ['cbsa'])

In [32]:
cbsa_provider_merge['cnt_per_1kcapita'] = cbsa_provider_merge['benefic_d_cnt']/cbsa_provider_merge['pop2017']*1000

In [33]:
provider_cbsa_merge['cnt_per_1kcapita'] = provider_cbsa_merge['benefic_d_cnt']/provider_cbsa_merge['pop2017']*1000

In [34]:
cbsa_provider_merge = cbsa_provider_merge.dropna(subset = ['state'])

provider_cbsa_merge = provider_cbsa_merge.dropna(subset = ['state'])

In [40]:
provider_cbsa_merge.state.unique()

array(['TN', 'TN-MS-AR', 'TN-GA', 'TN-VA', 'TX', 'UT', 'CT', 'FL'],
      dtype=object)

In [38]:
cbsa_provider_merge = cbsa_provider_merge[cbsa_provider_merge['state'] != 'CT']

cbsa_provider_merge = cbsa_provider_merge[cbsa_provider_merge['state'] != 'UT']

cbsa_provider_merge = cbsa_provider_merge[cbsa_provider_merge['state'] != 'FL']

cbsa_provider_merge = cbsa_provider_merge[cbsa_provider_merge['state'] != 'TX']

In [41]:
provider_cbsa_merge = provider_cbsa_merge[provider_cbsa_merge['state'] != 'CT']

provider_cbsa_merge = provider_cbsa_merge[provider_cbsa_merge['state'] != 'UT']

provider_cbsa_merge = provider_cbsa_merge[provider_cbsa_merge['state'] != 'FL']

provider_cbsa_merge = provider_cbsa_merge[provider_cbsa_merge['state'] != 'TX']

In [44]:
cbsa_provider_merge.to_csv('../cbsa_provider.csv')

In [45]:
provider_cbsa_merge.to_csv('../provider_cbsa.csv')