Data from:

Physician & Other Supplier Payments - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Physician-and-Other-Supplier2017

Hospital Outpatient - Detailed Data
https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Outpatient

APC to CPT/HCPCS crosswalk - Addendum B – January 2020
https://www.cms.gov/Medicare/Medicare-Fee-for-Service-Payment/HospitalOutpatientPPS/Addendum-A-and-Addendum-B-Updates

Zip Code to CBSA
https://www.huduser.gov/portal/datasets/usps_crosswalk.html![image.png](attachment:image.png)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline

In [3]:
pd.options.display.max_columns = 25

In [4]:
pd.options.display.max_rows = 25

In [5]:
# Reading in the CBSA to ZIP code crosswalk

cbsa_to_zip = pd.read_excel('../data/CBSA_ZIP_032020.xlsx', usecols = ['CBSA', 'ZIP'])

In [6]:
#Reading in HCPCS to APC crosswalk with Short Descriptor column

hcpcs_to_apc = pd.read_excel('../data/Addendum_B/2020_january_web_addendum_b.12312019.xlsx', header = 2, usecols = ['HCPCS Code', 'Short Descriptor', 'APC '])

In [7]:
#Reading in the Physician & Other Supplier Payments while discarding some columns

phys_other_payments = pd.read_csv('../data/Medicare_Provider_Util_Payment_PUF_CY2017/Medicare_Provider_Util_Payment_PUF_CY2017.txt', sep='\t', skiprows = [1], usecols = lambda column : column not in ['average_submitted_chrg_amt', 'average_Medicare_payment_amt', 'average_Medicare_standard_amt', 'hcpcs_drug_indicator', 'medicare_participation_indicator'], low_memory=False)

In [8]:
# Reading in the Hospital Outpatient

column_exclude_list = ['Outlier\nComprehensive\nAPC\nServices', 'Average\nMedicare\nOutlier\nAmount', 'Average\nEstimated\nTotal\nSubmitted\nCharges', 'Average\nMedicare\nPayment\nAmount']

hosp_payments = pd.read_excel('../data/MUP_OHP_R19_P04_V10_D17_APC_Provider/MUP_OHP_R19_P04_V10_D17_APC_Provider.xlsx', header = 5, usecols = lambda column : column not in column_exclude_list)

In [9]:
hcpcs_to_apc.columns = ['hcpcs', 'descriptor', 'apc']

In [10]:
cbsa_to_zip.columns = ['cbsa', 'zip']

In [11]:
phys_other_payments.columns = ['npi', 'last_org_name', 'first_name', 'mi', 'creds', 'gender', 'entity', 'street1', 'street2' , 'city', 'zip','state', 'country', 'provider_type', 'service_loc', 'hcpcs', 'description', 'services_cnt', 'benefic_cnt', 'benefic_d_cnt', 'avg_medi_allowed_amt']

In [12]:
phys_other_payments['zip'] = phys_other_payments['zip'].str[:5]

In [15]:
# Dropping non US

phys_other_payments = phys_other_payments[phys_other_payments.country == 'US']

In [16]:
# Converting zip as object to zip as numeric

phys_other_payments['zip'] = pd.to_numeric(phys_other_payments['zip'])

In [34]:
# Putting leading zero back on zips

phys_other_payments['zip'] = phys_other_payments['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [46]:
# Putting leading zero back on zips

cbsa_to_zip['zip'] = cbsa_to_zip['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [49]:
# Merging CBSA to Zip in physcian billing data

phys_w_cbsa = pd.merge(phys_other_payments, cbsa_to_zip, how='left', on = ['zip'])

In [58]:
# Making a dataframe with only office entities to compare with the Hospital Data.

phys_w_cbsa_office = phys_w_cbsa[phys_w_cbsa['entity'] == 'O']

In [65]:
# Dropping NaN values in the apc column of hcpcs to apc crosswalk

hcpcs_to_apc = hcpcs_to_apc.dropna(subset=['apc'])

In [69]:
hosp_payments.columns = ['provider_id', 'provider_name', 'street', 'city', 'state', 'zip', 'region', 'apc', 'description', 'benefic_cnt', 'services_cnt', 'avg_medi_allowed_amt']

In [72]:
# Putting leading zero back on zips

hosp_payments['zip'] = hosp_payments['zip'].apply(lambda x: '{0:0>5}'.format(x))

In [78]:
# Adding matching APC code to HCPCS in the office dataframe, keeping only rows that have an apc match

phys_w_cbsa_office_apc = pd.merge(phys_w_cbsa_office, hcpcs_to_apc, how = 'inner', on = ['hcpcs'])

In [82]:
phys_apc_data = phys_w_cbsa_office_apc.drop(['first_name', 'mi', 'creds', 'gender'], axis = 1)

In [89]:
# Adding CBSA column to hospital data, keeping only rows with a CBSA

hosp_payments_cbsa = pd.merge(hosp_payments, cbsa_to_zip, how = 'inner', on = ['zip'])

In [135]:
# Selcting a cbsa and a apc and returing the max, mean, and min values for those selections.

selection = hosp_payments_cbsa[(hosp_payments_cbsa['cbsa'] == 20020) & (hosp_payments_cbsa['apc'] == 5072)]
max_value = round(selection.avg_medi_allowed_amt.max(), 2)
avg_value = round(selection.avg_medi_allowed_amt.mean(), 2)
min_value = round(selection.avg_medi_allowed_amt.min(), 2)

print('Region = ', selection['region'][0])
print('Procedure = ', selection['description'][0])
print()
print('Max = ', max_value)
print('Mean = ', avg_value)
print('Min = ', min_value)

Region =  AL - Dothan
Procedure =  Level 2 Excision/ Biopsy/ Incision and Drainage

Max =  1102.92
Mean =  1028.73
Min =  986.77


In [83]:
phys_apc_data

Unnamed: 0,npi,last_org_name,entity,street1,street2,city,zip,state,country,provider_type,service_loc,hcpcs,description,services_cnt,benefic_cnt,benefic_d_cnt,avg_medi_allowed_amt,cbsa,descriptor,apc
0,1003001322,NODAWAY COUNTY HEALTH CENTER,O,515 N MAIN ST,,MARYVILLE,64468,MO,US,Mass Immunizer Roster Biller,O,G0008,Administration of influenza virus vaccine,203.0,202,203,23.040000,32340.0,Admin influenza virus vac,5691.0
1,1003002254,WALGREEN CO.,O,5104 BOBBY HICKS HWY,,GRAY,37615,TN,US,Centralized Flu,O,G0008,Administration of influenza virus vaccine,162.0,161,162,19.400000,27740.0,Admin influenza virus vac,5691.0
2,1003002254,WALGREEN CO.,O,5104 BOBBY HICKS HWY,,GRAY,37615,TN,US,Centralized Flu,O,G0008,Administration of influenza virus vaccine,162.0,161,162,19.400000,28700.0,Admin influenza virus vac,5691.0
3,1003004938,"CVS STATE CAPITAL, L.L.C.",O,446 SABATTUS ST,,LEWISTON,04240,ME,US,Mass Immunizer Roster Biller,O,G0008,Administration of influenza virus vaccine,180.0,180,180,19.959778,30340.0,Admin influenza virus vac,5691.0
4,1003005059,TORRINGTON AREA HEALTH DISTRICT,O,350 MAIN ST STE A,,TORRINGTON,06790,CT,US,Mass Immunizer Roster Biller,O,G0008,Administration of influenza virus vaccine,162.0,162,162,28.340000,45860.0,Admin influenza virus vac,5691.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190107,1972824142,ILLINOIS PROTON CENTER LLC,O,4455 WEAVER PKWY,,WARRENVILLE,60555,IL,US,Radiation Therapy Center,O,77525,"Proton treatment delivery, complex",923.0,40,923,986.350000,16980.0,Proton treatment complex,5625.0
190108,1972868420,"ADVANCE EYECARE ASSOCIATES, O.D., P.A.",O,15235 JOHN J DELANEY DR STE H,,CHARLOTTE,28277,NC,US,Clinic or Group Practice,O,92014,Eye and medical examination for diagnosis and ...,19.0,19,19,117.310000,16740.0,Eye exam&tx estab pt 1/>vst,5012.0
190109,1982676771,"SHREVEPORT ENDOSCOPY CENTER, A MEDICAL CORPORA...",O,3217 MABEL ST,,SHREVEPORT,71103,LA,US,Ambulatory Surgical Center,F,49082,Drainage of fluid from abdominal cavity,54.0,24,54,357.956296,43340.0,Abd paracentesis,5301.0
190110,1992182703,BRACHYTHERAPY CENTERS OF TEXAS LLC,O,6513 PRESTON RD,SUITE #300,PLANO,75024,TX,US,Radiation Therapy Center,O,0394T,High dose rate electronic brachytherapy,106.0,13,82,213.122736,19100.0,Hdr elctrnc skn surf brchytx,5622.0
