In [1]:
import pandas as pd
import numpy as np

From: https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Marketplace-Products/index.html

File Info:
* '2015_OEP_County-Level_Public_Use_File.xlsx'
    * 2015 OEP County-Level Public Use File
    * The county and demographics PUF includes total health plan selections, as well as data for consumers with a plan selection segmented by age, race/ethnicity, Federal Poverty Level (FPL), consumer type, metal level, Cost-Sharing Reduction (CSR), and Advanced Premium Tax Credit (APTC). 
    * The 2015 Open Enrollment Period (OEP) for the Health Insurance Marketplaces ran between November 15, 2014 and February 15, 2015 and included additional special enrollment period activity reported through February 22, 2015. CMS has prepared a number of public use files summarizing plan selection activity from **November 15, 2014 through February 22, 2015.**
    * This PUF only includes data for the 37 states that used the HealthCare.gov platform in 2015.
* '2016_OEP_County-Level_Public_Use_File.xlsx'
    * 2016 OEP County-Level Public Use File.
    * The county and demographics PUF includes total health plan selections, as well as data for consumers with a plan selection segmented by age, race/ethnicity, Federal Poverty Level (FPL), consumer type, metal level, Cost-Sharing Reduction (CSR), and Advanced Premium Tax Credit (APTC).
    * The 2016 Open Enrollment Period (OEP) for the Health Insurance Marketplaces ran between **November 1, 2015 and January 31, 2016 and included additional Special Enrollment Period activity reported through February 1, 2016.**
    * This PUF only includes data for the 38 states that used the HealthCare.gov platform in 2016.
* '2017_OEP_County-Level_Public_Use_File.xlsx'
    * 2017 OEP ZIP Code-Level Public Use File. 
    * This ZIP code and APTC PUF includes total health plan selections, the count of consumers with APTC, and average APTC among consumers with APTC between **November 1, 2016 and January 31, 2017.**
    * This PUF only includes data for the 39 states that used the HealthCare.gov platform in 2017


In [2]:
# Read .xlsx.
df2015 = pd.read_excel('./data_files/Enrollment_Summary/2015_OEP_County-Level_Public_Use_File.xlsx', None)
df2016 = pd.read_excel('./data_files/Enrollment_Summary/2016_OEP_County-Level_Public_Use_File.xlsx', None)
df2017 = pd.read_excel('./data_files/Enrollment_Summary/2017_OEP_County-Level_Public_Use_File.xlsx', None)

# Append observation date columns.
df2015['(1) Consumer Type']['Observation_Start'] = '2014-11-15'
df2015['(1) Consumer Type']['Observation_End'] = '2015-02-22'
df2015['(1) Consumer Type']['Observation_Period_Name'] = '2015 OEP'

df2016['(1) Consumer Type']['Observation_Start'] = '2015-11-01'
df2016['(1) Consumer Type']['Observation_End'] = '2016-02-01'
df2016['(1) Consumer Type']['Observation_Period_Name'] = '2016 OEP'

df2017['(1) Consumer Type']['Observation_Start'] = '2016-11-01'
df2017['(1) Consumer Type']['Observation_End'] = '2017-01-31'
df2017['(1) Consumer Type']['Observation_Period_Name'] = '2017 OEP'

dfs = [df2015, df2016, df2017]

for frame_dict in dfs:
    for key in list(frame_dict):
        if key in ('Methods', 'Contents', 'Definitions', 'FAQs'):
            del frame_dict[key]
            
list(df2015)

['(1) Consumer Type',
 '(5) Household Income',
 '(2) Financial Assistance',
 '(3) Demographics',
 '(4) Metal Level']

In [3]:
names = []
for frame in dfs:
    names.extend(list(frame['(5) Household Income']))

print(sorted(set(names)))

metal_col_rename = {
    'Bronze': 'Tier Brone',
    'Catastrophic': 'Tier Catastrophic',
    'Gold' : 'Tier Gold',
    'Platinum': 'Tier Platinum',
    'Silver': 'Tier Silver'
}

demo_col_rename = {
    'Age < 18': 'Age 18 or Younger',
    'Age 18-25':'Age 18 to 25',
    'Age 26-34': 'Age 26 to 34',
    'Age 35-44':  'Age 35 to 44',
    'Age 45-54': 'Age 45 to 54',
    'Age 55-64':  'Age 55 to 64',
    'Age 65+': 'Age 65 or Older',
    'Age ≥65': 'Age 65 or Older',
    'Age Unknown': 'Age Unknown',
    'American Indian/ Alaska Native' :'Race American Indian or Alaska Native' ,
    'African-American': 'Race African-American',
    'Asian': 'Race Asian',
    'Hispanic': 'Race Hispanic',
    'Latino':  'Race Latino',
    'Multiracial': 'Race Multiracial',
    'Native Hawaiian/ Pacific Islander': 'Race Native Hawaiian or Pacific Islander',
    'Not Hispanic': 'Race Not Hispanic',
    'White': 'Race White',
    'Unknown': 'Race Unknown',
    'Unknown Race/ Ethnicity': 'Race Unknown',    
}

income_col_rename = {
    '< 100% of FPL': 'FPL 100 or Less',
    '≥ 100%  to ≤ 150% of FPL': 'FPL 100 to 150',
    '≥ 100% and ≤ 138% of FPL': 'FPL 100 to 138',
    '≥ 100% and ≤ 150% of FPL': 'FPL 100 to 150',
    '> 138% and ≤ 150 % of FPL': 'FPL 138 to 150',
    '> 150% and ≤ 200 % of FPL': 'FPL 150 to 200',
    '> 150% to ≤ 200% of FPL': 'FPL 150 to 200',
    '> 200% and ≤ 250% of FPL': 'FPL 200 to 250',
    '> 200% to ≤ 250% of FPL': 'FPL 200 to 250',
    '> 250% and ≤ 300% of FPL': 'FPL 250 to 300',
    '> 250% to ≤ 300% of FPL': 'FPL 250 to 300',
    '> 300% and ≤ 400% of FPL': 'FPL 300 to 400',
    '> 300% to ≤ 400% of FPL': 'FPL 300 to 400',
    '> 400% of FPL': 'FPL 400 or Greater',
    'Other': 'FPL Unknown',
    'Unknown FPL': 'FPL Unknown',
    'Unknown': 'FPL Unknown'
}
income_col_rename

['< 100% of FPL', '> 138% and ≤ 150 % of FPL', '> 150% and ≤ 200 % of FPL', '> 150% to ≤ 200% of FPL ', '> 200% and ≤ 250% of FPL', '> 200% to ≤ 250% of FPL ', '> 250% and ≤ 300% of FPL', '> 250% to ≤ 300% of FPL ', '> 300% and ≤ 400% of FPL', '> 300% to ≤ 400% of FPL ', '> 400% of FPL', 'County FIPS Code', 'County Name', 'Other', 'State', 'State ', 'Total Number of Consumers Who Have Selected a Marketplace Plan', 'Unknown', 'Unknown ', '≥ 100%  to ≤ 150% of FPL', '≥ 100% and ≤ 138% of FPL', '≥ 100% and ≤ 150% of FPL']


{'< 100% of FPL': 'FPL 100 or Less',
 '> 138% and ≤ 150 % of FPL': 'FPL 138 to 150',
 '> 150% and ≤ 200 % of FPL': 'FPL 150 to 200',
 '> 150% to ≤ 200% of FPL': 'FPL 150 to 200',
 '> 200% and ≤ 250% of FPL': 'FPL 200 to 250',
 '> 200% to ≤ 250% of FPL': 'FPL 200 to 250',
 '> 250% and ≤ 300% of FPL': 'FPL 250 to 300',
 '> 250% to ≤ 300% of FPL': 'FPL 250 to 300',
 '> 300% and ≤ 400% of FPL': 'FPL 300 to 400',
 '> 300% to ≤ 400% of FPL': 'FPL 300 to 400',
 '> 400% of FPL': 'FPL 400 or Greater',
 'Other': 'FPL Unknown',
 'Unknown': 'FPL Unknown',
 'Unknown FPL': 'FPL Unknown',
 '≥ 100%  to ≤ 150% of FPL': 'FPL 100 to 150',
 '≥ 100% and ≤ 138% of FPL': 'FPL 100 to 138',
 '≥ 100% and ≤ 150% of FPL': 'FPL 100 to 150'}

In [4]:
cols_to_remove = ['State', 'County Name', 'Total Number of Consumers Who Have Selected a Marketplace Plan']

for frame_dict in dfs:
    fips = frame_dict['(1) Consumer Type'][['County FIPS Code','Total Number of Consumers Who Have Selected a Marketplace Plan']]
    fips.columns = ['County FIPS Code','Sample Size']
    frame_dict['FIPS'] = fips
    
    for key in list(frame_dict):
        frame_dict[key].columns = frame_dict[key].columns.map(lambda x: x.strip())
        for col in frame_dict[key]:
            if col in cols_to_remove:
                frame_dict[key].drop(col, axis=1, inplace=True)
        if key == '(1) Consumer Type':
            frame_dict[key].rename(columns={'Active \nRe-enrollees': 'Active Re-enrollees'}, inplace = True)
        if key == '(3) Demographics':
            frame_dict[key].rename(columns=demo_col_rename, inplace = True)
        if key == '(4) Metal Level':
            frame_dict[key].rename(columns=metal_col_rename, inplace = True)
        if key == '(5) Household Income':
            frame_dict[key].rename(columns=income_col_rename, inplace = True)

In [5]:
# # Add a new data frame to each list of data frames with just FIPS codes.
# for frame_dict in dfs:
#     fips = pd.DataFrame(frame_dict['(1) Consumer Type']['County FIPS Code'])
#     fips.columns = ['FIPS']
#     frame_dict['FIPS'] = fips
# #     print(list(frame_dict))
#     print(fips)

In [6]:
enrollment_summary_df = pd.DataFrame()
for frame in dfs:
#     for df in frame:
#         frame[df].drop('State', axis=1, inplace=True)
    frame['FIPS'] = frame['FIPS'].merge(frame['(1) Consumer Type'], on='County FIPS Code', how='left')
    frame['FIPS'] = frame['FIPS'].merge(frame['(2) Financial Assistance'], on='County FIPS Code', how='left')
    frame['FIPS'] = frame['FIPS'].merge(frame['(3) Demographics'], on='County FIPS Code', how='left')
    frame['FIPS'] = frame['FIPS'].merge(frame['(4) Metal Level'], on='County FIPS Code', how='left')
    frame['FIPS'] = frame['FIPS'].merge(frame['(5) Household Income'], on='County FIPS Code', how='left')
    print(frame['FIPS'].columns.values)
          
#     frame['FIPS'].drop_duplicates('County FIPS Code_y', axis=1, inplace=True)

#     frame['FIPS'] = frame['FIPS'].merge(frame['(1) Consumer Type'], left_on='FIPS', right_on='County FIPS Code')
#     frame['FIPS'] = frame['FIPS'].merge(frame['(2) Financial Assistance'], left_on='FIPS', right_on='County FIPS Code')
#     frame['FIPS'] = frame['FIPS'].merge(frame['(3) Demographics'], left_on='FIPS', right_on='County FIPS Code')
#     frame['FIPS'] = frame['FIPS'].merge(frame['(4) Metal Level'], left_on='FIPS', right_on='County FIPS Code')
#     frame['FIPS'] = frame['FIPS'].merge(frame['(5) Household Income'], left_on='FIPS', right_on='County FIPS Code')
#     frame['FIPS'].drop_duplicates('County FIPS Code_y', axis=1, inplace=True)
    enrollment_summary_df = enrollment_summary_df.append(frame['FIPS'])
print(enrollment_summary_df.columns.values)

['County FIPS Code' 'Sample Size' 'New Consumers' 'Active Re-enrollees'
 'Automatic Re-enrollees' 'Observation_Start' 'Observation_End'
 'Observation_Period_Name' 'Consumers with CSR' 'Consumers with APTC'
 'Age 18 or Younger' 'Age 18 to 25' 'Age 26 to 34' 'Age 35 to 44'
 'Age 45 to 54' 'Age 55 to 64' 'Age 65 or Older' 'Age Unknown'
 'Race American Indian or Alaska Native' 'Race Asian'
 'Race African-American' 'Race Latino'
 'Race Native Hawaiian or Pacific Islander' 'Race Multiracial' 'Race White'
 'Race Unknown' 'Tier Catastrophic' 'Tier Brone' 'Tier Silver' 'Tier Gold'
 'Tier Platinum' 'FPL 100 or Less' 'FPL 100 to 150' 'FPL 150 to 200'
 'FPL 200 to 250' 'FPL 250 to 300' 'FPL 300 to 400' 'FPL 400 or Greater'
 'FPL Unknown']
['County FIPS Code' 'Sample Size' 'New Consumers' 'Active Re-enrollees'
 'Automatic Re-enrollees' 'Observation_Start' 'Observation_End'
 'Observation_Period_Name' 'Consumers with CSR' 'Consumers with APTC'
 'Age 18 or Younger' 'Age 18 to 25' 'Age 26 to 34' 'Age 3

In [7]:
print(enrollment_summary_df.columns.values)

['Active Re-enrollees' 'Age 18 or Younger' 'Age 18 to 25' 'Age 26 to 34'
 'Age 35 to 44' 'Age 45 to 54' 'Age 55 to 64' 'Age 65 or Older'
 'Age Unknown' 'Automatic Re-enrollees' 'Average APTC'
 'Consumers with APTC' 'Consumers with CSR' 'County FIPS Code'
 'FPL 100 or Less' 'FPL 100 to 138' 'FPL 100 to 150' 'FPL 138 to 150'
 'FPL 150 to 200' 'FPL 200 to 250' 'FPL 250 to 300' 'FPL 300 to 400'
 'FPL 400 or Greater' 'FPL Unknown' 'New Consumers' 'Observation_End'
 'Observation_Period_Name' 'Observation_Start' 'Race African-American'
 'Race American Indian or Alaska Native' 'Race Asian' 'Race Hispanic'
 'Race Latino' 'Race Multiracial'
 'Race Native Hawaiian or Pacific Islander' 'Race Not Hispanic'
 'Race Unknown' 'Race White' 'Sample Size' 'Tier Brone' 'Tier Catastrophic'
 'Tier Gold' 'Tier Platinum' 'Tier Silver']
