These plan attributes data includes high-level plan design, marketing and issuer information for all Qualified Health Plans (QHPs) offered by the Federal Health Insurance Marketplace as well as information about some plans offered outside of the Marketplace.

Plan attributes data can be downloaded from the The Center for Consumer Information & Insurance Oversight [Data Resources page](https://www.cms.gov/cciio/resources/data-resources/marketplace-puf.html). See also: [Plan Attributes Data Dictionary](https://www.cms.gov/CCIIO/Resources/Data-Resources/Downloads/Plan-Attributes-Data-Dictionary.pdf).

In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.options.display.max_seq_items = 2000

In [3]:
plan_attributes_2014 = pd.read_csv('./data_files/Plan_Attributes/Plan_Attributes_PUF_2014.csv',encoding = "ISO-8859-1" )
plan_attributes_2015 = pd.read_csv('./data_files/Plan_Attributes/Plan_Attributes_PUF_2015.csv',encoding = "ISO-8859-1" )
plan_attributes_2016 = pd.read_csv('./data_files/Plan_Attributes/Plan_Attributes_PUF_2016.csv',encoding = "ISO-8859-1" )
plan_attributes_2017 = pd.read_csv('./data_files/Plan_Attributes/Plan_Attributes_PUF_2017.csv',encoding = "ISO-8859-1" )
plan_attributes_2018 = pd.read_csv('./data_files/Plan_Attributes/Plan_Attributes_PUF_2018.csv',encoding = "ISO-8859-1" )

plan_attributes_2014.name = 'plan_attributes_2014_PUF'
plan_attributes_2015.name = 'plan_attributes_2015_PUF'
plan_attributes_2016.name = 'plan_attributes_2016_PUF'
plan_attributes_2017.name = 'plan_attributes_2017_PUF'
plan_attributes_2018.name = 'plan_attributes_2018_PUF'

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
dfs = [plan_attributes_2014, plan_attributes_2015, plan_attributes_2016, plan_attributes_2017, plan_attributes_2018]

In [5]:
for frame in dfs:
    print("%r has %r columns and %r rows." % (frame.name, len(list(frame)), len(frame)))

'plan_attributes_2014_PUF' has 126 columns and 18719 rows.
'plan_attributes_2015_PUF' has 126 columns and 31253 rows.
'plan_attributes_2016_PUF' has 151 columns and 26993 rows.
'plan_attributes_2017_PUF' has 152 columns and 21365 rows.
'plan_attributes_2018_PUF' has 152 columns and 13861 rows.


In [6]:
n = 0
for frame in dfs:
    n += len(frame)
print("For a total of %r rows." % (n))

For a total of 112191 rows.


In [7]:
col_rename = {
    'PlanEffictiveDate': 'PlanEffectiveDate'   
}

for frame in dfs:
    frame.rename(columns=col_rename, inplace = True)



In [8]:
consolidated_plan_attr = pd.DataFrame()
for frame in dfs:
    consolidated_plan_attr = consolidated_plan_attr.append(frame)

consolidated_plan_attr.name = 'consolidated_plan_attributes_2014-2018'

    
print("%r has %r columns and %r rows." % (consolidated_plan_attr.name, len(list(consolidated_plan_attr)), len(consolidated_plan_attr)))

'consolidated_plan_attributes_2014-2018' has 182 columns and 112191 rows.


In [9]:
len(list(consolidated_plan_attr))

182

In [10]:
drop_col_names = [
    'SpecialistRequiringReferral', 
    'OutOfCountryCoverageDescription', 
    'OutOfServiceAreaCoverageDescription', 
    'PlanLevelExclusions',
    'VersionNum'
]

consolidated_plan_attr = consolidated_plan_attr.drop(drop_col_names, axis=1)

In [11]:
# Replace inconsistent boolean values

yes_no_col_names = [
    'CompositeRatingOffered',
    'DentalOnlyPlan',
    'HSAOrHRAEmployerContribution',
    'IsGuaranteedRate',
    'IsHSAEligible', 
    'IsNoticeRequiredForPregnancy', 
    'IsReferralRequiredForSpecialist', 
    'MedicalDrugDeductiblesIntegrated',
    'MedicalDrugMaximumOutofPocketIntegrated',
    'MultipleInNetworkTiers', 
    'NationalNetwork', 
    'OutOfCountryCoverage', 
    'OutOfServiceAreaCoverage', 
    'UniquePlanDesign',
    'WellnessProgramOffered'
]

yes_no_replace = ['Allows Adult and Child-Only', 'Estimated Rate', 'Guaranteed Rate', 'no', 'No', 'NO', 'yes', 'Yes','YEs', 'YES']
yes_no_replacement = [None, None, None, False, False, False, True, True, True, True]

consolidated_plan_attr[yes_no_col_names] = consolidated_plan_attr[yes_no_col_names].replace(yes_no_replace,yes_no_replacement)
zipped = zip(yes_no_replace,yes_no_replacement)
for k,v in zipped:
    print(k, v)

Allows Adult and Child-Only None
Estimated Rate None
Guaranteed Rate None
no False
No False
NO False
yes True
Yes True
YEs True
YES True


In [12]:
pct_col_names = [
    'DEHBDedInnTier1Coinsurance', 
    'DEHBDedInnTier2Coinsurance', 
    'FirstTierUtilization', 
    'IssuerActuarialValue', 
    'MEHBDedInnTier1Coinsurance',
    'MEHBDedInnTier2Coinsurance', 
    'SecondTierUtilization', 
    'TEHBDedInnTier1Coinsurance', 
    'TEHBDedInnTier2Coinsurance'
]

consolidated_plan_attr[pct_col_names] = consolidated_plan_attr[pct_col_names].\
    apply(lambda x: x.str.rstrip('%').str.lstrip('$')).astype(float)/100


In [13]:
currency_col_names = ['SBCHavingDiabetesCoinsurance']

consolidated_plan_attr[currency_col_names] = consolidated_plan_attr[currency_col_names].\
    apply(lambda x: x.str.lstrip('$').str.replace(',','')).astype(float)

In [14]:
float_to_int_col_names = [
    'IssuerId2',
    'BenefitPackageId'
]

consolidated_plan_attr[float_to_int_col_names] = consolidated_plan_attr[float_to_int_col_names].apply(np.int64)
consolidated_plan_attr[float_to_int_col_names].head()

Unnamed: 0,IssuerId2,BenefitPackageId
0,21989,1
1,21989,1
2,21989,1
3,21989,1
4,21989,1


In [15]:
csr_names = {
    '73% AV Level Silver Plan': ['73% AV', 4, 'QHP', 1],
    '87% AV Level Silver Plan': ['87% AV', 5, 'QHP', 1],
    '94% AV Level Silver Plan': ['94% AV', 6, 'QHP', 1],
    'Limited Cost Sharing Plan Variation': ['Limited Cost Sharing', 3, 'QHP', 1],
    'Standard  On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Bronze Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Bronze On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Catastrophic Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Catastrophic On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Gold Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Gold On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard High Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard High On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Low Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Low On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Platinum  On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Standard Platinum Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Platinum On Exchange Plan': ['Platinum', 1, 'QHP', 1],
    'Standard Silver Off Exchange Plan': ['Standard', 0, 'Non-QHP', 0],
    'Standard Silver On Exchange Plan': ['Standard', 1, 'QHP', 1],
    'Zero Cost Sharing Plan Variation': ['Zero Cost Sharing', 2, 'QHP', 1]
}

consolidated_plan_attr['CSRName'] = consolidated_plan_attr['CSRVariationType'].map(lambda x: csr_names[x][0])
consolidated_plan_attr['CSRId'] = consolidated_plan_attr['CSRVariationType'].map(lambda x: csr_names[x][1])
consolidated_plan_attr['ExchangeTypeName'] = consolidated_plan_attr['CSRVariationType'].map(lambda x: csr_names[x][2])
consolidated_plan_attr['ExchangeTypeId'] = consolidated_plan_attr['CSRVariationType'].map(lambda x: csr_names[x][3])

In [16]:
metal_names = {
    'Low': 8,
    'High': 9,
    'Bronze': 1,
    'Expanded Bronze': 2,
    'Gold': 4,
    'Silver': 3,
    'Catastrophic': 6,
    'Platinum': 5,
    'None': 0
}

consolidated_plan_attr['MetalId'] = consolidated_plan_attr['MetalLevel'].map(lambda x: metal_names[x])

In [17]:
for column in consolidated_plan_attr:
    print("Column %r has values of: %r" % (consolidated_plan_attr[column].name ,consolidated_plan_attr[column].unique()))

Column 'AVCalculatorOutputNumber' has values of: array([nan, '0.619164943695068', '0.794966459274292', ...,
       0.88008289713796, 0.937938068466923, 0.648318564118744], dtype=object)
Column 'BeginPrimaryCareCostSharingAfterNumberOfVisits' has values of: array([0, 5, 3, 4, 1, 2, 6])
Column 'BeginPrimaryCareDeductibleCoinsuranceAfterNumberOfCopays' has values of: array([ 0,  3,  5,  1,  4,  2,  6, 10,  8])
Column 'BenefitPackageId' has values of: array([                   1,                    2,                    3,
                          4,                    5,                    6,
                          7,                    8,                    9,
                         10,                   18,                   19,
                         31,                   32,                   28,
                         29,                   16,                   17,
                         21,                   22,                   23,
                         15,         

In [18]:
consolidated_plan_attr['BusinessYear'].unique()

array([2014, 2015, 2016, 2017, 2018])

In [19]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://localhost:5432/ifprates')

In [20]:
consolidated_plan_attr.columns = [c.lower() for c in consolidated_plan_attr.columns] #postgres doesn't like capitals or spaces

In [21]:
# consolidated_plan_attr.to_sql("plan_attributes_etl", engine)