## Setup

In [14]:
import json
import os
import sys

from datetime import datetime
from functools import partial

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile

from matplotlib.dates import DateFormatter


pd.set_option("display.max_columns", None)
sys.path.append("../..")

from src.utils.download import download_file_from_url
from src.utils.plot import plot_bar, plot_hist, plot_pie
from src.utils.states import abbrev_to_state

INPUT_DATA_DIR = "./input_data"

os.makedirs(INPUT_DATA_DIR, exist_ok=True)

YEAR = 2021

In [15]:
# URL obtained by clicking "2021 OEP State-Level Public Use File (ZIP)" here:
# https://www.cms.gov/research-statistics-data-systems/marketplace-products/2021-marketplace-open-enrollment-period-public-use-files
url = "https://www.cms.gov/files/zip/2021-oep-state-level-public-use-file.zip"
zip_path = os.path.join(INPUT_DATA_DIR, f"enrollment_puf_{YEAR}.zip")
download_file_from_url(url, zip_path)

# Unzip
zipdata = zipfile.ZipFile(zip_path)
zipinfo = zipdata.infolist()[0]  # Single file in each zip

# Rename file to enforce consistency across years and handle with same boilerplate
zipinfo.filename = f"enrollment_puf_{YEAR}.csv"
enrollment_path = zipdata.extract(zipinfo, INPUT_DATA_DIR)

# Delete zip
os.remove(zip_path)

Downloading file from: https://www.cms.gov/files/zip/2021-oep-state-level-public-use-file.zip.
Success.


In [16]:
enrollment_csv = pd.read_csv(enrollment_path)
enrollment_csv["Cnsmr"] = (
    enrollment_csv["Cnsmr"].apply(lambda x: x.replace(",", "")).astype(int)
)

In [17]:
hc_gov_records = enrollment_csv[enrollment_csv["Pltfrm"] == "HC.gov"]
sbm_records = enrollment_csv[enrollment_csv["Pltfrm"] == "SBM"]

In [18]:
component_rows = hc_gov_records[hc_gov_records["State_Abrvtn"] != "Total"]
total_row = hc_gov_records[hc_gov_records["State_Abrvtn"] == "Total"]
assert component_rows["Cnsmr"].sum() == total_row["Cnsmr"].sum()
# 2021 population represented by healthcare.gov plans
hc_gov_total = total_row["Cnsmr"].sum()
hc_gov_total

8251703

In [19]:
component_rows = sbm_records[sbm_records["State_Abrvtn"] != "Total"]
total_row = sbm_records[sbm_records["State_Abrvtn"] == "Total"]
assert component_rows["Cnsmr"].sum() == total_row["Cnsmr"].sum()
# 2021 population represented by healthcare.gov plans
sbe_total = total_row["Cnsmr"].sum()
sbe_total

3752662

In [20]:
for abbrev in abbrev_to_state.keys():
    state_row = sbm_records[sbm_records["State_Abrvtn"] == abbrev]
    state_consumers = state_row["Cnsmr"].sum()
    if state_consumers > 0:
        print(f"{state_consumers} marketplace QHP consumers in {abbrev} in {YEAR}")

1625546 marketplace QHP consumers in CA in 2021
179607 marketplace QHP consumers in CO in 2021
104946 marketplace QHP consumers in CT in 2021
16947 marketplace QHP consumers in DC in 2021
68832 marketplace QHP consumers in ID in 2021
166038 marketplace QHP consumers in MD in 2021
294097 marketplace QHP consumers in MA in 2021
112804 marketplace QHP consumers in MN in 2021
81903 marketplace QHP consumers in NV in 2021
269560 marketplace QHP consumers in NJ in 2021
215889 marketplace QHP consumers in NY in 2021
337722 marketplace QHP consumers in PA in 2021
31174 marketplace QHP consumers in RI in 2021
24866 marketplace QHP consumers in VT in 2021
222731 marketplace QHP consumers in WA in 2021


In [21]:
total_QHP_enrollment = hc_gov_total + sbe_total
print(f"{total_QHP_enrollment} marketplace QHP consumers in {YEAR}")

12004365 marketplace QHP consumers in 2021
