In [1]:
import requests
import os
import logging
import polars as pl
from pathlib import Path
import json
import time
import random
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

FILE_PATH = Path('__file__').parent.absolute().parent

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

In [2]:
url = 'https://data.cms.gov/data.json'
headers = {'accept': 'application/json'}


data = requests.get(url=url, headers=headers).json()

In [3]:
root_df = pl.DataFrame(data["dataset"], strict=False, nan_to_null=True)

In [4]:
root_df = root_df.explode("distribution"
                            ).explode("theme"
                            ).explode("references"
                            ).explode("programCode"
                            ).explode("language"
                            ).explode("keyword"
                            ).explode("bureauCode"
                            ).unnest(columns=("contactPoint","distribution", "publisher"), separator="_"
                            )
root_df

@type,accessLevel,accessRights,accrualPeriodicity,bureauCode,contactPoint_@type,contactPoint_fn,contactPoint_hasEmail,describedBy,dataQuality,description,distribution_@type,distribution_format,distribution_accessURL,distribution_resourcesAPI,distribution_description,distribution_title,distribution_modified,distribution_temporal,distribution_downloadURL,distribution_mediaType,identifier,keyword,landingPage,language,license,modified,programCode,publisher_@type,publisher_name,references,temporal,theme,title,describedByType
str,str,str,str,str,str,str,str,str,bool,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Shared Savings Program - CM""","""mailto:SharedSavingsProgram@cm…","""http://data.cms.gov/resources/…",true,"""The Accountable Care Organizat…","""dcat:Distribution""","""API""","""http://data.cms.gov/data-api/v…","""https://data.cms.gov/data-api/…","""latest""","""Accountable Care Organization …","""2025-09-10""","""2025-01-01/2025-12-31""",,,"""http://data.cms.gov/data-api/v…","""Medicare""","""http://data.cms.gov/medicare-s…","""en-US""","""https://www.usa.gov/government…","""2025-09-10""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/resources/…","""2014-01-01/2025-12-31""","""Medicare""","""Accountable Care Organization …",
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Shared Savings Program - CM""","""mailto:SharedSavingsProgram@cm…","""http://data.cms.gov/resources/…",true,"""The Accountable Care Organizat…","""dcat:Distribution""","""API""","""http://data.cms.gov/data-api/v…","""https://data.cms.gov/data-api/…","""latest""","""Accountable Care Organization …","""2025-09-10""","""2025-01-01/2025-12-31""",,,"""http://data.cms.gov/data-api/v…","""Value-Based Care""","""http://data.cms.gov/medicare-s…","""en-US""","""https://www.usa.gov/government…","""2025-09-10""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/resources/…","""2014-01-01/2025-12-31""","""Medicare""","""Accountable Care Organization …",
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Shared Savings Program - CM""","""mailto:SharedSavingsProgram@cm…","""http://data.cms.gov/resources/…",true,"""The Accountable Care Organizat…","""dcat:Distribution""","""API""","""http://data.cms.gov/data-api/v…","""https://data.cms.gov/data-api/…","""latest""","""Accountable Care Organization …","""2025-09-10""","""2025-01-01/2025-12-31""",,,"""http://data.cms.gov/data-api/v…","""Coordinated Care""","""http://data.cms.gov/medicare-s…","""en-US""","""https://www.usa.gov/government…","""2025-09-10""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/resources/…","""2014-01-01/2025-12-31""","""Medicare""","""Accountable Care Organization …",
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Shared Savings Program - CM""","""mailto:SharedSavingsProgram@cm…","""http://data.cms.gov/resources/…",true,"""The Accountable Care Organizat…","""dcat:Distribution""","""API""","""http://data.cms.gov/data-api/v…","""https://data.cms.gov/data-api/…","""latest""","""Accountable Care Organization …","""2025-09-10""","""2025-01-01/2025-12-31""",,,"""http://data.cms.gov/data-api/v…","""Payment Models""","""http://data.cms.gov/medicare-s…","""en-US""","""https://www.usa.gov/government…","""2025-09-10""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/resources/…","""2014-01-01/2025-12-31""","""Medicare""","""Accountable Care Organization …",
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Shared Savings Program - CM""","""mailto:SharedSavingsProgram@cm…","""http://data.cms.gov/resources/…",true,"""The Accountable Care Organizat…","""dcat:Distribution""","""API""","""http://data.cms.gov/data-api/v…","""https://data.cms.gov/data-api/…","""latest""","""Accountable Care Organization …","""2025-09-10""","""2025-01-01/2025-12-31""",,,"""http://data.cms.gov/data-api/v…","""Accountable Care Organizations""","""http://data.cms.gov/medicare-s…","""en-US""","""https://www.usa.gov/government…","""2025-09-10""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/resources/…","""2014-01-01/2025-12-31""","""Medicare""","""Accountable Care Organization …",
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Medicare Current Beneficiary S…","""mailto:MCBS@cms.hhs.gov""","""http://data.cms.gov/sites/defa…",true,"""The Medicare Current Beneficia…","""dcat:Distribution""",,,"""https://data.cms.gov/data-api/…",,"""Medicare Current Beneficiary S…","""2024-08-30""","""2013-01-01/2013-12-31""","""http://data.cms.gov/sites/defa…","""application/zip""","""http://data.cms.gov/data-api/v…","""Original Medicare""","""http://data.cms.gov/medicare-c…","""en-US""","""https://www.usa.gov/government…","""2025-12-04""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/sites/defa…","""2013-01-01/2023-12-31""","""Medicare""","""Medicare Current Beneficiary S…","""application/zip"""
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Medicare Current Beneficiary S…","""mailto:MCBS@cms.hhs.gov""","""http://data.cms.gov/sites/defa…",true,"""The Medicare Current Beneficia…","""dcat:Distribution""",,,"""https://data.cms.gov/data-api/…",,"""Medicare Current Beneficiary S…","""2024-08-30""","""2013-01-01/2013-12-31""","""http://data.cms.gov/sites/defa…","""application/zip""","""http://data.cms.gov/data-api/v…","""Medicare Advantage""","""http://data.cms.gov/medicare-c…","""en-US""","""https://www.usa.gov/government…","""2025-12-04""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/sites/defa…","""2013-01-01/2023-12-31""","""Medicare""","""Medicare Current Beneficiary S…","""application/zip"""
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Medicare Current Beneficiary S…","""mailto:MCBS@cms.hhs.gov""","""http://data.cms.gov/sites/defa…",true,"""The Medicare Current Beneficia…","""dcat:Distribution""",,,"""https://data.cms.gov/data-api/…",,"""Medicare Current Beneficiary S…","""2024-08-30""","""2013-01-01/2013-12-31""","""http://data.cms.gov/sites/defa…","""application/zip""","""http://data.cms.gov/data-api/v…","""Medicare Prescription Drug""","""http://data.cms.gov/medicare-c…","""en-US""","""https://www.usa.gov/government…","""2025-12-04""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/sites/defa…","""2013-01-01/2023-12-31""","""Medicare""","""Medicare Current Beneficiary S…","""application/zip"""
"""dcat:Dataset""","""public""","""Open""","""R/P1Y""","""009:38""","""vcard:Contact""","""Medicare Current Beneficiary S…","""mailto:MCBS@cms.hhs.gov""","""http://data.cms.gov/sites/defa…",true,"""The Medicare Current Beneficia…","""dcat:Distribution""",,,"""https://data.cms.gov/data-api/…",,"""Medicare Current Beneficiary S…","""2024-08-30""","""2013-01-01/2013-12-31""","""http://data.cms.gov/sites/defa…","""application/zip""","""http://data.cms.gov/data-api/v…","""Health Equity""","""http://data.cms.gov/medicare-c…","""en-US""","""https://www.usa.gov/government…","""2025-12-04""","""009:000""","""org:Organization""","""Centers for Medicare & Medicai…","""http://data.cms.gov/sites/defa…","""2013-01-01/2023-12-31""","""Medicare""","""Medicare Current Beneficiary S…","""application/zip"""


In [5]:
lf_group = root_df.group_by("keyword").agg(pl.col("distribution_accessURL"))
lf_group

keyword,distribution_accessURL
str,list[str]
"""Original Medicare""","[""http://data.cms.gov/data-api/v1/dataset/1cd9eded-d2c9-4215-a064-aac6dae3b714/data"", null, … null]"
"""Medicare Advantage""","[null, null, … null]"
"""Skilled Nursing""","[""http://data.cms.gov/data-api/v1/dataset/d086edc0-4953-4fb9-a663-b35526371add/data"", null, … null]"
"""Financial Resources""","[""http://data.cms.gov/data-api/v1/dataset/090bac79-c8b2-4e8a-bc3e-6bee002bcd6e/data"", ""http://data.cms.gov/data-api/v1/dataset/090bac79-c8b2-4e8a-bc3e-6bee002bcd6e/data"", … ""http://data.cms.gov/data-api/v1/dataset/6d8bb85c-58d8-48c2-90dc-f9aacbc0294c/data""]"
"""Home Health""","[""http://data.cms.gov/data-api/v1/dataset/fc009b2d-7846-44b1-b4a1-692f0c143879/data"", null, … null]"
…,…
"""Safety of Care""","[""http://data.cms.gov/data-api/v1/dataset/7cf9662e-7c5c-4fe0-a8c6-828edf81a23c/data"", null, … ""http://data.cms.gov/data-api/v1/dataset/d5c2ce96-a45a-4205-8f1a-597d7d3b6557/data""]"
"""Hospital Referral Regions""","[null, null]"
"""Medicare""","[""http://data.cms.gov/data-api/v1/dataset/9767cb68-8ea9-4f0b-8179-9431abc89f11/data"", null, … null]"
"""Long Term Care""","[""http://data.cms.gov/data-api/v1/dataset/129a6503-c0f1-4132-b186-4c0232c2d894/data"", null, … ""http://data.cms.gov/data-api/v1/dataset/e180a723-9a6d-4d58-b1c9-5c113af8a1e7/data""]"


In [6]:
lf_count_values = lf_group.filter(pl.col("keyword") == "Inpatient")
lf_count_values = lf_count_values.explode("distribution_accessURL").select("distribution_accessURL")

lista_de_apis = []

for api in lf_count_values["distribution_accessURL"]:
    if not api == None:
        lista_de_apis.append(api)
total_apis = len(lista_de_apis)
dfs = []
cont = 1

for api in lista_de_apis:
    data = requests.get(url=api, headers=headers)
    print(f"Requisições realizadas: {cont}/{total_apis}")
    df = pl.DataFrame(data.json(), strict=False, nan_to_null=True)
    dfs.append(df)
    cont += 1

final_df = pl.concat(dfs, how="diagonal", rechunk=True)
print("Salvando arquivo cms_inpatient.parquet")
final_df.write_parquet(f"{FILE_PATH}/syncdata/data/cms_inpatient.parquet")

Requisições realizadas: 1/157
Requisições realizadas: 2/157
Requisições realizadas: 3/157
Requisições realizadas: 4/157
Requisições realizadas: 5/157
Requisições realizadas: 6/157
Requisições realizadas: 7/157
Requisições realizadas: 8/157
Requisições realizadas: 9/157
Requisições realizadas: 10/157
Requisições realizadas: 11/157
Requisições realizadas: 12/157
Requisições realizadas: 13/157
Requisições realizadas: 14/157
Requisições realizadas: 15/157
Requisições realizadas: 16/157
Requisições realizadas: 17/157
Requisições realizadas: 18/157
Requisições realizadas: 19/157
Requisições realizadas: 20/157
Requisições realizadas: 21/157
Requisições realizadas: 22/157
Requisições realizadas: 23/157
Requisições realizadas: 24/157
Requisições realizadas: 25/157
Requisições realizadas: 26/157
Requisições realizadas: 27/157
Requisições realizadas: 28/157
Requisições realizadas: 29/157
Requisições realizadas: 30/157
Requisições realizadas: 31/157
Requisições realizadas: 32/157
Requisições reali

KeyboardInterrupt: 

In [None]:
lf = pl.scan_parquet(f"{FILE_PATH}/syncdata/data/cms_inpatient.parquet", rechunk=True, low_memory=True)

In [None]:
lf.collect()

HOSP_ID,ADM_DISC,RATE,INTERVAL_LOWER_LIMIT,INTERVAL_HIGHER_LIMIT,START_QUARTER,START_DATE,END_QUARTER,END_DATE,MEDICARE_PROV_NUM,ZIP_CD_OF_RESIDENCE,TOTAL_DAYS_OF_CARE,TOTAL_CHARGES,TOTAL_CASES,Provider Number,Facility Name,City,State,Zip Code,Certification Date,Medicare Census,Medicaid Census,Other Census,Total Residents,Program Participation Code,Hospital Based,Ownership Type,Multi-Facility Organization,Multi-Facility Organization Name,Number of AIDS Beds,Number of Alzheimer's Disease Beds,Number of Dialysis Beds,Number of Disabled Children/Young Adult Beds,Number of Head Trauma Beds,Number of Hospice Beds,Number of Huntington's Disease Beds,Number of Ventilator Beds,…,Notes and Loans Payable (Short Term),Deferred Income,Other Current Liabilities,Total Current Liabilities,Mortgage Payable,Notes Payable,Unsecured Loans,Other Long Term Liabilities,Total Long Term Liabilities,Total Liabilities,General Fund Balance,Total Fund Balances,Total Liabilities and Fund Balances,DRG Amounts Other Than Outlier Payments,DRG Amounts Before October 1,DRG Amounts After October 1,Outlier Payments For Discharges,Disproportionate Share Adjustment,Allowable DSH Percentage,Managed Care Simulated Payments,Total IME Payment,Inpatient Revenue,Outpatient Revenue,Total Patient Revenue,Less Contractual Allowance and Discounts on Patients' Accounts,Net Patient Revenue,Less Total Operating Expense,Net Income from Service to Patients,Total Other Income,Total Income,Total Other Expenses,Net Income,Cost To Charge Ratio,Net Revenue from Medicaid,Medicaid Charges,Net Revenue from Stand-Alone CHIP,Stand-Alone CHIP Charges
i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,i64,str,str,str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
10001,"""1763""","""9.86""","""5.23""","""14.48""","""3Q2013""","""07/01/2013""","""2Q2015""","""06/30/2015""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10005,"""218""","""17.59""","""8""","""27.18""","""3Q2013""","""07/01/2013""","""2Q2015""","""06/30/2015""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10006,"""""","""""","""""","""""","""3Q2013""","""07/01/2013""","""2Q2015""","""06/30/2015""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10007,"""57""","""11.37""","""0""","""23.43""","""3Q2013""","""07/01/2013""","""2Q2015""","""06/30/2015""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10008,"""2""","""""","""""","""""","""3Q2013""","""07/01/2013""","""2Q2015""","""06/30/2015""",,,,,,,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
,,,,,,,,,,,,,,,,"""CHEYENNE""",,"""73628""",,,,,,,,,,,,,,,,,,,…,"""""","""""","""18510""","""252438""","""""","""""","""""","""""","""""","""252438""","""11948232""","""11948232""","""12200670""","""""","""""","""""","""""","""""","""""","""""","""""","""472176""","""2661355""","""3133531""","""-257769""","""3391300""","""4030526""","""-639226""","""4206405""","""3567179""","""""","""3567179""","""1.243546""","""104749""","""206822""","""""",""""""
,,,,,,,,,,,,,,,,"""MIDLAND""",,"""79706""",,,,,,,,,,,,,,,,,,,…,"""""","""""","""""","""5368535""","""""","""1250000""","""""","""""","""1250000""","""6618535""","""-5052108""","""-5052108""","""1566427""","""39869""","""""","""""","""""","""""","""""","""""","""""","""10155605""","""6000000""","""16155605""","""11169810""","""4985795""","""7162180""","""-2176385""","""-21198""","""-2197583""","""""","""-2197583""","""0.432338""","""""","""""","""""",""""""
,,,,,,,,,,,,,,,,"""SHENANDOAH""",,"""51601""",,,,,,,,,,,,,,,,,,,…,"""""","""13109""","""701925""","""2449473""","""""","""""","""""","""""","""""","""2449473""","""20265074""","""21394870""","""23844343""","""""","""""","""""","""""","""""","""""","""""","""""","""8586949""","""35779989""","""44366938""","""21153627""","""23213311""","""23417613""","""-204302""","""1414344""","""1210042""","""85839""","""1124203""","""0.410366""","""""","""6757888""","""""",""""""
,,,,,,,,,,,,,,,,"""PEMBROKE PINES""",,"""33025""",,,,,,,,,,,,,,,,,,,…,"""""","""""","""690799""","""3749927""","""""","""""","""""","""""","""""","""3749927""","""37055668""","""37055668""","""40805595""","""""","""""","""""","""""","""""","""""","""""","""""","""3596621""","""""","""3596621""","""""","""3596621""","""31320941""","""-27724320""","""""","""-27724320""","""""","""-27724320""","""0.998702""","""""","""""","""""",""""""
