In [0]:
from datetime import date
import requests
from pyspark.sql import SparkSession

In [0]:
spark = SparkSession.builder.appName("NPI Data").getOrCreate()

current_date = date.today()

base_url = "https://npiregistry.cms.hhs.gov/api/"

params = {
    "version": "2.1",  # API version
    "state": "CA",  # Example state, replace with desired state or other criteria
    "city": "Los Angeles",  # Example city, replace with desired city
    "limit": 200,  # Limit the number of results for demonstration purposes
}

response = requests.get(base_url, params=params)

In [0]:
if response.status_code == 200:
    npi_data = response.json()
    npi_list=[result for result in npi_data.get("results", [])]

    detailed_results=[]
    
    for result in npi_list:
        npi_number = result.get("number")
        basic_info = result.get("basic", {})
        if result["enumeration_type"] == "NPI-1":
            fname = basic_info.get("first_name", "")
            lname = basic_info.get("last_name", "")
        else:
            fname = basic_info.get("authorized_official_first_name", "")
            lname = basic_info.get("authorized_official_last_name", "")
            position = (
                        basic_info.get("authorized_official_title_or_position", "")
                        if "authorized_official_title_or_position" in basic_info
                        else ""
                    )
            organisation = basic_info.get("organization_name", "")
            last_updated = basic_info.get("last_updated", "")
            detailed_results.append(
                        {
                            "npi_id": npi_number,
                            "first_name": fname,
                            "last_name": lname,
                            "position": position,
                            "organisation_name": organisation,
                            "last_updated": last_updated,
                            "refreshed_at": current_date,
                        }
                    )

In [0]:
if detailed_results:
    #print(detailed_results)
    df = spark.createDataFrame(detailed_results)
    display(df)
    df.coalesce(1).write.format("parquet").mode("overwrite").save("/mnt/bronze/npi_extract/")
    #df.coalesce(1).write.format("delta").mode("overwrite").saveAsTable("npi_extract")

else:
    print("No detailed results found.")