In [0]:
# import libraries
from datetime import date
import requests

In [0]:
current_date = date.today()

In [0]:

# Base URL for the NPI Registry API
base_url = "https://npiregistry.cms.hhs.gov/api/"

# Defining the parameters for the initial API request to get a list of NPIs
params = {
    "version": "2.1",
    "state": "CA",  
    "city": "Los Angeles", 
    "limit": 20}

# Make the initial API request to get a list of NPIs
response = requests.get(base_url, params=params)
print(response.status_code)


In [0]:

# Check if the request was successful
if response.status_code == 200:
    npi_data = response.json()
    npi_list = [result["number"] for result in npi_data.get("results", [])]

    # Initialize a list to store detailed NPI information
    detailed_results = []

    # Loop through each NPI to get their details
    for npi in npi_list:
        detail_params = {"version": "2.1", "number": npi}
        detail_response = requests.get(base_url, params=detail_params)

        if detail_response.status_code == 200:
            detail_data = detail_response.json()
            if "results" in detail_data and detail_data["results"]:
                for result in detail_data["results"]:
                    npi_number = result.get("number")
                    basic_info = result.get("basic", {})
                    if result["enumeration_type"] == "NPI-1":
                        fname = basic_info.get("first_name", "")
                        lname = basic_info.get("last_name", "")
                    else:
                        fname = basic_info.get("authorized_official_first_name", "")
                        lname = basic_info.get("authorized_official_last_name", "")
                    position = (
                        basic_info.get("authorized_official_title_or_position", "")
                        if "authorized_official_title_or_position" in basic_info
                        else ""
                    )
                    organisation = basic_info.get("organization_name", "")
                    last_updated = basic_info.get("last_updated", "")
                    detailed_results.append(
                        {
                            "npi_id": npi_number,
                            "first_name": fname,
                            "last_name": lname,
                            "position": position,
                            "organisation_name": organisation,
                            "last_updated": last_updated,
                            "refreshed_at": current_date,
                        }
                    )

    # Create a DataFrame
    if detailed_results:
        df = spark.createDataFrame(detailed_results)
        display(df.limit(10))
    else:
        print("No detailed results found.")
else:
    print(f"Failed to fetch data: {response.status_code} - {response.text}")

In [0]:
# Define the storage account name and container
storage_account_name = "healthcarercmra"
client_id = "55cb5f89-1a5c-41b2-a286-947e13e78c78"
tenant_id = "e1dd8e8f-9203-44c7-b497-48a69721f03b"
client_secret = "ZAC8Q~0J3U3H08m2P5Y~yYzHOdFDkd8W2IsOwbU-"

# Set up the configuration for the service principal
configs = {
    "fs.azure.account.auth.type": "OAuth",
    "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id": client_id,
    "fs.azure.account.oauth2.client.secret": client_secret,
    "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
}

# Apply the configuration
for key, value in configs.items():
    spark.conf.set(key, value)

# Define the path to the container
bronze_path = f"abfss://bronze@{storage_account_name}.dfs.core.windows.net/"
print(bronze_path)

In [0]:
%fs ls
abfss://bronze@healthcarercmra.dfs.core.windows.net/

In [0]:
npi_codes_path = f"{bronze_path}/npi_codes"

# save the dataframe as a delta table
df.coalesce(1).write.mode("overwrite").format("parquet")save(npi_codes_path)

In [0]:
display(spark.read.format("delta").load(npi_codes_path).limit(10))