In [0]:
import requests
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date
from pyspark.sql.types import StructType, StructField, StringType, DateType, BooleanType

In [0]:
client_id = '7464c9f2-e9f8-420e-b812-eb3a01b5d685_3afa8a09-5b8e-4891-873a-0f358a5bac5c'
client_secret = '2Vxfdmqnj9Cj5y0nB0r6u2cTTTMe0NIhEzZx5j0c6yU='
base_url = 'https://id.who.int/icd/'
current_date = datetime.now().date()

auth_url = 'https://icdaccessmanagement.who.int/connect/token'
auth_response = requests.post(auth_url, data={
    'client_id': client_id,
    'client_secret': client_secret,
    'grant_type': 'client_credentials'
})

if auth_response.status_code == 200:
    access_token = auth_response.json().get('access_token')
else:
    raise Exception(f"Failed to obtain access token: {auth_response.status_code} - {auth_response.text}")


In [0]:
headers = {
    'Authorization': f'Bearer {access_token}',
    'API-Version': 'v2',
    'Accept-Language': 'en'
}

def fetch_icd_codes(url):
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")

def extract_codes(url):
    data = fetch_icd_codes(url)
    codes = []
    if 'child' in data:
        for child_url in data['child']:
            codes.extend(extract_codes(child_url))
    else:
        if 'code' in data and 'title' in data:
            codes.append({
                'icd_code': data['code'],
                'icd_code_type': 'ICD-10',
                'code_description': data['title']['@value'],
                'inserted_date': current_date,
                'updated_date': current_date,
                'is_current_flag': True
            })
    return codes


In [0]:
display(dbutils.fs.mounts())

In [0]:
root_url = 'https://id.who.int/icd/release/10/2019/A00-A09'
icd_codes = extract_codes(root_url)

# Define schema
schema = StructType([
    StructField("icd_code", StringType(), True),
    StructField("icd_code_type", StringType(), True),
    StructField("code_description", StringType(), True),
    StructField("inserted_date", DateType(), True),
    StructField("updated_date", DateType(), True),
    StructField("is_current_flag", BooleanType(), True)
])

# Create DataFrame
df = spark.createDataFrame(icd_codes, schema=schema)
df.display()  # optional: to preview
# df.write.format("parquet").mode("append").save("/mnt/bronze/icd_codes/")

df.write.format("parquet").mode("overwrite").save("/mnt/rcmabhi/bronze/icd_codes/")
