In [7]:
import json
import pandas as pd

In [8]:
# Set file paths

input_file = "data/patientExample.json"

In [9]:
# Load JSON file
with open(input_file, "r") as f:
    data = json.load(f)

print(data)

{'resourceType': 'Patient', 'id': 'example', 'identifier': [{'use': 'usual', 'type': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v2-0203', 'code': 'MR'}]}, 'system': 'urn:oid:1.2.36.146.595.217.0.1', 'value': '12345', 'period': {'start': '2001-05-06'}, 'assigner': {'display': 'Acme Healthcare'}}], 'active': True, 'name': [{'use': 'official', 'family': 'Chalmers', 'given': ['Peter', 'James']}, {'use': 'usual', 'given': ['Jim']}, {'use': 'maiden', 'family': 'Windsor', 'given': ['Peter', 'James'], 'period': {'end': '2002'}}], 'telecom': [{'use': 'home'}, {'system': 'phone', 'value': '(03) 5555 6473', 'use': 'work', 'rank': 1}, {'system': 'phone', 'value': '(03) 3410 5613', 'use': 'mobile', 'rank': 2}, {'system': 'phone', 'value': '(03) 5555 8834', 'use': 'old', 'period': {'end': '2014'}}], 'gender': 'male', 'birthDate': '1974-12-25', '_birthDate': {'extension': [{'url': 'http://hl7.org/fhir/StructureDefinition/patient-birthTime', 'valueDateTime': '1974-12-25T14:35:45-05:

In [17]:
# create an empty dictionary as it lets you associate keys with values, which is ideal for representing JSON object
# on the contrary, lists just hold values by position (index), not by named "resourceType": data.get("resourceType", ""),
# Create dictionary to store extracted fields
# Create dictionary to store extracted fields
dictData = {
    "resourceType": data.get("resourceType", ""),
    "id": data.get("id", ""),
    "gender": data.get("gender", ""),
    "birthDate": data.get("birthDate", ""),
    "deceasedBoolean": data.get("deceasedBoolean", False),
    "first_name": "",
    "last_name": "",
    "identifier_value": "",
    "home_phone": "",
    "work_phone": "",
    "address_city": "",
    "address_state": ""
}

# Extract first_name and last_name from the 'name' list (use first 'official' name)
for name_entry in data.get("name", []):
    if name_entry.get("use") == "official":
        dictData["first_name"] = " ".join(name_entry.get("given", []))  # Join given names
        dictData["last_name"] = name_entry.get("family", "")
        break

# Extract identifier value from the 'identifier' list (use first entry)
if data.get("identifier"):
    dictData["identifier_value"] = data["identifier"][0].get("value", "")

# Extract home_phone and work_phone from the 'telecom' list
telecom_list = data.get("telecom", [])
for telecom in telecom_list:
    if telecom.get("use") == "home":
        dictData["home_phone"] = telecom.get("value", "")  # Assign empty string if no value
    elif telecom.get("use") == "work" and telecom.get("system") == "phone":
        dictData["work_phone"] = telecom.get("value", "")

# Extract city and state from the 'address' list (use first entry)
if data.get("address"):
    dictData["address_city"] = data["address"][0].get("city", "")
    dictData["address_state"] = data["address"][0].get("state", "")

In [19]:
# Convert to DataFrame
# [dictData] is to create a DataFrame with one row, using the keys as column
df = pd.DataFrame([dictData])

# Show DataFrame
print(df)

  resourceType       id gender   birthDate  deceasedBoolean   first_name  \
0      Patient  example   male  1974-12-25            False  Peter James   

  last_name identifier_value home_phone      work_phone   address_city  \
0  Chalmers            12345             (03) 5555 6473  PleasantVille   

  address_state  
0           Vic  


In [None]:
## Flowchart Description
The process of parsing `patientExample.json` into a DataFrame is outlined below:

1. **Start**
2. **Load JSON file → Store in 'data'**: Read the JSON file and store its contents in the `data` variable.
3. **Initialize dictData with default values**: Create a dictionary `dictData` with default values for all fields.
4. **Extract top-level fields (resourceType, id, gender, birthDate, deceasedBoolean)**: Populate `dictData` with top-level JSON fields.
5. **Loop through 'name' list**: Iterate over the `name` list in the JSON.
   - **If 'use' == 'official'**: 
     - Set `first_name` by joining the `given` names.
     - Set `last_name` from the `family` field.
     - Break the loop to use the first official name.
   - If not official, continue looping.
6. **Extract 'identifier[0].value' → Set identifier_value**: Extract the value from the first `identifier` entry.
7. **Loop through 'telecom' list**: Iterate over the `telecom` list.
   - **If 'use' == 'home'**: Set `home_phone` to the `value`.
   - **If 'use' == 'work' and system == 'phone'**: Set `work_phone` to the `value`.
   - If neither condition is met, proceed.
8. **Extract 'address[0].city' → Set address_city**: Extract the `city` from the first `address` entry.
9. **Extract 'address[0].state' → Set address_state**: Extract the `state` from the first `address` entry.
10. **Convert dictData to DataFrame**: Convert the `dictData` dictionary into a Pandas DataFrame.
11. **Display DataFrame**: Print the DataFrame to the output.
12. **End**

## Flowchart
![Flowchart Description](flowchart.png)