# convertParquetMPI

This notebook reads in patient data from an uploaded parquet file (`mpi_incoming_filename`), converts to FHIR, and writes the data to blob storage.

In [None]:
pip install phdi

In [None]:
filename=""

In [None]:
from notebookutils import mssparkutils
from phdi.linkage.seed import convert_to_patient_fhir_resources
from datetime import date
import json

# Set up file client
storage_account = "$STORAGE_ACCOUNT"
source_data_bucket = "source-data"
patient_data_bucket = "patient-data"
storage_account_url = f"https://{storage_account}.blob.core.windows.net/"
mpi_incoming_filename = f"abfss://{patient_data_bucket}@{storage_account}.dfs.core.windows.net/{filename}"

# Set up for writing to blob storage
blob_relative_path = ""
blob_storage_linked_service = "$BLOB_STORAGE_LINKED_SERVICE" 
blob_sas_token = mssparkutils.credentials.getConnectionStringOrCreds(blob_storage_linked_service)
wasb_path = 'wasbs://%s@%s.blob.core.windows.net/%s' % (source_data_bucket, storage_account, blob_relative_path)
spark.conf.set('fs.azure.sas.%s.%s.blob.core.windows.net' % (source_data_bucket, storage_account), blob_sas_token)

# Try mounting the remote storage directory at the mount point
try:
    mssparkutils.fs.mount(
        wasb_path,
        "/",
        {"LinkedService": blob_storage_linked_service}
    )
except:
    print("Already mounted")


Read the MPI parquet data into a spark dataframe. Iterate over each row of patient data in the dataframe and convert to a FHIR bundle and associated iris_id. Create a POST request to the record linkage container with FHIR bundle and iris_id.

In [None]:
# Convert data and write to blob storage
def convert_write_data(mpi_incoming_filename):
    curr_date = date.today()
    df = spark.read.parquet(mpi_incoming_filename)
    curr_date = date.today()
    file_idx = 0
    for row in df.collect():
        file_idx += 1

        iris_id, fhir_bundle = convert_to_patient_fhir_resources(row.asDict())
        fhir_bundle["meta"] = {"source": "uri:iris"}

        data = {
            'bundle': fhir_bundle,
            'external_person_id': iris_id
        }

        pre_filename = f"abfss://{source_data_bucket}@{storage_account}.dfs.core.windows.net/fhir/lac_extract_{str(curr_date)}_{str(file_idx)}.json"
        mssparkutils.fs.put(pre_filename, json.dumps(data), True)


convert_write_data(mpi_incoming_filename)