### updateECRDatastorePersonID
This is the 1st step to update the ECR datastore after receiving new MPI data from LAC.

This notebook joins the ECR datastore (`ecr`) and Patient table from the Master Patient Index (`patient`) on `patient_id` to update the ECR datastore's `person_id` with the most up-to-date `person_id` in the `patient` table. 

In [None]:
pip install psycopg2-binary azure-identity

In [None]:
from azure.identity import DefaultAzureCredential
import psycopg2
from delta.tables import *
from pyspark.sql.functions import *

In [None]:
storage_account_name = "$STORAGE_ACCOUNT"
ECR_DELTA_TABLE_FILE_PATH = f"abfss://delta-tables@{storage_account_name}.dfs.core.windows.net/ecr-datastore" 

# Set your Key Vault information and Key Vault linked service
vault_name = "$KEY_VAULT"
vault_linked_service = "$KEY_VAULT_LINKED_SERVICE"
credential = DefaultAzureCredential()

# Database connection parameters
DB_NAME = "DibbsMpiDB"
DB_USER = "postgres"
DB_HOST = "phdidevmpi9d194c64.postgres.database.azure.com"
DB_PORT = "5432"
DB_TABLE = "patient"

# Get the secret value (password) from the previous step
db_password =  TokenLibrary.getSecret(vault_name,"mpi-db-password",vault_linked_service)

# Connect to the database
conn = psycopg2.connect(
    dbname=DB_NAME,
    user=DB_USER,
    password=db_password,
    host=DB_HOST,
    port=DB_PORT
)

# Create a cursor
cur = conn.cursor()

# Execute the query to get the list of tables in the database
cur.execute(f"""
    SELECT patient_id,person_id
    FROM {DB_TABLE};
""")

# Fetch the results
data = cur.fetchall()

# Close the cursor and connection
cur.close()
conn.close()


# Prep the MPI data for merging with ECR data 
columns=['patient_id','person_id']
patient = spark.createDataFrame(data = data, schema = columns)


# Load ecr Delta table
ecr = DeltaTable.forPath(spark,ECR_DELTA_TABLE_FILE_PATH)

# Update ecr data with `person_id` from MPI by joining on `patient_id`
ecr.alias("ecr") \
  .merge(
    patient.alias("mpi_patient"),
    "ecr.patient_id = mpi_patient.patient_id") \
  .whenMatchedUpdate(set = { "person_id": "mpi_patient.person_id", "person_id_date_added": date_format(current_timestamp(), 'yyyy-MM-dd')}) \
  .execute()

