In [None]:
pip install psycopg2-binary azure-keyvault

In [None]:
from azure.identity import ManagedIdentityCredential
from azure.core.credentials import AccessToken
from azure.keyvault.secrets import SecretClient
import psycopg2
import time

from delta.tables import *
from pyspark.sql.functions import *

In [None]:
ECR_DELTA_TABLE_FILE_PATH = "/delta-tables/ecr_datastore"

# Set up authentication
class spoof_token:
    def get_token(*args, **kwargs):
        return AccessToken(
            token=mssparkutils.credentials.getToken(audience="vault"),
            expires_on=int(time.time())+60*10 # some random time in future... synapse doesn't document how to get the actual time
        )

credential = ManagedIdentityCredential()
credential._credential = spoof_token() # monkey-patch the contents of the private `_credential`

# Set your Key Vault information
KEY_VAULT_URL = "https://devvault9d194c64.vault.azure.net"
DB_PASS_SECRET_NAME = "mpi-password-test123"

# Create a SecretClient to interact with the Key Vault
secret_client = SecretClient(vault_url=KEY_VAULT_URL, credential=credential)

# Retrieve the secret
db_pass_secret = secret_client.get_secret(DB_PASS_SECRET_NAME)

# Database connection parameters
DB_NAME = "DibbsMpiDB"
DB_USER = "postgres"
DB_HOST = "phdidevmpi9d194c64.postgres.database.azure.com"
DB_PORT = "5432"
DB_TABLE = "person"

# Get the secret value (password) from the previous step
db_password = db_pass_secret.value

# Connect to the database
conn = psycopg2.connect(
    dbname=DB_NAME,
    user=DB_USER,
    password=db_password,
    host=DB_HOST,
    port=DB_PORT
)

# Create a cursor
cur = conn.cursor()

# Execute the query to get the list of tables in the database
cur.execute(f"""
    SELECT person_id, external_person_id
    FROM {DB_TABLE};
""")

# Fetch the results
data = cur.fetchall()
data

# Close the cursor and connection
cur.close()
conn.close()


# Prep the MPI data for merging with ECR data 
columns=['person_id','external_person_id']
person = spark.createDataFrame(data = data, schema = columns)


# Load ecr Delta table
ecr = DeltaTable.forPath(spark,ECR_DELTA_TABLE_FILE_PATH)

# Update ecr data with `external_person_id` from MPI by joining on `person_id`
ecr.alias("ecr") \
  .merge(
    person.alias("mpi_person"),
    "ecr.person_id = mpi_person.person_id") \
  .whenMatchedUpdate(set = { "iris_id": col("mpi_person.external_person_id") }) \
  .execute()
