In [0]:
import os
import IPython
import uuid
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.types import *
import requests
import http.client
import json
from azure.identity import DefaultAzureCredential, ClientSecretCredential
import pandas as pd
from typing import Tuple
import re
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

newid = udf(lambda : str(uuid.uuid4()),StringType())

In [0]:
engineId = os.getenv("ENGINE_ID")
tenant_id = os.getenv("TENANT_ID")
client_id = os.getenv("CLIENT_ID")
domain = os.getenv("DOMAIN")
ygdra_api = os.getenv("API_URL")
keyvault = os.getenv("KEYVAULT_NAME")

# Get client secret from vault
client_secret = dbutils.secrets.get(keyvault, "clientsecret")

# Create scope url
scope = "https://" + domain + "/" + client_id + "/.default"

# Get access token
credential = ClientSecretCredential(tenant_id, client_id, client_secret)
access_token = credential.get_token(scope)
headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + access_token.token}

url = ygdra_api + "/api/engines/daemon/" + engineId

res = requests.get(url, headers = headers, verify = False)

engine = json.loads(res.text)

accountName = engine["storageName"] # from engine.storageName
accountKey = "dsLake-" + engine["storageName"] # from engine.storageName

# Get the secret value
accountKeyValue = dbutils.secrets.get(keyvault, accountKey)

# set the token for accessing input and output path
spark.conf.set("fs.azure.account.key." + accountName + ".dfs.core.windows.net", accountKeyValue)


In [0]:
def add_metric(payload):
  urlMetric = ygdra_api + "/api/appinsights/" + engine["id"] + "/daemon/metrics/" + dataSourceName + "/" + entityName + "/" + version
  res = requests.post(urlMetric, headers = headers, verify = False, data = payload)
  return res

def add_pandas_metric(payload):
  urlMetric = ygdra_api + "/api/appinsights/" + engine["id"] + "/daemon/metrics/" + dataSourceName + "/" + entityName + "/" + version + "/pandas"
  res = requests.post(urlMetric, headers = headers, verify = False, data = payload)
  return res

def get_entity(entityName):
  urlEntities = ygdra_api + "/api/datafactories/" + engine["id"] + "/daemon/entities/" + entityName
  res = requests.get(urlEntities, headers = headers, verify = False)
  entity = json.loads(res.text)
  return entity


def get_path(container, path):
  return "abfss://" + container + "@" + engine["storageName"] + ".dfs.core.windows.net/" + path