In [0]:
import json
import requests
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import catalog, jobs, pipelines
import time

In [0]:
def switch_pipeline(apiToken, ApiUrl, PipelineID, action="stop", fullRefresh=False, cause="Triggered by API"):
    
    if action == "start":
        endpoint = f"{ApiUrl}/{PipelineID}/updates"
        headers = {
        "Authorization": f"Bearer {apiToken}",
        "Content-Type": "application/json"}
        payload = {
            "full_refresh": fullRefresh,
            "cause": cause
        }
    elif action == "stop":
        headers = {
        "Authorization": f"Bearer {apiToken}",
        "Content-Type": "application/json"
        }
        endpoint = f"{ApiUrl}/{PipelineID}/stop"
        payload = {}
    else:
        raise ValueError("Invalid action. Must be 'start' or 'stop'.")

    response = requests.post(endpoint, headers=headers, data=json.dumps(payload))
    print(f"{action.capitalize()} response status:", response.status_code)
    try:
        print("Response body:", response.json())
    except Exception as e:
        print("No JSON response:", response.text)

In [0]:
def get_pipeline_status(apiToken, ApiUrl, pipelineID):
        #The get_pipeline_status function retrieves the current status of a pipeline, such as IDLE, RUNNING, or STOPPING.
    headers = {
        "Authorization": f"Bearer {apiToken}",
        "Content-Type": "application/json"
    }
    status_url = f"{ApiUrl}/{pipelineID}"
 
    response = requests.get(status_url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        return data.get("state")
    elif response.status_code == 404:
        return "NOT_FOUND"
    else:
        print("Failed to retrieve pipeline status. Status code:", response.status_code)
        return None

In [0]:
def get_environment():
    try:
        value = json.loads(spark.conf.get("spark.databricks.clusterUsageTags.clusterAllTags"))
      
        tags_dict = {tag["key"]: tag["value"] for tag in value}

        environment = tags_dict.get("x_Environment", "Unknown")
 
        return environment
    except Exception as e:
        print(f"Error retrieving environment tags: {e}")
        return "Unknown", "Unknown"

In [0]:
def get_sub_environment():
    try:
        value = json.loads(spark.conf.get("spark.databricks.clusterUsageTags.clusterAllTags"))
      
        tags_dict = {tag["key"]: tag["value"] for tag in value}

        sub_environment = tags_dict.get("x_SubEnvironment", "Unknown")
 
        return sub_environment
    except Exception as e:
        print(f"Error retrieving environment tags: {e}")
        return  "Unknown"

In [0]:
def get_jdbc_url(environment, subEnvironment, database):
    if subEnvironment == 'qa':
        subEnvironment = 'qae'
    jdbc_hostname = f"{environment}-analytics-{subEnvironment}-01-sql.database.windows.net"
    jdbc_port = 1433
    jdbc_database = f"{database}"

    # global jdbc_url
    jdbc_url = f"jdbc:sqlserver://{jdbc_hostname}:{jdbc_port};databaseName={jdbc_database};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
    return jdbc_url

In [0]:
def get_connection_properties(username, password):
    # global connection_properties
    connection_properties = {
        "user": username,
        "password": password,
        "driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver"
    }
    return connection_properties
    

In [0]:
def execute_sql_query(query, jdbc_url, connection_properties):
    formatted_query = f"({query}) as tmp"
    return spark.read.jdbc(url=jdbc_url, table=formatted_query, properties=connection_properties)