In [0]:
%run ./data_utility_modules

In [0]:
import json

with open("schema_config.json", "r") as file:
    schema_config = json.load(file)


In [0]:
# Get widgets

source_folder = dbutils.widgets.get("source_folder")
table_name = dbutils.widgets.get("table_name")
schema = dbutils.widgets.get("schema")
table_keys = dbutils.widgets.get("table_keys")
job_name = dbutils.widgets.get("job_name")

In [0]:
import json

# Initialize SchemaManager
schema_mgr = SchemaManager(spark)

# Define schema
schema_dict = schema_config[schema]

# Get the current catalog name
current_catalog = spark.catalog.currentCatalog()

# Define metadata updates
silver_metadata_updates = {
    "checkpoint": '2025-06-01 00:00:00',
    "source_table": f"{current_catalog}.ncp.{table_name}_bronze",
    "table_keys": table_keys
}

bronze_metadata_updates = {"table_keys": table_keys, "checkpoint": '2020-01-01 00:00:00'}


tables = [
    (f"{current_catalog}.ncp.{table_name}_bronze", bronze_metadata_updates),
    (f"{current_catalog}.ncp.{table_name}_silver", silver_metadata_updates),
]

# Insert bronze table metadata
for schema_name, metadata in tables:
    schema_mgr.add_new_table_etl(schema_name, schema_dict, metadata)
    # Print results
    display(
        spark.sql(
            f"select * from {current_catalog}.ncp.metadata_table where table_name = '{schema_name}'"
        )
    )

In [0]:
for schema_name, metadata in tables:
    table_keys = metadata.get("table_keys", None)
    table_properties = f"TBLPROPERTIES (primaryKey='{table_keys}')" if table_keys else ""
    spark.sql(
        f"""
        CREATE TABLE {schema_name} ({', '.join([f'{col} {dtype}' for col, dtype in schema_dict.items()])})
        USING DELTA {table_properties}
        """
    )
    # Check if the table was created
    table_exists = spark.catalog.tableExists(schema_name)
    if table_exists:
        print(f"Table {schema_name} was successfully created.")
    else:
        print(f"Failed to create table {schema_name}.")

In [0]:
from databricks_api import DatabricksAPI

# Get the Databricks instance and token
DATABRICKS_INSTANCE = (
    dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
)
DATABRICKS_TOKEN = (
    dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
)

current_user = (
    dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()
)

# Define the volume path and paramters
volume_path = "abfss://analytics-data@mlanalyticsstore01.dfs.core.windows.net/nuvei-simplex-sftp-nuvei-user/NCP"
source_path = f"{volume_path}/{source_folder}"
bronze_table = f"{current_catalog}.ncp.{table_name}_bronze"
silver_table = f"{current_catalog}.ncp.{table_name}_silver"

# Initialize Databricks API client
db = DatabricksAPI(host=f"{DATABRICKS_INSTANCE}", token=f"{DATABRICKS_TOKEN}")

# Define the workflow JSON
workflow_json = {
    "name": job_name,
    "email_notifications": {
        "on_failure": [f"{current_user}"],
        "no_alert_for_skipped_runs": True,
    },
    "webhook_notifications": {},
    "timeout_seconds": 0,
    "max_concurrent_runs": 1,
    "tasks": [
        {
            "task_key": "bronze_auto_loader",
            "run_if": "ALL_SUCCESS",
            "notebook_task": {
                "notebook_path": "src/ncp_pipelines/generic_etls/bronze_auto_loader",
                "base_parameters": {
                    "SOURCE_PATH": source_path,
                    "TARGET_TABLE": bronze_table,
                    "OPERATIONAL_VOLUME": f"/Volumes/{current_catalog}/default/operational/prod",
                },
                "source": "GIT",
            },
            "job_cluster_key": "fraud_feature",
            "max_retries": 0,
            "min_retry_interval_millis": 900000,
            "retry_on_timeout": False,
            "disable_auto_optimization": True,
            "timeout_seconds": 0,
            "email_notifications": {},
            "webhook_notifications": {},
        },
        {
            "task_key": "silver_batch_etl",
            "depends_on": [{"task_key": "bronze_auto_loader"}],
            "run_if": "ALL_SUCCESS",
            "notebook_task": {
                "notebook_path": "src/ncp_pipelines/generic_etls/silver_batch_etl",
                "base_parameters": {"TARGET_TABLE": silver_table},
                "source": "GIT",
            },
            "max_retries": 0,
            "min_retry_interval_millis": 900000,
            "retry_on_timeout": False,
            "disable_auto_optimization": True,
            "timeout_seconds": 0,
            "email_notifications": {},
            "webhook_notifications": {},
        },
    ],
    "git_source": {
        "git_url": "https://dev.azure.com/nuvei/AI%20Analytics/_git/databricks_etls",
        "git_provider": "azureDevOpsServices",
        "git_branch": "main",
    },
    "tags": {"job_name": job_name, "team": "bpa", "type": "ncp_ingestion"},
    # "run_as": {"service_principal_name": "ef2a4258-8195-4d34-8e97-99fad4c1d1b5"},
}

# Create the workflow
result = db.jobs.create_job(**workflow_json)
print(result)
# job_id = result["job_id"]

# # Update the workflow
# workflow_json["run_as"] = {"service_principal_name": "ef2a4258-8195-4d34-8e97-99fad4c1d1b5"}
# update = db.jobs.update_job(job_id=job_id, new_settings=workflow_json)



In [0]:
from databricks.sdk.service.jobs import JobSettings as Job

fraud_features_ingestion = Job.from_dict(
    {
        "name": job_name,
        "email_notifications": {
            "on_failure": [
                current_user,
            ],
            "no_alert_for_skipped_runs": True,
        },
        "tasks": [
            {
                "task_key": "bronze_auto_loader",
                "notebook_task": {
                    "notebook_path": "src/ncp_pipelines/generic_etls/bronze_auto_loader",
                    "base_parameters": {
                        "SOURCE_PATH": source_path,
                        "TARGET_TABLE": bronze_table,
                        "OPERATIONAL_VOLUME": f"/Volumes/{current_catalog}/default/operational/prod",
                    },
                    "source": "GIT",
                },
                "job_cluster_key": table_name,
                "max_retries": 1,
                "min_retry_interval_millis": 900000,
            },
            {
                "task_key": "silver_batch_etl",
                "depends_on": [
                    {
                        "task_key": "bronze_auto_loader",
                    },
                ],
                "notebook_task": {
                    "notebook_path": "src/ncp_pipelines/generic_etls/silver_batch_etl",
                    "base_parameters": {
                        "TARGET_TABLE": silver_table,
                    },
                    "source": "GIT",
                },
                "max_retries": 1,
                "min_retry_interval_millis": 900000,
                "disable_auto_optimization": False,
            },
        ],
        "job_clusters": [
            {
                "job_cluster_key": table_name,
                "new_cluster": {
                    "spark_version": "16.4.x-scala2.13",
                    "azure_attributes": {
                        "first_on_demand": 1,
                        "availability": "SPOT_WITH_FALLBACK_AZURE",
                        "spot_bid_max_price": 100,
                    },
                    "node_type_id": "Standard_D8ds_v5",
                    "spark_env_vars": {
                        "PYSPARK_PYTHON": "/databricks/python3/bin/python3",
                    },
                    "policy_id": "001CFA1F0598B866",
                    "data_security_mode": "SINGLE_USER",
                    "runtime_engine": "STANDARD",
                    "kind": "CLASSIC_PREVIEW",
                    "is_single_node": False,
                    "autoscale": {
                        "min_workers": 1,
                        "max_workers": 4,
                    },
                },
            },
        ],
        "git_source": {
            "git_url": "https://dev.azure.com/nuvei/AI%20Analytics/_git/databricks_etls",
            "git_provider": "azureDevOpsServices",
            "git_branch": "main",
        },
        "tags": {
            "job_name": job_name,
            "team": "bpa",
            "type": "ncp_ingestion",
        },
        "performance_target": "PERFORMANCE_OPTIMIZED",
    }
)

from databricks.sdk import WorkspaceClient

w = WorkspaceClient()
w.jobs.create(**fraud_features_ingestion.as_shallow_dict())
