# Notebook to synchronize catalogs and schemas
---

### Parameters
* Source Workspace : uri to source workspace
* Destination Workspace : uri to destination workspace

* Dictionary with: 
  * catalog name to synch
  * the storage root (adls path for storage, since we have each catalog with its own storage account)
  * schemas with names to synch

In [0]:
dbutils.widgets.text("source_workspace", "", "Source Workspace")
dbutils.widgets.text("destination_workspace", "", "Destination Workspace")


In [0]:
source_workspace = dbutils.widgets.get("source_workspace")
destination_workspace = dbutils.widgets.get("destination_workspace")

In [0]:
#variables
# catalogs to copy is the catalog name with the target storage root
cats_to_copy = [{
    "catadb360dev" : {
                        "storageroot" : "abfss://fsms@dlg2metastoredevwestlp6m.dfs.core.windows.net/",
                        "schemas" : [
                            {"schema" : "schemaadb360dev"},
                            {"schema" : "anotherschema"}
                        ]
    }
},
{
    "catadb360prd" : {
                        "storageroot" : "abfss://fsms@dlg2metastoreprdwestlp6m.dfs.core.windows.net/",
                        "schemas" : [
                            {"schema" : "schemaadb360prd"}
                        ]
    }
}             

]


In [0]:
storageroot_dev = next(cat["catadb360dev"]["storageroot"] for cat in cats_to_copy if "catadb360dev" in cat)

In [0]:
# generate the catalogslist to work on from the configuration s.a.
catalogs_list = [list(cat.keys())[0] for cat in cats_to_copy]

In [0]:
assert ("catadb360dev" in catalogs_list and  "catadb360prd" in catalogs_list), "catadb360prd is not in the catalogs_list"
assert len(catalogs_list) == 2, "catalogs_list is not 2"

In [0]:
# get the credentials for sp from a keyvault backed secret scope
clientid = dbutils.secrets.get('kvbacked', 'clientid')
clientsecret = dbutils.secrets.get('kvbacked', 'clientsecret')
tenantid = dbutils.secrets.get('kvbacked', 'tenantid')


In [0]:
# imports
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import catalog


In [0]:
# create connections to source and destination workspace
sourceWs = WorkspaceClient(azure_client_id=clientid, azure_client_secret=clientsecret, azure_tenant_id=tenantid, host=source_workspace)
destWs = WorkspaceClient(azure_client_id=clientid, azure_client_secret=clientsecret, azure_tenant_id=tenantid, host=destination_workspace)

In [0]:
# get the source and destination catalogs
# check the source catalogs, filter by configuration -> expected all or a subset
source_cats = [x for x in sourceWs.catalogs.list() if x.name in catalogs_list]
# check the destination catalogs, filter by configuration -> expected none
dest_cats = [x for x in destWs.catalogs.list() if x.name in catalogs_list ]

In [0]:
# firstly we make sure, that the catalogs to synch exist in source
assert len(source_cats) <= 2
# secondly we make sure, that the catalogs or a subset to synch do not exist in destination
assert len(dest_cats) < 2

In [0]:
# find the catalogs to actually sync
catalogs_to_sync = [cat for cat in source_cats if cat.name not in {dest_cat.name for dest_cat in dest_cats}]


In [0]:
# synch the catalogs
for catalog in catalogs_to_sync:
    # if the catalog is an external on or a shared don't do anything
    if catalog.connection_name or catalog.share_name:
        print(f"Skipping {catalog.name} as it is an external catalog or a shared catalog")
        continue

    print (f"working on catalog {catalog.name}")
    catalog_name = catalog.name
    catalog_comment = catalog.comment 
    catalog_options = catalog.options
    catalog_properties = catalog.properties 

    # create the catalog if it doesn't exit already
    if catalog_name not in [cat.name for cat in destWs.catalogs.list()]:
        print("creating the catalog")
        storageroot = next(cat[catalog_name]["storageroot"] for cat in cats_to_copy if catalog_name in cat)
        destWs.catalogs.create(name=catalog_name, storage_root=storageroot)

    # get the schemas to create and create them
    schemas = [schema["schema"] for cat in cats_to_copy if catalog_name in cat for schema in cat[catalog_name]["schemas"]]
    for schema in schemas:
        print(f"\tcreating schema {schema}")
        destWs.schemas.create(name=schema, catalog_name=catalog_name)
    

print ('Done!')
