# Marketplace News setup

## Overview

Details related to Databricks sdk for python are described: <a href="https://learn.microsoft.com/en-us/azure/databricks/dev-tools/sdk-python" target="_blank">here</a>

## Step 1: databricks sdk install

In [0]:
%pip install databricks-sdk --upgrade

In [0]:
dbutils.library.restartPython()

In [0]:
%pip show databricks-sdk | grep -oP '(?<=Version: )\S+'

## Step 2: Authentication
Details related to python authentication as Service Principal are described: <a href="https://learn.microsoft.com/en-us/azure/databricks/dev-tools/auth/oauth-m2m#python" target="_blank">here</a>

In [0]:
workspace_url = spark.conf.get("spark.databricks.workspaceUrl")
display(workspace_url)

### Auto Authentication Setup via Secrets

In [0]:
scopes = dbutils.secrets.listScopes()
scopes

In [0]:
secrets = dbutils.secrets.list("scp-ag83-eb-datanostrum-005")
secrets

In [0]:
dbutils.widgets.text(name='account_id', defaultValue='')

import os

sp_cid = dbutils.secrets.get(scope = "scp-ag83-eb-datanostrum-005", key = "scr-ag83-eb-datanostrum-admin-005-uuid")
sp_tkn = dbutils.secrets.get(scope = "scp-ag83-eb-datanostrum-005", key = "scr-ag83-eb-datanostrum-admin-005-token")
ws_url =  workspace_url
ac_url =  "https://accounts.azuredatabricks.net"
ac_id = dbutils.widgets.get('account_id')

In [0]:
display("sp_cid: " + sp_cid)
display("sp_tkn: " + sp_tkn)
display("ws_url: " + ws_url)
display("ac_url: " + ac_url)
display("ac_id: "  + ac_id)

## Step 3: Use the Sdk - Marketplace API

### Workspace-level operations

In [0]:
# Parametri di input
LISTING_NAME = "BBC/Google/CNN/Reuters News listing"
TARGET_CATALOG = "marketplace_news"

In [0]:
from databricks.sdk import WorkspaceClient

w = WorkspaceClient(
  host          = ws_url,
  client_id     = sp_cid,
  client_secret = sp_tkn
)

### Listings

In [0]:
cl_list = list(w.consumer_listings.list())
display(len(cl_list))

In [0]:
import json

cl_dicts = [
    {
        **cl.as_dict(),
        "detail": json.dumps(cl.as_dict().get("detail"), ensure_ascii=False),
        "summary": json.dumps(cl.as_dict().get("summary"), ensure_ascii=False)
    }
    for cl in w.consumer_listings.list()
]
df = spark.createDataFrame(cl_dicts)
display(df)

In [0]:
from pyspark.sql.types import (
    StructType, StructField, StringType, ArrayType, DoubleType, LongType, MapType
)

detail_schema = StructType([
    StructField("assets", ArrayType(StringType()), True),
    StructField("collection_date_end", LongType(), True),
    StructField("collection_date_start", LongType(), True),
    StructField("collection_granularity", StructType([
        StructField("interval", LongType(), True),
        StructField("unit", StringType(), True)
    ]), True),
    StructField("cost", StringType(), True),
    StructField("data_source", StringType(), True),
    StructField("description", StringType(), True),
    StructField("documentation_link", StringType(), True),
    StructField("embedded_notebook_file_infos", ArrayType(StructType([
        StructField("created_at", LongType(), True),
        StructField("display_name", StringType(), True),
        StructField("download_link", StringType(), True),
        StructField("file_parent", StructType([
            StructField("file_parent_type", StringType(), True),
            StructField("parent_id", StringType(), True)
        ]), True),
        StructField("id", StringType(), True),
        StructField("marketplace_file_type", StringType(), True),
        StructField("mime_type", StringType(), True),
        StructField("status", StringType(), True),
        StructField("status_message", StringType(), True),
        StructField("updated_at", LongType(), True)
    ])), True),
    StructField("file_ids", ArrayType(StringType()), True),
    StructField("geographical_coverage", StringType(), True),
    StructField("license", StringType(), True),
    StructField("pricing_model", StringType(), True),
    StructField("privacy_policy_link", StringType(), True),
    StructField("size", DoubleType(), True),
    StructField("support_link", StringType(), True),
    StructField("tags", ArrayType(StructType([
        StructField("tag_name", StringType(), True),
        StructField("tag_values", ArrayType(StringType()), True)
    ])), True),
    StructField("terms_of_service", StringType(), True),
    StructField("update_frequency", StructType([
        StructField("interval", LongType(), True),
        StructField("unit", StringType(), True)
    ]), True)
])

summary_schema = StructType([
    StructField("categories", ArrayType(StringType()), True),
    StructField("created_at", LongType(), True),
    StructField("created_by", StringType(), True),
    StructField("created_by_id", LongType(), True),
    StructField("exchange_ids", ArrayType(StringType()), True),
    StructField("git_repo", StructType([
        StructField("git_repo_url", StringType(), True)
    ]), True),
    StructField("listingType", StringType(), True),
    StructField("name", StringType(), True),
    StructField("provider_id", StringType(), True),
    StructField("provider_region", StructType([
        StructField("cloud", StringType(), True),
        StructField("region", StringType(), True)
    ]), True),
    StructField("published_at", LongType(), True),
    StructField("published_by", StringType(), True),
    StructField("setting", StructType([
        StructField("visibility", StringType(), True)
    ]), True),
    StructField("share", StructType([
        StructField("name", StringType(), True),
        StructField("type", StringType(), True)
    ]), True),
    StructField("status", StringType(), True),
    StructField("subtitle", StringType(), True),
    StructField("updated_at", LongType(), True),
    StructField("updated_by", StringType(), True),
    StructField("updated_by_id", LongType(), True)
])

schema = StructType([
    StructField("id", StringType(), True),
    StructField("detail", detail_schema, True),
    StructField("summary", summary_schema, True)
])

cl_dicts = [
    {
        "id": cl.as_dict().get("id"),
        "detail": cl.as_dict().get("detail"),
        "summary": cl.as_dict().get("summary")
    }
    for cl in w.consumer_listings.list()
]

df = spark.createDataFrame(cl_dicts, schema=schema)
display(df)

In [0]:
df.createOrReplaceTempView("listing")

In [0]:
%sql
select *
from listing
where contains(summary.name, 'BBC/Google/CNN/Reuters')


In [0]:
from pyspark.sql.functions import col

result = df.filter(
    col("summary.name").contains("BBC/Google/CNN/Reuters")
).select("id", "summary.share.name")

row = result.first()
LISTING_ID = row["id"] if row else None
LISTING_SHARE_NAME = row["name"] if row else None

display("LISTING_ID: " + LISTING_ID)
display("LISTING_SHARE_NAME: " + LISTING_SHARE_NAME)

In [0]:
from databricks.sdk.service.marketplace import ConsumerTerms

# oggetto ConsumerTerms in base alla signature del tuo SDK
consumer_terms = ConsumerTerms(
    version="2023-01"
)

installation = w.consumer_installations.create(
    listing_id=LISTING_ID,
    catalog_name=TARGET_CATALOG,
    accepted_consumer_terms=consumer_terms,
    share_name=LISTING_SHARE_NAME
)

print(f"Installazione creata con ID: {installation.installation.id}")
print(f"Catalogo UC: {installation.installation.catalog_name}")

In [0]:
%sql
GRANT USE CATALOG ON CATALOG marketplace_news TO `account users`;
GRANT USE SCHEMA ON CATALOG marketplace_news TO `account users`;
GRANT SELECT ON CATALOG marketplace_news TO `account users`;

In [0]:
# esempio di cancellazione dell'installazione

# w.consumer_installations.delete(
#     listing_id=LISTING_ID,
#     installation_id=installation.installation.id
# )