In [0]:
#The getting started guide is based on PySpark however you can use the equivalent scala version as well.
#You can import and run this notebook in Azure Databricks
#see getting started guide:
#https://github.com/moderakh/azure-sdk-for-java/blob/users/moderakh/20210303-spark-getting-started-docs/sdk/cosmos/azure-cosmos-spark_3-0_2-12/docs/quick-start.md

In [0]:
#Set Cosmos DB account credentials, and the Cosmos DB Database name and container name.
cosmosEndpoint = "https://REPLACEME.documents.azure.com:443/"
cosmosMasterKey = "REPLACEME"
cosmosDatabaseName = "sampleDB"
cosmosContainerName = "sampleContainer"

cfg = {
  "spark.cosmos.accountEndpoint" : cosmosEndpoint,
  "spark.cosmos.accountKey" : cosmosMasterKey,
  "spark.cosmos.database" : cosmosDatabaseName,
  "spark.cosmos.container" : cosmosContainerName,
}

In [0]:
# Configure Catalog Api to be used
spark.conf.set("spark.sql.catalog.cosmosCatalog", "com.azure.cosmos.spark.CosmosCatalog")
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint", cosmosEndpoint)
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey", cosmosMasterKey)

In [0]:
# create a cosmos database using catalog api
spark.sql("CREATE DATABASE IF NOT EXISTS cosmosCatalog.{};".format(cosmosDatabaseName))

In [0]:
# create a cosmos container using catalog api
spark.sql("CREATE TABLE IF NOT EXISTS cosmosCatalog.{}.{} using cosmos.items TBLPROPERTIES(partitionKeyPath = '/id', manualThroughput = '1100')".format(cosmosDatabaseName, cosmosContainerName))

In [0]:
# Ingest data to Cosmos DB
spark.createDataFrame((("cat-alive", "Schrodinger cat", 2, True), ("cat-dead", "Schrodinger cat", 2, False)))\
  .toDF("id","name","age","isAlive") \
   .write\
   .format("cosmos.items")\
   .options(**cfg)\
   .mode("APPEND")\
   .save()

In [0]:
# Query data from Cosmos DB
from pyspark.sql.functions import col

df = spark.read.format("cosmos.items").options(**cfg)\
 .option("spark.cosmos.read.inferSchemaEnabled", "true")\
 .load()

df.filter(col("isAlive") == True)\
 .show()

In [0]:
# Show the inferred schema from Cosmos DB
df = spark.read.format("cosmos.items").options(**cfg)\
 .option("spark.cosmos.read.inferSchemaEnabled", "true")\
 .load()
 
df.printSchema()