In [1]:
appid = "<appId>"
appkey = "<appKey>"
tenantid = "<tenantId>"

container = "<demoContainerName>"
storageAccountName = "<storageAccount>.dfs.core.windows.net"

In [2]:
# Implicit write case
from pyspark.sql.types import *
from pyspark.sql import functions, Row
from decimal import Decimal
from datetime import datetime

# Write a CDM entity with Parquet data files, entity definition is derived from the dataframe schema
d = datetime.strptime("2015-03-31", '%Y-%m-%d')
ts = datetime.now()
data = [
  ["a", 1, True, 12.34, 6, d, ts, Decimal(1.4337879), Decimal(999.00), Decimal(18.8)],
  ["b", 1, True, 12.34, 6, d, ts, Decimal(1.4337879), Decimal(999.00), Decimal(18.8)]
]

schema = (StructType()
  .add(StructField("name", StringType(), True))
  .add(StructField("id", IntegerType(), True))
  .add(StructField("flag", BooleanType(), True))
  .add(StructField("salary", DoubleType(), True))
  .add(StructField("phone", LongType(), True))
  .add(StructField("dob", DateType(), True))
  .add(StructField("time", TimestampType(), True))
  .add(StructField("decimal1", DecimalType(15, 3), True))
  .add(StructField("decimal2", DecimalType(38, 7), True))
  .add(StructField("decimal3", DecimalType(5, 2), True))
)

df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)

# Creates the CDM manifest and adds the entity to it with gzip'd parquet partitions
# with both physical and logical entity definitions 
(df.write.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/implicitTest/default.manifest.cdm.json")
  .option("entity", "TestEntity")
  .option("format", "parquet")
  .option("compression", "gzip")
  .option("appId", appid)
  .option("appKey", appkey)
  .option("tenantId", tenantid)
  .save())

# Append the same dataframe content to the entity in the default CSV format
(df.write.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/implicitTest/default.manifest.cdm.json")
  .option("entity", "TestEntity1")
  .option("appId", appid)
  .option("appKey", appkey)
  .option("tenantId", tenantid)
  .mode("append")
  .save())

readDf = (spark.read.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/implicitTest/default.manifest.cdm.json")
  .option("entity", "TestEntity")
  .option("appId", appid)
  .option("appKey", appkey)
  .option("tenantId", tenantid)
  .load())

readDf.select("*").show()

In [3]:
# Explicit write, creating an entity in a CDM folder based on a pre-defined model 

# Case 1: Using an entity definition defined in the CDM Github repo

data = [
  ["1", "2", "3", 4],
  ["4", "5", "6", 8],
  ["7", "8", "9", 4],
  ["10", "11", "12", 8],
  ["13", "14", "15", 4]
]

schema = (StructType()
  .add(StructField("teamMembershipId", StringType(), True))
  .add(StructField("systemUserId", StringType(), True))
  .add(StructField("teamId", StringType(), True))
  .add(StructField("versionNumber", LongType(), True))
)

df = spark.createDataFrame(spark.sparkContext.parallelize(data,1), schema)
          
(df.write.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "explicitTest/root.manifest.cdm.json")
  .option("entity", "TeamMembership")
  .option("entityDefinition", "core/applicationCommon/TeamMembership.cdm.json/TeamMembership")
  .option("useCdmGithubModelRoot", True)  # sets the model root to the CDM GitHub schema documents folder
  .option("useSubManifest", True)
  .option("appId", appid)
  .option("appKey", appkey)
  .option("tenantId", tenantid)
  .mode("overwrite")
  .save())

readDf = (spark.read.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/explicitTest/root.manifest.cdm.json")
  .option("entity", "TeamMembership")
  .option("useCDMGithub", True) # sets the modelroot alias to the CDM GitHub schema documents folder
  .option("appId", appid)
  .option("appKey", appkey)
  .option("tenantId", tenantid)
  .load())
          
readDf.select("*").show()

In [4]:
# Explicit write, creating an entity in a CDM folder based on a pre-defined model 

# Case 2: Using an entity definition defined in a CDM model stored in ADLS

# UPLOAD CDM FILES FIRST
# To run this example, first create a /Models/Contacts folder to your demo container in ADLS gen2,
# then upload the provided Contacts.manifest.cdm.json, Person.cdm.json, Entity.cdm.json files

birthdate = datetime.strptime("1991-03-31", '%Y-%m-%d')
now = datetime.now()
data2 = [
  [1,now,"Donna","Carreras",birthdate],
  [2,now,"Keith","Harris",birthdate],
  [2,now,"Carla","McGee",birthdate]
]

schema2 = (StructType()
  .add(StructField("identifier", IntegerType()))
  .add(StructField("createdTime", TimestampType()))
  .add(StructField("firstName", StringType()))
  .add(StructField("lastName", StringType()))
  .add(StructField("birthDate", DateType())))

# Create the dataframe that matches the CDM definition of the entity, Person
df2 = spark.createDataFrame(spark.sparkContext.parallelize(data2, 1), schema2)
(df2.write.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/Data/Contacts/root.manifest.cdm.json")
  .option("entity", "Person")
  .option("entityDefinitionContainer", container)
  .option("entityDefinitionModelRoot", "Models") 
  .option("entityDefinition", "/Contacts/Person.cdm.json/Person")   
  .option("appId", appid).option("appKey", appkey).option("tenantId", tenantid)
  .mode("overwrite")
  .save())

readDf2 = (spark.read.format("com.microsoft.cdm")
  .option("storage", storageAccountName)
  .option("container", container)
  .option("manifest", "/Data/Contacts/root.manifest.cdm.json")
  .option("entity", "Person")
  .option("entityDefinitionContainer", container)
  .option("entityDefinitionModelRoot", "Models")
  .option("appId", appid).option("appKey", appkey).option("tenantId", tenantid)
  .load())

readDf2.select("*").show()