# Delta Batch Operations - Inser and Udpate

Delta allows read, write and query data in data lakes in an efficient manner.

* Lets try UPSERT data into existing Databricks Delta tables

In [2]:
%run ./Reference/Setup

In [3]:
deltaMiniDataPath = userhome + "/delta/customer-data-mini/"
genericMiniDataPath = userhome + "/generic/customer-data-mini/"
miniDataInputPath = "/mnt/training/online_retail/outdoor-products/outdoor-products-mini.csv"

In [4]:
from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType
from pyspark.sql.functions import col

inputSchema = StructType([
  StructField("InvoiceNo", IntegerType(), True),
  StructField("StockCode", StringType(), True),
  StructField("Description", StringType(), True),
  StructField("Quantity", IntegerType(), True),
  StructField("InvoiceDate", StringType(), True),
  StructField("UnitPrice", DoubleType(), True),
  StructField("CustomerID", IntegerType(), True),
  StructField("Country", StringType(), True)
])

miniDataDF = (spark       
  .read          
  .option("header", "true")
  .schema(inputSchema)
  .csv(miniDataInputPath)                            
)

In [5]:
newDataDF = spark.sql("SELECT * FROM demo_iot_data_delta")

In [6]:
%sql
SELECT * FROM demo_iot_data_delta WHERE date = '2018-06-01' and deviceId = 485

action,time,date,deviceId
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485


Let's change`action` to `Close` for `date = '2018-06-01' ` and `deviceId = 485`.

In [8]:
newDeviceId485DF = (newDataDF
  .selectExpr(" 'Open' as Action", "time", "date", "deviceId")
  .distinct()
  .filter("date = '2018-06-01' ")
  .filter("deviceId = 485")
)
actionCount = newDeviceId485DF.count()
print(actionCount)

In [9]:
spark.sql("DROP TABLE IF EXISTS iot_data_delta_to_upsert")
newDeviceId485DF.write.format("delta").saveAsTable("iot_data_delta_to_upsert")

In [10]:
%sql
select * from iot_data_delta_to_upsert

Action,time,date,deviceId
Close,1529091520,2018-06-01,485


In [11]:
%sql
--ANSWER
MERGE INTO demo_iot_data_delta
USING iot_data_delta_to_upsert
ON demo_iot_data_delta.deviceId = iot_data_delta_to_upsert.deviceId
WHEN MATCHED THEN
  UPDATE SET
    demo_iot_data_delta.action = iot_data_delta_to_upsert.action
WHEN NOT MATCHED
  THEN INSERT (action, time, date, deviceId)
  VALUES (
    iot_data_delta_to_upsert.action, 
    iot_data_delta_to_upsert.time, 
    iot_data_delta_to_upsert.date, 
    iot_data_delta_to_upsert.deviceId 
)

In [12]:
%sql
SELECT * FROM demo_iot_data_delta WHERE date = '2018-06-01' and deviceId = 485

action,time,date,deviceId
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485
Open,1529091520,2018-06-01,485


In [13]:
spark.sql("SELECT * FROM demo_iot_data_delta").count()
count = spark.sql("SELECT count(*) as total FROM demo_iot_data_delta WHERE deviceId = 485 AND action = 'Close' ").collect()[0][0]

In [14]:

%sql
SELECT * FROM demo_iot_data_delta VERSION AS OF 5 WHERE date = '2018-06-01' and deviceId = 485 

action,time,date,deviceId
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485
Close,1529091520,2018-06-01,485


In [15]:
%sql
describe detail demo_iot_data_delta

format,id,name,description,location,createdAt,lastModified,partitionColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion
delta,0d4eec58-a7e4-427f-9961-2cf91ec80a07,nagaraj_sengodan_hotmail_com_db.demo_iot_data_delta,,dbfs:/user/nagaraj.sengodan@hotmail.com/delta/iot-pipeline,2019-10-16T04:59:33.959+0000,2019-10-16T12:24:43.000+0000,List(date),188,732033,Map(),1,2
