#UPSERT USING MERGE

## SQL

In [0]:
data_sql_source = [
  (1,"Laura","USA",True),
  (2,"Michael","Spain",False)
]

schema_sql_source = "id INTEGER, name STRING, country STRING, active BOOLEAN"

In [0]:
df_sql_source = spark.createDataFrame(data=data_sql_source,schema=schema_sql_source)

df_sql_source.show()

+---+-------+-------+------+
| id|   name|country|active|
+---+-------+-------+------+
|  1|  Laura|    USA|  true|
|  2|Michael|  Spain| false|
+---+-------+-------+------+



In [0]:
df_sql_source.createOrReplaceTempView("sql_source")

In [0]:
%sql

CREATE OR REPLACE TABLE sql_destination (
  id INT,
  name STRING,
  country STRING,
  active BOOLEAN
)
USING DELTA
LOCATION "dbfs:/FileStore/tables/delta/upsert/sql/"

In [0]:
%sql

INSERT INTO sql_destination VALUES (1, "Laura", "France", true);

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql

SELECT * FROM sql_destination

id,name,country,active
1,Laura,France,True


In [0]:
%sql

MERGE INTO sql_destination AS d
USING sql_source AS s
  ON d.id = s.id
WHEN MATCHED
  THEN UPDATE SET
    d.name = s.name,
    d.country = s.country,
    d.active = s.active
WHEN NOT MATCHED THEN
  INSERT (id,name,country,active) VALUES (id,name,country,active)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,1,0,1


In [0]:
%sql

SELECT * FROM sql_destination

id,name,country,active
2,Michael,Spain,False
1,Laura,USA,True


## Spark

In [0]:
data_spark_source = [
  (1,"Audi","TT",True),
  (2,"Porche","911",False)
]

schema_spark_source = "id INTEGER, brand STRING, model STRING, available BOOLEAN"

In [0]:
df_spark_source = spark.createDataFrame(data=data_spark_source,schema=schema_spark_source)

df_spark_source.show()

+---+------+-----+---------+
| id| brand|model|available|
+---+------+-----+---------+
|  1|  Audi|   TT|     true|
|  2|Porche|  911|    false|
+---+------+-----+---------+



In [0]:
data_spark_destination = [
  (1,"Audi","R8",False)
]

schema_spark_destination = "id INTEGER, brand STRING, model STRING, available BOOLEAN"

In [0]:
df_spark_destination = spark.createDataFrame(data=data_spark_destination,schema=schema_spark_destination)

df_spark_destination.show()

+---+-----+-----+---------+
| id|brand|model|available|
+---+-----+-----+---------+
|  1| Audi|   R8|    false|
+---+-----+-----+---------+



In [0]:
df_spark_destination.write.format("delta").mode("overwrite").saveAsTable("spark_destination_dt")

In [0]:
from delta.tables import *

In [0]:
%sql

SELECT * FROM spark_destination_dt

id,brand,model,available
1,Audi,R8,False


In [0]:
spark_destination_dt = DeltaTable.forName(spark,"spark_destination_dt")

In [0]:
spark_destination_dt.alias("d").merge(
    source=df_spark_source.alias("s"),
    condition="d.id == s.id"
).whenMatchedUpdate(set=
                    {
                        "brand":"s.brand",
                        "model":"s.model",
                        "available":"s.available"
                    }
).whenNotMatchedInsert(values=
                    {
                        "id":"s.id",
                        "brand":"s.brand",
                        "model":"s.model",
                        "available":"s.available"
                    }
).execute()

In [0]:
%sql

SELECT * FROM spark_destination_dt

id,brand,model,available
2,Porche,911,False
1,Audi,TT,True
