# SCHEMA EVOLUTION WITH MERGE SCHEMA COMMAND

In [0]:
from delta.tables import *

In [0]:
DeltaTable.createOrReplace(spark) \
    .tableName("employee") \
    .addColumn("id", "INT") \
    .addColumn("name", "STRING") \
    .addColumn("gender", "STRING") \
    .addColumn("salary", "INT") \
    .addColumn("dept", "STRING") \
    .location('dbfs:/FileStore/tables/delta/merge_schema') \
    .execute()

Out[2]: <delta.tables.DeltaTable at 0x7fec2c5c0220>

In [0]:
%sql

INSERT INTO employee VALUES (1, "Carl Mike", "m", 170000, "Support");
INSERT INTO employee VALUES (2, "Mikel Clark", "m", 254300, "IT");
INSERT INTO employee VALUES (3, "Bob Smith", "m", 220000, "IT");

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql

SELECT * FROM employee;

id,name,gender,salary,dept
1,Carl Mike,m,170000,Support
2,Mikel Clark,m,254300,IT
3,Bob Smith,m,220000,IT


## Schema Evolution

In [0]:
data_employee = [(4, "Mary Scala", "f", 230000, "Sales", True),
                 (5, "Susan Liam", "f", 150000, "Sales", False),
                 (6, "Xi Wuan", "f", 150000, "IT", True)]

schema_employee_v2 = "id INTEGER, name STRING, gender STRING, salary INTEGER, dept STRING, active BOOLEAN"

In [0]:
df_employee_v2 = spark.createDataFrame(data=data_employee,schema=schema_employee_v2)

df_employee_v2.show()

+---+----------+------+------+-----+------+
| id|      name|gender|salary| dept|active|
+---+----------+------+------+-----+------+
|  4|Mary Scala|     f|230000|Sales|  true|
|  5|Susan Liam|     f|150000|Sales| false|
|  6|   Xi Wuan|     f|150000|   IT|  true|
+---+----------+------+------+-----+------+



##Normal Write Fail

In [0]:
df_employee_v2.write.format("delta").mode("append").saveAsTable("employee")

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-4346387320893596>:1[0m
[0;32m----> 1[0m [43mdf_employee_v2[49m[38;5;241;43m.[39;49m[43mwrite[49m[38;5;241;43m.[39;49m[43mformat[49m[43m([49m[38;5;124;43m"[39;49m[38;5;124;43mdelta[39;49m[38;5;124;43m"[39;49m[43m)[49m[38;5;241;43m.[39;49m[43mmode[49m[43m([49m[38;5;124;43m"[39;49m[38;5;124;43mappend[39;49m[38;5;124;43m"[39;49m[43m)[49m[38;5;241;43m.[39;49m[43msaveAsTable[49m[43m([49m[38;5;124;43m"[39;49m[38;5;124;43memployee[39;49m[38;5;124;43m"[39;49m[43m)[49m

File [0;32m/databricks/spark/python/pyspark/instrumentation_utils.py:48[0m, in [0;36m_wrap_function.<locals>.wrapper[0;34m(*args, **kwargs)[0m
[1;32m     46[0m start [38;5;241m=[39m time[38;5;241m.[39mperf_counter()
[1;32m     47[0m [38;5;28;01mtry[39;00m:
[0;32m---> 48[0

## Merge Schema

In [0]:
df_employee_v2.write.option("mergeSchema", "true").format("delta").mode("append").saveAsTable("employee")

## Query Combined Schema

In [0]:
%sql

SELECT * FROM employee;

id,name,gender,salary,dept,active
4,Mary Scala,f,230000,Sales,True
5,Susan Liam,f,150000,Sales,False
6,Xi Wuan,f,150000,IT,True
1,Carl Mike,m,170000,Support,
2,Mikel Clark,m,254300,IT,
3,Bob Smith,m,220000,IT,


## New schema update

In [0]:
data_employee_v2 = [(4, "Mary Scala", "f", 230000, 60, True),
                 (5, "Susan Liam", "f", 150000, 40, False),
                 (6, "Xi Wuan", "f", 150000, 23, True)]

schema_employee_v3 = "id INTEGER, name STRING, gender STRING, salary INTEGER, age INTEGER, active BOOLEAN"

In [0]:
df_employee_v3 = spark.createDataFrame(data=data_employee_v2,schema=schema_employee_v3)

df_employee_v3.show()

+---+----------+------+------+---+------+
| id|      name|gender|salary|age|active|
+---+----------+------+------+---+------+
|  4|Mary Scala|     f|230000| 60|  true|
|  5|Susan Liam|     f|150000| 40| false|
|  6|   Xi Wuan|     f|150000| 23|  true|
+---+----------+------+------+---+------+



In [0]:
df_employee_v3.write.option("mergeSchema", "true").format("delta").mode("append").saveAsTable("employee")

In [0]:
%sql

SELECT * FROM employee;

id,name,gender,salary,dept,active,age
4,Mary Scala,f,230000,Sales,True,
5,Susan Liam,f,150000,Sales,False,
4,Mary Scala,f,230000,,True,60.0
5,Susan Liam,f,150000,,False,40.0
6,Xi Wuan,f,150000,,True,23.0
6,Xi Wuan,f,150000,IT,True,
1,Carl Mike,m,170000,Support,,
2,Mikel Clark,m,254300,IT,,
3,Bob Smith,m,220000,IT,,
