### Handling Schema Evolution

####1.0 Create Table

In [0]:
from pyspark.sql import SparkSession

# Initialize Spark Session
spark = SparkSession.builder \
    .appName("SchemaEvolution") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

# Create a Delta Table using SQL
spark.sql("""
    CREATE TABLE IF NOT EXISTS default.user_data (
        id INT,
        name STRING,
        age INT
    ) USING DELTA
""")

# Insert initial records
spark.sql("""
    INSERT INTO default.user_data VALUES 
    (1, 'Alice', 25), 
    (2, 'Bob', 30)
""")

# Read table to verify
df = spark.sql("SELECT * FROM default.user_data")
df.show()


+---+-----+---+
| id| name|age|
+---+-----+---+
|  1|Alice| 25|
|  2|  Bob| 30|
+---+-----+---+



#### 1.1 Add new data 

In [0]:
#Now lets add new column, job_title for example. 
#define columns 
new_columns = ["id", "name", "age", "job_title"]
# add data into a struct 
new_data = [(3, "Charlie", 35, "Engineer")]

#convert into a dataframe
df_new= spark.createDataFrame(new_data,new_columns)



#### 1.2 Handling schema evolution 

In [0]:
#Append data with schema evolution enabled 
df_new.write.format("delta") \
    .mode("append") \
    .option("mergeSchema", "true") \
    .insertInto("default.user_data")

print("Table after schema evolution:")

spark.sql("select * FROM default.user_data").show()

Table after schema evolution:
+---+-------+---+---------+
| id|   name|age|job_title|
+---+-------+---+---------+
|  3|Charlie| 35| Engineer|
|  1|  Alice| 25|     null|
|  2|    Bob| 30|     null|
+---+-------+---+---------+

