In [5]:
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, IntegerType, StringType, DoubleType

spark = SparkSession.builder \
    .appName("DeltaLakeExample") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:3.1.0") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

# Read the new sales data
df = spark.read.format("csv").option("header","true").load("sales/sales.csv")

## Add month and year columns
df = df.withColumn("Year", year(col("OrderDate"))).withColumn("Month", month(col("OrderDate")))

# Derive FirstName and LastName columns
df = df.withColumn("FirstName", split(col("CustomerName"), " ").getItem(0)).withColumn("LastName", split(col("CustomerName"), " ").getItem(1))

# Filter and reorder columns
df = df["SalesOrderNumber", "SalesOrderLineNumber", "OrderDate", "Year", "Month", "FirstName", "LastName", "EmailAddress", "Item", "Quantity", "UnitPrice", "TaxAmount"]



In [None]:
# Since this is run locally, the following chunk won't run: 
# table_name = "sales"
# df.write.format("delta").mode("append").saveAsTable(table_name)


In [6]:
df.show()

+----------------+--------------------+----------+----+-----+---------+---------+--------------------+--------------------+--------+---------+---------+
|SalesOrderNumber|SalesOrderLineNumber| OrderDate|Year|Month|FirstName| LastName|        EmailAddress|                Item|Quantity|UnitPrice|TaxAmount|
+----------------+--------------------+----------+----+-----+---------+---------+--------------------+--------------------+--------+---------+---------+
|         SO43701|                   1|2019-07-01|2019|    7|  Christy|      Zhu|christy12@adventu...|Mountain-100 Silv...|       1|  3399.99| 271.9992|
|         SO43704|                   1|2019-07-01|2019|    7|    Julio|     Ruiz|julio1@adventure-...|Mountain-100 Blac...|       1|  3374.99| 269.9992|
|         SO43705|                   1|2019-07-01|2019|    7|   Curtis|       Lu|curtis9@adventure...|Mountain-100 Silv...|       1|  3399.99| 271.9992|
|         SO43700|                   1|2019-07-01|2019|    7|    Ruben|   Prasad|r