## Delta Lake

#### Creating a Delta Lake table from a dataframe

In [None]:
# Load a file into a dataframe
df = spark.read.load('abfss://data@synapsedp203dl.dfs.core.windows.net/sales/*.csv',format='csv', header=True)

In [None]:
display(df)

In [None]:
ItemPrice = df.select("Item", "UnitPrice").distinct()
display(ItemPrice)

###### **Create an external table**

In [None]:
delta_table_path = "abfss://data@synapsedp203dl.dfs.core.windows.net/delta/external/ProductPrice"

ItemPrice.write.mode("overwrite").format("delta").option("path", delta_table_path).saveAsTable("ProductPrice_ex")

###### **Create a managed table**

In [None]:
ItemPrice.write.mode("overwrite").format("delta").saveAsTable("ProductPrice_mn")

#### Update the Delta Table using a condition

In [None]:
df_item = ItemPrice.select("Item", "UnitPrice").where(df['Item']=='Road-250 Red, 52')
display(df_item.distinct())

In [None]:
from delta.tables import *
from pyspark.sql.functions import *

# Create a deltaTable object
deltaTable = DeltaTable.forPath(spark, delta_table_path)

# Update the table (reduce price of accessories by 10%)
deltaTable.update(
    condition = "Item == 'Road-250 Red, 52'",
    set = { "UnitPrice": "UnitPrice * 0.9" })

In [None]:
%%sql
SELECT * FROM ProductPrice_ex WHERE Item = 'Road-250 Red, 52'

#### Time Travel (Querying a previous version of a table)

In [None]:
from delta.tables import *

pathToTable = 'abfss://data@synapsedp203dl.dfs.core.windows.net/delta/external/ProductPrice'

deltaTable = DeltaTable.forPath(spark, pathToTable)
fullHistoryDF = deltaTable.history()

display(fullHistoryDF)

##### Using Version

In [None]:
df_vs = spark.read.format("delta").option("versionAsOf", 0).load(pathToTable)

In [None]:
df_v0 = df_vs.select("Item", "UnitPrice").where(df_vs['Item']=='Road-250 Red, 52')
display(df_v0)

##### Using Timestamp

In [None]:
df_ts = spark.read.format("delta").option("timestampAsOf", '2024-02-02 09:41:28.13').load(pathToTable)

In [None]:
df_ts1 = df_ts.select("Item", "UnitPrice").where(df_ts['Item']=='Road-250 Red, 52')
display(df_ts1)

#### Create Delta Tables using SQL

In [None]:
%%sql

CREATE TABLE IF NOT EXISTS ManagedSalesOrders 
(
    Orderid INT,
    OrderDate TIMESTAMP,
    CustomerName STRING,
    SalesTotal FLOAT
)
USING DELTA

In [None]:
%%sql

INSERT INTO ManagedSalesOrders

VALUES
(
    '12345',
    date'2024-01-16',
    'John Doe',
    2757.84
);


In [None]:
%%sql

SELECT * FROM ManagedSalesOrders

In [None]:
%%sql

CREATE TABLE IF NOT EXISTS ExternalSalesOrders 
(
    Orderid INT,
    OrderDate TIMESTAMP,
    CustomerName STRING,
    SalesTotal FLOAT
)
USING DELTA 
LOCATION 'abfss://data@synapsedp203dl.dfs.core.windows.net/delta/external/SalesOrders'

In [None]:
%%sql

INSERT INTO ExternalSalesOrders

VALUES
(
    '12345',
    date'2024-01-16',
    'John Doe',
    2757.84
);

In [None]:
%%sql

SELECT * FROM ExternalSalesOrders