In [0]:
%run "/Workspace/DatabricksMasterClass/AzureSetup"

In [0]:
mydata = [(1,'aa',30),(2,'bb',40),(3,'cc',50)]

myschema = "id INT, name STRING, marks INT"

df = spark.createDataFrame(mydata, schema=myschema)

# DELTA LAKE

In [0]:
df.write.format('delta')\
    .mode('overwrite')\
        .option('path','abfss://destination@datalakemercifulsamurai.dfs.core.windows.net/sales')\
            .save()

## Managed vs External Delta Tables

**Database**

In [0]:
%sql
CREATE DATABASE salesDB;

Managed Table

In [0]:
%sql
CREATE TABLE salesDB.mantable
(
  id INT,
  name STRING,
  marks INT
)
USING DELTA

In [0]:
%sql
INSERT INTO salesDB.mantable VALUES
(1,'aa',30),
(2,'bb',40),
(3,'cc',50),
(4,'dd',60)

In [0]:
%sql
SELECT * FROM salesDB.mantable;

External Table

In [0]:
%sql
SHOW CATALOGS;

In [0]:
dbutils.fs.ls('abfss://destination@datalakemercifulsamurai.dfs.core.windows.net/')

In [0]:
%sql
CREATE TABLE salesDB.exttable (
  id INT,
  name STRING,
  marks INT
)USING DELTA
LOCATION 'abfss://destination@datalakemercifulsamurai.dfs.core.windows.net/salesDB/exttable'

In [0]:
%sql
INSERT INTO salesDB.exttable VALUES
(1,'aa',30),
(2,'bb',40),
(3,'cc',50),
(4,'dd',60)

In [0]:
%sql

REFRESH TABLE salesDB.exttable;

In [0]:
%sql
SELECT * FROM salesDB.exttable;

In [0]:
%sql
INSERT INTO salesDB.exttable VALUES
(5,'aa',30),
(6,'bb',40),
(7,'cc',50),
(8,'dd',60)

## Delta Tables Functionalities

**INSERT**

In [0]:
%sql
DELETE FROM salesDB.exttable WHERE id = 8;
    
SELECT * FROM salesDB.exttable;

**Time Travel**

In [0]:
%sql
DESCRIBE HISTORY salesDB.exttable;

In [0]:
%sql
RESTORE TABLE salesDB.exttable TO VERSION AS OF 2;

In [0]:
%sql
SELECT * FROM salesDB.exttable

**VACUUM**

In [0]:
%sql
-- DO NOT RUN
VACUUM salesDB.exttable;

**OPTIMIZE**

In [0]:
%sql
OPTIMIZE salesDB.exttable;

In [0]:
%sql
SELECT * FROM salesDB.exttable;

In [0]:
%sql
OPTIMIZE salesDB.exttable ZORDER BY (id);

In [0]:
%sql
SELECT * FROM salesDB.exttable;

In [0]:
%sql
SELECT * FROM salesDB.exttable WHERE id = 1;

##AUTO LOADER

**Streaming Dataframe**

In [0]:
df = spark.readStream.format('cloudFiles')\
  .option('cloudFiles.format','parquet')\
    .option('cloudFiles.schemaLocation','abfss://aldestination@datalakemercifulsamurai.dfs.core.windows.net/checkpoint')\
      .load('abfss://alsource@datalakemercifulsamurai.dfs.core.windows.net')

In [0]:
df.writeStream.format('delta')\
    .option('checkpointLocation','abfss://aldestination@datalakemercifulsamurai.dfs.core.windows.net/checkpoint')\
        .trigger(processingTime='5 seconds')\
            .start('abfss://aldestination@datalakemercifulsamurai.dfs.core.windows.net/data')

In [0]:
dbutils.fs.ls('abfss://alsource@datalakemercifulsamurai.dfs.core.windows.net')