In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import *

In [0]:
dbutils.widgets.text("path_source", "abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/source")
source = dbutils.widgets.get("path_source")
dbutils.widgets.text("path_target", "abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/target")
target = dbutils.widgets.get("path_target")

In [0]:
csv_test = spark.read.format("csv")\
    .option("header", "true")\
    .option("inferSchema", "true")\
    .load(source)

In [0]:
csv_test.display()

#### Creating tables & DB

In [0]:
%sql
CREATE DATABASE salesDB;

In [0]:
%sql
INSERT INTO salesDB.externaltable (id, name, marks)
VALUES
  (1, 'Ali', '85'),
  (2, 'Fatima', '90'),
  (3, 'Omar', '78'),
  (4, 'Sara', '88'),
  (5, 'Yusuf', '92'),
  (6, 'Aisha', '80'),
  (7, 'Bilal', '76'),
  (8, 'Zainab', '89'),
  (9, 'Hassan', '95'),
  (10, 'Layla', '84');


In [0]:
%sql
drop table salesdb.externaltable; 

In [0]:
# spark.conf.set("fs.azure.account.key.storageaccnetflixdr.dfs.core.windows.net", "ABCDE")

# Hardcoded wil je natuurlijk niet!!
# DB CLI
# databricks secrets create-scope --scope my_secrets
# databricks secrets put --scope my_secrets --key storage_key

# vul je storage ket in :) daarna ophalen met

# Haal de key op uit de Secret Scope
storage_key = dbutils.secrets.get(scope="my_secrets", key="storage_key")

# Stel de sleutel in als configuratie voor toegang tot ADLS
spark.conf.set("fs.azure.account.key.storageaccnetflixdr.dfs.core.windows.net", storage_key)





In [0]:
%sql
--) External table

CREATE EXTERNAL TABLE salesdb.externaltable (
  id INTEGER,
  name STRING,
  marks STRING
)
USING DELTA
LOCATION 'abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/target';

In [0]:
%sql
INSERT INTO salesDB.externaltable (id, name, marks)
VALUES
  (12, 'Ali Massa', '85'),
  (22, 'Laura', '90'),
  (32, 'Dani', '78');

In [0]:
%sql
DELETE FROM salesdb.externaltable
WHERE id = 1;

In [0]:
%sql
select * from salesdb.externaltable; 

In [0]:
%sql
describe history salesdb.externaltable;

In [0]:
%sql
restore table salesdb.externaltable to version as of 1;

In [0]:
%sql
select *
from salesdb.externaltable;

In [0]:
%sql
OPTIMIZE salesdb.externaltable

In [0]:
%sql
OPTIMIZE salesdb.externaltable ZORDER BY id;

In [0]:
%sql
select * from salesdb.externaltable;

In [0]:
%sql
select * from salesdb.externaltable;

#### Incremental loading with AUTOLOADER

In [0]:
df_autoload = spark.readStream.format("cloudFiles")\
    .option("cloudFiles.format", "parquet")\
    .option("cloudFiles.schemaLocation", 'abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/ALtarget')\
    .load('abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/ALsource')

In [0]:
df_autoload.writeStream.format('delta')\
    .option('checkpointLocation', 'abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/ALtarget')\
    .trigger(availableNow=True)\
    .start('abfss://dbmasterclass@storageaccnetflixdr.dfs.core.windows.net/ALtarget')