[![pythonista](img/pythonista.png)](https://www.pythonista.io)

# Delta.

In [None]:
%load_ext sparksql_magic

https://delta.io/

https://docs.delta.io/latest/quick-start.html#python

In [4]:
import pyspark
from delta import *

builder = pyspark.sql.SparkSession.builder.appName("MyApp") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", 
            "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()
%load_ext sparksql_magic 

## `CREATE TABLE USING delta`.

In [5]:
%%sparksql
CREATE TABLE IF NOT EXISTS personas
    (id INT, 
     nombre CHAR(20), 
     activo BOOLEAN) 
USING delta;

## `UPDATE`.

In [6]:
%%sparksql --cache
INSERT INTO personas 
    VALUES (1001, 'Luis', 'true');

cache dataframe with lazy load


In [7]:
%%sparksql --cache
SELECT * FROM personas;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,Luis,True


In [8]:
%%sparksql --cache
UPDATE personas
SET nombre='JUAN'
WHERE id == 1001;

cache dataframe with lazy load


0
num_affected_rows
1


In [9]:
%%sparksql --cache
SELECT * FROM personas;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,JUAN,True


## Gestión de versiones.

In [10]:
%%sparksql --cache
SELECT * FROM personas VERSION AS OF 1;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,Luis,True


In [11]:
%%sparksql --cache
SELECT * FROM personas VERSION AS OF 2;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,JUAN,True


In [12]:
%%sparksql --cache
SELECT * FROM personas VERSION AS OF 0;

cache dataframe with lazy load


0,1,2
id,nombre,activo


## Restauración de una versión previa.

https://delta.io/blog/2022-10-03-rollback-delta-lake-restore/

In [13]:
%%sparksql
SELECT * FROM personas;

0,1,2
id,nombre,activo
1001,JUAN,True


In [17]:
%%sparksql
RESTORE personas VERSION AS OF 0;

0,1,2,3,4,5
table_size_after_restore,num_of_files_after_restore,num_removed_files,num_restored_files,removed_files_size,restored_files_size
0,0,1,0,941,0


In [18]:
%%sparksql
SELECT * FROM personas;

0,1,2
id,nombre,activo
1001,JUAN,True


In [16]:
%%sparksql --cache
SELECT * FROM personas VERSION AS OF 2;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,JUAN,True


In [19]:
%%sparksql --cache
SELECT * FROM personas;

cache dataframe with lazy load


0,1,2
id,nombre,activo
1001,JUAN,True


<p style="text-align: center"><a rel="license" href="http://creativecommons.org/licenses/by/4.0/"><img alt="Licencia Creative Commons" style="border-width:0" src="https://i.creativecommons.org/l/by/4.0/80x15.png" /></a><br />Esta obra está bajo una <a rel="license" href="http://creativecommons.org/licenses/by/4.0/">Licencia Creative Commons Atribución 4.0 Internacional</a>.</p>
<p style="text-align: center">&copy; José Luis Chiquete Valdivieso. 2023.</p>