In [10]:
import pyspark
from delta import *

builder = pyspark.sql.SparkSession.builder.appName("LocalDelta") \
    .master("local") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.extensions", "org.elasticsearch:elasticsearch-spark-30_2.12:8.15.1") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [33]:
def read_stream(path: str):
    schema = spark.read.format("json").load(path).schema
    return spark.readStream.format("json").schema(schema).load(path)

def write_stream(df, path: str, checkpoint_path: str):
    return (df.writeStream 
    .format("delta") 
    .outputMode("append") 
    .option("path", path) 
    .option("checkpointLocation", checkpoint_path) 
    .trigger(once=True) 
    .start()
    )

In [34]:
tbls = [
    'address',
    'org_addr',
    'organisation'
]

for tbl in tbls:
    print(tbl)
    df = read_stream(f'southwind/extact_json/{tbl}/')
    query = write_stream(df, f'local_lake/bronze/{tbl}/', f'local_lake/bronze/{tbl}_checkpoint/').awaitTermination()


address
org_addr
organisation


In [116]:
spark.read.format("delta").load("local_lake/bronze/address/").show()

+-------+------------+-----------+-----------------+--------------------+-----------+------------+-------+
|addr_id|change_token|change_type|             city|             country|hire_status|       state|zipcode|
+-------+------------+-----------+-----------------+--------------------+-----------+------------+-------+
|     33|           9|          I|    Lake Veronica|South Georgia and...|       true|    Oklahoma|  28881|
|     46|          12|          I|          Annside|Svalbard & Jan Ma...|       true|Pennsylvania|  47032|
|      9|           2|          I|         Adamland|Palestinian Terri...|       true|    Nebraska|  77455|
|     38|          10|          I| Port Danielville|             Denmark|       true|    Maryland|  22702|
|     12|           3|          I|Lake Jenniferbury|             Liberia|       true|      Oregon|  67883|
|     43|          11|          I|       Shawnmouth|         Isle of Man|       true|    New York|  06474|
|     56|          15|          I|   