In [1]:
from pyspark import SparkContext
from pyspark.sql import *
import os
import glob
from time import time
from delta import *

In [2]:
# sc = SparkContext.getOrCreate()
# spark = SparkSession(sc)
builder = SparkSession.builder.appName("MyApp") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

**Read Stream from Bronze Zone**

In [3]:
all_generations_df = (spark.readStream.format("delta")
                      .option("recursiveFileLookup", True)
                      .option("ignoreChanges", True)
                      .option("startingTimestamp", "2022-10-02")
                      .load(f"bronze-zone/items/generations.delta"))
all_generations_df.printSchema()
print("Streaming DataFrame : " + str(all_generations_df.isStreaming))

root
 |-- abilities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- url: string (nullable = true)
 |-- id: long (nullable = true)
 |-- main_region: struct (nullable = true)
 |    |-- name: string (nullable = true)
 |    |-- url: string (nullable = true)
 |-- moves: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- names: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- language: struct (nullable = true)
 |    |    |    |-- name: string (nullable = true)
 |    |    |    |-- url: string (nullable = true)
 |    |    |-- name: string (nullable = true)
 |-- pokemon_species: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- url: string (

**Write Stream to Silver Zone**

In [4]:
generation_stream_query = (all_generations_df.writeStream
                    .format("delta")
                    .option("checkpointLocation", f"silver-zone/items/generations.delta")
                    .outputMode('append')
                    .queryName("generation_stream_query")
                    .start(f"silver-zone/items/generations.delta")
                   )

In [6]:
df_bronze = spark.read.format("delta").load(f"bronze-zone/items/generations.delta")
df_silver = spark.read.format("delta").load(f"silver-zone/items/generations.delta")
df_silver.show()

+--------------------+---+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+--------------------+
|           abilities| id|         main_region|               moves|           name|               names|     pokemon_species|               types|      version_groups|
+--------------------+---+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+--------------------+
|[{stench, https:/...|  3|{hoenn, https://p...|[{fake-out, https...| generation-iii|[{{ja-Hrkt, https...|[{treecko, https:...|[{shadow, https:/...|[{ruby-sapphire, ...|
|[{pickpocket, htt...|  5|{unova, https://p...|[{hone-claws, htt...|   generation-v|[{{ja-Hrkt, https...|[{victini, https:...|                  []|[{black-white, ht...|
|[{stamina, https:...|  7|{alola, https://p...|[{breakneck-blitz...| generation-vii|[{{ja-Hrkt, https...|[{rowlet, https:/...|                  []|[{sun-mo

**Similarly Write Stream to Silver Zone for rest of the entities**

In [None]:
# readstream_pokemon = (spark.readStream.format("delta")
#                       .option("recursiveFileLookup", True)
#                       .option("ignoreChanges", True)
#                       .option("startingTimestamp", "2022-10-02")
#                       .load(f"bronze-zone/items/pokemons.delta"))
# readstream_pokemon.printSchema()
# print("Streaming DataFrame : " + str(readstream_pokemon.isStreaming))

# stream_query_pokemon = (readstream_pokemon.writeStream
#                     .format("delta")
#                     .option("checkpointLocation", f"silver-zone/items/pokemons.delta")
#                     .outputMode('append')
#                     .queryName("stream_query_pokemon")
#                     .start(f"silver-zone/items/pokemons.delta")
#                    )

# readstream_stats = (spark.readStream.format("delta")
#                       .option("recursiveFileLookup", True)
#                       .option("ignoreChanges", True)
#                       .option("startingTimestamp", "2022-10-02")
#                       .load(f"bronze-zone/items/stats.delta"))
# readstream_stats.printSchema()
# print("Streaming DataFrame : " + str(readstream_stats.isStreaming))

# stream_query_stats = (readstream_stats.writeStream
#                     .format("delta")
#                     .option("checkpointLocation", f"silver-zone/items/stats.delta")
#                     .outputMode('append')
#                     .queryName("stream_query_stats")
#                     .start(f"silver-zone/items/stats.delta")
#                    )

# readstream_pokemon = (spark.readStream.format("delta")
#                       .option("recursiveFileLookup", True)
#                       .option("ignoreChanges", True)
#                       .option("startingTimestamp", "2022-10-02")
#                       .load(f"bronze-zone/items/pokemons.delta"))
# readstream_pokemon.printSchema()
# print("Streaming DataFrame : " + str(readstream_pokemon.isStreaming))

# stream_query_pokemon = (readstream_pokemon.writeStream
#                     .format("delta")
#                     .option("checkpointLocation", f"silver-zone/items/pokemons.delta")
#                     .outputMode('append')
#                     .queryName("stream_query_pokemon")
#                     .start(f"silver-zone/items/pokemons.delta")
#                    )

# readstream_pokemon = (spark.readStream.format("delta")
#                       .option("recursiveFileLookup", True)
#                       .option("ignoreChanges", True)
#                       .option("startingTimestamp", "2022-10-02")
#                       .load(f"bronze-zone/items/pokemons.delta"))
# readstream_pokemon.printSchema()
# print("Streaming DataFrame : " + str(readstream_pokemon.isStreaming))

# stream_query_pokemon = (readstream_pokemon.writeStream
#                     .format("delta")
#                     .option("checkpointLocation", f"silver-zone/items/pokemons.delta")
#                     .outputMode('append')
#                     .queryName("stream_query_pokemon")
#                     .start(f"silver-zone/items/pokemons.delta")
#                    )

# readstream_pokemon = (spark.readStream.format("delta")
#                       .option("recursiveFileLookup", True)
#                       .option("ignoreChanges", True)
#                       .option("startingTimestamp", "2022-10-02")
#                       .load(f"bronze-zone/items/pokemons.delta"))
# readstream_pokemon.printSchema()
# print("Streaming DataFrame : " + str(readstream_pokemon.isStreaming))

# stream_query_pokemon = (readstream_pokemon.writeStream
#                     .format("delta")
#                     .option("checkpointLocation", f"silver-zone/items/pokemons.delta")
#                     .outputMode('append')
#                     .queryName("stream_query_pokemon")
#                     .start(f"silver-zone/items/pokemons.delta")
#                    )

