# Libraries

In [None]:
import sys, os
sys.path.append(os.path.abspath(os.path.join('..')))

In [None]:
from pyspark.sql import functions as F, types as T, SparkSession
from pyspark.sql.utils import AnalysisException
from delta import DeltaTable, configure_spark_with_delta_pip
from src.path_controller import PathController

# Extracting

In [None]:
builder = (
    SparkSession.builder.appName("MyApp")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
)

spark = configure_spark_with_delta_pip(builder).getOrCreate()

controller = PathController()
input_file_path = controller.get_path_for_bronze_layer('openbrewerydb', 'rest_api', 'breweries', 'response.json')

In [None]:
bronze_breweries = spark.read.json(input_file_path, multiLine=True)

# Loading

In [None]:
output_file_path = controller.get_path_for_silver_layer('breweries_database', 'master_data_context', 'breweries')
try:
    silver_table = DeltaTable.forPath(spark, output_file_path)
    
    (
        silver_table.alias("old")
        .merge(bronze_breweries.alias("new"), "old.id = new.id")
        .whenMatchedUpdate(set={
            "phone": F.col("new.phone"),
            "website_url": F.col("new.website_url"),
            "name": F.col("new.name")
        })
        .whenNotMatchedInsertAll()
        .execute()
    )
except AnalysisException:
    (
        bronze_breweries
        .write.mode('overwrite').format('delta')
        .partitionBy('state')
        .save(output_file_path)
    )