In [1]:
import json
import ipaddress

from pyspark.sql import SparkSession
from pyspark.sql.functions import (col, lit, max as colmax, min as colmin, split, concat, date_format,
                                   to_timestamp, to_date, regexp_extract, when, udf, size)
from pyspark.sql.types import StructType, StructField, IntegerType
from datetime import datetime, timedelta
from dateutils import relativedelta
from delta import DeltaTable, configure_spark_with_delta_pip

In [3]:
builder = (
    SparkSession
    .builder
    .master("spark://spark-master:7077")
    .config("spark.jars", "/jars/postgresql-42.5.0.jar,/jars/delta-core_2.12-1.0.0.jar")
    .config("spark.sql.warehouse.dir", "/mnt/warehouse")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") 
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
)
    
spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [4]:
last_year = (datetime.today() - relativedelta(years = 1)).year

In [8]:
path = "/mnt/g_layer/sales"

df_sells = (
    spark
    .read
    .format("delta")
    .load(path)
    .filter(col("year_partition") == last_year)
)

(
    df_sells
    .write
    .format("jdbc")
    .option("url", "jdbc:postgresql://postgres-datamart/datamart")
    .option("driver", "org.postgresql.Driver")
    .option("dbtable", "last_year_orders")
    .option("user", "docker")
    .option("password", "docker")
    .save()
)

                                                                                

In [10]:
path = "/mnt/g_layer/devices"

df_sells = (
    spark
    .read
    .format("delta")
    .load(path)
)

(
    df_sells
    .write
    .format("jdbc")
    .option("url", "jdbc:postgresql://postgres-datamart/datamart")
    .option("driver", "org.postgresql.Driver")
    .option("dbtable", "order_devices")
    .option("user", "docker")
    .option("password", "docker")
    .save()
)

                                                                                

In [11]:
path = "/mnt/g_layer/products"

df_sells = (
    spark
    .read
    .format("delta")
    .load(path)
)

(
    df_sells
    .write
    .format("jdbc")
    .option("url", "jdbc:postgresql://postgres-datamart/datamart")
    .option("driver", "org.postgresql.Driver")
    .option("dbtable", "popular_products")
    .option("user", "docker")
    .option("password", "docker")
    .save()
)

                                                                                

In [12]:
spark.stop()