In [0]:
from pyspark.sql import SparkSession
import os

spark = SparkSession.builder \
    .appName("GoldLayer") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

spark._jsc.hadoopConfiguration().set("fs.s3a.access.key", os.getenv("AWS_ACCESS_KEY"))
spark._jsc.hadoopConfiguration().set("fs.s3a.secret.key", os.getenv("AWS_SECRET_KEY"))
spark._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "s3.amazonaws.com")
spark._jsc.hadoopConfiguration().set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")

DB_HOST = os.getenv("DB_RELACIONAL_HOST")
DB_PORT = os.getenv("DB_RELACIONAL_PORT")
DB_NAME = os.getenv("DB_RELACIONAL_NAME")

jdbc_url = f"jdbc:postgresql://{DB_HOST}:{DB_PORT}/{DB_NAME}"
connection_properties = {
    "user": os.getenv("DB_RELACIONAL_USER"),
    "password": os.getenv("DB_RELACIONAL_PASSWORD"),
    "driver": "org.postgresql.Driver"
}

landing_bucket = "engenharia-dados-satc-landing-zone-bucket"

df_geo = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.geo) AS tmp_geo", properties=connection_properties)
df_clientes = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrairnovosclientes('')) AS tmp_clientes", properties=connection_properties)
df_vendedores = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrairnovosvendedores('')) AS tmp_vendedores", properties=connection_properties)
df_pedidos = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrairnovospedidos('')) AS tmp_pedidos", properties=connection_properties)
df_dadospedidos = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrair_dados_pedidos('')) AS tmp_dadospedidos", properties=connection_properties)
df_pagamentos = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrairpagamentos('')) AS tmp_pagamentos", properties=connection_properties)
df_produtos = spark.read.jdbc(url=jdbc_url, table="(SELECT * FROM dl.extrair_dados_produtos('')) AS tmp_produtos", properties=connection_properties)

df_geo.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/geo.csv")
df_clientes.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/clientes.csv")
df_vendedores.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/vendedores.csv")
df_pedidos.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/pedidos.csv")
df_dadospedidos.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/dadospedidos.csv")
df_pagamentos.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/pagamentos.csv")
df_produtos.write.mode("overwrite").option("header", "true").csv(f"s3a://{landing_bucket}/dadosprodutos.csv")