In [0]:
from pyspark.sql.functions import current_timestamp, lit

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS bronze;

In [0]:
path_bronze = "/Volumes/workspace/default/brazilian_ecommerce"


In [0]:
# Criação das tabelas deltas
df_consumidores = ( spark.read
                   .option("header", True)
                   .option("inferSchema", True)
                   .csv(f"{path_bronze}/olist_customers_dataset.csv")
                   .withColumn("ingestion_timestamp", current_timestamp())
)
df_consumidores.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_consumidores")

df_geo = (spark.read
          .option("header", True)
          .option("inferSchema", True)
          .csv(f"{path_bronze}/olist_geolocation_dataset.csv")
          .withColumn("ingestion_timestamp", current_timestamp())
)
df_geo.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_geolocalizacao")

df_itens = (spark.read
            .option("header", True)
            .option("inferSchema", True)
            .csv(f"{path_bronze}/olist_order_items_dataset.csv")
            .withColumn("ingestion_timestamp", current_timestamp())
)
df_itens.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_itens_pedidos")

df_pagamentos = (spark.read
                 .option("header", True)
                 .option("inferSchema", True)
                 .csv(f"{path_bronze}/olist_order_payments_dataset.csv")
                 .withColumn("ingestion_timestamp", current_timestamp())
)
df_pagamentos.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_pagamentos_pedidos")

df_avaliacoes = (spark.read
                 .option("header", True)
                 .option("inferSchema", True)
                 .csv(f"{path_bronze}/olist_order_reviews_dataset.csv")
                 .withColumn("ingestion_timestamp", current_timestamp())
)
df_avaliacoes.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_avaliacoes_pedidos")

df_pedidos = (spark.read
              .option("header", True)
              .option("inferSchema", True)
              .csv(f"{path_bronze}/olist_orders_dataset.csv")
              .withColumn("ingestion_timestamp", current_timestamp())
)
df_pedidos.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_pedidos")

df_produtos = (spark.read
               .option("header", True)
               .option("inferSchema", True)
               .csv(f"{path_bronze}/olist_products_dataset.csv")
               .withColumn("ingestion_timestamp", current_timestamp())
)
df_produtos.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_produtos")

df_vendedores = (spark.read
                 .option("header", True)
                 .option("inferSchema", True)
                 .csv(f"{path_bronze}/olist_sellers_dataset.csv")
                 .withColumn("ingestion_timestamp", current_timestamp())
)
df_vendedores.write.mode("overwrite").format("delta").saveAsTable("bronze.ft_vendedores")

df_traducao = (spark.read
               .option("header", True)
               .option("inferSchema", True)
               .csv(f"{path_bronze}/product_category_name_translation.csv")
               .withColumn("ingestion_timestamp", current_timestamp())
)
df_traducao.write.mode("overwrite").format("delta").saveAsTable("bronze.dm_categoria_produtos_traducao")


In [0]:
%sql
SHOW TABLES IN bronze;

database,tableName,isTemporary
bronze,dm_categoria_produtos_traducao,False
bronze,ft_avaliacoes_pedidos,False
bronze,ft_consumidores,False
bronze,ft_geolocalizacao,False
bronze,ft_itens_pedidos,False
bronze,ft_pagamentos_pedidos,False
bronze,ft_pedidos,False
bronze,ft_produtos,False
bronze,ft_vendedores,False


In [0]:
# Extrair cotação do Dólar
import requests
from datetime import datetime, timedelta
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import current_timestamp

data_fim = datetime.today()
data_inicio = data_fim - timedelta(days=60)

# Converte para o formato pedido: MM-DD-AAAA
data_inicio_formatada = data_inicio.strftime("%m-%d-%Y")
data_fim_formatada = data_fim.strftime("%m-%d-%Y")

print("Período consultado:", data_inicio_formatada, "até", data_fim_formatada)

''' Endpoint - Feito localmente e importado para o volume
url = (
    "https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/"
    "CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)"
    f"?@dataInicial='{data_inicio_formatada}'"
    f"&@dataFinalCotacao='{data_fim_formatada}'"
    "&$select=dataHoraCotacao,cotacaoCompra&$format=json"
)

response = requests.get(url)
dados = response.json()["value"] '''

# Criação do DataFrame 
df_dolar = (
    spark.read.option("header", True).csv(f"{path_bronze}/cotacao_dolar.csv")
        .withColumn("ingestion_timestamp", current_timestamp())
)

df_dolar.write.mode("overwrite").format("delta").saveAsTable("bronze.cotacao_dolar")

# Salvar na camada bronze
df_dolar.write.mode("overwrite").format("delta").saveAsTable("bronze.cotacao_dolar")


Período consultado: 09-12-2025 até 11-11-2025
