In [0]:
import requests
import json
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, current_timestamp

In [0]:
spark = SparkSession.builder.appName("IngestaoBronze").getOrCreate()

In [0]:
dbutils.widgets.text("data_inicio", "01-01-2017")
dbutils.widgets.text("data_fim", "12-31-2018")

In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS bronze")
spark.sql("CREATE DATABASE IF NOT EXISTS silver")

In [0]:
volume_path = "/Volumes/workspace/default/atividade2"

In [0]:
tabelas_bronze = [
    ("olist_customers_dataset.csv", "ft_consumidores"),
    ("olist_geolocation_dataset.csv", "ft_geolocalizacao"),
    ("olist_order_items_dataset.csv", "ft_itens_pedidos"),
    ("olist_order_payments_dataset.csv", "ft_pagamentos_pedidos"),
    ("olist_order_reviews_dataset.csv", "ft_avaliacoes_pedidos"),
    ("olist_orders_dataset.csv", "ft_pedidos"),
    ("olist_products_dataset.csv", "ft_produtos"),
    ("olist_sellers_dataset.csv", "ft_vendedores"),
    ("product_category_name_translation.csv", "dm_categoria_produtos_traducao")
]

In [0]:
for csv_file, table_name in tabelas_bronze:
    df = spark.read.format("csv") \
        .option("header", "true") \
        .option("inferSchema", "true") \
        .load(f"{volume_path}/{csv_file}")
    
    df_with_timestamp = df.withColumn("ingestion_timestamp", current_timestamp())
    
    df_with_timestamp.write.mode("overwrite") \
        .saveAsTable(f"bronze.{table_name}")
    
    print(f"Tabela bronze.{table_name} criada com sucesso.")

data_inicio = dbutils.widgets.get("data_inicio")
data_fim = dbutils.widgets.get("data_fim")

url = f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?@dataInicial='{data_inicio}'&@dataFinalCotacao='{data_fim}'&$select=dataHoraCotacao,cotacaoCompra&$format=json"

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    if data['value']:
        df_api = spark.createDataFrame(data['value'])
        
        df_api_with_timestamp = df_api.withColumn("ingestion_timestamp", current_timestamp())
        
        df_api_with_timestamp.write.mode("overwrite") \
            .saveAsTable("bronze.dm_cotacao_dolar")
        
        print("Tabela bronze.dm_cotacao_dolar criada com sucesso.")
    else:
        print("API não retornou dados para o período.")
else:
    print(f"Falha ao buscar dados da API: {response.status_code}")