In [0]:
import requests
from datetime import datetime, timedelta
from time import sleep
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType


spark = SparkSession.builder.getOrCreate()

BASE = "https://olinda.bcb.gov.br/olinda/servico/Pix_DadosAbertos/versao/v1/odata"

schema = StructType([
    StructField("Data", StringType(), True),
    StructField("ISPB", StringType(), True),
    StructField("Nome", StringType(), True),
    StructField("NaturezaUsuario", StringType(), True),
    StructField("TipoChave", StringType(), True),
    StructField("qtdChaves", IntegerType(), True)
])
def fetch_chaves_pix(date_str):
    all_data = []
    url = f"{BASE}/ChavesPix(Data=@Data)?@Data='{date_str}'&$format=json"
    
    while url:
        try:
            r = requests.get(url)
            r.raise_for_status()
            response_json = r.json()
            data = response_json.get('value', [])
            all_data.extend(data)
            url = response_json.get('@odata.nextLink')
        except requests.exceptions.RequestException as e:
            print(f"Erro na requisição: {e}")
            url = None
            sleep(5)

    return all_data


start_date = datetime(2023, 1, 1)
end_date = datetime(2024, 12, 31)
delta = timedelta(days=7)  

current_date = start_date
while current_date <= end_date:
    block_end = min(current_date + delta - timedelta(days=1), end_date)
    print(f"Processando de {current_date.date()} até {block_end.date()}")

 
    bloco_dados = []
    temp_date = current_date
    while temp_date <= block_end:
        date_str = temp_date.strftime("%Y-%m-%d")
        try:
            data = fetch_chaves_pix(date_str)
            if data:
                bloco_dados.extend(data)
                print(f"{len(data)} registros coletados para {date_str}")
            else:
                print(f"Nenhum registro para {date_str}")
        except Exception as e:
            print(f"Erro em {date_str}: {e}")
        temp_date += timedelta(days=1)
        sleep(1)


    if bloco_dados:
        df_spark = spark.createDataFrame(bloco_dados, schema=schema)
        df_spark.write.format("delta").mode("append").saveAsTable("estatisticas_pix.raw_data.chaves_pix")
        print(f"Bloco salvo com sucesso: {len(bloco_dados)} registros")
    
    # Limpa memória
    del bloco_dados
    sleep(2)
    
    current_date = block_end + timedelta(days=1)
