In [3]:
# ==========================
# Importações
# ==========================
# %pip install requests  # (se necessário)
import time
import requests
from typing import List, Dict

from pyspark.sql import Row
from pyspark.sql import functions as F
from pyspark.sql import types as T

# ==========================
# 1) Parâmetros
# ==========================
SUPABASE_TABLE = "tb_emociograma_tratativa"
SUPABASE_URL = f"https://jewtbymqxxubjpwnjtux.supabase.co/rest/v1/{SUPABASE_TABLE}"

# Mesmas chaves que você já utilizou
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3MTU3NzQ1ODQsImV4cCI6MjAzMTM1MDU4NH0.bs8NXsld5F98WdGTqt_9U0d1HY3DSXT4us0Ur1Rs8HE"
BEARER_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcxNTc3NDU4NCwiZXhwIjoyMDMxMzUwNTg0fQ.qJW13vrpLiF_uIHpGxNCy0iGpr--WhUK8g-AfeS4xm8"

# Lakehouse destino (ajuste para o seu workspace/lakehouse)
path_destino = "abfss://ws_sistemas@onelake.dfs.fabric.microsoft.com/lk_systemmax.Lakehouse/Tables/tb_emociograma_tratativa"

# (Opcional) nome de tabela gerenciada (saveAsTable). Deixe vazio para usar apenas path físico.
tabela_destino = ""  # ex.: "bronze.tb_emociograma_tratativa"

PAGE_SIZE = 1000
HTTP_TIMEOUT = 60  # segundos

# ==========================
# 2) Schema explícito (todos string)
# ==========================
schema = T.StructType([
    T.StructField("id", T.StringType(), True),
    T.StructField("created_at", T.StringType(), True),
    T.StructField("id_lider", T.StringType(), True),
    T.StructField("avaliacao", T.StringType(), True),
    T.StructField("tratativa", T.StringType(), True),
    T.StructField("data_tratativa", T.StringType(), True),
    T.StructField("id_registro", T.StringType(), True),
])
cols = [f.name for f in schema]

# ==========================
# 3) Função GET com retry/backoff
# ==========================
def http_get_with_retry(url: str, headers: Dict[str, str], timeout: int, max_retries: int = 5) -> requests.Response:
    backoff = 1.5
    attempt = 0
    while True:
        try:
            resp = requests.get(url, headers=headers, timeout=timeout)
            if resp.status_code in (429, 500, 502, 503, 504):
                attempt += 1
                if attempt > max_retries:
                    resp.raise_for_status()
                time.sleep(backoff ** attempt)
                continue
            resp.raise_for_status()
            return resp
        except requests.RequestException:
            attempt += 1
            if attempt > max_retries:
                raise
            time.sleep(backoff ** attempt)

# ==========================
# 4) Coleta paginada no Supabase
# ==========================
base_headers = {
    "apikey": API_KEY,
    "Authorization": f"Bearer {BEARER_TOKEN}",
    "Accept": "application/json",
}

offset = 0
registros: List[Dict] = []

while True:
    headers = {**base_headers, "Range": f"{offset}-{offset + PAGE_SIZE - 1}"}
    resp = http_get_with_retry(SUPABASE_URL, headers, timeout=HTTP_TIMEOUT)
    batch = resp.json()
    if not batch:
        break
    registros.extend(batch)
    if len(batch) < PAGE_SIZE:
        break
    offset += PAGE_SIZE

print(f"Registros coletados do Supabase ({SUPABASE_TABLE}): {len(registros)}")

# ==========================
# 5) Monta DataFrame sem inferência
# ==========================
if len(registros) == 0:
    df = spark.createDataFrame([], schema)
else:
    def to_row(rec: dict) -> Row:
        fixed = {c: (None if rec.get(c) is None else str(rec.get(c))) for c in cols}
        return Row(**fixed)
    rows = [to_row(r) for r in registros]
    df = spark.createDataFrame(rows, schema)

# Checagens rápidas
df.printSchema()
df.show(10, truncate=False)

# ==========================
# 6) Escrita em Delta
# ==========================
if tabela_destino.strip():
    spark.sql(f"DROP TABLE IF EXISTS {tabela_destino}")
    df.write.format("delta").mode("overwrite").saveAsTable(tabela_destino)
    print(f"Tabela gerenciada gravada: {tabela_destino}")
else:
    df.write.format("delta").mode("overwrite").save(path_destino)
    print(f"Delta gravado no caminho: {path_destino}")

print(f"Linhas salvas: {df.count()}")


StatementMeta(, bf8b7fe2-5755-417d-9ae6-7c79c30fd7bf, 5, Finished, Available, Finished)

Registros coletados do Supabase (tb_emociograma_tratativa): 13
root
 |-- id: string (nullable = true)
 |-- created_at: string (nullable = true)
 |-- id_lider: string (nullable = true)
 |-- avaliacao: string (nullable = true)
 |-- tratativa: string (nullable = true)
 |-- data_tratativa: string (nullable = true)
 |-- id_registro: string (nullable = true)

+---+--------------------------------+--------+-----------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------+-----------+
|id |created_at                      |id_lider|avaliacao                                                                                                              |tratativa                                                                                     