In [1]:
# ==========================
# Importações
# ==========================
# %pip install requests  # (ative se necessário no Fabric)
import time
import requests
from typing import List, Dict

from pyspark.sql import Row
from pyspark.sql import types as T

# ==========================
# 1) Parâmetros
# ==========================
SUPABASE_TABLE = "tb_inspecao_execucao"
SUPABASE_URL = f"https://jewtbymqxxubjpwnjtux.supabase.co/rest/v1/{SUPABASE_TABLE}"

# Mesmas chaves (em produção, use Key Vault / credenciais do Fabric)
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3MTU3NzQ1ODQsImV4cCI6MjAzMTM1MDU4NH0.bs8NXsld5F98WdGTqt_9U0d1HY3DSXT4us0Ur1Rs8HE"
BEARER_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcxNTc3NDU4NCwiZXhwIjoyMDMxMzUwNTg0fQ.qJW13vrpLiF_uIHpGxNCy0iGpr--WhUK8g-AfeS4xm8"

# Lakehouse destino (ajuste para o seu workspace/lakehouse)
path_destino = "abfss://ws_sistemas@onelake.dfs.fabric.microsoft.com/lk_systemmax.Lakehouse/Tables/tb_inspecao_execucao"

# (Opcional) tabela gerenciada (catálogo). Deixe vazio para usar apenas o path físico.
tabela_destino = ""  # ex.: "bronze.tb_inspecao_execucao"

# Coleta
PAGE_SIZE = 1000
HTTP_TIMEOUT = 60  # s

# ==========================
# 2) Schema explícito (todos string)
# ==========================
schema = T.StructType([
    T.StructField("id", T.StringType(), True),
    T.StructField("created_at", T.StringType(), True),
    T.StructField("idcadastro", T.StringType(), True),
    T.StructField("uuidexecucao", T.StringType(), True),
    T.StructField("titulocadastro", T.StringType(), True),
    T.StructField("colaborador", T.StringType(), True),
    T.StructField("datahora", T.StringType(), True),
    T.StructField("contratouser", T.StringType(), True),
    T.StructField("local", T.StringType(), True),
    T.StructField("participantes", T.StringType(), True),
    T.StructField("status", T.StringType(), True),
])
cols = [f.name for f in schema]

# ==========================
# 3) Função GET com retry/backoff
# ==========================
def http_get_with_retry(url: str, headers: Dict[str, str], timeout: int, max_retries: int = 5) -> requests.Response:
    backoff = 1.5
    attempt = 0
    while True:
        try:
            resp = requests.get(url, headers=headers, timeout=timeout)
            if resp.status_code in (429, 500, 502, 503, 504):
                attempt += 1
                if attempt > max_retries:
                    resp.raise_for_status()
                time.sleep(backoff ** attempt)
                continue
            resp.raise_for_status()
            return resp
        except requests.RequestException:
            attempt += 1
            if attempt > max_retries:
                raise
            time.sleep(backoff ** attempt)

# ==========================
# 4) Coleta paginada no Supabase
# ==========================
base_headers = {
    "apikey": API_KEY,
    "Authorization": f"Bearer {BEARER_TOKEN}",
    "Accept": "application/json",
}

offset = 0
registros: List[Dict] = []

while True:
    headers = {**base_headers, "Range": f"{offset}-{offset + PAGE_SIZE - 1}"}
    resp = http_get_with_retry(SUPABASE_URL, headers, timeout=HTTP_TIMEOUT)
    batch = resp.json()
    if not batch:
        break
    registros.extend(batch)
    if len(batch) < PAGE_SIZE:
        break
    offset += PAGE_SIZE

print(f"Registros coletados do Supabase ({SUPABASE_TABLE}): {len(registros)}")

# ==========================
# 5) Monta DataFrame sem inferência de tipos
# ==========================
if len(registros) == 0:
    df = spark.createDataFrame([], schema)
else:
    rows = []
    for rec in registros:
        fixed = {c: (None if rec.get(c) is None else str(rec.get(c))) for c in cols}
        rows.append(Row(**fixed))
    df = spark.createDataFrame(rows, schema)

df.printSchema()
df.show(10, truncate=False)

# ==========================
# 6) Escrita em Delta
# ==========================
if tabela_destino.strip():
    spark.sql(f"DROP TABLE IF EXISTS {tabela_destino}")
    df.write.format("delta").mode("overwrite").saveAsTable(tabela_destino)
    print(f"Tabela gerenciada gravada: {tabela_destino}")
else:
    df.write.format("delta").mode("overwrite").save(path_destino)
    print(f"Delta gravado no caminho: {path_destino}")

print(f"Linhas salvas: {df.count()}")


StatementMeta(, 148f8cde-facd-4510-8f28-fb5edc332782, 3, Finished, Available, Finished)

Registros coletados do Supabase (tb_inspecao_execucao): 437
root
 |-- id: string (nullable = true)
 |-- created_at: string (nullable = true)
 |-- idcadastro: string (nullable = true)
 |-- uuidexecucao: string (nullable = true)
 |-- titulocadastro: string (nullable = true)
 |-- colaborador: string (nullable = true)
 |-- datahora: string (nullable = true)
 |-- contratouser: string (nullable = true)
 |-- local: string (nullable = true)
 |-- participantes: string (nullable = true)
 |-- status: string (nullable = true)

+---+--------------------------------+----------+------------------------------------+---------------------------------------------------------------+------------------------------------+-----------------------+------------+---------------------+---------------------------------------------------------------------------------------------------------------+---------+
|id |created_at                      |idcadastro|uuidexecucao                        |titulocadastro          