In [1]:
# ==========================
# Importações
# ==========================
# %pip install requests  # (se necessário; no Fabric geralmente já existe)
import time
import json
import requests
from typing import List, Dict

from pyspark.sql import Row
from pyspark.sql import functions as F
from pyspark.sql import types as T

# ==========================
# 1) Parâmetros
# ==========================
SUPABASE_TABLE = "tb_emociometro"
SUPABASE_URL = f"https://jewtbymqxxubjpwnjtux.supabase.co/rest/v1/{SUPABASE_TABLE}"

# >>> Troque por segredo seguro em produção <<<
API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3MTU3NzQ1ODQsImV4cCI6MjAzMTM1MDU4NH0.bs8NXsld5F98WdGTqt_9U0d1HY3DSXT4us0Ur1Rs8HE"
BEARER_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Impld3RieW1xeHh1Ympwd25qdHV4Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcxNTc3NDU4NCwiZXhwIjoyMDMxMzUwNTg0fQ.qJW13vrpLiF_uIHpGxNCy0iGpr--WhUK8g-AfeS4xm8"

# Caminho físico do Lakehouse (ajuste para o seu workspace/lakehouse)
path_destino = "abfss://ws_sistemas@onelake.dfs.fabric.microsoft.com/lk_systemmax.Lakehouse/Tables/tb_emociometro"

# (Opcional) nome de tabela gerenciada se preferir saveAsTable (deixe em branco para não usar)
tabela_destino = ""  # ex.: "bronze.tb_emociometro"

# Tamanho da página e tempo limite HTTP
PAGE_SIZE = 1000
HTTP_TIMEOUT = 60  # segundos

# ==========================
# 2) Schema explícito (todos string)
# ==========================
schema = T.StructType([
    T.StructField("id", T.StringType(), True),
    T.StructField("created_at", T.StringType(), True),
    T.StructField("colaborador", T.StringType(), True),
    T.StructField("matricula_colaborador", T.StringType(), True),
    T.StructField("estado_emocional", T.StringType(), True),
    T.StructField("tratativa_lideranca", T.StringType(), True),
    T.StructField("contrato", T.StringType(), True),
    T.StructField("data", T.StringType(), True),
    T.StructField("status", T.StringType(), True),
    T.StructField("tratativa_sesmt", T.StringType(), True),
    T.StructField("tratativa_medicina", T.StringType(), True),
    T.StructField("avaliacao_lideranca", T.StringType(), True),
    T.StructField("avaliacao_sesmt", T.StringType(), True),
    T.StructField("avaliacao_medicina", T.StringType(), True),
    T.StructField("lider_imediato", T.StringType(), True),
])
cols = [f.name for f in schema]

# ==========================
# 3) Função de requisição com retry/backoff
# ==========================
def http_get_with_retry(url: str, headers: Dict[str, str], timeout: int, max_retries: int = 5) -> requests.Response:
    backoff = 1.5
    attempt = 0
    while True:
        try:
            resp = requests.get(url, headers=headers, timeout=timeout)
            # Retentar em 429/5xx
            if resp.status_code in (429, 500, 502, 503, 504):
                attempt += 1
                if attempt > max_retries:
                    resp.raise_for_status()
                sleep_s = backoff ** attempt
                time.sleep(sleep_s)
                continue
            # Se 4xx (exceto 429) ou 2xx: sai do loop
            resp.raise_for_status()
            return resp
        except requests.RequestException as e:
            attempt += 1
            if attempt > max_retries:
                raise
            time.sleep(backoff ** attempt)

# ==========================
# 4) Coleta paginada no Supabase
# ==========================
base_headers = {
    "apikey": API_KEY,
    "Authorization": f"Bearer {BEARER_TOKEN}",
    "Accept": "application/json",
    # "Prefer": "count=exact"  # opcional
}

offset = 0
registros: List[Dict] = []

while True:
    # Range-based pagination: [offset, offset+PAGE_SIZE-1]
    headers = {**base_headers, "Range": f"{offset}-{offset + PAGE_SIZE - 1}"}
    resp = http_get_with_retry(SUPABASE_URL, headers, timeout=HTTP_TIMEOUT)
    batch = resp.json()
    if not batch:
        break
    registros.extend(batch)
    if len(batch) < PAGE_SIZE:
        break
    offset += PAGE_SIZE

print(f"Registros coletados do Supabase: {len(registros)}")

# ==========================
# 5) Monta DataFrame sem inferência
# ==========================
if len(registros) == 0:
    df = spark.createDataFrame([], schema)
else:
    def to_row(rec: dict) -> Row:
        fixed = {c: (None if rec.get(c) is None else str(rec.get(c))) for c in cols}
        return Row(**fixed)
    rows = [to_row(r) for r in registros]
    df = spark.createDataFrame(rows, schema)

# (Sanity check)
df.printSchema()
df.show(10, truncate=False)

# ==========================
# 6) Escrita em Delta
# ==========================
if tabela_destino.strip():
    # Tabela gerenciada (Catálogo do Fabric)
    spark.sql(f"DROP TABLE IF EXISTS {tabela_destino}")
    df.write.format("delta").mode("overwrite").saveAsTable(tabela_destino)
    print(f"Tabela gerenciada gravada: {tabela_destino}")
else:
    # Caminho físico (usa path_destino)
    df.write.format("delta").mode("overwrite").save(path_destino)
    print(f"Delta gravado no caminho: {path_destino}")

print(f"Linhas salvas: {df.count()}")


StatementMeta(, a33723f0-cc52-4cb9-8362-5ffffcdfdd51, 3, Finished, Available, Finished)

Registros coletados do Supabase: 1827
root
 |-- id: string (nullable = true)
 |-- created_at: string (nullable = true)
 |-- colaborador: string (nullable = true)
 |-- matricula_colaborador: string (nullable = true)
 |-- estado_emocional: string (nullable = true)
 |-- tratativa_lideranca: string (nullable = true)
 |-- contrato: string (nullable = true)
 |-- data: string (nullable = true)
 |-- status: string (nullable = true)
 |-- tratativa_sesmt: string (nullable = true)
 |-- tratativa_medicina: string (nullable = true)
 |-- avaliacao_lideranca: string (nullable = true)
 |-- avaliacao_sesmt: string (nullable = true)
 |-- avaliacao_medicina: string (nullable = true)
 |-- lider_imediato: string (nullable = true)

+---+-----------------------------+------------------------------+---------------------+----------------------+-------------------+--------+----------+---------+---------------+-------------------------------------------------------------------------------------------------------