In [1]:
import pandas as pd
import re
import math
from sqlalchemy import create_engine, text

In [2]:
CONN_STR = "postgresql+psycopg2://estufas_user:estufas_pass_123@localhost:5432/estufas_kibala"
engine = create_engine(CONN_STR)

In [3]:
# ---------- helper ----------
def cultura_key(s: str) -> str:
    s = str(s).strip()
    s = re.sub(r"\s*\(.*\)\s*$", "", s)   # remove "(...)"
    s = re.sub(r"\.\s*$", "", s)         # remove ponto no fim
    s = re.sub(r"\s+", " ", s)           # normaliza espaços
    return s

In [4]:
# ---------- 1) extract ----------
sql_bronze = """
SELECT
  ano,
  semana,
  bloco::int AS bloco_id,
  TRIM(cultura) AS cultura,
  caixas_normal,
  peso_normal_kg,
  caixas_refugo,
  peso_refugo_kg
FROM bronze.colheitas_bronze
WHERE ano IS NOT NULL
  AND semana IS NOT NULL
  AND bloco IS NOT NULL
  AND cultura IS NOT NULL
  AND TRIM(cultura) <> '';
"""
df = pd.read_sql(sql_bronze, engine)
print("BRONZE colheita rows:", len(df))

BRONZE colheita rows: 5598


In [5]:
# ---------- 2) tipos / chaves ----------
df["ano"] = pd.to_numeric(df["ano"], errors="coerce").astype("Int64")
df["semana"] = pd.to_numeric(df["semana"], errors="coerce").astype("Int64")
df["bloco_id"] = pd.to_numeric(df["bloco_id"], errors="coerce").astype("Int64")

df["ano_semana"] = df["ano"].astype(str) + "-" + df["semana"].astype(str).str.zfill(2)

df["cultura_key"] = df["cultura"].apply(cultura_key)

In [6]:
# ---------- 3) map cultura_id ----------
dim = pd.read_sql("SELECT cultura_id, cultura_nome FROM silver.dim_cultura", engine)
dim["cultura_key"] = dim["cultura_nome"].apply(cultura_key)

map_cult = dict(zip(dim["cultura_key"], dim["cultura_id"]))
df["cultura_id"] = df["cultura_key"].map(map_cult)

print("Mapeadas:", df["cultura_id"].notna().sum(), "| Não mapeadas:", df["cultura_id"].isna().sum())
if df["cultura_id"].isna().any():
    print("\nCulturas não mapeadas (amostra):")
    print(df.loc[df["cultura_id"].isna(), "cultura"].drop_duplicates().head(50).to_string(index=False))

Mapeadas: 5598 | Não mapeadas: 0


In [8]:
# métricas -> numérico
for col in ["caixas_normal","peso_normal_kg","caixas_refugo","peso_refugo_kg"]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

# mantém só linhas mapeadas
df = df[df["cultura_id"].notna()].copy()
df["cultura_id"] = df["cultura_id"].astype("Int64")

# agrupa no grão da fact
fact = (
    df.groupby(["ano_semana","ano","semana","bloco_id","cultura_id"], as_index=False)
      .agg({
          "peso_normal_kg":"sum",
          "caixas_normal":"sum",
          "peso_refugo_kg":"sum",
          "caixas_refugo":"sum"
      })
)

print("FACT (agrupada) linhas:", len(fact))
fact

FACT (agrupada) linhas: 2785


Unnamed: 0,ano_semana,ano,semana,bloco_id,cultura_id,peso_normal_kg,caixas_normal,peso_refugo_kg,caixas_refugo
0,2025-01,2025,1,1,11,56.0,2.0,0.0,0.0
1,2025-01,2025,1,1,12,504.0,28.0,0.0,0.0
2,2025-01,2025,1,4,26,4945.0,0.0,0.0,0.0
3,2025-01,2025,1,5,21,16.0,2.0,0.0,0.0
4,2025-01,2025,1,5,40,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
2780,2025-47,2025,47,21,45,7136.0,539.0,0.0,0.0
2781,2025-47,2025,47,21,46,6400.0,530.0,0.0,0.0
2782,2025-47,2025,47,21,49,2642.0,363.0,0.0,0.0
2783,2025-47,2025,47,21,58,67918.0,3281.0,0.0,0.0


In [9]:
sql_upsert = text("""
INSERT INTO silver.fact_colheita_linha (
  ano_semana, ano, semana, bloco_id, cultura_id,
  peso_normal_kg, caixas_normal, peso_refugo_kg, caixas_refugo
)
VALUES (
  :ano_semana, :ano, :semana, :bloco_id, :cultura_id,
  :peso_normal_kg, :caixas_normal, :peso_refugo_kg, :caixas_refugo
)
ON CONFLICT (ano_semana, bloco_id, cultura_id)
DO UPDATE SET
  ano = EXCLUDED.ano,
  semana = EXCLUDED.semana,
  peso_normal_kg = EXCLUDED.peso_normal_kg,
  caixas_normal = EXCLUDED.caixas_normal,
  peso_refugo_kg = EXCLUDED.peso_refugo_kg,
  caixas_refugo = EXCLUDED.caixas_refugo;
""")

records = fact.to_dict("records")

# limpa NaN/NaT no dict (defensivo)
for r in records:
    for k, v in list(r.items()):
        if isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
            r[k] = None
        elif pd.isna(v):
            r[k] = None

with engine.begin() as conn:
    conn.execute(sql_upsert, records)

print("✅ silver.fact_colheita_linha carregada. Linhas:", len(records))


✅ silver.fact_colheita_linha carregada. Linhas: 2785
