from pathlib import Path
from datetime import datetime, timezone
import pandas as pd
import sqlite3

ROOT = Path(__file__).resolve().parents[1]
DATA = ROOT / "data"
OUT = ROOT / "output"
OUT.mkdir(parents=True, exist_ok=True)

DB = OUT / "ut1.db"
con = sqlite3.connect(DB)

def ingest_csv(path: Path, expected_cols: list, raw_table: str):
    """Lee un CSV, añade metadatos y lo guarda en tabla raw_*"""
    if not path.exists():
        print(f"⚠️ No se encontró {path.name}")
        return pd.DataFrame(columns=expected_cols + ["_source_file","_ingest_ts","_batch_id"])
    
    df = pd.read_csv(path, dtype=str)
    # Asegurar columnas esperadas
    for c in expected_cols:
        if c not in df.columns:
            df[c] = None
    # Metadatos
    df["_source_file"] = path.name
    df["_ingest_ts"] = datetime.now(timezone.utc).isoformat()
    df["_batch_id"] = "demo"
    # Persistencia en SQLite
    df.to_sql(raw_table, con, if_exists="append", index=False)
    print(f"Ingested {len(df)} rows into {raw_table}")
    return df

# Ingesta de los tres CSV
raw_ventas    = ingest_csv(DATA / "ventas.csv",
                           ["fecha","id_cliente","id_producto","unidades","precio_unitario"],
                           "raw_ventas")

raw_clientes  = ingest_csv(DATA / "clientes.csv",
                           ["id_cliente","nombre","email"],
                           "raw_clientes")

raw_productos = ingest_csv(DATA / "productos.csv",
                           ["id_producto","nombre","categoria","precio_base"],
                           "raw_productos")

