In [10]:
#!/usr/bin/env python3
import os
import sys
import pickle
import boto3
import cv2
import insightface
import tempfile
import shutil
from io import BytesIO
from tqdm import tqdm
from sklearn.preprocessing import normalize
import environ

# ─────────────────────────────────────────────────────────────
# 1) Localizar y leer .env junto a manage.py
# ─────────────────────────────────────────────────────────────
def find_project_env():
    """Sube la jerarquía hasta hallar manage.py y devuelve la ruta del .env."""
    path = os.getcwd()
    while True:
        if os.path.isfile(os.path.join(path, "manage.py")):
            env_file = os.path.join(path, ".env")
            return env_file if os.path.isfile(env_file) else None
        parent = os.path.dirname(path)
        if parent == path:    # llegamos a la raíz
            return None
        path = parent

dotenv_path = find_project_env()
if not dotenv_path:
    print("❌  No se encontró .env junto a manage.py")
    sys.exit(1)

env = environ.Env()
env.read_env(dotenv_path)

try:
    AWS_ACCESS_KEY_ID     = env("AWS_ACCESS_KEY_ID")
    AWS_SECRET_ACCESS_KEY = env("AWS_SECRET_ACCESS_KEY")
    AWS_REGION            = env("AWS_REGION")
    S3_BUCKET             = env("S3_BUCKET")
    S3_KEY                = env("S3_KEY")
except Exception as e:
    print(f"❌  Falta variable en .env: {e}")
    sys.exit(1)

# ─────────────────────────────────────────────────────────────
# 2) Cliente S3 y modelo ArcFace
# ─────────────────────────────────────────────────────────────
session = boto3.Session(
    aws_access_key_id     = AWS_ACCESS_KEY_ID,
    aws_secret_access_key = AWS_SECRET_ACCESS_KEY,
    region_name           = AWS_REGION,
)
s3 = session.client("s3")

model = insightface.app.FaceAnalysis(allowed_modules=["detection", "recognition"])
model.prepare(ctx_id=0)

# ─────────────────────────────────────────────────────────────
# 3) Utilidades
# ─────────────────────────────────────────────────────────────
def load_embeddings(folder: str):
    """Extrae embeddings de todas las imágenes en `folder` con barra tqdm."""
    embs, labels = [], []
    files = [f for f in os.listdir(folder) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
    for fn in tqdm(files, desc="🔎 Extrayendo embeddings", unit="img"):
        path = os.path.join(folder, fn)
        img = cv2.imread(path)
        if img is None:
            continue
        for face in model.get(img):
            if face.embedding is not None:
                embs.append(face.embedding)
                labels.append(os.path.splitext(fn)[0])
    if not embs:
        raise RuntimeError("No se obtuvo ningún embedding.")
    return normalize(embs), labels

def download_faces_to(tmp_dir: str):
    """Descarga faces/*.jpg|png desde S3 al tmp_dir con barra de progreso."""
    keys = []
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=S3_BUCKET, Prefix="faces/"):
        for obj in page.get("Contents", []):
            key = obj["Key"]
            if key.lower().endswith((".jpg", ".jpeg", ".png")):
                keys.append(key)

    for key in tqdm(keys, desc="⏬ Descargando faces", unit="img"):
        rel = key.split("/", 1)[-1]       # quita el prefijo faces/
        local = os.path.join(tmp_dir, rel)
        os.makedirs(os.path.dirname(local), exist_ok=True)
        s3.download_file(S3_BUCKET, key, local)

# ─────────────────────────────────────────────────────────────
# 4) main()
# ─────────────────────────────────────────────────────────────
def main():
    tmp_dir = tempfile.mkdtemp(prefix="faces_")
    try:
        download_faces_to(tmp_dir)                # barra de progreso
        embeddings, labels = load_embeddings(tmp_dir)

        # Empaquetar y subir
        buf = BytesIO()
        pickle.dump({"embeddings": embeddings, "labels": labels}, buf)
        buf.seek(0)
        s3.upload_fileobj(buf, Bucket=S3_BUCKET, Key=S3_KEY)
        print(f"✔  Embeddings subidos a s3://{S3_BUCKET}/{S3_KEY}")
    finally:
        shutil.rmtree(tmp_dir, ignore_errors=True)

if __name__ == "__main__":
    main()


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\juane/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\juane/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)


⏬ Descargando faces:   1%|          | 51/4531 [00:11<17:06,  4.37img/s]


KeyboardInterrupt: 

In [3]:
import onnxruntime
print("✅ ONNX Runtime version:", onnxruntime.__version__)


✅ ONNX Runtime version: 1.15.1
