In [8]:
#!/usr/bin/env python3
import os
import sys
import pickle
import mimetypes
import boto3
import cv2
import insightface
from io import BytesIO
from sklearn.preprocessing import normalize
from tqdm import tqdm
import environ

# Función para encontrar el .env junto a manage.py
def find_project_env():
    """
    Sube la jerarquía de carpetas hasta encontrar 'manage.py'
    y devuelve la ruta a '.env' en ese mismo directorio.
    """
    path = os.getcwd()
    while True:
        if os.path.isfile(os.path.join(path, 'manage.py')):
            env_file = os.path.join(path, '.env')
            return env_file if os.path.isfile(env_file) else None
        parent = os.path.dirname(path)
        if parent == path:
            return None
        path = parent

# 1) Localiza y carga el .env de la raíz del proyecto (donde está manage.py)
dotenv_path = find_project_env()
if not dotenv_path:
    print("ERROR: no se encontró .env junto a manage.py")
    sys.exit(1)

# 2) Leer variables de entorno usando django-environ
env = environ.Env()
env.read_env(dotenv_path)

try:
    AWS_ACCESS_KEY_ID     = env('AWS_ACCESS_KEY_ID')
    AWS_SECRET_ACCESS_KEY = env('AWS_SECRET_ACCESS_KEY')
    AWS_REGION            = env('AWS_REGION')
    S3_BUCKET             = env('S3_BUCKET')
    S3_KEY                = env('S3_KEY')
except Exception as e:
    print(f"❌ Falta variable en .env: {e}")
    sys.exit(1)

# 3) Inicializa cliente S3
session = boto3.Session(
    aws_access_key_id     = AWS_ACCESS_KEY_ID,
    aws_secret_access_key = AWS_SECRET_ACCESS_KEY,
    region_name           = AWS_REGION
)
s3 = session.client('s3')

# 4) Inicializa modelo ArcFace
model = insightface.app.FaceAnalysis(allowed_modules=['detection', 'recognition'])
model.prepare(ctx_id=0)

def load_embeddings(folder):
    """
    Recorre `folder`, extrae embeddings con ArcFace, y devuelve
a normalized embeddings array and labels list.
    """
    embs, labels = [], []
    for fn in tqdm(os.listdir(folder), desc="Extrayendo embeddings"):
        path = os.path.join(folder, fn)
        img = cv2.imread(path)
        if img is None:
            continue
        for face in model.get(img):
            emb = face.embedding
            if emb is not None:
                embs.append(emb)
                labels.append(os.path.splitext(fn)[0])
    if not embs:
        raise RuntimeError("No se extrajo ningún embedding de la carpeta.")
    return normalize(embs), labels

# 5) Lógica principal de ejecución def main():
def main():
    # Toma el primer argumento que sea un directorio existente, o usa 'faces'
    folder = 'faces'
    for a in sys.argv[1:]:
        if os.path.isdir(a):
            folder = a
            break

    if not os.path.isdir(folder):
        print(f"ERROR: carpeta no encontrada '{folder}'")
        sys.exit(1)

    # Extrae y normaliza embeddings
    embeddings, labels = load_embeddings(folder)

    # Empaqueta en pickle y sube a S3
    buf = BytesIO()
    pickle.dump({'embeddings': embeddings, 'labels': labels}, buf)
    buf.seek(0)

    s3.upload_fileobj(buf, Bucket=S3_BUCKET, Key=S3_KEY)
    print(f"✔ Embeddings subidos a s3://{S3_BUCKET}/{S3_KEY}")

if __name__ == '__main__':
    main()


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\juane/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: C:\Users\juane/.insightface\models\buffalo_l\genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\juane/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)


Extrayendo embeddings: 100%|██████████| 4531/4531 [09:48<00:00,  7.70it/s]


✔ Embeddings subidos a s3://hackathon-facesiupb/embeddings/embeddings.pkl


In [3]:
import onnxruntime
print("✅ ONNX Runtime version:", onnxruntime.__version__)


✅ ONNX Runtime version: 1.15.1
