In [None]:
# === Crear estructura de carpetas: GT, VIBE, MP ===
# Uso: aquí se organizarán los .npy (T,14,3) de GT, VIBE y MediaPipe que luego se unirán en un solo JSON.
import os

BASE_DIR = "/content"        # cámbialo si quieres
GT_DIR   = os.path.join(BASE_DIR, "GT_npy")
VIBE_DIR = os.path.join(BASE_DIR, "VIBE_npy")
MP_DIR   = os.path.join(BASE_DIR, "MP_npy")

for d in (GT_DIR, VIBE_DIR, MP_DIR):
    os.makedirs(d, exist_ok=True)
    open(os.path.join(d, ".keep"), "w").close()  # para que no queden vacías en managers

print("Listo:")
print(" GT_DIR  ->", GT_DIR)
print(" VIBE_DIR->", VIBE_DIR)
print(" MP_DIR  ->", MP_DIR)

Listo:
 GT_DIR  -> /content/GT_npy
 VIBE_DIR-> /content/VIBE_npy
 MP_DIR  -> /content/MP_npy


In [None]:
# === Construir JSON (GT + VIBE + MP) a partir de carpetas con .npy (T,14,3) ===
# Une los .npy de GT, VIBE y MediaPipe (ya reducidos a 14 articulaciones) en un solo archivo JSON:
#   /content/dataset_3models.json, con todas las secuencias y todos los frames.
import os, json, numpy as np
from google.colab import files

# ----------------- CONFIG -----------------
GT_DIR   = "/content/GT_npy"      # ej.: "/content/GT_npy" con "courtyard_backpack_00.npy", ...
VIBE_DIR = "/content/VIBE_npy"    # ej.: "/content/VIBE_npy" con "VIBE_courtyard_backpack_00.npy", ...
MP_DIR   = "/content/MP_npy"      # ej.: "/content/MP_npy" con "MP_courtyard_backpack_00.npy", ...
VIBE_PREFIX = "VIBE_"
MP_PREFIX   = "MP_"

OUTPUT_JSON = "/content/dataset_3models.json"

# Esqueleto común de 14 articulaciones (orden estándar 0..13 usado en toda la tesis)
# Nombres y conexiones (K=14) — ORDEN FIJO 0..13
KEYPOINT_NAMES = [
    "Hombro_D","Hombro_I","Codo_D","Codo_I","Muñeca_D","Muñeca_I",
    "Cadera_D","Cadera_I","Rodilla_D","Rodilla_I","Tobillo_D","Tobillo_I",
    "Cuello","Cabeza"
]
# Conexiones del esqueleto en 14 articulaciones:
# Conexiones GT/VIBE
CONN_GT_VIBE = [(0,6),(1,7),(0,2),(2,4),(1,3),(3,5),(6,7),(6,8),(8,10),(7,9),(9,11),(12,0),(12,1),(12,13)]
# Conexiones MediaPipe
CONN_MP = [(0,1),(1,6),(0,7),(0,2),(2,4),(1,3),(3,5),(6,7),(6,8),(8,10),(7,9),(9,11),(12,13)]

# Política temporal: usar solo los primeros T = min(T_GT, T_VIBE, T_MP) frames para que todos los modelos estén alineados
TEMPORAL_ALIGN = "truncate_min_T"

# ----------------- UTILIDADES -----------------
def _is_npy14x3(a):
    return isinstance(a, np.ndarray) and a.ndim == 3 and a.shape[1] == 14 and a.shape[2] == 3

def _load_npy(path):
    a = np.load(path)
    if not _is_npy14x3(a):
        raise ValueError(f"Forma inesperada en {path}: {getattr(a,'shape',None)} (se espera (T,14,3))")
    return a.astype(np.float32, copy=False)

def _k3d_3xK(frame_14x3):
    """Convierte frame (14,3) a k3d 3xK con None en lugar de NaN/Inf."""
    # frame_14x3: (14,3) -> X[14], Y[14], Z[14]
    X = frame_14x3[:,0].tolist()
    Y = frame_14x3[:,1].tolist()
    Z = frame_14x3[:,2].tolist()
    # Reemplazar no finitos por None para JSON
    X = [float(x) if np.isfinite(x) else None for x in X]
    Y = [float(y) if np.isfinite(y) else None for y in Y]
    Z = [float(z) if np.isfinite(z) else None for z in Z]
    return [X, Y, Z]

def _base_from_gt(fname):
    # "courtyard_backpack_00.npy" -> "courtyard_backpack_00"
    return os.path.splitext(os.path.basename(fname))[0]

def _expected_vibe_name(base):
    return VIBE_PREFIX + base + ".npy"

def _expected_mp_name(base):
    return MP_PREFIX + base + ".npy"

# ----------------- RECOLECCIÓN DE ARCHIVOS -----------------
gt_files = sorted([f for f in os.listdir(GT_DIR) if f.lower().endswith(".npy")])
if not gt_files:
    raise FileNotFoundError(f"No se encontraron .npy en {GT_DIR}")

pairs = []  # (base, gt_path, vibe_path, mp_path)
missing = []

for gt_fname in gt_files:
    base = _base_from_gt(gt_fname)         # video_id
    vibe_fname = _expected_vibe_name(base)
    mp_fname   = _expected_mp_name(base)

    gt_path   = os.path.join(GT_DIR, gt_fname)
    vibe_path = os.path.join(VIBE_DIR, vibe_fname)
    mp_path   = os.path.join(MP_DIR, mp_fname)

    ok = True
    if not os.path.isfile(vibe_path):
        missing.append(("VIBE", base, vibe_path)); ok = False
    if not os.path.isfile(mp_path):
        missing.append(("MP", base, mp_path)); ok = False
    if ok:
        pairs.append((base, gt_path, vibe_path, mp_path))

if missing:
    print("ADVERTENCIA: faltan archivos emparejados (se omitirán esas secuencias):")
    for m in missing: print(" -", m)
if not pairs:
    raise RuntimeError("No hay pares completos GT/VIBE/MP.")

# ----------------- CONSTRUIR JSON -----------------
# Estructura raíz del JSON que usará el postprocesamiento y las métricas
root = {
    "keypoint_names": KEYPOINT_NAMES,
    "connections": {
        "gt_vibe": CONN_GT_VIBE,
        "mediapipe": CONN_MP
    },
    "models_present": ["gt", "vibe", "mp"],
    "meta": {"temporal_align": TEMPORAL_ALIGN, "k3d_layout": "3xK"},
    "sequences": []
}

for base, gt_path, vibe_path, mp_path in pairs:
    # Cargar npy
    gt = _load_npy(gt_path)       # (Tg,14,3)
    vb = _load_npy(vibe_path)     # (Tv,14,3)
    mp = _load_npy(mp_path)       # (Tm,14,3)

    T = int(min(gt.shape[0], vb.shape[0], mp.shape[0]))  # truncate_min_T
    if T <= 0:
        print(f"Saltado (T=0) -> {base}")
        continue

    seq = {
        "video_id": base,
        "frames": []
    }

    # Por simplicidad asumimos 1 persona (id=1) por frame
    for f in range(T):
        k_gt = _k3d_3xK(gt[f])
        k_vb = _k3d_3xK(vb[f])
        k_mp = _k3d_3xK(mp[f])

        # Cada frame guarda 1 persona (id=1) con los tres modelos: gt, vibe y mp en formato k3d (3xK)
        frame_obj = {
            "f": f,
            "persons": [
                {
                    "id": 1,
                    "models": {
                        "gt":   {"k3d": k_gt},
                        "vibe": {"k3d": k_vb},
                        "mp":   {"k3d": k_mp}
                    }
                }
            ]
        }
        seq["frames"].append(frame_obj)

    root["sequences"].append(seq)
    print(f"OK: {base}  (frames: {T})")

# ----------------- GUARDAR Y DESCARGAR -----------------
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
    json.dump(root, f, ensure_ascii=False, separators=(",", ":" ))  # compacto

print("JSON guardado en:", OUTPUT_JSON)
files.download(OUTPUT_JSON)


OK: courtyard_backpack_00  (frames: 1262)
OK: courtyard_bodyScannerMotions_00  (frames: 1257)
OK: courtyard_box_00  (frames: 1002)
OK: courtyard_jumpBench_01  (frames: 619)
OK: courtyard_laceShoe_00  (frames: 931)
OK: courtyard_relaxOnBench_00  (frames: 558)
OK: courtyard_relaxOnBench_01  (frames: 959)
OK: flat_guitar_01  (frames: 472)
OK: outdoors_climbing_00  (frames: 1228)
OK: outdoors_climbing_01  (frames: 1062)
OK: outdoors_climbing_02  (frames: 376)
OK: outdoors_crosscountry_00  (frames: 142)
OK: outdoors_fencing_01  (frames: 942)
OK: outdoors_freestyle_00  (frames: 497)
OK: outdoors_freestyle_01  (frames: 273)
OK: outdoors_golf_00  (frames: 341)
OK: outdoors_parcours_00  (frames: 1657)
OK: outdoors_parcours_01  (frames: 1319)
OK: outdoors_slalom_00  (frames: 326)
OK: outdoors_slalom_01  (frames: 333)
JSON guardado en: /content/dataset_3models.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# === Inspección rápida del JSON de secuencias ===
# Uso: verificar que dataset_3models.json se creó bien (modelos presentes, número de secuencias y de frames).
import json, os, os.path as osp

JSON_PATH = "/content/dataset_3models.json"  # ajusta si es necesario

with open(JSON_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

# Resumen general
kp = data.get("keypoint_names", [])
models = data.get("models_present", [])
conns = data.get("connections", {})
seqs = data.get("sequences", [])

print(f"OK: {osp.basename(JSON_PATH)}")
print(f"- keypoints (K): {len(kp)} -> {kp}")
print(f"- modelos: {models}")
print(f"- conexiones: gt_vibe={len(conns.get('gt_vibe', []))}, mediapipe={len(conns.get('mediapipe', []))}")
print(f"- secuencias: {len(seqs)}\n")

# Listar todas las secuencias con cantidad de frames
print("Listado de secuencias (idx) video_id -> frames")
for i, s in enumerate(seqs):
    vid = s.get("video_id", "<sin_id>")
    nframes = len(s.get("frames", []))
    print(f"{i:3d}) {vid} -> {nframes}")

# Opcional: busca por texto (por si dudas de mayúsculas/guiones)
QUERY = "fencing"   # cambia o deja "" para no filtrar
if QUERY:
    print(f"\nCoincidencias con '{QUERY}':")
    for s in seqs:
        vid = s.get("video_id","")
        if QUERY.lower() in vid.lower():
            print(" -", vid)

# Opcional: muestra un frame de una secuencia por índice para comprobar la estructura interna (persons, models, etc.)
SEQ_IDX_TO_PEEK = None  # pon un número, p.ej. 0
if isinstance(SEQ_IDX_TO_PEEK, int):
    s = seqs[SEQ_IDX_TO_PEEK]
    vid = s.get("video_id","")
    nframes = len(s.get("frames", []))
    print(f"\nDetalle rápido de '{vid}': frames={nframes}")
    if nframes:
        fr0 = s["frames"][0]
        persons = fr0.get("persons", [])
        print("- frame 0: persons:", [p.get("id") for p in persons])
        if persons:
            mkeys = list(persons[0].get("models", {}).keys())
            print("- modelos en frame 0:", mkeys)


OK: dataset_3models.json
- keypoints (K): 14 -> ['Hombro_D', 'Hombro_I', 'Codo_D', 'Codo_I', 'Muñeca_D', 'Muñeca_I', 'Cadera_D', 'Cadera_I', 'Rodilla_D', 'Rodilla_I', 'Tobillo_D', 'Tobillo_I', 'Cuello', 'Cabeza']
- modelos: ['gt', 'vibe', 'mp']
- conexiones: gt_vibe=14, mediapipe=13
- secuencias: 20

Listado de secuencias (idx) video_id -> frames
  0) courtyard_backpack_00 -> 1262
  1) courtyard_bodyScannerMotions_00 -> 1257
  2) courtyard_box_00 -> 1002
  3) courtyard_jumpBench_01 -> 619
  4) courtyard_laceShoe_00 -> 931
  5) courtyard_relaxOnBench_00 -> 558
  6) courtyard_relaxOnBench_01 -> 959
  7) flat_guitar_01 -> 472
  8) outdoors_climbing_00 -> 1228
  9) outdoors_climbing_01 -> 1062
 10) outdoors_climbing_02 -> 376
 11) outdoors_crosscountry_00 -> 142
 12) outdoors_fencing_01 -> 942
 13) outdoors_freestyle_00 -> 497
 14) outdoors_freestyle_01 -> 273
 15) outdoors_golf_00 -> 341
 16) outdoors_parcours_00 -> 1657
 17) outdoors_parcours_01 -> 1319
 18) outdoors_slalom_00 -> 326
 19

In [None]:
# === Diagnóstico de emparejamiento GT / VIBE / MP ===
# Uso: comprobar que para cada GT existe el .npy correspondiente de VIBE y de MP con la forma correcta (T,14,3)
# antes de intentar construir el JSON unificado.
import os, numpy as np, os.path as osp

# Ajusta estas rutas si cambian:
GT_DIR   = "/content/GT_npy"
VIBE_DIR = "/content/VIBE_npy"
MP_DIR   = "/content/MP_npy"
VIBE_PREFIX = "VIBE_"
MP_PREFIX   = "MP_"

def is_npy14x3(a): return isinstance(a, np.ndarray) and a.ndim==3 and a.shape[1:]==(14,3)

gt_files = sorted([f for f in os.listdir(GT_DIR) if f.lower().endswith(".npy")])
print(f"GT npy encontrados: {len(gt_files)}")
print(f"VIBE npy: {len([f for f in os.listdir(VIBE_DIR) if f.lower().endswith('.npy')])}")
print(f"MP npy: {len([f for f in os.listdir(MP_DIR) if f.lower().endswith('.npy')])}\n")

missing = []
for fgt in gt_files:
    base = osp.splitext(osp.basename(fgt))[0]  # video_id esperado
    fv  = VIBE_PREFIX + base + ".npy"
    fm  = MP_PREFIX   + base + ".npy"
    pgt = osp.join(GT_DIR, fgt)
    pv  = osp.join(VIBE_DIR, fv)
    pm  = osp.join(MP_DIR, fm)

    ok_v = osp.isfile(pv)
    ok_m = osp.isfile(pm)

    # Mensaje base
    print(f"[{base}]")
    # GT shape
    try:
        a_gt = np.load(pgt)
        print("  GT:", a_gt.shape if hasattr(a_gt, "shape") else type(a_gt))
        if not is_npy14x3(a_gt): print("   -> ERROR: GT no es (T,14,3)")
    except Exception as e:
        print("  GT: ERROR al cargar ->", e)

    # VIBE
    if ok_v:
        try:
            a_v = np.load(pv)
            print("  VIBE ok:", fv, "| shape:", a_v.shape)
            if not is_npy14x3(a_v): print("   -> ERROR: VIBE no es (T,14,3)")
        except Exception as e:
            print("  VIBE: ERROR al cargar ->", e)
    else:
        print("  VIBE faltante:", fv)
        # búsqueda flexible por si no usaste el prefijo exacto
        candidates = [x for x in os.listdir(VIBE_DIR)
                      if x.lower().endswith(".npy") and base.lower() in x.lower()]
        if candidates:
            print("   * Sugerencias VIBE:", candidates[:5])

    # MP
    if ok_m:
        try:
            a_m = np.load(pm)
            print("  MP ok:", fm, "| shape:", a_m.shape)
            if not is_npy14x3(a_m): print("   -> ERROR: MP no es (T,14,3)")
        except Exception as e:
            print("  MP: ERROR al cargar ->", e)
    else:
        print("  MP faltante:", fm)
        candidates = [x for x in os.listdir(MP_DIR)
                      if x.lower().endswith(".npy") and base.lower() in x.lower()]
        if candidates:
            print("   * Sugerencias MP:", candidates[:5])
    print()


GT npy encontrados: 20
VIBE npy: 20
MP npy: 20

[courtyard_backpack_00]
  GT: (1262, 14, 3)
  VIBE ok: VIBE_courtyard_backpack_00.npy | shape: (1262, 14, 3)
  MP ok: MP_courtyard_backpack_00.npy | shape: (1262, 14, 3)

[courtyard_bodyScannerMotions_00]
  GT: (1257, 14, 3)
  VIBE ok: VIBE_courtyard_bodyScannerMotions_00.npy | shape: (1257, 14, 3)
  MP ok: MP_courtyard_bodyScannerMotions_00.npy | shape: (1257, 14, 3)

[courtyard_box_00]
  GT: (1041, 14, 3)
  VIBE ok: VIBE_courtyard_box_00.npy | shape: (1002, 14, 3)
  MP ok: MP_courtyard_box_00.npy | shape: (1041, 14, 3)

[courtyard_jumpBench_01]
  GT: (619, 14, 3)
  VIBE ok: VIBE_courtyard_jumpBench_01.npy | shape: (619, 14, 3)
  MP ok: MP_courtyard_jumpBench_01.npy | shape: (619, 14, 3)

[courtyard_laceShoe_00]
  GT: (931, 14, 3)
  VIBE ok: VIBE_courtyard_laceShoe_00.npy | shape: (931, 14, 3)
  MP ok: MP_courtyard_laceShoe_00.npy | shape: (931, 14, 3)

[courtyard_relaxOnBench_00]
  GT: (558, 14, 3)
  VIBE ok: VIBE_courtyard_relaxOnBench

In [None]:
# === Fix JSON: swap caderas 6<->7 SOLO en MediaPipe + actualizar conexiones MP ===
# Corregir el orden de las caderas (índices 6 y 7) SOLO para el modelo MediaPipe,
# manteniendo GT y VIBE intactos, y generar un JSON corregido para el postprocesamiento.
import json

# JSON original generado en la celda anterior (entrada) y JSON corregido (salida) que usarán las métricas
SRC_JSON = "/content/dataset_3models.json"                  # tu archivo original (bueno)
DST_JSON = "/content/dataset_3models_fix.json"              # salida corregida

SWAP_A, SWAP_B = 6, 7  # caderas

with open(SRC_JSON, "r", encoding="utf-8") as f:
    data = json.load(f)

K = len(data.get("keypoint_names", []))
assert max(SWAP_A, SWAP_B) < K, f"K={K} no incluye índices {SWAP_A},{SWAP_B}"

# --- 1) Swap en datos (mp.k3d) ---
frames_touched = 0
for seq in data.get("sequences", []):
    for fr in seq.get("frames", []):
        for person in fr.get("persons", []):
            mp = person.get("models", {}).get("mp")
            if not mp or "k3d" not in mp:
                continue
            k3d = mp["k3d"]  # 3xK (listas con floats o None)
            for r in range(3):
                k3d[r][SWAP_A], k3d[r][SWAP_B] = k3d[r][SWAP_B], k3d[r][SWAP_A]
            frames_touched += 1

# --- 2) Actualizar conexiones de MediaPipe (mapear 6<->7) ---
def map_idx(i):
    return SWAP_B if i == SWAP_A else (SWAP_A if i == SWAP_B else i)

mp_conns = data.get("connections", {}).get("mediapipe")
if not isinstance(mp_conns, list):
    raise ValueError("No se encontró connections.mediapipe en el JSON.")

mp_conns_new = []
seen = set()
for a,b in mp_conns:
    aa, bb = map_idx(a), map_idx(b)
    pair = (aa, bb)
    if pair not in seen:
        seen.add(pair)
        mp_conns_new.append([aa, bb])

data["connections"]["mediapipe"] = mp_conns_new

# --- 3) Meta de postproceso ---
# Registrar en 'meta' que este JSON ya tiene aplicada la corrección de caderas en MediaPipe
data.setdefault("meta", {}).setdefault("postprocess", []).append(
    {"action":"swap_mp_indices_and_mp_connections","swap":[SWAP_A,SWAP_B]}
)

# --- 4) Guardar/descargar ---
with open(DST_JSON, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, separators=(",",":"))

print(f"Listo: {DST_JSON} | frames (mp) afectados: {frames_touched}")
try:
    from google.colab import files
    files.download(DST_JSON)
except Exception:
    pass


In [None]:
# Segundo fix, porque de alguna forma se volvió a dañar el el nuevo código.     9/11/25
# Duplica el JSON y, SOLO para MediaPipe, intercambia coords de joints 6↔7 en todos los frames/personas.
import json, numpy as np
from google.colab import files

JSON_IN  = "dataset_3models_fix.json"
JSON_OUT = "dataset_3models_fix__mp_swap_hips.json"
MODEL_KEYS = {"mediapipe", "mp"}   # ajusta si usas otra clave
J6, J7 = 6, 7

with open(JSON_IN, "r", encoding="utf-8") as f:
    data = json.load(f)

seqs = data.get("sequences", [])
seq_iter = list(seqs.values()) if isinstance(seqs, dict) else seqs

def swap_k3d(k3d):
    a = np.asarray(k3d, dtype=float)
    if a.ndim != 2:
        return k3d
    # Soporta (3,K) y (K,3)
    if a.shape[0] == 3 and a.shape[1] > max(J6,J7):
        a[:, [J6, J7]] = a[:, [J7, J6]]
        return a.tolist()
    if a.shape[1] == 3 and a.shape[0] > max(J6,J7):
        a[[J6, J7], :] = a[[J7, J6], :]
        return a.tolist()
    return k3d

for seq in seq_iter:
    for fr in seq.get("frames", []):
        persons = fr.get("persons", [])
        for p in persons:
            models = p.get("models", {})
            for mk in list(models.keys()):
                if mk in MODEL_KEYS and isinstance(models[mk], dict) and "k3d" in models[mk]:
                    models[mk]["k3d"] = swap_k3d(models[mk]["k3d"])

with open(JSON_OUT, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False)

print("OK ->", JSON_OUT)
files.download(JSON_OUT)


OK -> dataset_3models_fix__mp_swap_hips.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>