In [2]:
import os, cv2, csv, numpy as np
from glob import glob

RAW_DIR   = "/workspaces/fruit-detector-pricer/ProyectoFinal/Dataset_Filtrado"
MASK_DIR  = "/workspaces/fruit-detector-pricer/ProyectoFinal/Morfologia/masks_clean"
OUT_RGB   = "/workspaces/fruit-detector-pricer/ProyectoFinal/recorte/crops"
OUT_GRAY  = "/workspaces/fruit-detector-pricer/ProyectoFinal/recorte/crops_gray"
os.makedirs(OUT_RGB, exist_ok=True); os.makedirs(OUT_GRAY, exist_ok=True)

# util: busca la imagen original con el mismo nombre base (cualquier extensión)
def find_raw_path(split, clase, stem):
    folder = os.path.join(RAW_DIR, split, clase)
    for ext in (".jpg",".jpeg",".png",".JPG",".PNG",".JPEG"):
        p = os.path.join(folder, stem + ext)
        if os.path.exists(p): return p
    return None

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))

meta_path = "/workspaces/fruit-detector-pricer/ProyectoFinal/recorte/results"; os.makedirs(meta_path, exist_ok=True)
csv_path  = os.path.join(meta_path, "crop_meta.csv")
if not os.path.exists(csv_path):
    with open(csv_path, "w", newline="") as f:
        csv.writer(f).writerow(["split","clase","img_id","x","y","w","h","added_margin","final_size"])

total_ok = 0
for split in ("train","test"):
    # todas las máscaras limpias (cualquier extensión)
    masks = []
    for ext in ("*.png","*.jpg","*.jpeg","*.PNG","*.JPG","*.JPEG"):
        masks += glob(os.path.join(MASK_DIR, split, "**", ext), recursive=True)

    for mpath in masks:
        clase = os.path.basename(os.path.dirname(mpath))
        stem  = os.path.splitext(os.path.basename(mpath))[0]  # ej "IMG_001"
        imgp  = find_raw_path(split, clase, stem)
        if imgp is None: 
            continue

        # leer máscara (gris) y asegurar binaria fruta=255
        m = cv2.imread(mpath, cv2.IMREAD_GRAYSCALE)
        if m is None: 
            continue
        m = (m > 0).astype(np.uint8) * 255

        # Si tu limpieza no garantiza 1 componente, quédate con la mayor
        num, labels, stats, _ = cv2.connectedComponentsWithStats(m, 8)
        if num > 1:
            largest = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
            m = np.where(labels == largest, 255, 0).astype("uint8")

        ys, xs = np.where(m == 255)
        if ys.size == 0:
            continue
        y0, y1 = ys.min(), ys.max() + 1
        x0, x1 = xs.min(), xs.max() + 1

        # margen opcional (evita cortar la fruta al ras)
        margin = 4
        y0 = max(0, y0 - margin); x0 = max(0, x0 - margin)
        y1 = min(m.shape[0], y1 + margin); x1 = min(m.shape[1], x1 + margin)

        # recortar imagen original y máscara
        img = cv2.imread(imgp, cv2.IMREAD_COLOR)
        crop_img = img[y0:y1, x0:x1].copy()
        crop_m   = m[y0:y1, x0:x1]

        # aplicar máscara → fondo blanco
        bg_white = np.full_like(crop_img, 255)
        crop_img = np.where(crop_m[...,None]==0, bg_white, crop_img)

        # salidas (mismo árbol)
        rel = os.path.join(split, clase, stem + "_crop.png")
        dst_rgb  = os.path.join(OUT_RGB, rel)
        os.makedirs(os.path.dirname(dst_rgb), exist_ok=True)
        cv2.imwrite(dst_rgb, crop_img)

        # versión gris 128×128
        gray = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
        gray = cv2.resize(gray, (128,128), interpolation=cv2.INTER_AREA)
        dst_g  = os.path.join(OUT_GRAY, os.path.join(split, clase, stem + "_128.png"))
        os.makedirs(os.path.dirname(dst_g), exist_ok=True)
        cv2.imwrite(dst_g, gray)  # (si luego quieres [0,1], guárdalo como .npy)

        # log
        with open(csv_path, "a", newline="") as f:
            csv.writer(f).writerow([split, clase, stem, x0, y0, x1-x0, y1-y0, margin, "128x128"])
        total_ok += 1

print(f"Listo: {total_ok} recortes guardados en:\n- {OUT_RGB}\n- {OUT_GRAY}\nLog: {csv_path}")

Listo: 5339 recortes guardados en:
- /workspaces/fruit-detector-pricer/ProyectoFinal/recorte/crops
- /workspaces/fruit-detector-pricer/ProyectoFinal/recorte/crops_gray
Log: /workspaces/fruit-detector-pricer/ProyectoFinal/recorte/results/crop_meta.csv
