# Tabla de clasificación de defectos (sigmoid + (opcional) brightness/gamma)

Este notebook recorre las imágenes de un **lote** (3/4/5/6) en **orden natural** (`p3_1`, `p3_2`, …) y, **a medida que procesa**, imprime:

- nombre de la imagen
- clase predicha (`defect`)
- `confidence`

Pipeline configurable:

1. (Opcional) recorte automático (`potato_pixels_rgb_img`)
2. (Opcional) `apply_brightness_and_gamma` **antes o después** del sigmoid
3. `apply_sigmoid`
4. `potato_defect_classification`


In [1]:
from __future__ import annotations

from pathlib import Path
import os
import sys
from typing import Any, Iterable

import numpy as np
from PIL import Image

def find_project_root(start: Path | None = None, marker_dir: str = "data") -> Path:
    """Busca el root del repo subiendo carpetas hasta encontrar `marker_dir/`."""
    start = Path.cwd() if start is None else Path(start).resolve()
    for p in [start, *start.parents]:
        if (p / marker_dir).exists() and (p / marker_dir).is_dir():
            return p
    raise FileNotFoundError(f"No he encontrado '{marker_dir}/' subiendo desde {start}")

PROJECT_ROOT = find_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

try:
    from src.raw_image_treatment import (
        apply_sigmoid,
        apply_brightness_and_gamma,
        potato_defect_classification,
        potato_pixels_rgb_img,
    )
except Exception as e:
    raise ImportError(
        "No he podido importar desde src/raw_image_treatment.py.\n"
        "Asegúrate de ejecutar este notebook dentro del repo y que existe esa ruta.\n"
        f"Error original: {e!r}"
    )

print("PROJECT_ROOT:", PROJECT_ROOT)
print("ROBOFLOW_API_KEY set:", bool(os.environ.get("ROBOFLOW_API_KEY")))


PROJECT_ROOT: c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml
ROBOFLOW_API_KEY set: True




In [2]:
# ------------------------------------------------------------
# Rutas y helpers
# ------------------------------------------------------------
import re
import pandas as pd
from IPython.display import display

DIR_DEFINITIVE = PROJECT_ROOT / "data/input/raw/raw_images/definitive"
DIR_TEST = PROJECT_ROOT / "data/input/raw/raw_images/test_1"
DIR_CROPPED_DEF = PROJECT_ROOT / "data/input/processed/cropped_def"

def natural_sort_key(p: Path):
    parts = re.split(r"(\d+)", p.name)
    key = []
    for part in parts:
        key.append(int(part) if part.isdigit() else part.lower())
    return key

def list_images(source: str, pattern: str) -> list[Path]:
    """Lista imágenes de una fuente en orden natural."""
    if source == "definitive":
        base = DIR_DEFINITIVE
    elif source == "test_1":
        base = DIR_TEST
    elif source == "cropped_def":
        base = DIR_CROPPED_DEF
    else:
        raise ValueError("source debe ser: 'definitive', 'test_1' o 'cropped_def'")

    if not base.exists():
        raise FileNotFoundError(f"No existe la carpeta fuente: {base}")

    paths = sorted(base.glob(pattern), key=natural_sort_key)
    return paths

def downscale_pil(img: Image.Image, factor: float) -> Image.Image:
    """Reduce resolución por un factor (>1). 1.0 = sin cambio."""
    factor = float(factor)
    if factor <= 1.0:
        return img
    w, h = img.size
    new_w = max(1, int(round(w / factor)))
    new_h = max(1, int(round(h / factor)))
    return img.resize((new_w, new_h), resample=Image.BILINEAR)

def display_table(rows: list[dict[str, Any]]):
    """Muestra DataFrame y devuelve df."""
    df = pd.DataFrame(rows)
    if "confidence" in df.columns:
        df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
    display(df)
    return df

print("definitive exists:", DIR_DEFINITIVE.exists(), "|", DIR_DEFINITIVE)
print("cropped_def exists:", DIR_CROPPED_DEF.exists(), "|", DIR_CROPPED_DEF)
print("test_1 exists:", DIR_TEST.exists(), "|", DIR_TEST)


definitive exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\definitive
cropped_def exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\processed\cropped_def
test_1 exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\test_1


In [16]:
import inspect

def _apply_sigmoid_compat(img: Image.Image, *, k: float, mid: float, normalize: bool) -> Image.Image:
    """Llama a apply_sigmoid tanto si acepta `normalize` como si no (compatibilidad)."""
    try:
        params = inspect.signature(apply_sigmoid).parameters
        if "normalize" in params:
            return apply_sigmoid(img, k=float(k), mid=float(mid), normalize=bool(normalize))
        # Versión antigua: sin `normalize`
        return apply_sigmoid(img, k=float(k), mid=float(mid))
    except Exception:
        # Si inspect falla por cualquier motivo, probamos con normalize y luego sin
        try:
            return apply_sigmoid(img, k=float(k), mid=float(mid), normalize=bool(normalize))
        except TypeError:
            return apply_sigmoid(img, k=float(k), mid=float(mid))


def run_defect_table_sigmoid(
    *,
    source: str,
    pattern: str = "*.png",
    max_images: int | None = None,
    # Cut (solo tiene sentido para 'definitive' y 'test_1'; 'cropped_def' ya son recortes)
    do_cut: bool = True,
    cut_margin: int = 35,
    cut_min_conf: float = 0.01,
    # Reducir resolución (factor x). 1.0 = sin cambio, 2.0 = mitad, 4.0 = cuarto...
    downscale_factor: float = 1.0,
    # Sigmoid
    sigmoid_k: float = 6.0,
    sigmoid_mid: float = 0.5,
    sigmoid_normalize: bool = True,
    # (Opcional) Brightness/Gamma antes o después
    bg_mode: str = "none",  # 'none' | 'before' | 'after'
    brightness: float = 1.0,
    gamma: float = 1.0,
    # Clasificador
    confidence_threshold: float = 0.25,
    verbose: bool = True,
):
    """Genera una tabla de clasificación recorriendo imágenes en orden natural."""

    bg_mode = str(bg_mode).lower().strip()
    if bg_mode not in {"none", "before", "after"}:
        raise ValueError("bg_mode debe ser 'none', 'before' o 'after'")

    paths = list_images(source=source, pattern=pattern)
    if max_images is not None:
        paths = paths[: int(max_images)]

    rows: list[dict[str, Any]] = []

    if verbose:
        print(f"Encontradas {len(paths)} imágenes en {source=} con {pattern=}")
        print(
            f"Pipeline: cut={do_cut}, bg_mode={bg_mode}, "
            f"sigmoid(k={sigmoid_k}, mid={sigmoid_mid}, norm={sigmoid_normalize}), "
            f"bg(brightness={brightness}, gamma={gamma})"
        )

    for i, p in enumerate(paths, start=1):
        row = {
            "i": i,
            "file": p.name,
            "path": str(p),
            "status": "pending",
            "defect": None,
            "confidence": None,
            "error": None,
        }

        try:
            # 1) Cargar (mantenemos PIL.Image)
            img = Image.open(p).convert("RGB")

            # 2) Downscale opcional
            img = downscale_pil(img, downscale_factor)

            # 3) Cut opcional (recorte automático de la patata)
            if do_cut and source != "cropped_def":
                img, _ = potato_pixels_rgb_img(img, margin=int(cut_margin), min_conf=float(cut_min_conf))

            # 4) Brightness/Gamma opcional ANTES
            if bg_mode == "before":
                img = apply_brightness_and_gamma(img, brightness=float(brightness), gamma=float(gamma))

            # 5) Sigmoid (compat)
            img = _apply_sigmoid_compat(img, k=float(sigmoid_k), mid=float(sigmoid_mid), normalize=bool(sigmoid_normalize))

            # 6) Brightness/Gamma opcional DESPUÉS
            if bg_mode == "after":
                img = apply_brightness_and_gamma(img, brightness=float(brightness), gamma=float(gamma))

            # 7) Clasificación
            defect, conf, _vis = potato_defect_classification(img, confidence_threshold=float(confidence_threshold))

            row.update({"status": "ok", "defect": defect, "confidence": float(conf)})

            if verbose:
                print(f"[{i:>3}/{len(paths)}] {p.name} -> {defect} (conf={float(conf):.3f})")

        except Exception as e:
            row.update({"status": "error", "defect": None, "confidence": None, "error": repr(e)})
            if verbose:
                print(f"[{i:>3}/{len(paths)}] {p.name} -> ERROR: {e!r}")

        rows.append(row)

    return rows


In [31]:
# ==============================
# PARÁMETROS (edita aquí solo)
# ==============================

# --- Selección de lote (3, 4, 5 o 6) ---
LOT = 3  # <-- cambia a 3 / 4 / 5 / 6

# --- Fuente de imágenes ---
SOURCE = "definitive"  # 'definitive', 'cropped_def', o 'test_1'

# Pattern por defecto según el lote (puedes sobrescribirlo)
# Ejemplos típicos:
#   - "p3_*.png"
#   - "p3_*_cropped.png"
PATTERN = f"p{int(LOT)}_*.png"

# --- Límite opcional (None = todas) ---
MAX_IMAGES = None  # e.g. 30

# --- Cut (recorte automático) ---
DO_CUT = False
CUT_MARGIN = 35
CUT_MIN_CONF = 0.01

# --- Downscale (1.0 = no, 2.0 = mitad, 4.0 = cuarto...) ---
DOWNSCALE_FACTOR = 1.0

# --- Sigmoid (NUEVO) ---
SIGMOID_K = 6.0
SIGMOID_MID = 0.5
SIGMOID_NORMALIZE = True

# --- (Opcional) Brightness/Gamma antes o después del sigmoid ---
#   - "none": no aplicar
#   - "before": aplicar antes del sigmoid
#   - "after": aplicar después del sigmoid
BG_MODE = "before"
BRIGHTNESS = 1.0
GAMMA = 1.1

# --- Clasificador ---
CONFIDENCE_THRESHOLD = 0.25


In [17]:
# ------------------------------------------------------------
# Ejecutar
# ------------------------------------------------------------
df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    bg_mode=BG_MODE,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)


Encontradas 30 imágenes en source='definitive' con pattern='p3_*.png'
Pipeline: cut=False, bg_mode=none, sigmoid(k=6.0, mid=0.5, norm=True), bg(brightness=1.0, gamma=1.1)
[  1/30] p3_1.png -> Potato (conf=0.871)
[  2/30] p3_2.png -> Potato (conf=0.880)
[  3/30] p3_3.png -> Potato (conf=0.640)
[  4/30] p3_4.png -> Unable to classify (conf=0.000)
[  5/30] p3_5.png -> Unable to classify (conf=0.000)
[  6/30] p3_6.png -> Potato (conf=0.873)
[  7/30] p3_7.png -> Potato (conf=0.771)
[  8/30] p3_8.png -> Unable to classify (conf=0.000)
[  9/30] p3_9.png -> Potato (conf=0.746)
[ 10/30] p3_10.png -> Potato (conf=0.800)
[ 11/30] p3_11.png -> Diseased-fungal potato (conf=0.456)
[ 12/30] p3_12.png -> Potato (conf=0.763)
[ 13/30] p3_13.png -> Potato (conf=0.784)
[ 14/30] p3_14.png -> Potato (conf=0.824)
[ 15/30] p3_15.png -> Unable to classify (conf=0.000)
[ 16/30] p3_16.png -> Potato (conf=0.812)
[ 17/30] p3_17.png -> Potato (conf=0.892)
[ 18/30] p3_18.png -> Potato (conf=0.630)
[ 19/30] p3_19.png

In [21]:
# ------------------------------------------------------------
# Ejecutar
# ------------------------------------------------------------
df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    bg_mode=BG_MODE,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)

Encontradas 30 imágenes en source='definitive' con pattern='p3_*.png'
Pipeline: cut=False, bg_mode=before, sigmoid(k=6.0, mid=0.5, norm=True), bg(brightness=1.0, gamma=1.1)
[  1/30] p3_1.png -> Potato (conf=0.889)
[  2/30] p3_2.png -> Potato (conf=0.882)
[  3/30] p3_3.png -> Potato (conf=0.777)
[  4/30] p3_4.png -> Potato (conf=0.485)
[  5/30] p3_5.png -> Potato (conf=0.720)
[  6/30] p3_6.png -> Potato (conf=0.879)
[  7/30] p3_7.png -> Potato (conf=0.767)
[  8/30] p3_8.png -> Potato (conf=0.672)
[  9/30] p3_9.png -> Potato (conf=0.763)
[ 10/30] p3_10.png -> Potato (conf=0.840)
[ 11/30] p3_11.png -> Potato (conf=0.610)
[ 12/30] p3_12.png -> Potato (conf=0.782)
[ 13/30] p3_13.png -> Potato (conf=0.793)
[ 14/30] p3_14.png -> Potato (conf=0.844)
[ 15/30] p3_15.png -> Sprouted potato (conf=0.442)
[ 16/30] p3_16.png -> Potato (conf=0.888)
[ 17/30] p3_17.png -> Potato (conf=0.857)
[ 18/30] p3_18.png -> Potato (conf=0.732)
[ 19/30] p3_19.png -> Unable to classify (conf=0.000)
[ 20/30] p3_20.pn

In [23]:
# ------------------------------------------------------------
# Ejecutar
# ------------------------------------------------------------
df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    bg_mode=BG_MODE,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)

Encontradas 72 imágenes en source='definitive' con pattern='p4_*.png'
Pipeline: cut=False, bg_mode=before, sigmoid(k=6.0, mid=0.5, norm=True), bg(brightness=1.0, gamma=1.1)
[  1/72] p4_1.png -> Potato (conf=0.866)
[  2/72] p4_2.png -> Potato (conf=0.902)
[  3/72] p4_3.png -> Potato (conf=0.821)
[  4/72] p4_4.png -> Potato (conf=0.878)
[  5/72] p4_5.png -> Potato (conf=0.532)
[  6/72] p4_6.png -> Potato (conf=0.698)
[  7/72] p4_7.png -> Potato (conf=0.798)
[  8/72] p4_8.png -> Potato (conf=0.843)
[  9/72] p4_9.png -> Potato (conf=0.906)
[ 10/72] p4_10.png -> Potato (conf=0.807)
[ 11/72] p4_11.png -> Potato (conf=0.859)
[ 12/72] p4_12.png -> Potato (conf=0.884)
[ 13/72] p4_13.png -> Potato (conf=0.591)
[ 14/72] p4_14.png -> Potato (conf=0.905)
[ 15/72] p4_15.png -> Potato (conf=0.514)
[ 16/72] p4_16.png -> Potato (conf=0.779)
[ 17/72] p4_17.png -> Potato (conf=0.614)
[ 18/72] p4_18.png -> Diseased-fungal potato (conf=0.560)
[ 19/72] p4_19.png -> Potato (conf=0.615)
[ 20/72] p4_20.png -> 

In [26]:
# ------------------------------------------------------------
# Ejecutar
# ------------------------------------------------------------
df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    bg_mode=BG_MODE,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)

Encontradas 34 imágenes en source='definitive' con pattern='p5_*.png'
Pipeline: cut=False, bg_mode=before, sigmoid(k=6.0, mid=0.5, norm=True), bg(brightness=1.0, gamma=1.1)
[  1/34] p5_1.png -> Sprouted potato (conf=0.686)
[  2/34] p5_2.png -> Diseased-fungal potato (conf=0.437)
[  3/34] p5_3.png -> Sprouted potato (conf=0.533)
[  4/34] p5_4.png -> Sprouted potato (conf=0.794)
[  5/34] p5_5.png -> Sprouted potato (conf=0.738)
[  6/34] p5_6.png -> Diseased-fungal potato (conf=0.413)
[  7/34] p5_7.png -> Unable to classify (conf=0.000)
[  8/34] p5_8.png -> Sprouted potato (conf=0.592)
[  9/34] p5_9.png -> Potato (conf=0.641)
[ 10/34] p5_10.png -> Sprouted potato (conf=0.646)
[ 11/34] p5_11.png -> Sprouted potato (conf=0.852)
[ 12/34] p5_12.png -> Diseased-fungal potato (conf=0.492)
[ 13/34] p5_13.png -> Sprouted potato (conf=0.523)
[ 14/34] p5_14.png -> Sprouted potato (conf=0.643)
[ 15/34] p5_15.png -> Diseased-fungal potato (conf=0.531)
[ 16/34] p5_16.png -> Diseased-fungal potato (con

In [28]:
# ------------------------------------------------------------
# Ejecutar
# ------------------------------------------------------------
df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    bg_mode=BG_MODE,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)

Encontradas 19 imágenes en source='definitive' con pattern='p6_*.png'
Pipeline: cut=False, bg_mode=before, sigmoid(k=6.0, mid=0.5, norm=True), bg(brightness=1.0, gamma=1.1)
[  1/19] p6_1.png -> Diseased-fungal potato (conf=0.889)
[  2/19] p6_2.png -> Diseased-fungal potato (conf=0.911)
[  3/19] p6_3.png -> Diseased-fungal potato (conf=0.785)
[  4/19] p6_4.png -> Unable to classify (conf=0.000)
[  5/19] p6_5.png -> Diseased-fungal potato (conf=0.918)
[  6/19] p6_6.png -> Diseased-fungal potato (conf=0.605)
[  7/19] p6_7.png -> Diseased-fungal potato (conf=0.826)
[  8/19] p6_8.png -> Damaged potato (conf=0.676)
[  9/19] p6_9.png -> Diseased-fungal potato (conf=0.913)
[ 10/19] p6_10.png -> Potato (conf=0.595)
[ 11/19] p6_11.png -> Diseased-fungal potato (conf=0.900)
[ 12/19] p6_12.png -> Diseased-fungal potato (conf=0.912)
[ 13/19] p6_13.png -> Diseased-fungal potato (conf=0.742)
[ 14/19] p6_14.png -> Diseased-fungal potato (conf=0.601)
[ 15/19] p6_15.png -> Diseased-fungal potato (conf=0