# Tabla de clasificación de defectos (sigmoid + clasificación)

Este notebook genera una **tabla** (DataFrame) con la clasificación de defectos por imagen, usando:

- `apply_sigmoid`: aplica una curva *sigmoid* sobre la luminancia antes de clasificar.
- `potato_defect_classification`: devuelve `(defect, confidence, vis_img)`.

Está pensado para recorrer las patatas **en orden natural** (`p3_1, p3_2, ..., p3_30`, etc.) y, **a medida que procesa**, imprime:

- nombre de la imagen
- clase predicha
- confidence


In [1]:
from __future__ import annotations

from pathlib import Path
import os
import sys
from typing import Any, Iterable

import numpy as np


def find_project_root(start: Path | None = None, marker_dir: str = "data") -> Path:
    """Busca el root del repo subiendo carpetas hasta encontrar `marker_dir/`."""
    start = Path.cwd() if start is None else Path(start).resolve()
    for p in [start, *start.parents]:
        if (p / marker_dir).exists() and (p / marker_dir).is_dir():
            return p
    raise FileNotFoundError(f"No he encontrado '{marker_dir}/' subiendo desde {start}")


PROJECT_ROOT = find_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

try:
    from PIL import Image
    import pandas as pd

    # Funciones del proyecto (deberían existir en `src/raw_image_treatment.py`)
    from src.raw_image_treatment import (
        apply_sigmoid,
        potato_defect_classification,
        potato_pixels_rgb_img,
    )
except Exception as e:
    raise ImportError(
        "No he podido importar dependencias o funciones del repo.\n"
        "Asegúrate de ejecutar este notebook dentro del repo y que existe src/raw_image_treatment.py.\n"
        f"Error original: {e!r}"
    )

print("PROJECT_ROOT:", PROJECT_ROOT)
print("ROBOFLOW_API_KEY set:", bool(os.environ.get("ROBOFLOW_API_KEY")))


PROJECT_ROOT: c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml
ROBOFLOW_API_KEY set: True




In [2]:
# ------------------------------------------------------------
# Rutas y helpers
# ------------------------------------------------------------
import re
from IPython.display import display



DIR_DEFINITIVE = PROJECT_ROOT / "data/input/raw/raw_images/definitive"
DIR_TEST = PROJECT_ROOT / "data/input/raw/raw_images/test_1"
DIR_CROPPED_DEF = PROJECT_ROOT / "data/input/processed/cropped_def"

# Si tu repo usa rutas distintas, añade aquí alternativas:
ALT_DIRS = [
    PROJECT_ROOT / "data/raw",
    PROJECT_ROOT / "data/input/raw",
]


def natural_sort_key(p: Path):
    """Clave de orden natural: p3_2 antes que p3_10."""
    parts = re.split(r"(\d+)", p.name)
    key = []
    for part in parts:
        key.append(int(part) if part.isdigit() else part.lower())
    return key


def get_image_paths(
    *,
    source: str,
    pattern: str = "*.png",
    max_images: int | None = None,
) -> list[Path]:
    """Devuelve una lista de paths ordenados (natural sort) según SOURCE + PATTERN."""
    if source not in {"definitive", "cropped_def", "test_1"}:
        raise ValueError("source debe ser 'definitive', 'cropped_def', o 'test_1'")

    base = DIR_DEFINITIVE if source == "definitive" else (DIR_CROPPED_DEF if source == "cropped_def" else DIR_TEST)

    # Fallback: si no existe, intenta alternativas razonables
    if not base.exists():
        for alt in ALT_DIRS:
            if alt.exists():
                # buscamos una carpeta que contenga algo parecido al source
                cand = alt / "raw_images" / ("definitive" if source == "definitive" else ("test_1" if source == "test_1" else ""))
                if cand.exists():
                    base = cand
                    break

    paths = sorted(base.glob(pattern), key=natural_sort_key)

    if max_images is not None:
        paths = paths[: int(max_images)]

    return paths


def downscale_pil(img: Image.Image, factor: float) -> Image.Image:
    """Reduce resolución: factor=2 -> mitad ancho/alto. factor<=1 => no toca."""
    if factor is None or float(factor) <= 1.0:
        return img
    f = float(factor)
    w, h = img.size
    new_w = max(1, int(round(w / f)))
    new_h = max(1, int(round(h / f)))
    return img.resize((new_w, new_h))


def display_table(rows: list[dict[str, Any]]):
    """Muestra DataFrame y devuelve df."""
    df = pd.DataFrame(rows)
    if "confidence" in df.columns:
        df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
    display(df)
    return df


print("definitive exists:", DIR_DEFINITIVE.exists(), DIR_DEFINITIVE)
print("cropped_def exists:", DIR_CROPPED_DEF.exists(), DIR_CROPPED_DEF)
print("test_1 exists:", DIR_TEST.exists(), DIR_TEST)


definitive exists: True c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\definitive
cropped_def exists: True c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\processed\cropped_def
test_1 exists: True c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\test_1


In [3]:
def run_defect_table_sigmoid(
    *,
    source: str,
    pattern: str,
    max_images: int | None,
    # Cut (opcional; útil si trabajas con imágenes grandes)
    do_cut: bool = True,
    cut_margin: int = 35,
    cut_min_conf: float = 0.01,
    downscale_factor: float = 1.0,
    # Sigmoid
    sigmoid_k: float = 6.0,
    sigmoid_mid: float = 0.5,
    sigmoid_normalize: bool = True,
    # Clasificador
    confidence_threshold: float = 0.40,
    # Logging
    verbose: bool = True,
) -> pd.DataFrame:
    """Recorre imágenes, aplica sigmoid y clasifica defectos. Devuelve DataFrame."""
    paths = get_image_paths(source=source, pattern=pattern, max_images=max_images)
    if len(paths) == 0:
        raise FileNotFoundError(f"No he encontrado imágenes con pattern={pattern!r} en source={source!r}")

    rows: list[dict[str, Any]] = []

    for i, p in enumerate(paths, start=1):
        row: dict[str, Any] = {
            "idx": i,
            "image": p.name,
            "path": str(p),
            "source": source,
            "pattern": pattern,
            "do_cut": do_cut,
            "cut_margin": cut_margin,
            "downscale_factor": downscale_factor,
            "sigmoid_k": sigmoid_k,
            "sigmoid_mid": sigmoid_mid,
            "sigmoid_normalize": sigmoid_normalize,
            "confidence_threshold": confidence_threshold,
        }

        try:
            # 1) Load (manteniendo PIL)
            img = Image.open(p)

            # 2) Downscale opcional
            img = downscale_pil(img, downscale_factor)

            # 3) Cut opcional (recorte automático de la patata)
            if do_cut and source != "cropped_def":
                img = potato_pixels_rgb_img(img, margin=int(cut_margin), min_conf=float(cut_min_conf))

            # 4) Sigmoid (nueva función)
            img = apply_sigmoid(img, k=float(sigmoid_k), mid=float(sigmoid_mid), normalize=bool(sigmoid_normalize))

            # 5) Clasificación
            defect, conf, _vis = potato_defect_classification(img, confidence_threshold=float(confidence_threshold))

            row.update(
                {
                    "status": "ok",
                    "defect": defect,
                    "confidence": float(conf),
                }
            )

            if verbose:
                print(f"[{i:>3}/{len(paths)}] {p.name} -> {defect} (conf={conf:.3f})")

        except Exception as e:
            row.update(
                {
                    "status": "error",
                    "defect": None,
                    "confidence": None,
                    "error": repr(e),
                }
            )
            if verbose:
                print(f"[{i:>3}/{len(paths)}] {p.name} -> ERROR: {e!r}")

        rows.append(row)

    df = pd.DataFrame(rows)
    if "confidence" in df.columns:
        df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
    return df


## Celda plantilla (copia/pega)

Cambia solo los parámetros de la celda de abajo (lote 3/4/5/6 + parámetros de sigmoid).

In [4]:
# ==============================
# PARÁMETROS (edita aquí solo)
# ==============================

# --- Selección de lote (3, 4, 5 o 6) ---
LOT = 3  # <-- cambia a 4 / 5 / 6

# --- Fuente de imágenes ---
SOURCE = "definitive"  # 'definitive', 'cropped_def', o 'test_1'

# Pattern por defecto según el lote (puedes sobrescribirlo)
# Ejemplos típicos:
#   - "p3_*.png"
#   - "p3_*_cropped.png"
PATTERN = f"p{int(LOT)}_*.png"

MAX_IMAGES = None  # None para todas, o un int (p.ej. 30)

# --- Cut (opcional; si ya usas *_cropped.png, pon DO_CUT=False o SOURCE='cropped_def') ---
DO_CUT = False
CUT_MARGIN = 35
CUT_MIN_CONF = 0.01

# --- Downscale (opcional) ---
# 1 = sin cambio, 2 = mitad de resolución, 4 = cuarto, etc.
DOWNSCALE_FACTOR = 1.0

# --- Sigmoid (bloque que querías para tocar parámetros) ---
SIGMOID_K = 6.0
SIGMOID_MID = 0.5
SIGMOID_NORMALIZE = True

# --- Clasificador ---
CONFIDENCE_THRESHOLD = 0.01  # umbral para aceptar predicción en potato_defect_classification

# ==============================
# EJECUCIÓN
# ==============================
if int(LOT) not in (3, 4, 5, 6):
    raise ValueError("LOT debe ser 3, 4, 5 o 6")

df = run_defect_table_sigmoid(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    sigmoid_k=SIGMOID_K,
    sigmoid_mid=SIGMOID_MID,
    sigmoid_normalize=SIGMOID_NORMALIZE,
    confidence_threshold=CONFIDENCE_THRESHOLD,
    verbose=True,
)

display(df)


[  1/30] p3_1.png -> Potato (conf=0.871)
[  2/30] p3_2.png -> Potato (conf=0.880)
[  3/30] p3_3.png -> Potato (conf=0.640)
[  4/30] p3_4.png -> Unable to classify (conf=0.000)
[  5/30] p3_5.png -> Unable to classify (conf=0.000)
[  6/30] p3_6.png -> Potato (conf=0.873)
[  7/30] p3_7.png -> Potato (conf=0.771)
[  8/30] p3_8.png -> Unable to classify (conf=0.000)
[  9/30] p3_9.png -> Potato (conf=0.746)
[ 10/30] p3_10.png -> Potato (conf=0.801)
[ 11/30] p3_11.png -> Diseased-fungal potato (conf=0.456)
[ 12/30] p3_12.png -> Potato (conf=0.763)
[ 13/30] p3_13.png -> Potato (conf=0.784)
[ 14/30] p3_14.png -> Potato (conf=0.824)
[ 15/30] p3_15.png -> Unable to classify (conf=0.000)
[ 16/30] p3_16.png -> Potato (conf=0.812)
[ 17/30] p3_17.png -> Potato (conf=0.892)
[ 18/30] p3_18.png -> Potato (conf=0.631)
[ 19/30] p3_19.png -> Unable to classify (conf=0.000)
[ 20/30] p3_20.png -> Damaged potato (conf=0.578)
[ 21/30] p3_21.png -> Potato (conf=0.826)
[ 22/30] p3_22.png -> Potato (conf=0.882)
[

Unnamed: 0,idx,image,path,source,pattern,do_cut,cut_margin,downscale_factor,sigmoid_k,sigmoid_mid,sigmoid_normalize,confidence_threshold,status,defect,confidence
0,1,p3_1.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.870933
1,2,p3_2.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.880069
2,3,p3_3.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.639801
3,4,p3_4.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Unable to classify,0.0
4,5,p3_5.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Unable to classify,0.0
5,6,p3_6.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.87327
6,7,p3_7.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.770853
7,8,p3_8.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Unable to classify,0.0
8,9,p3_9.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.745835
9,10,p3_10.png,c:\Users\david\Desktop\Uni\potato-dry-matter-o...,definitive,p3_*.png,False,35,1.0,6.0,0.5,True,0.01,ok,Potato,0.800741


## (Opcional) Resumen rápido por clase

In [5]:
try:
    resumen = (
        df.groupby(["status", "defect"], dropna=False)
        .size()
        .reset_index(name="count")
        .sort_values("count", ascending=False)
    )
    display(resumen)
except Exception as e:
    print("No se pudo crear el resumen:", e)


Unnamed: 0,status,defect,count
2,ok,Potato,21
4,ok,Unable to classify,5
1,ok,Diseased-fungal potato,2
0,ok,Damaged potato,1
3,ok,Sprouted potato,1
