# Tabla de clasificación de defectos (brightness/gamma + recorte opcional)

Este notebook genera una **tabla** (DataFrame) con la clasificación de defectos por imagen, usando:

- `potato_pixels_rgb_img` (opcional): recorte automático de la patata (si la imagen es grande).
- `apply_brightness_and_gamma` (opcional): ajuste de brillo y gamma antes de clasificar.
- `potato_defect_classification`: devuelve `(defect, confidence, vis_img)`.

**NUEVO:** puedes **reducir la resolución** de entrada con `DOWNSCALE_FACTOR = x` (por ejemplo `2` ⇒ mitad de ancho/alto).


In [19]:
from __future__ import annotations

from pathlib import Path
import os
import sys
from typing import Any

import numpy as np


def find_project_root(start: Path | None = None, marker_dir: str = "data") -> Path:
    """Busca el root del repo subiendo carpetas hasta encontrar `marker_dir/`."""
    start = Path.cwd() if start is None else Path(start).resolve()
    for p in [start, *start.parents]:
        if (p / marker_dir).exists() and (p / marker_dir).is_dir():
            return p
    raise FileNotFoundError(f"No he encontrado '{marker_dir}/' subiendo desde {start}")


PROJECT_ROOT = find_project_root()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

try:
    from src.raw_image_treatment import (
        apply_brightness_and_gamma,
        potato_defect_classification,
        potato_pixels_rgb_img,
    )
except Exception as e:
    raise ImportError(
        "No he podido importar desde src/raw_image_treatment.py.\n"
        "Asegúrate de ejecutar este notebook dentro del repo y que existe esa ruta.\n"
        f"Error original: {e!r}"
    )

print("PROJECT_ROOT:", PROJECT_ROOT)
print("ROBOFLOW_API_KEY set:", bool(os.environ.get("ROBOFLOW_API_KEY")))


PROJECT_ROOT: c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml
ROBOFLOW_API_KEY set: True


In [32]:
# ------------------------------------------------------------
# Rutas y helpers
# ------------------------------------------------------------
from typing import Iterable
import re

DIR_DEFINITIVE = PROJECT_ROOT / "data/input/raw/raw_images/definitive"
DIR_TEST = PROJECT_ROOT / "data/input/raw/raw_images/test_1"
DIR_CROPPED_DEF = PROJECT_ROOT / "data/input/processed/cropped_def"


def natural_sort_key(p: Path):
    parts = re.split(r"(\d+)", p.name)
    key = []
    for part in parts:
        key.append(int(part) if part.isdigit() else part.lower())
    return key


def list_images(
    *,
    source: str,
    pattern: str = "*.png",
    max_images: int | None = None,
    shuffle: bool = False,
    seed: int = 0,
) -> list[Path]:
    if source not in {"definitive", "cropped_def", "test_1"}:
        raise ValueError("source debe ser 'definitive', 'cropped_def', o 'test_1'")

    base = DIR_DEFINITIVE if source == "definitive" else (DIR_CROPPED_DEF if source == "cropped_def" else DIR_TEST)
    paths = sorted(base.glob(pattern), key=natural_sort_key)

    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(paths)

    if max_images is not None:
        paths = paths[: int(max_images)]

    return paths


def display_table(rows: list[dict[str, Any]]):
    """Muestra DataFrame si pandas está disponible; si no, imprime diccionarios."""
    # Limpieza de columnas 'de configuración' que no suelen interesar en pantalla
    def _clean_row(r: dict[str, Any]) -> dict[str, Any]:
        r = dict(r)
        for k in ("source", "do_cut", "cut_min_conf", "confidence_threshold", "apply_bg"):
            r.pop(k, None)
        if r.get("cut_margin") in (None, 0):
            r.pop("cut_margin", None)
        if r.get("downscale_factor") in (None, 1, 1.0):
            r.pop("downscale_factor", None)
        if r.get("brightness") is None:
            r.pop("brightness", None)
        if r.get("gamma") is None:
            r.pop("gamma", None)
        return r

    rows2 = [_clean_row(r) for r in rows]

    try:
        import pandas as pd

        df = pd.DataFrame(rows2)
        if "confidence" in df.columns:
            df["confidence"] = pd.to_numeric(df["confidence"], errors="coerce")
        display(df)
        return df
    except Exception:
        for r in rows2:
            print(r)
        return rows2


print("definitive exists:", DIR_DEFINITIVE.exists(), "|", DIR_DEFINITIVE)
print("cropped_def exists:", DIR_CROPPED_DEF.exists(), "|", DIR_CROPPED_DEF)
print("test_1 exists:", DIR_TEST.exists(), "|", DIR_TEST)

definitive exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\definitive
cropped_def exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\processed\cropped_def
test_1 exists: True | c:\Users\david\Desktop\Uni\potato-dry-matter-optics-ml\data\input\raw\raw_images\test_1


In [33]:
def run_defect_table(
    *,
    source: str,
    pattern: str = "*.png",
    max_images: int | None = None,
    shuffle: bool = False,
    seed: int = 0,
    # Cut (solo tiene sentido para 'definitive' y 'test_1'; 'cropped_def' ya son recortes)
    do_cut: bool = True,
    cut_margin: int = 35,
    cut_min_conf: float = 0.01,
    # NUEVO: reducir resolución (factor x). 1.0 = sin cambio, 2.0 = mitad, 4.0 = cuarto...
    downscale_factor: float = 1.0,
    # Brightness/Gamma
    apply_bg: bool = False,
    brightness: float | None = None,
    gamma: float | None = None,
    # Clasificador
    confidence_threshold: float = 0.5,
):
    """Devuelve un DataFrame (si pandas) con la clasificación por imagen."""
    from PIL import Image

    paths = list_images(
        source=source,
        pattern=pattern,
        max_images=max_images,
        shuffle=shuffle,
        seed=seed,
    )

    rows: list[dict[str, Any]] = []
    for i, img_path in enumerate(paths, start=1):
        row: dict[str, Any] = {
            "image": img_path.name,
            "source": source,
            "do_cut": bool(do_cut),
            "cut_margin": int(cut_margin),
            "cut_min_conf": float(cut_min_conf),
            "downscale_factor": float(downscale_factor),
            "apply_bg": bool(apply_bg),
            "brightness": brightness,
            "gamma": gamma,
            "confidence_threshold": float(confidence_threshold),
        }

        try:
            img_in: Any = img_path  # puede ser Path o PIL.Image

            # (Opcional) Downscale: reduce resolución antes del cut/clasificación
            factor = float(downscale_factor)
            if factor < 1.0:
                raise ValueError("downscale_factor debe ser >= 1 (p.ej., 2 reduce a la mitad)")
            if factor > 1.0:
                pil = Image.open(str(img_path)).convert("RGB")
                new_w = max(1, int(pil.size[0] / factor))
                new_h = max(1, int(pil.size[1] / factor))
                resample = Image.Resampling.LANCZOS if hasattr(Image, "Resampling") else Image.LANCZOS
                img_in = pil.resize((new_w, new_h), resample=resample)

            # Cut
            if do_cut and source != "cropped_def":
                cropped_img, _vis = potato_pixels_rgb_img(img_in, margin=cut_margin, min_conf=cut_min_conf)
                if cropped_img is None:
                    row.update({"defect": None, "confidence": None, "status": "no_potato_detected"})
                    rows.append(row)
                    continue
                img_in = cropped_img

            # Brightness/Gamma
            if apply_bg:
                img_in = apply_brightness_and_gamma(img_in, brightness=brightness, gamma=gamma)

            defect, conf, _vis_img = potato_defect_classification(
                img_in, confidence_threshold=confidence_threshold
            )
            row.update({"defect": defect, "confidence": float(conf), "status": "ok"})

        except Exception as e:
            row.update({"defect": None, "confidence": None, "status": "error", "error": repr(e)})

        rows.append(row)

        if i % 10 == 0:
            print(f"Procesadas {i}/{len(paths)}")

    return display_table(rows)


## Celda plantilla (copia/pega)

Entre celdas cambia solo los parámetros del bloque inicial.

In [22]:
# ==============================
# PARÁMETROS (edita aquí solo)
# ==============================
SOURCE = "definitive"   # 'definitive', 'cropped_def', o 'test_1'
PATTERN = "*.png"       # ej: "p3_*.png" o "*_cropped.png"
MAX_IMAGES = 10         # None para todas
SHUFFLE = False
SEED = 0

# Cut (solo aplica si SOURCE!='cropped_def')
DO_CUT = True
CUT_MARGIN = 35
CUT_MIN_CONF = 0.01

# NUEVO: reducir resolución antes del cut/clasificación
# 1 = sin cambio, 2 = mitad de resolución, 4 = cuarto, etc.
DOWNSCALE_FACTOR = 1.0

# Brightness/Gamma (opcional)
APPLY_BG = True
BRIGHTNESS = 3   # ej 1.1
GAMMA = 0.8        # ej 1.2

# Clasificador
CONFIDENCE_THRESHOLD = 0.01

df = run_defect_table(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    shuffle=SHUFFLE,
    seed=SEED,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    apply_bg=APPLY_BG,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
)


Procesadas 10/10


Unnamed: 0,image,cut_margin,brightness,gamma,defect,confidence,status
0,p3_1.png,35,3,0.8,Defected potato,0.438737,ok
1,p3_2.png,35,3,0.8,Diseased-fungal potato,0.461319,ok
2,p3_3.png,35,3,0.8,Defected potato,0.593238,ok
3,p3_4.png,35,3,0.8,Potato,0.421964,ok
4,p3_5.png,35,3,0.8,Diseased-fungal potato,0.689147,ok
5,p3_6.png,35,3,0.8,Diseased-fungal potato,0.754915,ok
6,p3_7.png,35,3,0.8,Diseased-fungal potato,0.663033,ok
7,p3_8.png,35,3,0.8,Diseased-fungal potato,0.497637,ok
8,p3_9.png,35,3,0.8,Diseased-fungal potato,0.579517,ok
9,p3_10.png,35,3,0.8,Diseased-fungal potato,0.464561,ok


In [None]:
# ==============================
# PARÁMETROS (edita aquí solo)
# ==============================
SOURCE = "definitive"   # 'definitive', 'cropped_def', o 'test_1'
PATTERN = "p4_*.png"       # ej: "p3_*.png" o "*_cropped.png"
MAX_IMAGES = 10         # None para todas
SHUFFLE = False
SEED = 0

# Cut (solo aplica si SOURCE!='test_1')
DO_CUT = False
CUT_MARGIN = 35
CUT_MIN_CONF = 0.01

# NUEVO: reducir resolución antes del cut/clasificación
# 1 = sin cambio, 2 = mitad de resolución, 4 = cuarto, etc.
DOWNSCALE_FACTOR = 4

# Brightness/Gamma (opcional)
APPLY_BG = True
BRIGHTNESS = 2.5   # ej 1.1
GAMMA = 0.8        # ej 1.2

# Clasificador
CONFIDENCE_THRESHOLD = 0.01

df = run_defect_table(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    shuffle=SHUFFLE,
    seed=SEED,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    apply_bg=APPLY_BG,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
)


Procesadas 10/10


Unnamed: 0,image,cut_margin,downscale_factor,brightness,gamma,defect,confidence,status
0,p4_1.png,35,4.0,2.5,0.8,Defected potato,0.777139,ok
1,p4_2.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.745871,ok
2,p4_3.png,35,4.0,2.5,0.8,Defected potato,0.635858,ok
3,p4_4.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.78075,ok
4,p4_5.png,35,4.0,2.5,0.8,Defected potato,0.758231,ok
5,p4_6.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.888975,ok
6,p4_7.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.823537,ok
7,p4_8.png,35,4.0,2.5,0.8,Defected potato,0.718602,ok
8,p4_9.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.881555,ok
9,p4_10.png,35,4.0,2.5,0.8,Defected potato,0.564673,ok


In [38]:
# ==============================
# PARÁMETROS (edita aquí solo)
# ==============================
SOURCE = "definitive"   # 'definitive', 'cropped_def', o 'test_1'
PATTERN = "p6_*.png"       # ej: "p3_*.png" o "*_cropped.png"
MAX_IMAGES = 10         # None para todas
SHUFFLE = False
SEED = 0

# Cut (solo aplica si SOURCE!='test_1')
DO_CUT = False
CUT_MARGIN = 35
CUT_MIN_CONF = 0.01

# NUEVO: reducir resolución antes del cut/clasificación
# 1 = sin cambio, 2 = mitad de resolución, 4 = cuarto, etc.
DOWNSCALE_FACTOR = 4

# Brightness/Gamma (opcional)
APPLY_BG = True
BRIGHTNESS = 2.5   # ej 1.1
GAMMA = 0.8        # ej 1.2

# Clasificador
CONFIDENCE_THRESHOLD = 0.01

df = run_defect_table(
    source=SOURCE,
    pattern=PATTERN,
    max_images=MAX_IMAGES,
    shuffle=SHUFFLE,
    seed=SEED,
    do_cut=DO_CUT,
    cut_margin=CUT_MARGIN,
    cut_min_conf=CUT_MIN_CONF,
    downscale_factor=DOWNSCALE_FACTOR,
    apply_bg=APPLY_BG,
    brightness=BRIGHTNESS,
    gamma=GAMMA,
    confidence_threshold=CONFIDENCE_THRESHOLD,
)


Procesadas 10/10


Unnamed: 0,image,cut_margin,downscale_factor,brightness,gamma,defect,confidence,status
0,p6_1.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.892073,ok
1,p6_2.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.873776,ok
2,p6_3.png,35,4.0,2.5,0.8,Damaged potato,0.767064,ok
3,p6_4.png,35,4.0,2.5,0.8,Damaged potato,0.713883,ok
4,p6_5.png,35,4.0,2.5,0.8,Damaged potato,0.438795,ok
5,p6_6.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.556692,ok
6,p6_7.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.702086,ok
7,p6_8.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.5582,ok
8,p6_9.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.646028,ok
9,p6_10.png,35,4.0,2.5,0.8,Diseased-fungal potato,0.793847,ok


## (Opcional) Resumen rápido por clase

In [24]:
try:
    import pandas as pd

    if hasattr(df, "groupby"):
        resumen = (
            df.groupby(["status", "defect"], dropna=False)
            .size()
            .reset_index(name="count")
            .sort_values("count", ascending=False)
        )
        display(resumen)
except Exception:
    pass


Unnamed: 0,status,defect,count
1,ok,Potato,9
0,ok,Damaged potato,1
