# Tarea 2
## Inicio y preparaci√≥n del proyecto

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from cv2 import imread
from skimage.measure import regionprops, label
from skimage.morphology import remove_small_objects
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.utils import resample

## Configuraci√≥n

In [None]:
TRAIN = [
    ("TAR.png", "fragmentos_negros"),
    ("PEL.png", "pellets_esfericos"),
    ("FRA.png", "films_translucidos"),
]
TEST_IMG = "MPs_test.jpg"
TEST_BB_CSV = "MPs_test_bbs.csv"

CSV_TO_CLASS = {
    "TAR": "fragmentos_negros",
    "PEL": "pellets_esfericos",
    "FRA": "films_translucidos",
}

## Funciones auxiliares

In [None]:
def preprocess(img, show_debug=False):
    """Segmentaci√≥n ajustada: Otsu con refuerzo local suave"""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)

    # Umbral Otsu original
    _, th_otsu = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # --- refuerzo suave: mejora zonas poco contrastadas ---
    adapt = cv2.adaptiveThreshold(
        blur, 255,
        cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV,
        blockSize=51, C=6
    )
    # Mezclamos ambos (Otsu + Adaptativo) para conservar detalles sin ruido
    th = cv2.bitwise_or(th_otsu, adapt)

    # Limpieza y relleno
    kernel = np.ones((3,3), np.uint8)
    th = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel, iterations=2)
    mask = th > 0
    mask = remove_small_objects(mask, 50)
    final_mask = (mask.astype(np.uint8) * 255)

    if show_debug:
        plt.figure(figsize=(15,5))
        plt.subplot(1,3,1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.title("Imagen original")
        plt.axis("off")

        plt.subplot(1,3,2)
        plt.imshow(gray, cmap="gray")
        plt.title("Escala de grises")
        plt.axis("off")

        plt.subplot(1,3,3)
        plt.imshow(final_mask, cmap="gray")
        plt.title("M√°scara binaria (Otsu + Adaptativo)")
        plt.axis("off")

        plt.tight_layout()
        plt.show()

    return final_mask

def extract_features(img, mask):
    """Extrae geometr√≠a, color, textura y contraste."""
    lbl = label(mask)
    props = regionprops(lbl)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    feats = []

    for p in props:
        if p.area < 100:  # eliminamos objetos peque√±os
            continue

        y0, x0, y1, x1 = p.bbox
        region_mask = (lbl[y0:y1, x0:x1] == p.label)

        # Geometr√≠a
        area = p.area
        perim = p.perimeter if p.perimeter > 0 else 1
        circularity = 4 * np.pi * area / (perim ** 2)
        aspect = (x1 - x0) / (y1 - y0) if (y1 - y0) > 0 else 0
        extent = p.extent
        solidity = getattr(p, "solidity", 0)

        # Color (HSV)
        vals = hsv[y0:y1, x0:x1][region_mask]
        h_mean, s_mean, v_mean = vals.mean(axis=0)
        h_std, s_std, v_std = vals.std(axis=0)

        # Textura
        gray_crop = cv2.cvtColor(img[y0:y1, x0:x1], cv2.COLOR_BGR2GRAY)
        gray_vals = gray_crop[region_mask]
        var_intensity = np.var(gray_vals)
        contrast = float(gray_vals.max() - gray_vals.min())  # nuevo descriptor

        feats.append([
            area, circularity, aspect, extent, solidity,
            h_mean, s_mean, v_mean, h_std, s_std, v_std,
            var_intensity, contrast  # a√±adido
        ])
    return np.array(feats)


## Preparaci√≥n

In [None]:
# ---------------------------------------------------------
# Preparaci√≥n
# ---------------------------------------------------------
def prepare_training():
    X, y = [], []
    for path, label_name in TRAIN:
        print(f"Procesando {path} ({label_name})...")
        img = imread(path)
        # üü¢ Mostrar visualizaci√≥n para todas las im√°genes de entrenamiento
        mask = preprocess(img, show_debug=True)
        feats = extract_features(img, mask)
        if len(feats) > 0:
            X.append(feats)
            y += [label_name] * len(feats)
    X = np.vstack(X)
    y = np.array(y)

    # Balanceo
    classes, counts = np.unique(y, return_counts=True)
    min_n = counts.min()
    Xb, yb = [], []
    for c in classes:
        X_c = X[y == c]
        y_c = y[y == c]
        Xr, yr = resample(X_c, y_c, n_samples=min_n, random_state=42)
        Xb.append(Xr)
        yb.append(yr)
    X = np.vstack(Xb)
    y = np.concatenate(yb)
    print("Balance de clases:", dict(zip(classes, [min_n]*len(classes))))
    return X, y

def prepare_test():
    df = pd.read_csv(TEST_BB_CSV)
    df["gt_class"] = df["label"].map(CSV_TO_CLASS)
    img = imread(TEST_IMG)
    feats = []
    for _, r in df.iterrows():
        x0, y0, x1, y1 = map(int, [r.x_min, r.y_min, r.x_max, r.y_max])
        crop = img[y0:y1, x0:x1]
        mask = preprocess(crop)
        f = extract_features(crop, mask)
        if len(f) == 0:
            f = np.zeros((1, 13))  # ahora hay 13 features
        feats.append(f.mean(axis=0))
    return np.array(feats), df["gt_class"].values, df

## Main

In [None]:
def main():
    print("\n=== Entrenamiento ===")
    X_train, y_train = prepare_training()
    clf = RandomForestClassifier(
        n_estimators=1200,       # m√°s √°rboles
        max_depth=18,            # m√°s profundidad para captar detalles
        min_samples_leaf=1,      # hojas m√°s espec√≠ficas
        class_weight="balanced",
        random_state=42
    )
    clf.fit(X_train, y_train)

    print("\n=== Evaluaci√≥n ===")
    X_test, y_true, df = prepare_test()
    y_pred = clf.predict(X_test)

    # Reajuste h√≠brido (m√°s fino)
    for i in range(len(y_pred)):
        v_mean = X_test[i, 7]  # √≠ndice de V_mean
        if v_mean < 85:
            y_pred[i] = "fragmentos_negros"
        elif v_mean > 155 and y_pred[i] == "fragmentos_negros":
            y_pred[i] = "films_translucidos"

    # Resultados
    labels = ["fragmentos_negros", "pellets_esfericos", "films_translucidos"]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    print("\nMatriz de confusi√≥n:")
    print(labels)
    print(cm)

    print("\nReporte:")
    print(classification_report(y_true, y_pred, labels=labels, digits=3))
    acc = accuracy_score(y_true, y_pred)
    print(f"\n‚úÖ Precisi√≥n global: {acc*100:.2f}%")

    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=labels, yticklabels=labels, cmap="Blues")
    plt.xlabel("Predicho")
    plt.ylabel("Verdadero")
    plt.title(f"Matriz de confusi√≥n (Accuracy {acc*100:.1f}%)")
    plt.show()

    df["pred_class"] = y_pred
    df.to_csv("predicciones_test.csv", index=False)
    print("‚úÖ Archivo 'predicciones_test.csv' guardado con √©xito.")

if __name__ == "__main__":
    main()