In [2]:
# Activar el entorno de conda
import os
from pathlib import Path
import h5py
import numpy as np
import cv2
from collections import defaultdict

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential


In [3]:
print("TF:", tf.__version__)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("GPUs visibles:", tf.config.list_logical_devices('GPU'))
else:
    print("No hay GPU visible: se usará CPU")
# Opcional para ver en qué dispositivo corre cada op:
# tf.debugging.set_log_device_placement(True)


TF: 2.10.0
No hay GPU visible: se usará CPU


In [None]:
# Ruta del dataset 
DATA_DIR = Path("../DataSet/Tumores")

# Clases en CJDATA: 1=Meningioma, 2=Glioma, 3=Adenoma_hipofisario
label_to_name = {
    0: "Meningioma",
    1: "Glioma",
    2: "Adenoma_hipofisario",
}
class_names = [label_to_name[i] for i in sorted(label_to_name.keys())]

# Imagen y entrenamiento
img_size = (128, 128)   # (H, W) para TensorFlow; OpenCV usa (W, H) en resize
batch = 32
AUTOTUNE = tf.data.AUTOTUNE


In [5]:
def preprocess_one(image, mask):
    # OpenCV: resize(tam=(W,H)) — como 128 es cuadrado, no afecta, pero se deja explícito.
    image_resized = cv2.resize(image, (img_size[1], img_size[0]))
    mask_resized  = cv2.resize(mask,  (img_size[1], img_size[0])) 

    image_norm = cv2.normalize(
        image_resized, None,
        alpha=0, beta=255,
        norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F
    ).astype(np.uint8)

    tumor_image = image_norm * mask_resized
    return tumor_image  # (H,W) uint8


In [6]:
if not DATA_DIR.exists():
    raise FileNotFoundError(f"No se encontró la ruta del dataset: {DATA_DIR.resolve()}")

mat_files = sorted(DATA_DIR.glob("*.mat"))
if not mat_files:
    raise FileNotFoundError(f"No hay archivos .mat en {DATA_DIR.resolve()}")

data_by_tumor = defaultdict(list)
labels_by_tumor = defaultdict(list)

for f in mat_files:
    try:
        with h5py.File(str(f), "r") as h5:
            label = int(h5["cjdata"]["label"][0][0]) - 1
            if label not in label_to_name:
                continue

            image = np.array(h5["cjdata"]["image"])
            mask  = np.array(h5["cjdata"]["tumorMask"])

            if image.ndim != 2 or mask.ndim != 2:
                raise ValueError(f"{f.name}: image/mask deben ser 2D; got image.ndim={image.ndim}, mask.ndim={mask.ndim}")

            tumor_img = preprocess_one(image, mask)

            data_by_tumor[label].append(tumor_img)
            labels_by_tumor[label].append(label)
    except Exception as e:
        print(f"[AVISO] Error procesando {f.name}: {e}")

# Unir todas las clases en X, y
Xs, ys = [], []
for lab in sorted(label_to_name.keys()):
    imgs = data_by_tumor.get(lab, [])
    labs = labels_by_tumor.get(lab, [])
    if imgs:
        Xs.append(np.stack(imgs, axis=0))        # (Ni, H, W)
        ys.append(np.array(labs, dtype=np.int32))
    else:
        print(f"[AVISO] Clase {lab} ({label_to_name[lab]}) sin muestras.")

if not Xs:
    raise RuntimeError("No se pudo construir el dataset (Xs vacío).")

X = np.concatenate(Xs, axis=0)                  # (N, H, W)
y = np.concatenate(ys, axis=0)                  # (N,)
X = X[..., np.newaxis].astype(np.uint8)         # (N, H, W, 1)

print("Dataset total:", X.shape, "Etiquetas:", y.shape)
print("Distribución de clases:", {label_to_name[i]: int((y==i).sum()) for i in np.unique(y)})


Dataset total: (3064, 128, 128, 1) Etiquetas: (3064,)
Distribución de clases: {'Meningioma': 708, 'Glioma': 1426, 'Adenoma_hipofisario': 930}


In [7]:
X_train, X_tmp, y_train, y_tmp = train_test_split(
    X, y, test_size=0.30, stratify=y, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_tmp, y_tmp, test_size=0.50, stratify=y_tmp, random_state=42
)

num_classes = len(np.unique(y))
print(f"Tamaños -> train: {len(y_train)} | val: {len(y_val)} | test: {len(y_test)}")
print("Clases:", [label_to_name[i] for i in sorted(np.unique(y))])


Tamaños -> train: 2144 | val: 460 | test: 460
Clases: ['Meningioma', 'Glioma', 'Adenoma_hipofisario']


In [8]:
def to_rgb_and_scale(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)  # [0,1]
    image = tf.image.grayscale_to_rgb(image)                 # (H,W,1)->(H,W,3)
    return image, label

data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.10),
])

def make_tfds(X, y, shuffle=False, augment=False):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(y), seed=42, reshuffle_each_iteration=True)
    ds = ds.map(to_rgb_and_scale, num_parallel_calls=AUTOTUNE)
    if augment:
        ds = ds.map(lambda img, lbl: (data_augmentation(img, training=True), lbl),
                    num_parallel_calls=AUTOTUNE)
    return ds.batch(batch).cache().prefetch(AUTOTUNE)

train_ds = make_tfds(X_train, y_train, shuffle=True,  augment=True)
val_ds   = make_tfds(X_val,   y_val,   shuffle=False, augment=False)
test_ds  = make_tfds(X_test,  y_test,  shuffle=False, augment=False)


# Modelo

In [None]:
model = Sequential([
    layers.Input(shape=img_size + (3,)),  # ya normalizado a [0,1] y en RGB

    layers.Conv2D(32, 3, padding="same", kernel_initializer="he_uniform"),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, padding="same", kernel_initializer="he_uniform"),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.MaxPooling2D(),

    layers.Conv2D(128, 3, padding="same", kernel_initializer="he_uniform"),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.MaxPooling2D(),

    layers.GlobalAveragePooling2D(),
    layers.BatchNormalization(),

    layers.Dense(256, kernel_initializer="he_uniform"),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.30),

    layers.Dense(128, kernel_initializer="he_uniform"),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.20),

    layers.Dense(num_classes, activation="softmax"),
])

lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001, decay_steps=1000, decay_rate=0.9, staircase=False
)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer,
            loss=keras.losses.SparseCategoricalCrossentropy(),
            metrics=["accuracy"])

model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 128, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 128, 128, 32)     128       
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 128, 128, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 64, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 64, 64)        18496     
                                                                 
 batch_normalization_1 (Batc  (None, 64, 64, 64)      

# Entrenamiento

In [10]:
# Entrena el modelo registrando métricas en history para análisis posterior
history = model.fit(train_ds, validation_data=val_ds, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


# Evaluación objetiva en el conjunto de test

In [11]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test — loss: {test_loss:.4f}  acc: {test_acc:.4f}")

Test — loss: 4.7389  acc: 0.5109


# Métricas

In [12]:
# y_true: concatena las etiquetas reales de cada batch del test
y_true = np.concatenate([y.numpy() for _, y in test_ds], axis=0)

# y_pred: para cada imagen, argmax de las probabilidades softmax -> clase predicha
y_pred = np.argmax(model.predict(test_ds), axis=1)

# Genera la matriz de confusión (filas: clase real, columnas: predicción)
cm = tf.math.confusion_matrix(y_true, y_pred, num_classes=len(class_names))
print("Confusion matrix:\n", cm.numpy())

Confusion matrix:
 [[ 18  88   1]
 [  1 212   1]
 [  0 134   5]]


In [13]:
# %%
# out_base = Path("preprocessed_data_por_tumor")
# for lab, name in label_to_name.items():
#     imgs = np.array(data_by_tumor.get(lab, []), dtype=np.uint8)
#     labs = np.array(labels_by_tumor.get(lab, []), dtype=np.int32)
#     if imgs.size:
#         (out_base / name).mkdir(parents=True, exist_ok=True)
#         np.savez_compressed(out_base / name / f"{name}_preprocessed.npz",
#                             images=imgs, labels=labs)
#         print(f"Guardado -> {name}: {len(imgs)} imágenes")
