In [None]:
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import logging

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, 
                                     BatchNormalization, GlobalAveragePooling2D)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import datetime


2025-11-18 20:01:50.201673: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv("../data/raw/HAM10000_metadata.csv")
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [3]:
df.dx.value_counts()

dx
nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: count, dtype: int64

### Clasificación de lesiones de piel por tipo (`dx`)

En conjuntos de datos dermatológicos como HAM10000, las lesiones se clasifican en siete tipos según el campo `dx`. A continuación se indica cuáles se consideran benignas y cuáles malignas:

#### Lesiones benignas
- **`nv` (Nevus melanocítico)**: Lunares comunes, generalmente benignos.
- **`bkl` (Queratosis benigna)**: Incluye lentigos solares y queratosis seborreicas.
- **`df` (Dermatofibroma)**: Tumores benignos de la piel, firmes y pequeños.
- **`vasc` (Lesiones vasculares)**: Incluye angiomas y otras lesiones vasculares, consideradas benignas.

#### Lesiones malignas
- **`mel` (Melanoma)**: Cáncer de piel agresivo y potencialmente mortal.
- **`bcc` (Carcinoma basocelular)**: Cáncer de piel de crecimiento lento pero invasivo.
- **`akiec` (Queratosis actínica / carcinoma intraepidérmico)**: Lesión precancerosa o carcinoma in situ.

Esta clasificación permite agrupar las clases en dos categorías para tareas de clasificación binaria: benignas vs malignas.


In [4]:
data_dir = "../data/processed/train"

# Augmentación para entrenamiento
datagen = ImageDataGenerator(
    rescale=1./255, 
    rotation_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    brightness_range=[0.8,1.2],
    shear_range=0.2,
    horizontal_flip=True, # Contempla manchas simétricas
    validation_split=0.15,  # Separación interna
)

# Generador de entrenamiento
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training',
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

print(pd.Series(val_generator.classes).value_counts())
print(pd.Series(train_generator.classes).value_counts())

Found 7662 images belonging to 2 classes.
Found 1351 images belonging to 2 classes.
0    1088
1     263
Name: count, dtype: int64
0    6166
1    1496
Name: count, dtype: int64


In [5]:
labels = train_generator.classes

# Calculamos los pesos
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)

# Lo convertimos en diccionario para Keras
class_weights = dict(enumerate(class_weights))
print(class_weights)


{0: 0.6213104119364256, 1: 2.560828877005348}


## Modelo

In [6]:
def clasificador_binario(input_shape=(224,224,3), lr=1e-3):
    entrada = Input(shape=input_shape, name='entrada_imagen')

    # Bloque 1
    x = Conv2D(32, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(entrada)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 2
    x = Conv2D(64, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 3
    x = Conv2D(128, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Bloque 4 (extra para más capacidad)
    x = Conv2D(256, (3,3), activation='relu', padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2,2))(x)

    # Global pooling en lugar de Flatten (reduce parámetros)
    x = GlobalAveragePooling2D()(x)

    # Capa densa
    x = Dense(128, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = Dropout(0.5)(x)

    salida = Dense(1, activation='sigmoid', name='salida_binaria')(x)

    modelo = Model(inputs=entrada, outputs=salida, name='cnn_mejorada')
    modelo.compile(optimizer=Adam(learning_rate=lr),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])
    return modelo



In [7]:
import tensorflow as tf

# Desactiva todas las GPUs
tf.config.set_visible_devices([], 'GPU')


W0000 00:00:1763492512.567253   19421 gpu_device.cc:2431] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 5.2. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


In [8]:
# Ajustar nivel de logging de TensorFlow
logging.getLogger("tensorflow").setLevel(logging.ERROR)

modelo = clasificador_binario()  

history = modelo.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    class_weight=class_weights
)


Epoch 1/20


2025-11-18 20:01:57.754143: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f3bfc013e90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-11-18 20:01:57.754162: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Host, Default Version
2025-11-18 20:01:57.858241: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1763492519.358151   19616 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2025-11-18 20:01:59.361120: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 2667125736 exceeds 10% of free system memory.


[1m  1/240[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m29:39[0m 7s/step - accuracy: 0.3125 - loss: 0.9401

2025-11-18 20:02:01.305186: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 2667125736 exceeds 10% of free system memory.


[1m  2/240[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:33[0m 2s/step - accuracy: 0.3828 - loss: 0.9566 

2025-11-18 20:02:03.209385: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 2667125736 exceeds 10% of free system memory.


[1m  3/240[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:27[0m 2s/step - accuracy: 0.4080 - loss: 0.9628

2025-11-18 20:02:05.084674: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 2667125736 exceeds 10% of free system memory.


[1m  4/240[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:19[0m 2s/step - accuracy: 0.4310 - loss: 0.9381

2025-11-18 20:02:06.897282: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 2667125736 exceeds 10% of free system memory.


[1m125/240[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m3:35[0m 2s/step - accuracy: 0.5906 - loss: 0.7665

KeyboardInterrupt: 

In [None]:
# Current timestamp
timestamp = datetime.datetime.now().strftime("%m_%d_h%H_%M")

# Carpeta donde guardar
save_dir = "../models/classifier"
os.makedirs(save_dir, exist_ok=True)

modelo.save(f"../models/classifier/new_model_{timestamp}.keras")

In [None]:
modelo.summary()