In [16]:
import kagglehub
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import os
from tensorflow.keras.callbacks import EarlyStopping

In [11]:
# Definimos la ruta completa hasta donde están las 8 carpetas de cáncer
path = r'C:\Users\inter\.cache\kagglehub\datasets\obulisainaren\multi-cancer\versions\3\Multi Cancer\Multi Cancer'

In [12]:
# Configuración del Generador
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Carga de Entrenamiento
train_generator = datagen.flow_from_directory(
    path, 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Carga de Validación
val_generator = datagen.flow_from_directory(
    path, 
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# Variables de clase
class_names = list(train_generator.class_indices.keys())
num_classes = len(class_names)

print(f"\nÉxito: Se han detectado {num_classes} clases.")
print(f"Nombres de las clases: {class_names}")

Found 104002 images belonging to 8 classes.
Found 26000 images belonging to 8 classes.

Éxito: Se han detectado 8 clases.
Nombres de las clases: ['ALL', 'Brain Cancer', 'Breast Cancer', 'Cervical Cancer', 'Kidney Cancer', 'Lung and Colon Cancer', 'Lymphoma', 'Oral Cancer']


In [13]:
model_cnn = Sequential([
    # Capa Convolucional 1:
    # Ajustamos input_shape a (224, 224, 3) para que coincida con nuestro generator
    Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)), 
    
    # Capa Convolucional 2:
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)), 
    
    # Bloque de Clasificación
    Flatten(),
    
    # Capa densa de interpretación
    Dense(64, activation='relu'),
    
    # Capa de salida: num_classes se ajustará automáticamente a las carpetas de Kaggle
    Dense(num_classes, activation='softmax')
])

model_cnn.summary()

In [18]:
# early stopping para evitar sobreajuste
early_stop = EarlyStopping(
    monitor='val_loss', 
    patience=3, 
    restore_best_weights=True
)

In [None]:
model_cnn.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

print("\n--- Entrenando la CNN con el Dataset de Cáncer ---")

history_cnn = model_cnn.fit(
    train_generator,
    epochs=5,                
    validation_data=val_generator,
    callbacks=[early_stop]
)


--- Entrenando la CNN con el Dataset de Cáncer ---
Epoch 1/5
[1m3251/3251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1841s[0m 565ms/step - accuracy: 0.9690 - loss: 0.0994 - val_accuracy: 0.9866 - val_loss: 0.0466
Epoch 2/5
[1m3251/3251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1607s[0m 494ms/step - accuracy: 0.9895 - loss: 0.0347 - val_accuracy: 0.9661 - val_loss: 0.1150
Epoch 3/5
[1m3251/3251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1672s[0m 514ms/step - accuracy: 0.9925 - loss: 0.0265 - val_accuracy: 0.9423 - val_loss: 0.2026
Epoch 4/5
[1m 880/3251[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m19:41[0m 498ms/step - accuracy: 0.9952 - loss: 0.0133