intentaremos un modelo de redes neuronales con multiclases, donde detectaremos l tipo de lesion, tanto venigna como maligna 

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Suprimir todos los warnings
import warnings
warnings.filterwarnings('ignore')

# Verificar la GPU disponible
print("GPUs disponibles:", len(tf.config.experimental.list_physical_devices('GPU')))

GPUs disponibles: 1


In [2]:

# Cargar el dataset
df = pd.read_csv('/Users/luiseduardogarciablanco/Desktop/nueva data cancer/prueba_data_18_19_20/metadata_18_19_20_sin_NaN_code.csv', index_col=0)

# Definir las columnas target y las columnas de entrada (features)
target_columns = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC']
input_columns = [col for col in df.columns if col not in target_columns]

# Normalizar las características de entrada
scaler = StandardScaler()
df[input_columns] = scaler.fit_transform(df[input_columns])

# Dividir en entrenamiento y validación
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [3]:


# Crear generadores de datos con Data Augmentation solo para imágenes positivas
datagen = ImageDataGenerator(
    rescale=1./255,  # Normalizar imágenes
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def custom_image_generator(df, datagen, batch_size, target_columns, image_dir, augment_only_positive=True):
    while True:
        batch_df = df.sample(n=batch_size)
        images = []
        labels = []
        
        for i, row in batch_df.iterrows():
            img_path = f"{image_dir}/{i}.jpg"  # Asumiendo que el índice es el nombre de la imagen
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            images.append(img_array)
            labels.append(row[target_columns].values)
        
        images = np.array(images)
        labels = np.array(labels)
        
        if augment_only_positive:
            # Crear un generador para aplicar augmentación solo a imágenes positivas
            augmented_images = []
            for i in range(len(images)):
                if np.any(labels[i] == 1):  # Si la imagen tiene alguna etiqueta positiva
                    augmented_image = datagen.flow(images[i:i+1], batch_size=1).next()
                    augmented_images.append(augmented_image[0])
                else:
                    augmented_images.append(images[i])
            
            augmented_images = np.array(augmented_images)
        else:
            # Aplicar augmentación a todas las imágenes
            augmented_images = datagen.flow(images, batch_size=batch_size, shuffle=False).next()
        
        yield augmented_images, labels

batch_size = 32
image_dir = '/Users/luiseduardogarciablanco/Desktop/nueva data cancer/prueba_data_18_19_20/imagenes'  
train_generator = custom_image_generator(train_df, datagen, batch_size, target_columns, image_dir, augment_only_positive=True)
val_generator = custom_image_generator(val_df, datagen, batch_size, target_columns, image_dir, augment_only_positive=False)

In [4]:


# Cargar el modelo preentrenado
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)  # Añadir Dropout
predictions = Dense(len(target_columns), activation='sigmoid')(x)  # Usar 'sigmoid' para multi-label

model = Model(inputs=base_model.input, outputs=predictions)

# Congelar las capas del modelo base
for layer in base_model.layers:
    layer.trainable = False

# Compilar el modelo
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Entrenar el modelo
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_df) // batch_size,
    validation_data=val_generator,
    validation_steps=len(val_df) // batch_size,
    epochs=25
)


2024-08-23 16:16:13.364864: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-08-23 16:16:13.365545: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-08-23 16:16:13.365559: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-08-23 16:16:13.366112: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-08-23 16:16:13.366596: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/25


2024-08-23 16:16:14.946525: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2024-08-23 16:21:12.990117: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25

KeyboardInterrupt: 

In [5]:

# Evaluar el modelo
val_images, val_labels = next(custom_image_generator(val_df, datagen, batch_size, target_columns, image_dir, augment_only_positive=False))
predictions = model.predict(val_images)

# Convertir las predicciones a etiquetas de clase
pred_labels = (predictions > 0.5).astype(int)  # Umbral de 0.5 para clasificación multi-label
true_labels = val_labels

# Generar el informe de clasificación
report = classification_report(true_labels, pred_labels, target_names=target_columns, zero_division=0)
print("Classification Report:")
print(report)



2024-08-23 18:23:12.890372: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Classification Report:
              precision    recall  f1-score   support

         MEL       0.00      0.00      0.00         3
          NV       0.35      1.00      0.51         9
         BCC       0.00      0.00      0.00         3
          AK       0.00      0.00      0.00         0
         BKL       0.00      0.00      0.00         3
          DF       0.00      0.00      0.00         0
        VASC       0.00      0.00      0.00         0
         SCC       0.00      0.00      0.00         0

   micro avg       0.33      0.50      0.40        18
   macro avg       0.04      0.12      0.06        18
weighted avg       0.17      0.50      0.26        18
 samples avg       0.28      0.28      0.28        18



tras obtener resulatados totalmente inutiles, procedemos a probar otras maneras de analizar los datos y cambio de variables