In [None]:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.utils import class_weight
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import resample
from sklearn.metrics import classification_report, confusion_matrix

In [41]:
image_dir = "Data/images"

data = pd.read_csv("Data/metadata.csv")
data_uni = data.drop_duplicates()
print(data_uni.size)
data_uni = data_uni[
    data_uni["benign_malignant"].isin(["benign", "malignant"])
]


data_uni["filename"] = data_uni["isic_id"] + ".jpg"

data_uni["label"] = data_uni["benign_malignant"]

data_uni["label_index"] = data_uni["label"].astype("category").cat.codes
label_names = data_uni["label"].astype("category").cat.categories.tolist()

print(f"Diagnosis {label_names}")


2288216
Diagnosis ['benign', 'malignant']


  data = pd.read_csv("Data/metadata.csv")


In [42]:

data_uni["filepath"] = data_uni["filename"].apply(lambda fname: os.path.join(image_dir, fname))
data_uni = data_uni[data_uni["filepath"].apply(os.path.exists)]



In [4]:

model = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  
])
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()



I0000 00:00:1749811037.092921     587 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9711 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6


In [58]:
df_benign = data_uni[data_uni['label_index'] == 0]
df_malignant = data_uni[data_uni['label_index'] == 1]

In [61]:
data_uni["label_str"] = data_uni["label_index"].map({0: "benign", 1: "malignant"})

df_malignant_upsampled = resample(
    df_malignant,
    replace=True,
    n_samples=len(df_benign),
    random_state=42
)

balanced_df = pd.concat([df_benign, df_malignant_upsampled])

balanced_df = balanced_df.sample(frac=1).reset_index(drop=True)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    validation_split=0.2  # 20% para validación
)

train_generator_bal = train_datagen.flow_from_dataframe(
    dataframe=balanced_df,
    x_col='filepath',
    y_col='label_str',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_generator_bal = train_datagen.flow_from_dataframe(
    dataframe=balanced_df,
    x_col='filepath',
    y_col='label_str',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

Found 102476 validated image filenames belonging to 2 classes.
Found 25618 validated image filenames belonging to 2 classes.


In [23]:
model.fit(
    train_generator_bal,
    validation_data=val_generator_bal,
    epochs=10,
    # class_weight=class_weights_manual,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m3203/3203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3409s[0m 1s/step - accuracy: 0.7441 - loss: 0.5088 - val_accuracy: 0.7549 - val_loss: 0.5020 - learning_rate: 1.0000e-05
Epoch 2/10
[1m3203/3203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3325s[0m 1s/step - accuracy: 0.7547 - loss: 0.5017 - val_accuracy: 0.7597 - val_loss: 0.4917 - learning_rate: 1.0000e-05
Epoch 3/10
[1m3203/3203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3330s[0m 1s/step - accuracy: 0.7582 - loss: 0.4897 - val_accuracy: 0.7619 - val_loss: 0.4857 - learning_rate: 1.0000e-05
Epoch 4/10
[1m3203/3203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3361s[0m 1s/step - accuracy: 0.7583 - loss: 0.4848 - val_accuracy: 0.7635 - val_loss: 0.4791 - learning_rate: 1.0000e-05
Epoch 5/10
[1m3203/3203[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3250s[0m 1s/step - accuracy: 0.7624 - loss: 0.4790 - val_accuracy: 0.7688 - val_loss: 0.4714 - learning_rate: 1.0000e-05
Epoch 6/10
[1m3203/3203[0m 

<keras.src.callbacks.history.History at 0x7f901c237b20>

In [26]:
model.save("modelo_skin_issues_2_(Desbalanceado).h5")



In [None]:
# Obtener predicciones
pred_probs = model.predict(val_generator_bal)
pred_labels = (pred_probs > 0.5).astype(int).flatten()  # Convertir a etiquetas 0 o 1

true_labels = val_generator_bal.classes[:len(pred_labels)]

print(classification_report(true_labels, pred_labels, target_names=['benign', 'malignant']))


  self._warn_if_super_not_called()


[1m801/801[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 827ms/step
              precision    recall  f1-score   support

      benign       0.81      0.72      0.76     12921
   malignant       0.75      0.83      0.79     12697

    accuracy                           0.78     25618
   macro avg       0.78      0.78      0.77     25618
weighted avg       0.78      0.78      0.77     25618



In [36]:
cm = confusion_matrix(true_labels, pred_labels)
print("Matriz de Confusión:")
print(cm)

Matriz de Confusión:
[[ 9321  3600]
 [ 2151 10546]]
