In [4]:
import medmnist
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import layers, models

dataTrain = medmnist.BreastMNIST(split="train", download=True, size=64)
dataVal   = medmnist.BreastMNIST(split="val",   download=True, size=64)
dataTest  = medmnist.BreastMNIST(split="test",  download=True, size=64)

x_train, y_train = np.array([i[0] for i in dataTrain]), np.array([i[1][0] for i in dataTrain])
x_val, y_val =  np.array([i[0] for i in dataVal]), np.array([i[1][0] for i in dataVal])
x_test, y_test =  np.array([i[0] for i in dataTest]), np.array([i[1][0] for i in dataTest])

x_train = x_train / 255
x_val = x_val / 255
x_test = x_test / 255

x_train = np.expand_dims(x_train, axis=-1)
x_val = np.expand_dims(x_val, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

In [1]:
#Función usada en el otro ipynb
def modeloCustomizable(
    input_shape=(224, 224, 1), 
    num_filtros=[10, 20], 
    tamano_kernel=(5, 5), 
    capas_densas=[50], 
    dropout_rate=[0.5, 0],
    dropout_rate_final=0.5,
):
    if len(num_filtros) != len(dropout_rate):
        raise ValueError("""
            El largo de num_filtros y el de dropout_rate representan la cantidad de 
            capas convolucionales, por lo que estas listas deben tener el mismo largo.
        """)
    capas_conv = [layers.Input(shape=input_shape)]
    for i in range(len(num_filtros)):
        capas_conv.append(layers.Conv2D(num_filtros[i], tamano_kernel, activation='relu'))
        capas_conv.append(layers.MaxPooling2D(pool_size=(2, 2)))
        if dropout_rate[i] > 0:
            capas_conv.append(layers.Dropout(dropout_rate[i]))

    capas_dens = [layers.Flatten()]
    for i in capas_densas:
        capas_dens.append(layers.Dense(i, activation='relu'))
    if dropout_rate_final > 0:
        capas_dens.append(layers.Dropout(dropout_rate_final))
    capas_dens.append(layers.Dense(1, activation='sigmoid'))

    capas_total = capas_conv + capas_dens
    model = models.Sequential(capas_total)
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [11]:
from tensorflow.keras.callbacks import TensorBoard

def bootstrap_sample(X, y):
    idx = np.random.choice(len(X), size=len(X), replace=True)
    return X[idx], y[idx]

modelos = []
n_modelos = 5  # o la cantidad que quieras

for i in range(n_modelos):
    print(f"Entrenando modelo {i+1}/{n_modelos}")
    model = modeloCustomizable(
        input_shape=(64, 64, 1),
        num_filtros=[30, 30],
        tamano_kernel=(9, 9),
        capas_densas=[100]*3,
        dropout_rate=[0, 0],
        dropout_rate_final=0
    )
    X_boot, y_boot = bootstrap_sample(x_train, y_train)
    history = model.fit(
        X_boot, y_boot,
        validation_data=(x_val, y_val),
        epochs=100,
        batch_size=32,
        callbacks=[TensorBoard(log_dir=f'logs/bagging_{i}')],
        verbose=0
    )
    print(f"Último val_accuracy del modelo: {history.history['val_accuracy'][-1]:.4f}")
    modelos.append(model)

Entrenando modelo 1/5
Último val_accuracy del modelo: 0.7821
Entrenando modelo 2/5
Último val_accuracy del modelo: 0.8462
Entrenando modelo 3/5
Último val_accuracy del modelo: 0.8590
Entrenando modelo 4/5
Último val_accuracy del modelo: 0.8718
Entrenando modelo 5/5
Último val_accuracy del modelo: 0.8333


In [12]:
preds = np.zeros((len(x_test), n_modelos))
for i, model in enumerate(modelos):
    preds[:, i] = model.predict(x_test).ravel()

# Promedio de predicciones
y_pred_bagging = (preds.mean(axis=1) > 0.5).astype(int)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


In [13]:
sum(y_test == y_pred_bagging)/len(y_test == y_pred_bagging)

np.float64(0.8589743589743589)