In [None]:
# 1. Mount Google Drive and Install Dependencies
from google.colab import drive
drive.mount('/content/drive')
!pip install mlflow tensorflow wandb

# 2. Import Libraries
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
import mlflow
import mlflow.tensorflow
import wandb
wandb.login()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[34m[1mwandb[0m: Currently logged in as: [33mjd-lopez160[0m ([33mjd-lopez160-pontificia-universidad-javeriana[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# 6. Define Data Directories
dir_covid = "/content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Covid/est_y_norm_Procesadas/"
dir_pneumonia = "/content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Pneumonia/est_y_norm_Procesadas/"
dir_normal = "/content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Normal/est_y_norm_Procesadas/"

# 7. Load Data in Batches
def load_data_in_batches(directory, label, batch_size=100):
    print(f"Cargando datos de {directory} con etiqueta {label}")
    files = os.listdir(directory)
    print(f"Total de archivos en {directory}: {len(files)}")  # Imprime la cantidad total de archivos

    image_print_limit = 2  # Limita el número de imágenes para las cuales se imprime la dimensión
    batch_print_limit = 1  # Limita el número de lotes para los cuales se imprime la dimensión

    image_print_count = 0
    batch_print_count = 0

    while True:
        np.random.shuffle(files)
        for i in range(0, len(files), batch_size):
            batch_files = files[i:i + batch_size]
            batch_data = []
            for f in batch_files:
                img = np.load(os.path.join(directory, f))
                if img.ndim == 2:
                    img = np.stack([img] * 3, axis=-1)  # Convertir imágenes en escala de grises a RGB
                img = tf.image.resize(img, (299, 299))  # Redimensionar la imagen a 299x299
                batch_data.append(img)
                if image_print_count < image_print_limit:
                    print(f"Dimensiones de la imagen {f} después de redimensionar: {img.shape}")  # Imprime las dimensiones de algunas imágenes redimensionadas
                    image_print_count += 1
            batch_data = np.stack(batch_data, axis=0)
            batch_labels = np.array([label] * len(batch_files))
            if batch_print_count < batch_print_limit:
                print(f"Lote de datos dimensiones: {batch_data.shape}, Lote de etiquetas dimensiones: {batch_labels.shape}")  # Imprime las dimensiones de algunos lotes
                batch_print_count += 1
            yield batch_data, batch_labels

# 9. Prepare Data for Scenarios
def prepare_data_for_scenario(scenario, covid_dir, pneumonia_dir, normal_dir, batch_size=100):
    covid_dataset = load_data_in_batches(covid_dir, 0, batch_size)
    pneumonia_dataset = load_data_in_batches(pneumonia_dir, 1, batch_size)
    normal_dataset = load_data_in_batches(normal_dir, 2, batch_size)

    if scenario == 'undersampling':
        min_size = min(len(os.listdir(covid_dir)), len(os.listdir(pneumonia_dir)))
        normal_dataset = (data for i, (data, label) in enumerate(normal_dataset) if i < min_size)
    elif scenario == 'oversampling':
        max_size = max(len(os.listdir(covid_dir), len(os.listdir(pneumonia_dir)), len(os.listdir(normal_dir))))
        covid_dataset = (data for _ in range(max_size // len(os.listdir(covid_dir)) + 1) for data, label in covid_dataset)
        pneumonia_dataset = (data for _ in range(max_size // len(os.listdir(pneumonia_dir)) + 1) for data, label in pneumonia_dataset)

    def one_hot_encode(labels, num_classes=3):
        return tf.one_hot(labels, num_classes)

    dataset = tf.data.Dataset.from_generator(
        lambda: (batch for gen in [covid_dataset, pneumonia_dataset, normal_dataset] for batch in gen),
        output_signature=(
            tf.TensorSpec(shape=(None, 299, 299, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None,), dtype=tf.int32)
        )
    )

    dataset = dataset.map(lambda x, y: (x, one_hot_encode(y)))
    dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    # Print dataset shape (for only one batch)
    for data, labels in dataset.take(1):
        print(f"Dimensiones del dataset: {data.shape}, Dimensiones de las etiquetas: {labels.shape}")

    return dataset


# 8. Build and Compile Model
def build_model():
    model = models.Sequential([
        layers.Input(shape=(299, 299, 3)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(4096, activation='relu'),
        layers.Dense(4096, activation='relu'),
        layers.Dense(3, activation='softmax')
    ])
    return model

In [None]:
def run_experiment(train_dataset, val_dataset, learning_rates, optimizers, epochs=20, scenario="original"):
    results = []
    num_classes = 3  # Assuming 3 classes: COVID, Pneumonia, Normal
    print(f"Starting experiments for scenario: {scenario}")
    for lr in learning_rates:
        for optimizer_class in optimizers:
            print(f"\nInitializing experiment with Learning Rate: {lr}, Optimizer: {optimizer_class.__name__}")
            wandb.init(project="cnn-proyecto-1-ml",
                       config={
                           "learning_rate": lr,
                           "epochs": epochs,
                           "optimizer": optimizer_class.__name__,
                           "scenario": scenario
                       })

            model = build_model()
            optimizer = optimizer_class(learning_rate=lr)
            model.compile(optimizer=optimizer,
                          loss='categorical_crossentropy',
                          metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

            for epoch in range(epochs):
                print(f"Starting epoch {epoch+1}/{epochs}")
                history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)
                wandb.log({
                    'epoch': epoch + 1,
                    'train_loss': history.history['loss'][0],
                    'train_accuracy': history.history['accuracy'][0],
                    'train_precision': history.history['precision'][0],
                    'train_recall': history.history['recall'][0],
                    'val_loss': history.history['val_loss'][0],
                    'val_accuracy': history.history['val_accuracy'][0],
                    'val_precision': history.history['val_precision'][0],
                    'val_recall': history.history['val_recall'][0],
                })
                print(f"Completed epoch {epoch+1}/{epochs}. Validation Accuracy: {history.history['val_accuracy'][0]}")

            results.append({
                'optimizer': optimizer_class.__name__,
                'learning_rate': lr,
                'accuracy': history.history['val_accuracy'][-1],
                'precision': history.history['val_precision'][-1],
                'recall': history.history['val_recall'][-1],
                'loss': history.history['val_loss'][-1]
            })
            print(f"Experiment completed with Learning Rate: {lr}, Optimizer: {optimizer_class.__name__}\n")
            wandb.finish()
    return results




In [None]:
# 11. Execute Experiments
scenarios = ['undersampling', 'oversampling', 'original']
learning_rates = [0.001, 0.0001, 0.00001]
optimizers = [tf.keras.optimizers.Adam, tf.keras.optimizers.SGD, tf.keras.optimizers.RMSprop]
epochs = 20

for scenario in scenarios:
    print(f"Ejecutando escenario: {scenario}")
    train_dataset = prepare_data_for_scenario(scenario, dir_covid, dir_pneumonia, dir_normal)
    val_dataset = prepare_data_for_scenario(scenario, dir_covid, dir_pneumonia, dir_normal)
    results = run_experiment(train_dataset, val_dataset, learning_rates, optimizers, epochs, scenario=scenario)
    print(f'Resultados para el escenario: {scenario}')
    for result in results:
        print(result)

Ejecutando escenario: undersampling
Cargando datos de /content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Covid/est_y_norm_Procesadas/ con etiqueta 0
Total de archivos en /content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Covid/est_y_norm_Procesadas/: 3616
Dimensiones de la imagen COVID-620.npy después de redimensionar: (299, 299, 3)
Dimensiones de la imagen COVID-512.npy después de redimensionar: (299, 299, 3)
Lote de datos dimensiones: (32, 299, 299, 3), Lote de etiquetas dimensiones: (32,)
Dimensiones del dataset: (32, 32, 299, 299, 3), Dimensiones de las etiquetas: (32, 32, 3)
Cargando datos de /content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Covid/est_y_norm_Procesadas/ con etiqueta 0
Total de archivos en /content/drive/MyDrive/Proyecto 1/Imagenes Proyecto 1/Procesadas/Covid/est_y_norm_Procesadas/: 3616
Dimensiones de la imagen COVID-3595.npy después de redimensionar: (299, 299, 3)
Dimensiones de la imagen COVID-2410.npy después de redime

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Starting epoch 1/20


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(None, None, 299, 299, 3), dtype=float32). Expected shape (None, 299, 299, 3), but input has incompatible shape (None, None, 299, 299, 3)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, None, 299, 299, 3), dtype=float32)
  • training=True
  • mask=None