## Inicialización: dataset + dependencias


In [1]:
!pip install mlflow -q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.8.3 requires cubinlinker, which is not installed.
cudf 24.8.3 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.8.3 requires ptxcompiler, which is not installed.
cuml 24.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.8.3 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.8.3 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.6.0 which is incompatible.
dataproc-jupyter-plugin 0.1.79 requires pydantic~=1.10.0, but you have pydantic 2.9.2 which is incompatible.
kfp 2.5.0 requires google-cloud-storage<3,>=2.2.1, but you have google-cloud-storage 1.44.0 which is incompatible.[0m[31m
[0m

In [2]:
!pip install gdown

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
Successfully installed gdown-5.2.0


In [3]:
import tensorflow as tf

from tensorflow.keras.callbacks import TensorBoard

from tensorboard.plugins.hparams import api as hp

import mlflow

from tensorflow.keras.applications import ResNet50

from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

from tensorflow.keras import Sequential

from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import Adam

from functools import partial

import matplotlib.pyplot as plt

import albumentations as A

import numpy as np

  check_for_updates()


In [4]:
!gdown --id 1--m8IGrdaNgf1ocUXBqJjnHBo3ZaxN0E

  pid, fd = os.forkpty()


Downloading...
From (original): https://drive.google.com/uc?id=1--m8IGrdaNgf1ocUXBqJjnHBo3ZaxN0E
From (redirected): https://drive.google.com/uc?id=1--m8IGrdaNgf1ocUXBqJjnHBo3ZaxN0E&confirm=t&uuid=cbea2d69-21af-4a7c-b356-5adc9d266b31
To: /kaggle/working/dataset.zip
100%|█████████████████████████████████████████| 240M/240M [00:01<00:00, 212MB/s]


In [5]:
#Unzip quietly
!unzip /kaggle/working/dataset.zip -d /kaggle/working/dataset && rm /kaggle/working/dataset.zip

Archive:  /kaggle/working/dataset.zip
   creating: /kaggle/working/dataset/American coot/
   creating: /kaggle/working/dataset/Cheops/
   creating: /kaggle/working/dataset/Moorish arch/
   creating: /kaggle/working/dataset/Pharaoh/
   creating: /kaggle/working/dataset/alcoholic/
   creating: /kaggle/working/dataset/book bag/
   creating: /kaggle/working/dataset/chime/
   creating: /kaggle/working/dataset/cirque/
   creating: /kaggle/working/dataset/great grandparent/
   creating: /kaggle/working/dataset/hill myna/
   creating: /kaggle/working/dataset/kitchenware/
   creating: /kaggle/working/dataset/larva/
   creating: /kaggle/working/dataset/masterpiece/
   creating: /kaggle/working/dataset/mousse/
   creating: /kaggle/working/dataset/plastic wrap/
   creating: /kaggle/working/dataset/shield/
   creating: /kaggle/working/dataset/stately home/
   creating: /kaggle/working/dataset/sweet William/
   creating: /kaggle/working/dataset/toy box/
   creating: /kaggle/worki

Basado en: https://albumentations.ai/docs/examples/tensorflow-example/

## Modelo

In [6]:
!rm -rf logs

In [7]:
from tensorflow.keras.preprocessing import image_dataset_from_directory



data_dir = "/kaggle/working/dataset"

train_ds, val_ds = image_dataset_from_directory(

    data_dir,

    labels='inferred',

    label_mode='int',

    class_names=None,

    color_mode='rgb',

    batch_size=32,

    image_size=(500, 500),

    shuffle=True,

    seed=37,

    validation_split=0.2,

    subset='both',

    interpolation='bilinear',

)

Found 4000 files belonging to 20 classes.
Using 3200 files for training.
Using 800 files for validation.


In [8]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([256]))#([64,128,256]))

HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.0,0.5]))# ([0.0,0.2,0.5]))

HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([1e-3]))#([1e-3, 1e-4]))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():

    hp.hparams_config(

        hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_LEARNING_RATE],

        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')])

In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

def train_model(hparams,callbacks):

    with mlflow.start_run():

        # Build the base model

        base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(500, 500, 3))

        x = base_model.output

        x = GlobalAveragePooling2D()(x)

        x = Dense(hparams[HP_NUM_UNITS], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)

        x = Dropout(hparams[HP_DROPOUT])(x)

        output_layer = Dense(20, activation='softmax')(x)



        # Complete model

        model = Model(inputs=base_model.input, outputs=output_layer)



        # Freeze base model layers

        for layer in base_model.layers:

            layer.trainable = False



        # Compile model

        optimizer = Adam(learning_rate=hparams[HP_LEARNING_RATE])

        model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])



        # Define callbacks

        early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

        reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, min_lr=1e-6)



        # Use ".keras" file extension for model checkpoint in the new Keras format

        checkpoint_filepath = "best_weights.keras"

        model_checkpoint = ModelCheckpoint(checkpoint_filepath, save_best_only=True, monitor='val_loss', mode='min')



        # Train model

        history = model.fit(

            train_ds,

            validation_data=val_ds,

            epochs=2,

            callbacks= callbacks + [early_stopping, reduce_lr, model_checkpoint]

        )



        # Log hyperparameters and metrics to MLflow

        mlflow.log_param("num_units", hparams[HP_NUM_UNITS])

        mlflow.log_param("dropout", hparams[HP_DROPOUT])

        mlflow.log_param("learning_rate", hparams[HP_LEARNING_RATE])

        mlflow.log_metric("accuracy", max(history.history['accuracy']))



        # Load the best weights and save as final weights in ".h5" format

        model.load_weights(checkpoint_filepath)  # Load the best weights before saving

        final_weights_path = "final_model.weights.h5"

        model.save_weights(final_weights_path)

        mlflow.log_artifact(final_weights_path)



        # Log the model to MLflow

        mlflow.keras.log_model(model, "model")


In [10]:
session_num = 1

for num_units in HP_NUM_UNITS.domain.values:
    for dropout in HP_DROPOUT.domain.values:
        for learning_rate in HP_LEARNING_RATE.domain.values:
            hparams = {
                HP_NUM_UNITS: num_units,
                HP_DROPOUT: dropout,
                HP_LEARNING_RATE: learning_rate,
            }

            # Specify a unique log directory for each trial
            log_dir = f"logs/hparam_tuning/session_{session_num}"
            tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
            hparams_callback = hp.KerasCallback(log_dir, hparams)

            print(f"--- Starting trial: {session_num}")
            print({h.name: hparams[h] for h in hparams})
            
            # Update train_model function to accept the callbacks dynamically
            train_model(hparams, callbacks=[tensorboard_callback, hparams_callback])
            session_num += 1


--- Starting trial: 1
{'num_units': 256, 'dropout': 0.0, 'learning_rate': 0.001}
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Epoch 1/2


I0000 00:00:1730728684.815974      87 service.cc:145] XLA service 0x7dfcfc004b50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730728684.816045      87 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1730728693.992133      87 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 314ms/step - accuracy: 0.6132 - loss: 4.8197 - val_accuracy: 0.8313 - val_loss: 1.9669 - learning_rate: 0.0010
Epoch 2/2
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 277ms/step - accuracy: 0.8912 - loss: 1.5508 - val_accuracy: 0.8675 - val_loss: 1.1439 - learning_rate: 0.0010




--- Starting trial: 2
{'num_units': 256, 'dropout': 0.5, 'learning_rate': 0.001}
Epoch 1/2
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 309ms/step - accuracy: 0.4344 - loss: 5.4866 - val_accuracy: 0.8537 - val_loss: 2.2307 - learning_rate: 0.0010
Epoch 2/2
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 278ms/step - accuracy: 0.8239 - loss: 2.0784 - val_accuracy: 0.8587 - val_loss: 1.4454 - learning_rate: 0.0010


