In [1]:
# Configuración de GPU
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import MobileNetV2, EfficientNetB0
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

import mlflow
import dagshub

import json
import gc
import warnings
warnings.filterwarnings('ignore')

print(f"TensorFlow: {tf.__version__}")
print(f"Optuna: {optuna.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


TensorFlow: 2.20.0
Optuna: 4.6.0


In [4]:
# Verificar GPU
gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs disponibles: {len(gpus)}")

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU configurada correctamente: {gpus[0].name}")
    except RuntimeError as e:
        print(f"Error al configurar GPU: {e}")
else:
    print("No se detectó GPU, usando CPU")

GPUs disponibles: 1
GPU configurada correctamente: /physical_device:GPU:0


In [5]:
# DagsHub
dagshub.init(repo_owner='404brainnotfound-ai', repo_name='Tarea_6', mlflow=True)
mlflow.set_experiment("Plant_Pathology_2020")

print(f"URI de seguimiento MLflow: {mlflow.get_tracking_uri()}")

2025/11/27 12:33:59 INFO mlflow.tracking.fluent: Experiment with name 'Plant_Pathology_2020' does not exist. Creating a new experiment.


URI de seguimiento MLflow: https://dagshub.com/404brainnotfound-ai/Tarea_6.mlflow


In [6]:
# Rutas
DIRECTORIO_BASE = "plant-pathology-2020-fgvc7"
DIRECTORIO_IMAGENES = os.path.join(DIRECTORIO_BASE, "images")
ARCHIVO_TRAIN = os.path.join(DIRECTORIO_BASE, "train.csv")
ARCHIVO_TEST = os.path.join(DIRECTORIO_BASE, "test.csv")

# Parámetros de entrenamiento
TAMAÑO_IMAGEN = (224, 224)
TAMAÑO_LOTE = 16
SEMILLA = 42
NUM_CLASES = 4

# Configurar semillas para reproducibilidad
tf.random.set_seed(SEMILLA)
np.random.seed(SEMILLA)

print("Configuración:")
print(f"  Tamaño de imagen: {TAMAÑO_IMAGEN}")
print(f"  Tamaño de lote: {TAMAÑO_LOTE}")
print(f"  Número de clases: {NUM_CLASES}")

Configuración:
  Tamaño de imagen: (224, 224)
  Tamaño de lote: 16
  Número de clases: 4


In [7]:
# Cargar datos
df = pd.read_csv(ARCHIVO_TRAIN)
df['image_id'] = df['image_id'] + '.jpg'
df = df.drop_duplicates(subset='image_id').reset_index(drop=True)

print(f"Total de imágenes: {len(df)}")
print("\nPrimeras filas:")
print(df.head())

Total de imágenes: 1821

Primeras filas:
      image_id  healthy  multiple_diseases  rust  scab
0  Train_0.jpg        0                  0     0     1
1  Train_1.jpg        0                  1     0     0
2  Train_2.jpg        1                  0     0     0
3  Train_3.jpg        0                  0     1     0
4  Train_4.jpg        1                  0     0     0
