In [2]:
# 📌 Paso 1: Instalar y configurar Kaggle
!pip install -q kaggle

import os
os.makedirs('/root/.kaggle', exist_ok=True)

from google.colab import files
files.upload()  # ⬆️ Sube aquí tu archivo kaggle.json

!mv kaggle.json /root/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

# 📦 Paso 2: Descargar y descomprimir el dataset
!kaggle datasets download -d arjuntejaswi/plant-village
!unzip -q plant-village.zip -d plant_village

# 📂 Paso 3: Exploración básica
import os
from pathlib import Path

data_dir = Path("plant_village/PlantVillage")
clases = os.listdir(data_dir)
print(f"Número de clases: {len(clases)}")
print(f"Ejemplo de clases: {clases[:5]}")

# 📸 Paso 4: Preprocesamiento de imágenes
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Filtramos solo las clases de tomate
clases_tomate = [c for c in clases if c.startswith("Tomato")]
print(f"Clases seleccionadas: {clases_tomate}")

# Creamos una estructura con solo imágenes de tomate
import shutil

base_dir = Path("dataset_tomate")
base_dir.mkdir(exist_ok=True)

for clase in clases_tomate:
    src = data_dir / clase
    dst = base_dir / clase
    shutil.copytree(src, dst, dirs_exist_ok=True)

# 📁 División en entrenamiento, validación y prueba usando ImageDataGenerator
img_size = (224, 224)
batch_size = 32

# Generador con normalización
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Set de entrenamiento
train_gen = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Set de validación
val_gen = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=True
)

# Guardamos clases para reutilizar luego
class_indices = train_gen.class_indices
print(f"Índice de clases: {class_indices}")

# ✅ Listo para entrenar modelos CNN con Keras y TensorFlow


Saving kaggle.json to kaggle (1).json
Dataset URL: https://www.kaggle.com/datasets/arjuntejaswi/plant-village
License(s): unknown
Downloading plant-village.zip to /content
 87% 285M/329M [00:00<00:00, 431MB/s]
100% 329M/329M [00:00<00:00, 457MB/s]
Número de clases: 15
Ejemplo de clases: ['Tomato_healthy', 'Potato___healthy', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato_Leaf_Mold']
Clases seleccionadas: ['Tomato_healthy', 'Tomato__Tomato_YellowLeaf__Curl_Virus', 'Tomato_Spider_mites_Two_spotted_spider_mite', 'Tomato_Leaf_Mold', 'Tomato__Tomato_mosaic_virus', 'Tomato_Early_blight', 'Tomato__Target_Spot', 'Tomato_Late_blight', 'Tomato_Bacterial_spot', 'Tomato_Septoria_leaf_spot']
Found 12813 images belonging to 10 classes.
Found 3198 images belonging to 10 classes.
Índice de clases: {'Tomato_Bacterial_spot': 0, 'Tomato_Early_blight': 1, 'Tomato_Late_blight': 2, 'Tomato_Leaf_Mold': 3, 'Tomato_Septoria_leaf_spot': 4, 'Tomato_Spider_mites_Tw

In [None]:
from google.colab import drive
drive.mount('/content/drive')