In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("enalis/tomatoes-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/enalis/tomatoes-dataset?dataset_version_number=4...


100%|██████████| 93.7M/93.7M [00:02<00:00, 38.9MB/s]

Extracting files...





Path to dataset files: /Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4


In [3]:
import os

val_path = r"/Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4/content/ieee-mbl-cls/val"

# Listar los archivos o carpetas dentro del directorio
contenido = os.listdir(val_path)

print("Contenido de la carpeta 'val':")
for item in contenido:
    print(item)

Contenido de la carpeta 'val':
Unripe
Old
Damaged
Ripe


In [4]:
import os

train_dir = os.path.join(path, 'train')
val_dir = os.path.join(path, 'validation')

# o si no están divididas en train/validation, puedes usar una sola carpeta y dividir tú mismo con ImageDataGenerator con validation_split

In [5]:
print(train_dir)
print(val_dir)

/Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4/train
/Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4/validation


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Parámetros
ancho, alto = 150, 150
batch_size = 32
epochs = 10

# Rutas a los directorios
train_dir = val_dir = '/Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4/content/ieee-mbl-cls/train'
val_dir = val_dir = '/Users/pablo/.cache/kagglehub/datasets/enalis/tomatoes-dataset/versions/4/content/ieee-mbl-cls/val'

# Aumento de datos para mejorar el entrenamiento
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Carga de imágenes desde carpetas
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(alto, ancho),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(alto, ancho),
    batch_size=batch_size,
    class_mode='categorical'
)

# Definición del modelo CNN
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(alto, ancho, 3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(4, activation='softmax')  # 3 clases: verde, pinton, maduro
])

# Compilación del modelo
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Entrenamiento
model.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator
)

# Guardar el modelo entrenado
model.save('modelo_tomates.h5')

Found 6500 images belonging to 4 classes.
Found 724 images belonging to 4 classes.
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 108ms/step - accuracy: 0.6289 - loss: 0.8904 - val_accuracy: 0.7735 - val_loss: 0.5608
Epoch 2/10
[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 107ms/step - accuracy: 0.8021 - loss: 0.4845 - val_accuracy: 0.8605 - val_loss: 0.3717
Epoch 3/10
[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 110ms/step - accuracy: 0.8466 - loss: 0.3783 - val_accuracy: 0.8011 - val_loss: 0.5693
Epoch 4/10
[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 110ms/step - accuracy: 0.8537 - loss: 0.3788 - val_accuracy: 0.8798 - val_loss: 0.3080
Epoch 5/10
[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 111ms/step - accuracy: 0.8833 - loss: 0.3086 - val_accuracy: 0.8536 - val_loss: 0.3572
Epoch 6/10
[1m204/204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 111ms/step - accuracy: 0.8687 - loss: 0.3378 - val_accuracy: 0.8840 - val_loss: 0.2997
Epoch 7/10
[1m204/20



In [11]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np

# Cargar el modelo entrenado
model = load_model('modelo_tomates.h5')

# Lista de clases en el mismo orden que se usó en el entrenamiento
clases = ['verde', 'pinton', 'maduro']

# Ruta a la imagen que quieres clasificar
img_path = 'images/tomate-pinton-1.jpg'  # por ejemplo: 'test_tomate.jpg'

# Preprocesar la imagen
img = image.load_img(img_path, target_size=(150, 150))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)  # Convertir a batch

# Realizar la predicción
prediccion = model.predict(img_array)
indice = np.argmax(prediccion)
confianza = np.max(prediccion)

# Mostrar resultado
print(f"La imagen es un tomate {clases[indice]} con una confianza del {confianza:.2%}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
La imagen es un tomate pinton con una confianza del 80.65%
