In [1]:
# Configuraci√≥n para entrenamiento local
import numpy as np
import pandas as pd
import os

# Verificar estructura del dataset
dataset_path = './dataset'
if os.path.exists(dataset_path):
    print("‚úÖ Dataset encontrado")
    for dirname, _, filenames in os.walk(dataset_path):
        if filenames:
            print(f"üìÅ {dirname}: {len(filenames)} archivos")
            if len(filenames) > 0:
                break
else:
    print("‚ùå Dataset no encontrado. Aseg√∫rate de extraer el ZIP en ./dataset")


‚úÖ Dataset encontrado
üìÅ ./dataset\test\apple: 10 archivos


In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
import tensorflow as tf
from tf_keras.preprocessing.image import load_img, img_to_array
print(f"TensorFlow version: {tf.__version__}")



TensorFlow version: 2.20.0
TensorFlow version: 2.20.0


In [3]:
# Definir solo las 15 frutas que queremos clasificar
FRUITS_ONLY = ['apple', 'banana', 'bell pepper', 'chilli pepper', 'grapes', 
               'jalepeno', 'kiwi', 'lemon', 'mango', 'orange', 
               'paprika', 'pear', 'pineapple', 'pomegranate', 'watermelon']

print(f"üçé Entrenaremos solo con {len(FRUITS_ONLY)} frutas:")
for i, fruit in enumerate(FRUITS_ONLY, 1):
    print(f"  {i}. {fruit.title()}")


üçé Entrenaremos solo con 15 frutas:
  1. Apple
  2. Banana
  3. Bell Pepper
  4. Chilli Pepper
  5. Grapes
  6. Jalepeno
  7. Kiwi
  8. Lemon
  9. Mango
  10. Orange
  11. Paprika
  12. Pear
  13. Pineapple
  14. Pomegranate
  15. Watermelon


In [4]:
# Rutas locales actualizadas
train_dir = Path('./dataset/train')
train_filepaths = list(train_dir.glob(r'**/*.jpg'))
print(f"‚úÖ Total im√°genes de entrenamiento: {len(train_filepaths)}")


‚úÖ Total im√°genes de entrenamiento: 2855


In [5]:
test_dir = Path('./dataset/test')
test_filepaths = list(test_dir.glob(r'**/*.jpg'))
print(f"‚úÖ Total im√°genes de prueba: {len(test_filepaths)}")


‚úÖ Total im√°genes de prueba: 340


In [6]:
val_dir = Path('./dataset/validation')
val_filepaths = list(val_dir.glob(r'**/*.jpg'))
print(f"‚úÖ Total im√°genes de validaci√≥n: {len(val_filepaths)}")


‚úÖ Total im√°genes de validaci√≥n: 340


In [7]:
def image_processing(filepath):
    """ Create a DataFrame with the filepath and the labels of the pictures
    Filtra solo las frutas definidas en FRUITS_ONLY
    """
    
    labels = [str(filepath[i]).split("\\")[-2] for i in range(len(filepath))]
    
    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')
    
    # Concatenate filepaths and labels
    df = pd.concat([filepath, labels], axis=1)
    
    # Filtrar solo frutas
    df = df[df['Label'].isin(FRUITS_ONLY)]
    
    # Shuffle the DataFrame and reset index
    df = df.sample(frac=1).reset_index(drop = True)
    
    print(f"‚úÖ Total im√°genes filtradas: {len(df)}")
    
    return df


In [8]:
train_df = image_processing(train_filepaths)
test_df = image_processing(test_filepaths)
val_df = image_processing(val_filepaths)

‚úÖ Total im√°genes filtradas: 1135
‚úÖ Total im√°genes filtradas: 137
‚úÖ Total im√°genes filtradas: 137


In [9]:
print('-- Training set --\n')
print(f'Number of pictures: {train_df.shape[0]}\n')
print(f'Number of different labels: {len(train_df.Label.unique())}\n')
print(f'Labels: {train_df.Label.unique()}')

-- Training set --

Number of pictures: 1135

Number of different labels: 15

Labels: ['chilli pepper' 'apple' 'bell pepper' 'mango' 'pomegranate' 'pineapple'
 'jalepeno' 'kiwi' 'paprika' 'grapes' 'banana' 'watermelon' 'pear'
 'orange' 'lemon']


In [9]:
train_df.head(5)

Unnamed: 0,Filepath,Label
0,../input/fruit-and-vegetable-image-recognition...,grapes
1,../input/fruit-and-vegetable-image-recognition...,pomegranate
2,../input/fruit-and-vegetable-image-recognition...,banana
3,../input/fruit-and-vegetable-image-recognition...,carrot
4,../input/fruit-and-vegetable-image-recognition...,garlic


In [None]:
# Create a DataFrame with one Label of each category
df_unique = train_df.copy().drop_duplicates(subset=["Label"]).reset_index()

# Display some pictures of the dataset
fig, axes = plt.subplots(nrows=6, ncols=6, figsize=(8, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df_unique.Filepath[i]))
    ax.set_title(df_unique.Label[i], fontsize = 12)
plt.tight_layout(pad=0.5)
plt.show()

In [10]:
import tf_keras as keras

train_generator = keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=keras.applications.mobilenet_v2.preprocess_input
)

test_generator = keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=keras.applications.mobilenet_v2.preprocess_input
)


In [11]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

Found 1135 validated image filenames belonging to 15 classes.


In [12]:
val_images = train_generator.flow_from_dataframe(
    dataframe=val_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

Found 137 validated image filenames belonging to 15 classes.


In [13]:
test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

Found 137 validated image filenames belonging to 15 classes.


In [14]:
import tf_keras as keras

pretrained_model = keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)
pretrained_model.trainable = False
print("‚úÖ Modelo MobileNetV2 cargado correctamente")





‚úÖ Modelo MobileNetV2 cargado correctamente
‚úÖ Modelo MobileNetV2 cargado correctamente


In [15]:
import tf_keras as keras

inputs = pretrained_model.input

x = keras.layers.Dense(128, activation='relu')(pretrained_model.output)
x = keras.layers.Dense(128, activation='relu')(x)

# Cambiar de 36 a 15 clases (solo frutas)
outputs = keras.layers.Dense(15, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(f"üöÄ Iniciando entrenamiento con 15 frutas, 10 √©pocas...")
history = model.fit(
    train_images,
    validation_data=val_images,
    batch_size = 32,
    epochs=10,  # Aumentado de 5 a 10 para mejor precisi√≥n
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,  # Aumentada la paciencia
            restore_best_weights=True
        )
    ]
)
print("‚úÖ Entrenamiento completado")



üöÄ Iniciando entrenamiento con 15 frutas, 10 √©pocas...
üöÄ Iniciando entrenamiento con 15 frutas, 10 √©pocas...
Epoch 1/10
Epoch 1/10




Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
‚úÖ Entrenamiento completado
‚úÖ Entrenamiento completado


In [None]:
# Guardar el nuevo modelo solo con frutas
model.save('FV_Fruits_Only.h5')
print("‚úÖ Modelo guardado como 'FV_Fruits_Only.h5'")
print(f"üìä Modelo entrenado con {len(FRUITS_ONLY)} frutas")


  saving_api.save_model(


‚úÖ Modelo guardado como 'FV_Fruits_Only.h5'
üìä Modelo entrenado con 15 frutas


: 