In [12]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img,img_to_array

In [13]:
base_path = Path('/kaggle/input/fruit-and-vegetable-image-recognition')

train_dir = base_path / 'train'
train_filepaths = list(train_dir.glob('**/*.jpg'))

val_dir = base_path / 'validation'
val_filepaths = list(val_dir.glob('**/*.jpg'))

test_dir = base_path / 'test'
test_filepaths = list(test_dir.glob('**/*.jpg'))

In [14]:
def image_processing(filepath):
    labels = [str(filepath[i]).split("/")[-2] \
              for i in range(len(filepath))]

    filepath = pd.Series(filepath, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    df = pd.concat([filepath, labels], axis=1)
    df = df.sample(frac=1).reset_index(drop = True)
    
    return df

In [15]:
train_df = image_processing(train_filepaths)
test_df = image_processing(test_filepaths)
val_df = image_processing(val_filepaths)

print('-- Training set --\n')
print(f'Number of pictures: {train_df.shape[0]}\n')
print(f'Number of different labels: {len(train_df.Label.unique())}\n')
print(f'Labels: {train_df.Label.unique()}')

-- Training set --

Number of pictures: 2780

Number of different labels: 36

Labels: ['pear' 'kiwi' 'paprika' 'cabbage' 'cauliflower' 'tomato' 'bell pepper'
 'grapes' 'capsicum' 'chilli pepper' 'pineapple' 'spinach' 'cucumber'
 'beetroot' 'mango' 'soy beans' 'raddish' 'orange' 'banana' 'sweetcorn'
 'eggplant' 'lettuce' 'peas' 'lemon' 'watermelon' 'sweetpotato' 'corn'
 'onion' 'apple' 'jalepeno' 'potato' 'turnip' 'carrot' 'ginger'
 'pomegranate' 'garlic']


In [16]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

In [17]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

Found 2780 validated image filenames belonging to 36 classes.


In [18]:
val_images = train_generator.flow_from_dataframe(
    dataframe=val_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=0,
    rotation_range=30,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

Found 334 validated image filenames belonging to 36 classes.


In [19]:
test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False
)

Found 334 validated image filenames belonging to 36 classes.


In [20]:
pretrained_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg'
)
pretrained_model.trainable = False

In [21]:
inputs = pretrained_model.input

x = tf.keras.layers.Dense(128, activation='relu')(pretrained_model.output)
x = tf.keras.layers.Dense(128, activation='relu')(x)

outputs = tf.keras.layers.Dense(36, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_images,
    validation_data=val_images,
    batch_size = 32,
    epochs=10,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True
        )
    ]
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 894ms/step - accuracy: 0.3446 - loss: 2.5560 - val_accuracy: 0.8533 - val_loss: 0.5112
Epoch 2/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 804ms/step - accuracy: 0.8198 - loss: 0.5801 - val_accuracy: 0.9102 - val_loss: 0.3171
Epoch 3/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 793ms/step - accuracy: 0.8925 - loss: 0.3389 - val_accuracy: 0.9102 - val_loss: 0.2653
Epoch 4/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 793ms/step - accuracy: 0.9351 - loss: 0.2210 - val_accuracy: 0.9222 - val_loss: 0.2342
Epoch 5/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 809ms/step - accuracy: 0.9513 - loss: 0.1620 - val_accuracy: 0.9581 - val_loss: 0.1737
Epoch 6/10
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 782ms/step - accuracy: 0.9721 - loss: 0.1077 - val_accuracy: 0.9371 - val_loss: 0.1845
Epoch 7/10
[1m87/87[

In [22]:
from IPython.display import FileLink
model.save('/kaggle/working/model_buah_sayur.h5')
FileLink('model_buah_sayur.h5')