In [None]:
import os, zipfile
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import preprocess_input
import json
import tensorflow as tf
import shutil

In [None]:
# donwload dataset dari kaggle
!kaggle datasets download -d keycia/fruit-nutrionix
!kaggle datasets download -d keycia/vegetables-nutrionix

In [2]:
# ekstrak file zip
with zipfile.ZipFile('./fruit-nutrionix.zip') as zipref:
  zipref.extractall('./')
with zipfile.ZipFile('./vegetables-nutrionix.zip') as zipref:
  zipref.extractall('./dataset')

data_dir = "./data"
fruit = "./fruit_dataset"
vegetable = "./dataset"

In [3]:
def split_dataset(original_dir, base_dir):
    class_names = os.listdir(original_dir)

    for class_name in class_names:
        # Membuat subfolder untuk setiap kelas di dataset_split
        os.makedirs(os.path.join(base_dir, 'train', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'val', class_name), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'test', class_name), exist_ok=True)

        # Memecah dataset menjadi train (60%), val (20%), test (20%) per kelas
        files = os.listdir(os.path.join(original_dir, class_name))
        train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)
        train_files, val_files = train_test_split(train_files, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

        # Memindahkan gambar ke dalam folder yang sesuai di dataset_split
        for file in train_files:
            os.rename(os.path.join(original_dir, class_name, file),
                        os.path.join(base_dir, 'train', class_name, file))
        for file in val_files:
            os.rename(os.path.join(original_dir, class_name, file),
                        os.path.join(base_dir, 'val', class_name, file))
        for file in test_files:
            os.rename(os.path.join(original_dir, class_name, file),
                        os.path.join(base_dir, 'test', class_name, file))

In [5]:
# Memecah data menjadi train, test, validation
split_dataset(fruit, data_dir)
split_dataset(vegetable, data_dir)

In [6]:
train_data_dir = "./data/train"
validation_data_dir = "./data/val"
test_data_dir = "./data/test"

In [None]:
# augmentasi gambar
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(300, 300),
        batch_size=128,
        class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(300, 300),
        batch_size=32,
        class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(300, 300),
        batch_size=32,
        class_mode='categorical')

In [None]:
# inisialisasi banyak class
num_class = len(train_generator.class_indices.keys())
print("num_class: ", num_class)
train_generator.class_indices.keys()

In [9]:
# menyimpan label dalam bentuk json
with open('metadata.json', 'w') as fp:
    json.dump(list(train_generator.class_indices.keys()), fp)

In [None]:
base_model_xception = tf.keras.applications.Xception(weights="imagenet", include_top=False, input_shape=(300, 300, 3))

for layer in base_model_xception.layers:
    layer.trainable = False

sequential_model = tf.keras.models.Sequential([
    base_model_xception,
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024, activation="relu"),
    tf.keras.layers.Dense(num_class, activation="softmax")
])

sequential_model.summary()

In [11]:
sequential_model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
history = sequential_model.fit(
      train_generator,
      epochs=10,
      verbose=1,
      validation_data = validation_generator,
)

In [None]:
sequential_model.evaluate(test_generator)

In [None]:
sequential_model.save("./fruit_and_vegetable_clasification_colab.h5")

In [None]:
# menyimpan hasil model ke drive
shutil.copy2("/content/fruit_and_vegetable_clasification_colab.h5", "/content/drive/MyDrive/fruit_and_vegetable_clasification_colab.h5")