# Import Libary

In [None]:
import numpy as np
import gdown
import os
import shutil
import seaborn as sns
import zipfile
import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

# Download dataset

In [None]:
file_id = "1e45f1GmiKKtHkbZPg8WkyjemBVWgjfOE"

# URL berbagi dari Google Drive
url = 'https://drive.google.com/uc?id=' + file_id

# Path untuk menyimpan file yang diunduh
output = 'dataset.zip'

# Mengunduh file
gdown.download(url, output, quiet=False)

# Mengekstrak file zip jika diperlukan
import zipfile

with zipfile.ZipFile(output, 'r') as zip_ref:
    zip_ref.extractall('datasets/')

Downloading...
From (original): https://drive.google.com/uc?id=1e45f1GmiKKtHkbZPg8WkyjemBVWgjfOE
From (redirected): https://drive.google.com/uc?id=1e45f1GmiKKtHkbZPg8WkyjemBVWgjfOE&confirm=t&uuid=9e1cd2b9-90db-4017-bb12-e92912455ed7
To: /content/dataset.zip
100%|██████████| 653M/653M [00:35<00:00, 18.2MB/s]


# Preprocessing

## Load and Clean Dataset

In [None]:
def check_and_remove_invalid_images(root_dir):
    invalid_files = []
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            try:
                with Image.open(file_path) as img:
                    img.verify()  # Attempt to open and verify the image
            except Exception as e:
                print(f"Error processing image: {file_path} - {e}")
                invalid_files.append(file_path)
    # Remove invalid files
    for invalid_file in invalid_files:
        os.remove(invalid_file)
        print(f"Invalid image removed: {invalid_file}")

In [None]:
def get_file_paths_and_labels(root_dir):
    # Check and remove invalid images before collecting file paths and labels
    check_and_remove_invalid_images(root_dir)

    file_paths = []
    labels = []
    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            label = os.path.basename(subdir)
            file_paths.append(file_path)
            labels.append(label)
    return file_paths, labels

In [None]:
# Direktori asal, train, dan validation
root_dir = '/content/datasets/'
train_dir = '/content/final-datasets/train'
val_dir = '/content/final-datasets/valid'

# Membuat direktori train dan validation jika belum ada
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Mendapatkan semua jalur file dan labelnya
file_paths, labels = get_file_paths_and_labels(root_dir)

Error processing image: /content/datasets/daun kemangi/daun-kemangi_154.jpg - cannot identify image file '/content/datasets/daun kemangi/daun-kemangi_154.jpg'
Error processing image: /content/datasets/daun kemangi/daun-kemangi_160.jpg - cannot identify image file '/content/datasets/daun kemangi/daun-kemangi_160.jpg'
Error processing image: /content/datasets/daun kemangi/daun-kemangi_180.jpg - cannot identify image file '/content/datasets/daun kemangi/daun-kemangi_180.jpg'
Error processing image: /content/datasets/bawang putih/bawang-putih_152.jpg - cannot identify image file '/content/datasets/bawang putih/bawang-putih_152.jpg'
Invalid image removed: /content/datasets/daun kemangi/daun-kemangi_154.jpg
Invalid image removed: /content/datasets/daun kemangi/daun-kemangi_160.jpg
Invalid image removed: /content/datasets/daun kemangi/daun-kemangi_180.jpg
Invalid image removed: /content/datasets/bawang putih/bawang-putih_152.jpg


## Split Dataset

In [None]:
def split_and_move_files(file_paths, labels, train_dir, val_dir, test_size=0.2):
    X_train, X_val, y_train, y_val = train_test_split(file_paths, labels, test_size=test_size, stratify=labels, random_state=234)

    for file_path, label in zip(X_train, y_train):
        label_dir = os.path.join(train_dir, label)
        os.makedirs(label_dir, exist_ok=True)
        shutil.copy(file_path, label_dir)

    for file_path, label in zip(X_val, y_val):
        label_dir = os.path.join(val_dir, label)
        os.makedirs(label_dir, exist_ok=True)
        shutil.copy(file_path, label_dir)

In [None]:
# Membagi dan memindahkan file ke direktori train dan validation
split_and_move_files(
    file_paths,
    labels,
    train_dir,
    val_dir,
    test_size=0.2
    )

## Data Generator and Augmentation

In [None]:
# ImageDataGenerator untuk training dan validation
train_datagen = ImageDataGenerator(
      rescale = 1./255,
	    rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)

# Membuat generator untuk training dan validation
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(256, 256),
    batch_size=32,
    class_mode='categorical'
)

Found 5049 images belonging to 31 classes.
Found 1263 images belonging to 31 classes.


# Modeling

In [None]:
## Loading VGG16 model
vgg16_model = VGG16(weights="imagenet", include_top=False, input_shape=(256, 256, 3))
vgg16_model.trainable= False
vgg16_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 256, 256, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 256, 256, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 128, 128, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 128, 128, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 128, 128, 128)    

In [None]:
model = Sequential([
    vgg16_model,
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dense(64, activation='relu'),
    Dense(31, activation='softmax')
])

optimizer = 'rmsprop'
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,
    batch_size = 32,
    verbose=2,
)

Epoch 1/20




158/158 - 125s - loss: 3.6964 - accuracy: 0.0396 - val_loss: 3.4334 - val_accuracy: 0.0443 - 125s/epoch - 792ms/step
Epoch 2/20
158/158 - 113s - loss: 3.3543 - accuracy: 0.0709 - val_loss: 2.8618 - val_accuracy: 0.1726 - 113s/epoch - 718ms/step
Epoch 3/20
158/158 - 114s - loss: 2.5295 - accuracy: 0.2490 - val_loss: 1.9399 - val_accuracy: 0.4133 - 114s/epoch - 724ms/step
Epoch 4/20
158/158 - 115s - loss: 2.0340 - accuracy: 0.3795 - val_loss: 1.5713 - val_accuracy: 0.5146 - 115s/epoch - 729ms/step
Epoch 5/20
158/158 - 115s - loss: 1.8046 - accuracy: 0.4413 - val_loss: 1.3735 - val_accuracy: 0.5891 - 115s/epoch - 727ms/step
Epoch 6/20
158/158 - 115s - loss: 1.6490 - accuracy: 0.4862 - val_loss: 1.2086 - val_accuracy: 0.6334 - 115s/epoch - 729ms/step
Epoch 7/20
158/158 - 112s - loss: 1.5362 - accuracy: 0.5270 - val_loss: 1.0792 - val_accuracy: 0.6785 - 112s/epoch - 706ms/step
Epoch 8/20
158/158 - 114s - loss: 1.5095 - accuracy: 0.5302 - val_loss: 1.0730 - val_accuracy: 0.6619 - 114s/epoch 

<keras.src.callbacks.History at 0x7e020e73a260>