**Import Library**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization,GlobalAveragePooling2D
from tensorflow.keras.initializers import GlorotNormal
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow import keras
from tensorflow.keras.optimizers import Adam


from keras.models import load_model
from keras.preprocessing.image import load_img, img_to_array
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.applications.resnet import ResNet101
from tensorflow.keras.applications import ResNet50
from keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import EfficientNetV2L

from PIL import Image
import shutil

import random
import os
import warnings

# **1. LOADING DATASET FROM GDRIVE**

**Mounting Google Drive in Google Colab**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: mount failed

# **2. INSPECT DATASET**

**Listing and Sorting Folders in a Dataset Directory**

In [None]:
path = '/content/drive/MyDrive/DATASET'
folders = sorted(os.listdir(path))
print(folders)

catagories = folders

**Collecting Disease Count Data for Plants**

In [None]:
subfolders = []
for folder in folders:
    subfolders.append(os.listdir(os.path.join(path, folder)))

data = []
for i in range(len(folders)):
    for j in range(len(subfolders[i])):
        count = len(os.listdir(os.path.join(path, folders[i], subfolders[i][j])))
        data.append({'Plant': folders[i], 'Disease': subfolders[i][j], 'Count': count})


df = pd.DataFrame(data)
# Menampilkan semua baris
pd.set_option('display.max_rows', None)

print(df)

It can be seen that the disease data distribution is uneven, with some having too few and others having too many.

**Visualizing Disease Case Counts for Each Plant Using Bar Charts**

In [None]:
df = pd.DataFrame(data)

# Membuat grafik batang untuk setiap tanaman
plants = df['Plant'].unique()

for plant in plants:
    # Filter data untuk tanaman tertentu
    plant_data = df[df['Plant'] == plant]

    # Membuat grafik batang
    plt.figure(figsize=(10, 6))
    plt.bar(plant_data['Disease'], plant_data['Count'], color='green')
    plt.title(f'Jumlah Kasus Penyakit untuk {plant}')
    plt.xlabel('Penyakit')
    plt.ylabel('Jumlah Kasus')
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

In [None]:
fig = px.treemap(df, path=['Plant', 'Disease'], values='Count', title='Plant and Disease Distribution')
fig.show()

**Calculating Disease Counts and Percentages with DataFrame Aggregation**

In [None]:
# Calculate the total count for each disease
disease_counts = df.groupby('Disease')['Count'].sum()

# Calculate the percentage of each disease
disease_percentages = (disease_counts / disease_counts.sum()) * 100

# Create a DataFrame with disease counts and percentages
disease_balance = pd.DataFrame({'Count': disease_counts, 'Percentage': disease_percentages})

# Sort the DataFrame by count in descending order
disease_balance = disease_balance.sort_values('Count', ascending=False)

disease_balance

**Descriptive Statistics of the Dataset**

In [None]:
df.describe()

#**3. DATA CLEANING**

**Clean and Organize Images by Limiting and Copying by Category**

In [None]:
def reduce_and_copy_images(source_dir, target_dir, max_images):
    """
    Fungsi untuk membatasi jumlah gambar di setiap subfolder dan menyalin hasilnya ke folder baru.
    Jika jumlah gambar lebih dari max_images, gambar akan dipilih secara acak.
    """
    # Membuat folder target jika belum ada
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # Loop melalui setiap folder tanaman di source_dir
    for plant in os.listdir(source_dir):
        plant_path = os.path.join(source_dir, plant)

        # Pastikan hanya memproses folder
        if os.path.isdir(plant_path):
            # Loop melalui setiap subfolder penyakit di dalam folder tanaman
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)

                if os.path.isdir(disease_path):
                    files = os.listdir(disease_path)

                    # Membuat folder tujuan untuk penyakit di dalam folder tanaman
                    target_plant_path = os.path.join(target_dir, plant)
                    target_disease_path = os.path.join(target_plant_path, disease)
                    os.makedirs(target_disease_path, exist_ok=True)

                    # Jika jumlah file lebih dari max_images, kurangi jumlahnya
                    if len(files) > max_images:
                        print(f"Reducing '{plant}/{disease}' from {len(files)} to {max_images} images.")
                        selected_files = random.sample(files, max_images)
                    else:
                        selected_files = files

                    # Salin file yang terpilih ke folder tujuan
                    for file in selected_files:
                        source_file = os.path.join(disease_path, file)
                        target_file = os.path.join(target_disease_path, file)
                        shutil.copy(source_file, target_file)
                    print(f"Copied {len(selected_files)} images to '{target_disease_path}'")
                else:
                    print(f"'{disease_path}' is not a folder.")

# Contoh penggunaan
source_directory = '/content/drive/MyDrive/DATASET'
target_directory = '/content/PlantDisease_Dataset'

max_images = 160

reduce_and_copy_images(source_directory, target_directory, max_images)

**Visualization of Plant and Disease Distribution Using Treemap**

In [None]:
reduced_path = '/content/PlantDisease_Dataset'

# Mendapatkan daftar folder tanaman
folders = sorted(os.listdir(reduced_path))

# Membuat list subfolder (penyakit) dan menghitung jumlah gambar
subfolders = []
for folder in folders:
    subfolders.append(os.listdir(os.path.join(reduced_path, folder)))

# Membuat DataFrame dari data yang dikumpulkan
data = []
for i in range(len(folders)):
    for j in range(len(subfolders[i])):
        count = len(os.listdir(os.path.join(reduced_path, folders[i], subfolders[i][j])))
        data.append({'Plant': folders[i], 'Disease': subfolders[i][j], 'Count': count})

# Membuat DataFrame
dt = pd.DataFrame(data)

fig = px.treemap(dt, path=['Plant', 'Disease'], values='Count', title='Plant and Disease Distribution')
fig.show()

**Descriptive Statistics of Disease Data with DataFrame Describe Function**

In [None]:
dt.describe()

**Renaming Image Files in a Plant Disease Dataset with a Consistent Format**

In [None]:
#rename image file with format Plant_Disease_ImageNumber.jpg and make all same format file
def rename_images(dataset_path):
    # Loop through each plant folder
    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)
        # Ensure we only process folders
        if os.path.isdir(plant_path):
            # Loop through each disease subfolder in the plant folder
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    # Get a list of image files in the subfolder
                    image_files = [f for f in os.listdir(disease_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
                    # Rename the image files
                    for i, image_file in enumerate(image_files):
                        old_path = os.path.join(disease_path, image_file)
                        new_name = f"{plant}_{disease}_{i+1}.jpg"
                        new_path = os.path.join(disease_path, new_name)
                        os.rename(old_path, new_path)
                        print(f"Renamed '{image_file}' to '{new_name}'")

# Define the dataset path
dataset_path = '/content/PlantDisease_Dataset'

# Rename the images
rename_images(dataset_path)

**Renaming Subfolders in Plant Disease Dataset to a Consistent Format**

In [None]:
#rename subfolder with format Plant_Disease
def rename_folders(dataset_path):
    # Loop through each plant folder
    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)
        # Ensure we only process folders
        if os.path.isdir(plant_path):
            # Loop through each disease subfolder in the plant folder
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    new_name = f"{plant}_{disease}"
                    new_path = os.path.join(plant_path, new_name)
                    os.rename(disease_path, new_path)
                    print(f"Renamed '{disease}' to '{new_name}'")

# Define the dataset path
dataset_path = '/content/PlantDisease_Dataset'

# Rename the folders
rename_folders(dataset_path)

**Splitting Plant Disease Dataset into Training and Testing Sets with a Specified Ratio**

In [None]:
import os
import shutil
import random

def split_dataset(dataset_path, training_path, testing_path, split_ratio=0.8):
    """Splits the dataset into training and testing sets."""

    os.makedirs(training_path, exist_ok=True)
    os.makedirs(testing_path, exist_ok=True)

    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)
        if os.path.isdir(plant_path):
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    image_files = [f for f in os.listdir(disease_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
                    random.shuffle(image_files)  # Shuffle the image files before splitting
                    split_index = int(len(image_files) * split_ratio)
                    training_files = image_files[:split_index]
                    testing_files = image_files[split_index:]

                    os.makedirs(os.path.join(training_path, plant, disease), exist_ok=True)
                    os.makedirs(os.path.join(testing_path, plant, disease), exist_ok=True)

                    for file in training_files:
                        source_file = os.path.join(disease_path, file)
                        target_file = os.path.join(training_path, plant, disease, file)
                        try:
                            shutil.copy2(source_file, target_file) #removed exist_ok
                        except shutil.Error as e: #handle errors for existing file. This avoids overwrite errors.
                            print(f"Error copying {file}: {e}")

                    for file in testing_files:
                        source_file = os.path.join(disease_path, file)
                        target_file = os.path.join(testing_path, plant, disease, file)
                        try:
                            shutil.copy2(source_file, target_file) #removed exist_ok
                        except shutil.Error as e: #handle errors for existing file
                            print(f"Error copying {file}: {e}")

                    print(f"Split '{plant}/{disease}' into {len(training_files)} training files and {len(testing_files)} testing images")



# Define the dataset paths - CHANGE THESE TO YOUR ACTUAL PATHS!!
dataset_path = '/content/PlantDisease_Dataset' #CHANGE PATHS!
training_path = '/content/Plant_Disease_Dataset/train' #CHANGE PATHS!
testing_path = '/content/Plant_Disease_Dataset/test'   #CHANGE PATHS!


split_ratio = 0.8
split_dataset(dataset_path, training_path, testing_path, split_ratio=split_ratio)

**Visualizing Plant and Disease Distribution Using a Treemap**

In [None]:
fixed_path = '/content/Plant_Disease_Dataset/train'
# Mendapatkan daftar folder tanaman
folders = sorted(os.listdir(fixed_path))

# Membuat list subfolder (penyakit) dan menghitung jumlah gambar
subfolders = []
for folder in folders:
    subfolders.append(os.listdir(os.path.join(fixed_path, folder)))

# Membuat DataFrame dari data yang dikumpulkan
data = []
for i in range(len(folders)):
    for j in range(len(subfolders[i])):
        count = len(os.listdir(os.path.join(fixed_path, folders[i], subfolders[i][j])))
        data.append({'Plant': folders[i], 'Disease': subfolders[i][j], 'Count': count})

# Membuat DataFrame
dt = pd.DataFrame(data)

fig = px.treemap(dt, path=['Plant', 'Disease'], values='Count', title='Plant and Disease Distribution')
fig.show()

**Listing and Counting Images in Plant and Disease Subfolders**

In [None]:
subfolders = []
for folder in folders:
    subfolders.append(os.listdir(os.path.join(fixed_path, folder)))

data = []
for i in range(len(folders)):
    for j in range(len(subfolders[i])):
        count = len(os.listdir(os.path.join(fixed_path, folders[i], subfolders[i][j])))
        data.append({'Plant': folders[i], 'Disease': subfolders[i][j], 'Count': count})


df = pd.DataFrame(data)
# Menampilkan semua baris
pd.set_option('display.max_rows', None)

print(df)

**Resizing Images to 224x224 Pixels and Converting to RGB Format**

In [None]:
#resize image to 224x224 pixel and convert to RGB format
def resize_images(dataset_path, target_size=(224, 224)):
    """Resizes images in the dataset to the specified target size."""
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
                file_path = os.path.join(root, file)
                try:
                    with Image.open(file_path) as img:
                        # save all image in jpg format
                        img = img.convert('RGB')
                        img = img.resize(target_size)
                        img.save(file_path, 'JPEG')
                        print(f"Resized '{file_path}' to {target_size}")
                except Exception as e:
                    print(f"Error processing '{file_path}': {e}")


# Define the dataset path
dataset_path = '/content/Plant_Disease_Dataset/train'  #CHANGE PATHS!

# Resize the images
resize_images(dataset_path)

**Describing the Dataset: Counting Plants, Diseases, and Images**

In [None]:
#describe the dataset
def describe_dataset(dataset_path):
    """Describes the dataset by counting the number of plants, diseases, and images."""
    plants = os.listdir(dataset_path)
    num_plants = len(plants)
    num_diseases = 0
    num_images = 0
    for plant in plants:
        plant_path = os.path.join(dataset_path, plant)
        if os.path.isdir(plant_path):
            diseases = os.listdir(plant_path)
            num_diseases += len(diseases)
            for disease in diseases:
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    images = [f for f in os.listdir(disease_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
                    num_images += len(images)
    print(f"Dataset contains {num_plants} plants, {num_diseases} diseases, and {num_images} images.")

# Define the dataset path
dataset_path = '/content/Plant_Disease_Dataset/train' #CHANGE PATHS!

# Describe the dataset
describe_dataset(dataset_path)

**Importing Libraries for Image Processing and Deep Learning with TensorFlow**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as implt
import seaborn as sns
sns.set_style("whitegrid")

**Visualizing the Number of Training Images per Category**

In [None]:
category_names = os.listdir(training_path) # output: ['humans', 'horses']
nb_categories = len(category_names) # output: 2
train_images = []

for category in category_names:
    folder = training_path + "/" + category
    train_images.append(len(os.listdir(folder)))

sns.barplot( x=train_images, y=category_names).set_title("Number Of Training Images Per Category");

**Visualizing the Number of Testing Images per Category**

In [None]:
val_images = []
for category in category_names:
    folder = testing_path + "/" + category
    val_images.append(len(os.listdir(folder)))

sns.barplot(x=train_images, y=category_names).set_title("Number Of Testing Images Per Category");

**Loading and Displaying Training Images with ImageDataGenerator**

In [None]:
# Define the paths to the training images
training_path = '/content/Plant_Disease_Dataset/train'  #CHANGE PATHS!

# Read the images from the training path
train_data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2) # Split the data into training and validation sets
train_generator = train_data_gen.flow_from_directory(training_path, target_size=(224, 224), class_mode='categorical', subset='training')

# Plot the images
plt.figure(figsize=(12, 12))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    for X_batch, Y_batch in train_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()


#**4. AUGMENTATION**

**Augmenting Images in Plant Disease Dataset with Data Augmentation Techniques**

In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img
import matplotlib.pyplot as plt

# Tentukan path ke dataset
dataset_dir = './Plant_Disease_Dataset/train'
output_dir = './Plant_Disease_Dataset/augmented'

# Buat folder output jika belum ada
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Membuat ImageDataGenerator dengan augmentasi
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Loop melalui setiap folder dalam dataset
for plant in os.listdir(dataset_dir):
    plant_path = os.path.join(dataset_dir, plant)
    if os.path.isdir(plant_path):
        for disease in os.listdir(plant_path):
            disease_path = os.path.join(plant_path, disease)
            output_path = os.path.join(output_dir, plant, disease)

            # Buat folder output untuk penyakit jika belum ada
            if not os.path.exists(output_path):
                os.makedirs(output_path)

            # Loop melalui semua file gambar di folder penyakit
            for img_file in os.listdir(disease_path):
                img_path = os.path.join(disease_path, img_file)
                try:
                    # Baca gambar
                    img = load_img(img_path)
                    x = img_to_array(img)
                    x = x.reshape((1,) + x.shape)

                    # Buat dan simpan gambar augmentasi
                    i = 0
                    for batch in datagen.flow(x, batch_size=1, save_to_dir=output_path, save_prefix='aug', save_format='jpeg'):
                        i += 1
                        if i > 5:  # Buat 5 gambar augmentasi per gambar asli
                            break
                except Exception as e:
                    print(f"Error processing {img_file}: {e}")

print("Proses augmentasi selesai!")


**Renaming Image Files in Plant Disease Dataset to Standard Format**

In [None]:
#rename image file with format Plant_Disease_ImageNumber.jpg and make all same format file
def rename_images(dataset_path):
    # Loop through each plant folder
    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)
        # Ensure we only process folders
        if os.path.isdir(plant_path):
            # Loop through each disease subfolder in the plant folder
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    # Get a list of image files in the subfolder
                    image_files = [f for f in os.listdir(disease_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
                    # Rename the image files
                    for i, image_file in enumerate(image_files):
                        old_path = os.path.join(disease_path, image_file)
                        new_name = f"{disease}_{i+1}.jpg"
                        new_path = os.path.join(disease_path, new_name)
                        os.rename(old_path, new_path)
                        print(f"Renamed '{image_file}' to '{new_name}'")

# Define the dataset path
dataset_path = '/content/Plant_Disease_Dataset/augmented'

# Rename the images
rename_images(dataset_path)

**Counting Augmented Images in Plant Disease Dataset**

In [None]:
augmented_path = '/content/Plant_Disease_Dataset/augmented'
subfolders = []
for folder in folders:
    subfolders.append(os.listdir(os.path.join(augmented_path, folder)))

data = []
for i in range(len(folders)):
    for j in range(len(subfolders[i])):
        count = len(os.listdir(os.path.join(augmented_path, folders[i], subfolders[i][j])))
        data.append({'Plant': folders[i], 'Disease': subfolders[i][j], 'Count': count})


df = pd.DataFrame(data)
# Menampilkan semua baris
pd.set_option('display.max_rows', None)

print(df)

**Display Sample Image for Each Disease in Plants**

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

def display_one_image_per_disease(dataset_path):
    """
    Menampilkan 1 sampel gambar dari setiap penyakit di semua tanaman dalam satu grid.
    """
    # Membuat list semua gambar beserta informasi
    images_info = []

    # Loop melalui setiap folder tanaman
    for plant in sorted(os.listdir(dataset_path)):
        plant_path = os.path.join(dataset_path, plant)
        if os.path.isdir(plant_path):
            # Loop melalui setiap subfolder penyakit
            for disease in sorted(os.listdir(plant_path)):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    files = os.listdir(disease_path)

                    # Pilih hanya 1 gambar dari kategori penyakit jika tersedia
                    if files:
                        sample_file = files[0]
                        sample_file_path = os.path.join(disease_path, sample_file)
                        images_info.append((plant, disease, sample_file_path))

    # Menentukan ukuran grid (4 gambar per baris)
    num_images = len(images_info)
    num_columns = 4
    num_rows = (num_images + num_columns - 1) // num_columns  # Hitung jumlah baris
    fig, axes = plt.subplots(num_rows, num_columns, figsize=(16, 5 * num_rows))
    fig.suptitle('Sampel Gambar dari Setiap Penyakit', fontsize=18, y=1.02)

    # Jika hanya satu baris, ubah axes menjadi list
    if num_rows == 1:
        axes = [axes]

    # Flatten axes array jika lebih dari 1 baris
    axes = axes.flatten() if num_rows > 1 else axes

    # Menampilkan gambar
    for i, (plant, disease, file_path) in enumerate(images_info):
        try:
            # Buka gambar dan dapatkan ukurannya
            with Image.open(file_path) as img:
                width, height = img.size
                axes[i].imshow(img)
                axes[i].axis('off')
                axes[i].set_title(f"{plant} - {disease}\n{width}x{height}", fontsize=12, pad=15)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

    # Sembunyikan sumbu kosong jika jumlah gambar kurang dari grid
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')

    # Tambahkan padding agar judul terlihat lebih jelas
    plt.tight_layout(rect=[0, 0, 1, 0.95], pad=3.0)
    plt.show()

# Jalankan fungsi
reduced_path = '/content/Plant_Disease_Dataset/train'
display_one_image_per_disease(reduced_path)


**Display One Sample Image per Disease for Each Plant in a Grid**

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

def display_one_image_per_disease(dataset_path):
    """
    Menampilkan 1 sampel gambar dari setiap penyakit di semua tanaman dalam satu grid.
    """
    # Membuat list semua gambar beserta informasi
    images_info = []

    # Loop melalui setiap folder tanaman
    for plant in sorted(os.listdir(dataset_path)):
        plant_path = os.path.join(dataset_path, plant)
        if os.path.isdir(plant_path):
            # Loop melalui setiap subfolder penyakit
            for disease in sorted(os.listdir(plant_path)):
                disease_path = os.path.join(plant_path, disease)
                if os.path.isdir(disease_path):
                    files = os.listdir(disease_path)

                    # Pilih hanya 1 gambar dari kategori penyakit jika tersedia
                    if files:
                        sample_file = files[0]
                        sample_file_path = os.path.join(disease_path, sample_file)
                        images_info.append((plant, disease, sample_file_path))

    # Menentukan ukuran grid (4 gambar per baris)
    num_images = len(images_info)
    num_columns = 4
    num_rows = (num_images + num_columns - 1) // num_columns  # Hitung jumlah baris
    fig, axes = plt.subplots(num_rows, num_columns, figsize=(16, 5 * num_rows))
    fig.suptitle('Sampel Gambar dari Setiap Penyakit', fontsize=18, y=1.02)

    # Jika hanya satu baris, ubah axes menjadi list
    if num_rows == 1:
        axes = [axes]

    # Flatten axes array jika lebih dari 1 baris
    axes = axes.flatten() if num_rows > 1 else axes

    # Menampilkan gambar
    for i, (plant, disease, file_path) in enumerate(images_info):
        try:
            # Buka gambar dan dapatkan ukurannya
            with Image.open(file_path) as img:
                width, height = img.size
                axes[i].imshow(img)
                axes[i].axis('off')
                axes[i].set_title(f"{plant} - {disease}\n{width}x{height}", fontsize=12, pad=15)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

    # Sembunyikan sumbu kosong jika jumlah gambar kurang dari grid
    for j in range(i + 1, len(axes)):
        axes[j].axis('off')

    # Tambahkan padding agar judul terlihat lebih jelas
    plt.tight_layout(rect=[0, 0, 1, 0.95], pad=3.0)
    plt.show()

# Jalankan fungsi
reduced_path = '/content/Plant_Disease_Dataset/augmented'
display_one_image_per_disease(reduced_path)


In [None]:
import os

# Path utama dataset
DATASET_PATH = '/content/Plant_Disease_Dataset/augmented'  # Ganti dengan path dataset Anda

def rename_subfolders(dataset_path):
    """
    Mengubah nama sub-folder di dalam dataset menjadi format Plant_Disease secara otomatis.
    """
    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)

        # Pastikan hanya memproses folder
        if os.path.isdir(plant_path):
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)

                # Pastikan subfolder adalah direktori
                if os.path.isdir(disease_path):
                    # Format baru: Plant_Disease
                    new_folder_name = f"{plant}_{disease}"
                    new_folder_path = os.path.join(dataset_path, new_folder_name)

                    # Rename subfolder
                    os.rename(disease_path, new_folder_path)
                    print(f"Renamed: {disease_path} -> {new_folder_path}")

            # Hapus folder tanaman jika sudah kosong
            try:
                os.rmdir(plant_path)
                print(f"Removed empty folder: {plant_path}")
            except OSError as e:
                print(f"Could not remove folder: {plant_path}, {e}")

# Jalankan fungsi
rename_subfolders(DATASET_PATH)


In [None]:
import os

# Path utama dataset
DATASET_PATH = '/content/Plant_Disease_Dataset/test'  # Ganti dengan path dataset Anda

def rename_subfolders(dataset_path):
    """
    Mengubah nama sub-folder di dalam dataset menjadi format Plant_Disease secara otomatis.
    """
    for plant in os.listdir(dataset_path):
        plant_path = os.path.join(dataset_path, plant)

        # Pastikan hanya memproses folder
        if os.path.isdir(plant_path):
            for disease in os.listdir(plant_path):
                disease_path = os.path.join(plant_path, disease)

                # Pastikan subfolder adalah direktori
                if os.path.isdir(disease_path):
                    # Format baru: Plant_Disease
                    new_folder_name = f"{plant}_{disease}"
                    new_folder_path = os.path.join(dataset_path, new_folder_name)

                    # Rename subfolder
                    os.rename(disease_path, new_folder_path)
                    print(f"Renamed: {disease_path} -> {new_folder_path}")

            # Hapus folder tanaman jika sudah kosong
            try:
                os.rmdir(plant_path)
                print(f"Removed empty folder: {plant_path}")
            except OSError as e:
                print(f"Could not remove folder: {plant_path}, {e}")

# Jalankan fungsi
rename_subfolders(DATASET_PATH)



#**5. MODELING**

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, Callback
from tensorflow.keras.optimizers import Adam

**Define Dataset Paths and Configuration Parameters for Image Processing**

In [None]:
# --- Define Parameters ---
TRAIN_PATH = "/content/Plant_Disease_Dataset/augmented"
TEST_PATH = "/content/Plant_Disease_Dataset/test"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 170
LEARNING_RATE = 1e-3
TARGET_ACCURACY = 0.98  # 98% Target Accuracy
MODEL_SAVE_PATH = "Train_23_nov_3.keras"  # Ensure .keras extension

**CNN Model Training for Plant Disease Classification with Augmented Data**

In [None]:
# Automatically Extract Class Indices from Dataset Structure
def get_class_indices(base_path):
    subfolders = [os.path.join(root, d) for root, dirs, _ in os.walk(base_path) for d in dirs]
    labels = [os.path.relpath(folder, base_path).replace("\\", "/") for folder in subfolders]
    return {label: idx for idx, label in enumerate(sorted(labels))}

# Check Class Mapping
class_indices = get_class_indices(TRAIN_PATH)
print(f"Class Indices Mapping: {class_indices}")

In [None]:
# --- Define CNN Model ---
def create_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model

In [None]:
# --- Early Stopping by Accuracy Callback ---
class EarlyStoppingByAccuracy(Callback):
    def __init__(self, target_accuracy, model_save_path):
        super(EarlyStoppingByAccuracy, self).__init__()
        self.target_accuracy = target_accuracy
        self.model_save_path = model_save_path

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        accuracy = logs.get('accuracy')
        if accuracy is not None and accuracy >= self.target_accuracy:
            print(f"\nTarget accuracy {self.target_accuracy * 100:.2f}% reached at epoch {epoch + 1}.")
            print("Stopping training...")
            self.model.save(self.model_save_path)
            print(f"Model saved to {self.model_save_path}.")
            self.model.stop_training = True

In [None]:
# --- Training and Validation ---
def train_model(train_path, test_path, img_size, batch_size, epochs, learning_rate, target_accuracy, model_save_path):
    # Data Generators
    train_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)
    test_datagen = ImageDataGenerator(rescale=1.0 / 255)

    train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='training'
    )

    validation_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        subset='validation'
    )

    test_generator = test_datagen.flow_from_directory(
        test_path,
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    # Create Model
    input_shape = (img_size[0], img_size[1], 3)
    model = create_cnn_model(input_shape, train_generator.num_classes)

    # Compile Model
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Callbacks
    checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', save_best_only=True, verbose=1)
    early_stopping = EarlyStoppingByAccuracy(target_accuracy=target_accuracy, model_save_path=model_save_path)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)

    # Train Model
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks=[checkpoint, early_stopping, reduce_lr]
    )

    # Evaluate Model
    _, accuracy = model.evaluate(test_generator)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    return model, history, train_generator.class_indices

In [None]:
# --- Train and Save Model ---
model, history, class_indices = train_model(
    TRAIN_PATH, TEST_PATH, IMG_SIZE, BATCH_SIZE, EPOCHS, LEARNING_RATE, TARGET_ACCURACY, MODEL_SAVE_PATH
)

In [None]:
# --- Plot Training History ---
def plot_training_history(history):
    plt.figure(figsize=(12, 5))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_training_history(history)

In [None]:
import numpy as np
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.models import load_model

# Mapping class indices secara manual
class_indices = {
    'Banana_Banana_cordana': 0, 'Banana_Banana_healthy': 1, 'Banana_Banana_pestalotiopsis': 2, 'Banana_Banana_sigatoka': 3,
    'Bean_Bean_angular_leaf_spot': 4, 'Bean_Bean_bean_rust': 5, 'Bean_Bean_healthy': 6,
    'Cassava_Cassava_cassava_bacterial_blight_cbb': 7, 'Cassava_Cassava_cassava_brown_streak_disease_cbsd': 8,
    'Cassava_Cassava_cassava_green_mottle_cgm': 9, 'Cassava_Cassava_cassava_mosaic_disease_cmd': 10,
    'Cassava_Cassava_healthy': 11, 'Cauli Flower_Cauli Flower_bacterial_spot_rot': 12,
    'Cauli Flower_Cauli Flower_downy_mildew': 13, 'Cauli Flower_Cauli Flower_no_disease': 14,
    'Corn_Corn_blight': 15, 'Corn_Corn_common_rust': 16, 'Corn_Corn_gray_leaf_spot': 17, 'Corn_Corn_healthy': 18,
    'Cucumber_Cucumber_anthracnose': 19, 'Cucumber_Cucumber_bacterial_wilt': 20, 'Cucumber_Cucumber_belly_rot': 21,
    'Cucumber_Cucumber_downy_mildew': 22, 'Cucumber_Cucumber_fresh_cucumber': 23, 'Cucumber_Cucumber_fresh_leaf': 24,
    'Cucumber_Cucumber_gummy_stem_blight': 25, 'Cucumber_Cucumber_pythium_fruit_rot': 26,
    'Mango_Mango_anthracnose': 27, 'Mango_Mango_bacterial_canker': 28, 'Mango_Mango_cutting_weevil': 29,
    'Mango_Mango_die_back': 30, 'Mango_Mango_gall_midge': 31, 'Mango_Mango_healthy': 32, 'Mango_Mango_powdery_mildew': 33,
    'Mango_Mango_sooty_mould': 34, 'Potato_Potato_alternaria_solani': 35, 'Potato_Potato_healthy': 36,
    'Potato_Potato_insect': 37, 'Potato_Potato_phytopthora_infestans': 38, 'Potato_Potato_virus': 39,
    'Pumpkin_Pumpkin_Bacterial Leaf Spot': 40, 'Pumpkin_Pumpkin_Downy Mildew': 41,
    'Pumpkin_Pumpkin_Healthy Leaf': 42, 'Pumpkin_Pumpkin_Mosaic Disease': 43, 'Pumpkin_Pumpkin_Powdery_Mildew': 44
}

# Fungsi prediksi gambar
def predict_image(model_path, image_path, img_size, class_indices):
    # Load model
    model = load_model(model_path)

    # Load dan preproses gambar
    img = load_img(image_path, target_size=img_size)
    img_array = img_to_array(img) / 255.0  # Normalisasi
    img_array = np.expand_dims(img_array, axis=0)  # Tambahkan dimensi batch

    # Prediksi
    predictions = model.predict(img_array)
    predicted_class = np.argmax(predictions, axis=1)[0]

    # Mapping index ke label
    class_labels = {v: k for k, v in class_indices.items()}
    full_label = class_labels.get(predicted_class, "Unknown")

    # Pisahkan nama tanaman dan penyakit
    if '_' in full_label:
        nama_tanaman, penyakit_tanaman = full_label.split('_', 1)
    else:
        nama_tanaman, penyakit_tanaman = full_label, "Tidak diketahui"

    confidence_score = predictions[0][predicted_class] * 100

    return nama_tanaman, penyakit_tanaman, confidence_score

# Definisi parameter
model_path = 'Train_23_nov_3.keras'  # Ganti dengan path model Anda
image_path = '/content/PlantDisease_Dataset/Potato/Potato_alternaria_solani/Potato_alternaria_solani_1.jpg'  # Ganti dengan path gambar Anda
img_size = (224, 224)  # Ukuran input model

# Prediksi
nama_tanaman, penyakit_tanaman, confidence_score = predict_image(model_path, image_path, img_size, class_indices)

# Tampilkan hasil
print(f"Nama Tanaman: {nama_tanaman}")
print(f"Penyakit Tanaman: {penyakit_tanaman}")
print(f"Hasil Prediksi: {confidence_score:.2f}%")


In [None]:
from google.colab import files
files.download('Train_23_nov_3.keras')