# Project: Portfolio - Final Project

In [11]:
import os
import math
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from PIL import ImageFile
import random
import shutil
from pathlib import Path
from PIL import Image
import re

# Tentukan direktori sumber dan tujuan
data_dir = Path('/content/corn-or-maize-leaf-disease-dataset')
train_dir = Path('/content/train')
val_dir = Path('/content/val')
test_dir = Path('/content/test')

# Membuat direktori tujuan jika belum ada
for dir_path in [train_dir, val_dir, test_dir]:
    os.makedirs(dir_path, exist_ok=True)

def split_data(data_dir, train_dir, val_dir, test_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    for label_dir in data_dir.iterdir():
        if label_dir.is_dir():
            # Memuat daftar file di direktori label
            files = list(label_dir.glob('*'))
            random.shuffle(files)

            # Menentukan jumlah file untuk setiap set
            train_files = files[:int(len(files) * train_ratio)]
            val_files = files[int(len(files) * train_ratio):int(len(files) * (train_ratio + val_ratio))]
            test_files = files[int(len(files) * (train_ratio + val_ratio)):]

            # Mengganti spasi dengan garis bawah dalam nama label
            label_name = label_dir.name.replace(" ", "_")

            # Membuat direktori untuk setiap label di dalam direktori tujuan
            for dir_path in [train_dir, val_dir, test_dir]:
                os.makedirs(dir_path / label_name, exist_ok=True)

            # Menyalin file ke set pelatihan
            for file in train_files:
                try:
                    img = Image.open(file)
                    img.verify()  # Memastikan file gambar tidak rusak
                    new_file_name = re.sub(r'[^a-zA-Z0-9_\.]', '_', file.name)
                    shutil.copy(file, train_dir / label_name / new_file_name)
                except (IOError, SyntaxError) as e:
                    print(f"Error copying file {file}: {e}")

            # Menyalin file ke set validasi
            for file in val_files:
                try:
                    img = Image.open(file)
                    img.verify()  # Memastikan file gambar tidak rusak
                    new_file_name = re.sub(r'[^a-zA-Z0-9_\.]', '_', file.name)
                    shutil.copy(file, val_dir / label_name / new_file_name)
                except (IOError, SyntaxError) as e:
                    print(f"Error copying file {file}: {e}")

            # Menyalin file ke set pengujian
            for file in test_files:
                try:
                    img = Image.open(file)
                    img.verify()  # Memastikan file gambar tidak rusak
                    new_file_name = re.sub(r'[^a-zA-Z0-9_\.]', '_', file.name)
                    shutil.copy(file, test_dir / label_name / new_file_name)
                except (IOError, SyntaxError) as e:
                    print(f"Error copying file {file}: {e}")

# Memanggil fungsi untuk membagi data
split_data(data_dir, train_dir, val_dir, test_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1)

# Menentukan dimensi gambar dan batch size
target_size = (150, 150)
batch_size = 32

# Mengecek apakah folder validation memiliki gambar
validation_images = sum(len(files) for _, _, files in os.walk(val_dir))

if validation_images == 0:
    print("Folder validation tidak memiliki gambar.")
    exit()

# Augmentasi dan normalisasi data gambar
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Augmentasi data gambar untuk validasi
test_datagen = ImageDataGenerator(rescale=1./255)

# Membuat generator gambar untuk data latih
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical'
)

# Membuat generator gambar untuk data validasi
validation_generator = test_datagen.flow_from_directory(
    val_dir,
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  # Tidak perlu untuk di-shuffle
)

# Perbarui jumlah langkah validasi
validation_steps = math.ceil(validation_generator.samples / validation_generator.batch_size)

# Membuat model CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(5, activation='softmax')  # 5 kelas
])

# Kompilasi model
model.compile(
    loss='categorical_crossentropy',
    optimizer='rmsprop',
    metrics=['accuracy']
)

# Pelatihan model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_steps
)




Found 4069 images belonging to 5 classes.
Found 2058 images belonging to 5 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
