<a href="https://colab.research.google.com/github/Ayasa18/ProgresBelajar/blob/main/Submission_Akhir_KlasifikasiGambar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Proyek Klasifikasi Gambar: [Input Nama Dataset]
- **Nama:** [Aditiya Saputra]
- **Email:** [AditiyaS1811@gmail.com]
- **ID Dicoding:** [aditiya18]

## Import Semua Packages/Library yang Digunakan

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from PIL import Image
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback

## Data Preparation

### Data Loading

In [3]:
!pip install kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d muratkokludataset/rice-image-dataset/

mkdir: cannot create directory ‘/root/.kaggle’: File exists
cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/muratkokludataset/rice-image-dataset/versions/
License(s): CC0-1.0
Downloading rice-image-dataset.zip to /content
 99% 217M/219M [00:11<00:00, 24.3MB/s]
100% 219M/219M [00:11<00:00, 20.1MB/s]


# Read Dataset

In [6]:
with zipfile.ZipFile('rice-image-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('rice-image-dataset')

def print_images_resolution(directory):
    """
    Mencetak jumlah gambar di setiap subdirektori dan resolusi unik setiap gambar.
    Args:
        directory (str): Path folder dataset.
    """
    unique_sizes = set()
    total_images = 0

    for subdir in os.listdir(directory):
        subdir_path = os.path.join(directory, subdir)
        if not os.path.isdir(subdir_path):
            continue  # Skip jika bukan direktori

        image_files = os.listdir(subdir_path)
        num_images = len(image_files)
        print(f"{subdir}: {num_images} images")
        total_images += num_images

        for img_file in image_files:
            img_path = os.path.join(subdir_path, img_file)
            try:
                with Image.open(img_path) as img:
                    unique_sizes.add(img.size)
            except Exception as e:
                print(f"Error opening image {img_file}: {e}")

        print("Unique resolutions:")
        for size in unique_sizes:
            print(f"- {size}")
        print("---------------")
        unique_sizes.clear()  # Reset untuk subdir berikutnya

    print(f"\nTotal images: {total_images}")

dataset_folder = "/content/rice-image-dataset/Rice_Image_Dataset"
print_images_resolution(dataset_folder)

Karacadag: 15000 images
Unique resolutions:
- (250, 250)
---------------
Basmati: 15000 images
Unique resolutions:
- (250, 250)
---------------
Ipsala: 15000 images
Unique resolutions:
- (250, 250)
---------------
Jasmine: 15000 images
Unique resolutions:
- (250, 250)
---------------
Arborio: 15000 images
Unique resolutions:
- (250, 250)
---------------

Total images: 75000


### Data Preprocessing

#### Split Dataset

In [7]:

# Direktori dataset
dataset_dir = "/content/rice-image-dataset/Rice_Image_Dataset"

# List tiap class
classes = ["Karacadag", "Basmati", "Ipsala", "Jasmine", "Arborio"]

# Rasio train dan test sets
train_ratio = 0.8

# Loop untuk setiap class
for class_name in classes:
    # Path ke direktori class saat ini
    class_dir = os.path.join(dataset_dir, class_name)

    # List semua file dalam direktori class
    files = os.listdir(class_dir)

    # Split files menjadi train (80%) dan test (20%)
    train_files, test_files = train_test_split(files, train_size=train_ratio, random_state=42)

    # Membuat direktori baru untuk train dan test sets
    for folder_name, file_list in [("train", train_files), ("test", test_files)]:
        folder_path = os.path.join(dataset_dir, folder_name, class_name)
        os.makedirs(folder_path, exist_ok=True)

        # Memindahkan file ke direktori yang sesuai
        for file in file_list:
            src = os.path.join(class_dir, file)
            dst = os.path.join(folder_path, file)
            shutil.move(src, dst)

print("Dataset berhasil dibagi menjadi 80% train dan 20% test.")


Dataset berhasil dibagi menjadi 80% train dan 20% test.


# Data Normalisasi dan Augmentasi

In [8]:
# Dimensi sebuah gambar
img_width, img_height = 256, 256

# Ukuran batch image
batch_size = 64
# Data augmentation dan normalisasi untuk train dan test
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,  # Normalize pixel values to [0,1]
)

test_datagen = ImageDataGenerator(
    rescale=1.0 / 255  # Normalize pixel values to [0,1]
)

# Data generators
train_generator = train_datagen.flow_from_directory(
    directory=os.path.join(dataset_dir, 'train'),
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    directory=os.path.join(dataset_dir, 'test'),
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  #
)

print(f"Train samples: {train_generator.samples}")
print(f"Test samples: {test_generator.samples}")


Found 60000 images belonging to 5 classes.
Found 15000 images belonging to 5 classes.
Train samples: 60000
Test samples: 15000


## Modelling

In [11]:
num_classes = len(["Karacadag", "Basmati", "Ipsala", "Jasmine", "Arborio"])

# Membuat model Sequential
model = Sequential()

# Layer Convolutional pertama dengan MaxPooling
model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer Convolutional kedua dengan MaxPooling
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer Convolutional ketiga dengan MaxPooling
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten layer untuk mengubah data ke dimensi 1D
model.add(Flatten())

# Fully connected layer (Dense) dengan dropout
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

# Output layer dengan softmax
model.add(Dense(num_classes, activation='softmax'))

# Kompilasi model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Menampilkan arsitektur model
model.summary()

In [None]:
class CustomStopCallback(Callback):
    def __init__(self, threshold=0.97):
        super(CustomStopCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        train_acc = logs.get('accuracy')
        val_acc = logs.get('val_accuracy')
        if train_acc > self.threshold and val_acc > self.threshold:
            print(f"\nAkurasi sudah mencapai {self.threshold * 100:.2f}%! Menghentikan pelatihan.")
            self.model.stop_training = True

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    min_delta=0.01,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    filepath='best_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

custom_stop_callback = CustomStopCallback(threshold=0.95)

# Latih Model dengan Callbacks
history = model.fit(
    train_generator,
    epochs=50,
    validation_data=test_generator,
    callbacks=[early_stopping, model_checkpoint, custom_stop_callback]
)

Epoch 1/50
[1m428/938[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m2:08[0m 253ms/step - accuracy: 0.9665 - loss: 0.0991

## Evaluasi dan Visualisasi

## Konversi Model

## Inference (Optional)