# PREPROCESSING, SPLIT, AUGMENTATION

LIBRARY

In [None]:
import os                         # Untuk interaksi dengan os
import matplotlib.pyplot as plt   # Untuk visualisasi data
import matplotlib.image as mpimg  # Membaca dan memanipulasi gambar
import tensorflow as tf           # Library untuk Machine Learning
import random                     # untuk menghasilkan angka acak
import shutil                     # Untuk interaksi dengan file dan dir 

SPLIT DATASET

In [None]:
# Direktori Dataset
cat_disease_dir = "/content/CAT SKIN DISEASE"
train_dir = "/content/train_dir"
val_dir = "/content/val_dir"
test_dir = "/content/test_dir"

In [None]:
# Membuat direktori
os.makedirs(train_dir)
os.makedirs(val_dir)
os.makedirs(test_dir)

In [None]:
# Ratio split dataset (Training:Validation:Test = 70:15:15)
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

In [None]:
# Melakukan pembagian dataset untuk setiap kelas yang ada
for class_name in cat_disease_classes:
  class_dir = os.path.join(cat_disease, class_name)
  class_images = [img for img in os.listdir(class_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
  num_images = len(class_images)

  # Membuat subdirektori untuk setiap kelas dalam direktori train, val, dan test
  train_class_dir = os.path.join(train_dir, class_name)
  val_class_dir = os.path.join(val_dir, class_name)
  test_class_dir = os.path.join(test_dir, class_name)

  # Mengecek subdirektori yang dibuat, apabila belum terbuat maka dibuat
  if not os.path.exists(train_class_dir):
    os.makedirs(train_class_dir)
  if not os.path.exists(val_class_dir):
    os.makedirs(val_class_dir)
  if not os.path.exists(test_class_dir):
    os.makedirs(test_class_dir)

  # Menentukan jumlah gambar untuk setiap set
  num_train = int(num_images * train_ratio)
  num_val = int(num_images * val_ratio)
  num_test = num_images - num_train - num_val

  # Mengacak urutan gambar
  random.shuffle(class_images)

  # Memindahkan gambar ke direktorinya masing masing
  for i, image_name in enumerate(class_images):
    image_path = os.path.join(class_dir, image_name)
    if i < num_train:
      shutil.copy(image_path, train_class_dir)
    elif i < num_train + num_val:
      shutil.copy(image_path, val_class_dir)
    else:
      shutil.copy(image_path, test_class_dir)

In [None]:
# Menampilkan jumlah masing masing dataset setelah dibagikan
print("\nJumlah Dataset Setelah Pembagian:")
for dataset_dir in [train_dir, val_dir, test_dir]:
  if dataset_dir == train_dir:
    print("Training Dataset:")
  elif dataset_dir == val_dir:
    print("Validation Dataset:")
  else:
    print("Testing Dataset:")
  for class_name in cat_disease_classes:
    class_dir = os.path.join(dataset_dir, class_name)
    num_images = len(os.listdir(class_dir))
    print(f"  {class_name}: {num_images} gambar")

PREPROCESSING

In [None]:
# Membuat Traininng Dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir, 
                                                      image_size=(120, 120),
                                                      batch_size=32,
                                                      label_mode='categorical') # Karena class lebih dari 2

# Membuat Validation Dataset
val_dataset = tf.keras.utils.image_dataset_from_directory(val_dir,  
                                                    image_size=(120, 120),
                                                    batch_size=32,
                                                    label_mode='categorical') # Karena class lebih dari 2

# Optimisasi Dataset
SHUFFLE_BUFFER_SIZE = 100
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE

train_dataset_fin = (train_dataset
                     .cache()
                     .shuffle(SHUFFLE_BUFFER_SIZE)
                     .prefetch(PREFETCH_BUFFER_SIZE))

val_dataset_fin = (val_dataset
                     .cache()
                     .shuffle(SHUFFLE_BUFFER_SIZE)
                     .prefetch(PREFETCH_BUFFER_SIZE))

AUGMENTASI GAMBAR

In [None]:
# Fungsi augmentasi gambar
def augment_image(image):
    image_augmentation = tf.keras.Sequential([
        tf.keras.Input(shape=(120,120,3)),
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),
        tf.keras.layers.experimental.preprocessing.RandomWidth(0.2),
        tf.keras.layers.experimental.preprocessing.RandomHeight(0.2)])
    
    return image_augmentation(image)

In [None]:
# Tampilkan contoh gambar yang telah di augmentasi
def display_augmented_images(images, num_samples=5):
    plt.figure(figsize=(15, 5))
    for i in range(num_samples):
        img = random.choice(images)  
        augmented_img = augment_image(img)  
        plt.subplot(1, num_samples, i + 1)
        plt.axis('off')
        plt.imshow(augmented_img)
    plt.show()