<a href="https://colab.research.google.com/github/FairuzAthallah88/Machine-Learning-Models/blob/main/CNN_Xplorin_MobileNetV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MODEL CNN XPLORIN MobileNetV3 (1 Fase Training)

- Arsitektur	: MobileNetV3
- IMG HEIGHT & IMG WIDTH : $224 \times 224$
- BATCH SIZE	: 32
- LEARNING RATE : LR_MAX $0.00005$ ; LR_MIN $0.0000001$
- EPOCH : 100
- DATASET : https://drive.google.com/drive/folders/1g4rL66pXysnmAFzhcyOV6Fl9K3O_cPmG


1. Instalasi, Imports, dan Konfigurasi

In [None]:
# --- Imports Library ---
import joblib
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import random
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import MobileNetV3Large
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers.schedules import CosineDecay
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive

drive.mount('/content/drive')
DATASET_MENTAH_DIR = '/content/drive/MyDrive/DATASET_CNN_XPLORIN/Xplorin'
ASSET_SAVE_DIR = '/content/drive/MyDrive/DATASET_CNN_XPLORIN/MODEL_ASSETS'
os.makedirs(ASSET_SAVE_DIR, exist_ok=True)

# File Model V3
MODEL_SAVE_PATH = os.path.join(ASSET_SAVE_DIR, 'best_mobilenet_v3_large.h5')

# Folder data
BASE_SPLIT_DIR = os.path.join(DATASET_MENTAH_DIR, 'dataset_split')
TRAINING_DIR = os.path.join(BASE_SPLIT_DIR, 'training')
VALIDATION_DIR = os.path.join(BASE_SPLIT_DIR, 'validation')

# HYPERPARAMETER
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
TOTAL_EPOCHS = 100

# Learning Rate
LR_MAX = 5e-5
LR_MIN = 1e-7

# Callback
PATIENCE_EARLY_STOP = 20
PATIENCE_REDUCE_LR = 10

print(f"Target Penyimpanan Model: {MODEL_SAVE_PATH}")
print("Konfigurasi MobileNetV3Large Selesai.")

2. Visualisasi Sampel Dataset Mentah

In [None]:
# --- Visualisasi Sampel Dataset Mentah ---
all_categories = [d for d in os.listdir(DATASET_MENTAH_DIR) if os.path.isdir(os.path.join(DATASET_MENTAH_DIR, d)) and d not in ['dataset_split', '.ipynb_checkpoints']]

if not all_categories:
    print("Gagal menemukan folder kategori di DATASET_MENTAH_DIR. Periksa kembali path.")
else:
    # Ambil 9 kategori secara acak
    num_to_display = min(9, len(all_categories))

    # Pilih kategori secara acak
    categories_to_display = random.sample(all_categories, num_to_display)
    plt.figure(figsize=(12, 12))
    plt.suptitle("Sampel Gambar Dataset Mentah (Sebelum Preprocessing)", fontsize=16)

    # Tampilkan satu gambar per kategori
    for i, category in enumerate(categories_to_display):
        category_path = os.path.join(DATASET_MENTAH_DIR, category)

        # Ambil semua file gambar dalam kategori tersebut
        image_files = [f for f in os.listdir(category_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

        if image_files:
            # Pilih satu gambar secara acak
            random_image_file = random.choice(image_files)
            image_path = os.path.join(category_path, random_image_file)

            # Tampilkan gambar
            plt.subplot(3, 3, i + 1)
            img = Image.open(image_path)
            plt.imshow(img)
            plt.title(category, fontsize=12)
            plt.axis('off')
        else:
            plt.subplot(3, 3, i + 1)
            plt.title(f"{category} (Kosong)", fontsize=12)
            plt.axis('off')

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()
    print(f" Visualisasi Sampel dari {num_to_display} kategori selesai.")

3. Data Augmentation dan Generator (Prepeocessing Data)

In [None]:
# --- Data Augmentation & Generator MobileNetV3 ---
# 1. Generator TRAINING
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# 2. Generator VALIDATION
validation_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

# 3. Pemuatan Data
train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)
validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

NUM_CLASSES = train_generator.num_classes
CLASS_NAMES = list(train_generator.class_indices.keys())

print(f"\nData Generators V3 Siap. Total Kelas: {NUM_CLASSES}")

4. Arsitektur Model MobileNetV2

In [None]:
# --- Arsitektur Model MobileNetV3Large ---
print("--- DEFINISI ARSITEKTUR MODEL (MobileNetV3Large) ---")

# Gunakan MobileNetV3Large
base_model = MobileNetV3Large(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)
)

# 1. Unfreeze Base Model
base_model.trainable = True
print(f"Jumlah Layer Base Model MobileNetV3Large: {len(base_model.layers)}")

# 2. Bangun Lapisan Klasifikasi (Head)
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Lapisan Dense dan Dropout
x = Dense(512, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.6)(x)

# Lapisan Output
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

# Model Akhir
model = Model(inputs=base_model.input, outputs=predictions)

print("\nArsitektur Model MobileNetV3Large Siap.")
model.summary()

5. Proses Training Terpadu (Hyperparameter Tuning: Penyesuaian LR & Regularisasi)



In [None]:
# --- Proses Training Terpadu ---
steps_per_epoch = train_generator.samples // BATCH_SIZE
total_steps = TOTAL_EPOCHS * steps_per_epoch

# Scheduler
lr_schedule = CosineDecay(
    initial_learning_rate=LR_MAX,
    decay_steps=total_steps,
    alpha=LR_MIN/LR_MAX
)

# Callbacks
callbacks_list_final = [
    EarlyStopping(monitor='val_loss', patience=PATIENCE_EARLY_STOP, restore_best_weights=True),
    ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True),
]

# Kompilasi
model.compile(optimizer=Adam(learning_rate=lr_schedule),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print("\n--- MEMULAI TRAINING MOBILE NET V3 ---")
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=TOTAL_EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    callbacks=callbacks_list_final,
)

print("\n‚úÖ Training V3 Selesai.")

HISTORY_SAVE_PATH = os.path.join(ASSET_SAVE_DIR, 'training_history_v3_large.pkl')
try:
    joblib.dump(history.history, HISTORY_SAVE_PATH)
    print(f"‚úÖ Riwayat training disimpan ke: {HISTORY_SAVE_PATH}")
except Exception as e:
    print(f"‚ùå Gagal menyimpan riwayat: {e}")

6. Visualisasi History & Evaluasi Model

In [None]:
# --- Visualisasi History & Evaluasi Model ---
HISTORY_PATH = os.path.join(ASSET_SAVE_DIR, 'training_history_v3_large.pkl')
MODEL_PATH = os.path.join(ASSET_SAVE_DIR, 'best_mobilenet_v3_large.h5')

# 1. PLOT GRAFIK HISTORY
try:
    print(f" Memuat history dari: {HISTORY_PATH}")
    history_data = joblib.load(HISTORY_PATH)

    hist_acc = history_data['accuracy']
    hist_val_acc = history_data['val_accuracy']
    hist_loss = history_data['loss']
    hist_val_loss = history_data['val_loss']
    epochs = range(1, len(hist_acc) + 1)
    random_guess = 1 / NUM_CLASSES
    plt.figure(figsize=(18, 6))

    # Grafik LOSS
    plt.subplot(1, 2, 1)
    plt.plot(epochs, hist_loss, 'b', label='Training Loss')
    plt.plot(epochs, hist_val_loss, 'r', label='Validation Loss')
    plt.title('Loss: MobileNetV3Large')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(alpha=0.3)

    # Grafik ACCURACY
    plt.subplot(1, 2, 2)
    plt.plot(epochs, hist_acc, 'b', label='Training Accuracy')
    plt.plot(epochs, hist_val_acc, 'r', label='Validation Accuracy')
    plt.axhline(y=random_guess, color='green', linestyle='--', label=f'Random ({random_guess*100:.1f}%)')
    plt.title('Accuracy: MobileNetV3Large')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"‚ö†Ô∏è Warning Grafik: {e} (Mungkin file history belum ada)")

# 2. LOAD WEIGHTS & EVALUASI
print(f"\n Memuat bobot terbaik dari: {MODEL_PATH}")
try:
    model.load_weights(MODEL_PATH)
    print("‚úÖ Bobot berhasil dimuat!")

    # COMPILE MODEL
    print("Compile model untuk evaluasi...")
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Evaluasi
    if 'validation_generator' in globals():
        print("\n--- MENJALANKAN EVALUASI PADA VALIDATION SET ---")
        validation_generator.reset()
        results = model.evaluate(validation_generator, steps=validation_generator.samples // BATCH_SIZE)
        print("\n==============================================")
        print(f" HASIL AKHIR MOBILENET V3 LARGE")
        print(f" Loss Akhir:     {results[0]:.4f}")
        print(f" Accuracy Akhir: {results[1]*100:.2f}%")
    else:
        print("‚ö†Ô∏è Generator tidak ditemukan.")

except Exception as e:
    print(f"‚ùå Error Evaluasi: {e}")

7. Classification Report (Precision, Recall, F1-Score) dan Confusion Matrix

In [None]:
# --- Classification Report & Confusion Matrix ---
try:
    validation_generator.reset()
    true_labels = validation_generator.classes
    class_names = list(validation_generator.class_indices.keys())

    print("--- MEMULAI PREDIKSI ---")
    predictions = model.predict(
        validation_generator,
        steps=validation_generator.samples // validation_generator.batch_size + 1,
        verbose=1
    )

    predictions = predictions[:len(true_labels)]
    predicted_labels = np.argmax(predictions, axis=1)

    # 1. Confusion Matrix
    print("\n==============================================")
    print("             CONFUSION MATRIX")
    print("==============================================")
    print(confusion_matrix(true_labels, predicted_labels))

    # 2. Classification Report
    print("\n==============================================")
    print("           CLASSIFICATION REPORT")
    print("==============================================")
    print(classification_report(true_labels, predicted_labels, target_names=class_names, digits=4))
    print("==============================================")

except NameError:
    print("‚ùå Error: Variabel 'model' atau 'validation_generator' tidak ditemukan.")
except Exception as e:
    print(f"‚ùå Terjadi kesalahan: {e}")

# OPSIONAL!!
---
UNTUK SPLIT DATASET DAN COMPARE



In [None]:
import os
import shutil
import random
from tqdm import tqdm
from collections import defaultdict

DATASET_MENTAH_DIR = '/content/drive/MyDrive/DATASET_CNN_XPLORIN/Xplorin'


In [None]:
# --- SPLIT DATASET MENTAH MENJADI TRAIN & VAL ---

IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.webp', '.JPG', '.JPEG', '.PNG')
print("\n--- Otomasi Pembagian Dataset Dimulai ---")
split_results = defaultdict(lambda: {'mentah': 0, 'train_sukses': 0, 'val_sukses': 0, 'gagal_copy': 0})

try:
    # 1. Pembersihan dan Pembuatan Folder Baru
    if os.path.exists(BASE_SPLIT_DIR):
        print("Membersihkan folder 'dataset_split' yang lama...")
        shutil.rmtree(BASE_SPLIT_DIR)

    os.makedirs(TRAINING_DIR, exist_ok=True)
    os.makedirs(VALIDATION_DIR, exist_ok=True)

    total_files_moved = 0

    all_categories = [d for d in os.listdir(DATASET_MENTAH_DIR) if os.path.isdir(os.path.join(DATASET_MENTAH_DIR, d)) and d not in ['dataset_split', '.ipynb_checkpoints']]

    if not all_categories:
        print("‚ö†Ô∏è Gagal: Tidak ada folder kategori yang ditemukan. Periksa DATASET_MENTAH_DIR.")
        sys.exit(1)

    # 2. Proses Pembagian Per Kategori
    for class_name in all_categories:
        class_path = os.path.join(DATASET_MENTAH_DIR, class_name)

        # Membuat struktur folder
        os.makedirs(os.path.join(TRAINING_DIR, class_name), exist_ok=True)
        os.makedirs(os.path.join(VALIDATION_DIR, class_name), exist_ok=True)

        images = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.endswith(IMAGE_EXTENSIONS)]

        split_results[class_name]['mentah'] = len(images)

        random.shuffle(images)

        if not images:
            continue

        # Hitung batas pembagian 80:20
        train_split_point = int(len(images) * SPLIT_RATIO)
        train_images = images[:train_split_point]
        val_images = images[train_split_point:]

        # Menyalin gambar ke folder Training
        for img_path in tqdm(train_images, desc=f"TRAIN {class_name}"):
            dest_path = os.path.join(TRAINING_DIR, class_name, os.path.basename(img_path))
            try:
                shutil.copy(img_path, dest_path)
                split_results[class_name]['train_sukses'] += 1
                total_files_moved += 1
            except Exception as e:
                split_results[class_name]['gagal_copy'] += 1
                print(f"\n‚ùå GAGAL COPY file {os.path.basename(img_path)} ke TRAIN: {e}")

        # Menyalin gambar ke folder Validation
        for img_path in tqdm(val_images, desc=f"VAL {class_name}"):
            dest_path = os.path.join(VALIDATION_DIR, class_name, os.path.basename(img_path))
            try:
                shutil.copy(img_path, dest_path)
                split_results[class_name]['val_sukses'] += 1
                total_files_moved += 1
            except Exception as e:
                split_results[class_name]['gagal_copy'] += 1
                print(f"\n‚ùå GAGAL COPY file {os.path.basename(img_path)} ke VAL: {e}")

    # --- 3. Laporan Akhir dan Diagnostik ---
    print("\n" + "="*80)
    if total_files_moved > 0:
        print(f"‚úÖ Pembagian Dataset Selesai. Total {total_files_moved} file berhasil diproses.")
    else:
        print("‚ö†Ô∏è Pembagian Gagal: Tidak ada file gambar yang berhasil disalin.")

    print("\n### DIAGNOSTIK AKURASI SPLIT ###")
    print(f"{'Kelas':<25} | {'Total Mentah':<12} | {'Train Sukses':<12} | {'Val Sukses':<10} | {'Gagal Copy':<10}")
    print("-" * 80)

    for class_name, res in sorted(split_results.items()):
        total_split = res['train_sukses'] + res['val_sukses'] + res['gagal_copy']

        if res['mentah'] > 0:
            if total_split != res['mentah']:
                print(f"‚ö†Ô∏è {class_name:<23} | {res['mentah']:<12} | {res['train_sukses']:<12} | {res['val_sukses']:<10} | {res['gagal_copy']:<10} | SELISIH: {res['mentah'] - total_split}")
            else:
                 print(f"‚úÖ {class_name:<23} | {res['mentah']:<12} | {res['train_sukses']:<12} | {res['val_sukses']:<10} | {res['gagal_copy']:<10}")

    print("-" * 80)

except Exception as e:
    print(f"Terjadi error fatal saat pembagian data: {e}")

In [None]:
# --- HTIUNG TOTAL DATASET MENTAH & DATASET SPLIT (UNTUK COMPARE DATASET MENTAH DAN SPLIT) ---

import os
from collections import defaultdict
from tabulate import tabulate

# --- KONFIGURASI PATH UTAMA ---
DATASET_MENTAH_DIR = "/content/drive/MyDrive/DATASET_CNN_XPLORIN/Xplorin"
BASE_SPLIT_DIR = "/content/drive/MyDrive/DATASET_CNN_XPLORIN/dataset_split"
TRAINING_DIR = os.path.join(BASE_SPLIT_DIR, "training")
VALIDATION_DIR = os.path.join(BASE_SPLIT_DIR, "validation")

IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.webp', '.JPG', '.JPEG', '.PNG')

def count_images_in_directory(dataset_path):
    """Menghitung total file gambar per subfolder (kelas) dalam satu direktori."""
    class_counts = defaultdict(int)

    if not os.path.exists(dataset_path):
        return None

    for root, dirs, files in os.walk(dataset_path):
        class_name = os.path.basename(root)

        # Hitung file gambar di folder saat ini
        for filename in files:
            if filename.lower().endswith(IMAGE_EXTENSIONS):
                if root != dataset_path:
                    class_counts[class_name] += 1

    return class_counts

def compare_split_integrity():
    """Mengumpulkan hitungan dari dataset mentah, training, dan validation, lalu membandingkannya."""

    print("--- MEMULAI INTEGRITAS & VERIFIKASI SPLIT ---")

    # 1. Hitung Dataset Mentah
    raw_counts = count_images_in_directory(DATASET_MENTAH_DIR)

    if raw_counts is None:
        print(f"‚ùå ERROR: Direktori Mentah ({DATASET_MENTAH_DIR}) tidak ditemukan. Periksa path sumber Anda.")
        return

    # 2. Hitung Dataset Training
    train_counts = count_images_in_directory(TRAINING_DIR)

    # 3. Hitung Dataset Validation
    val_counts = count_images_in_directory(VALIDATION_DIR)

    if train_counts is None or val_counts is None:
        print(f"‚ùå ERROR: Direktori Split (Train/Val) tidak ditemukan di {BASE_SPLIT_DIR}. Jalankan skrip split data terlebih dahulu!")
        return

    # Gabungkan semua nama kelas dari semua direktori
    all_classes = sorted(set(raw_counts.keys()) | set(train_counts.keys()) | set(val_counts.keys()))

    # --- 4. Buat Tabel Perbandingan ---
    table_data = []
    global_raw_total = 0
    global_split_total = 0

    for class_name in all_classes:
        raw = raw_counts.get(class_name, 0)
        train = train_counts.get(class_name, 0)
        val = val_counts.get(class_name, 0)

        split_total = train + val
        difference = raw - split_total

        global_raw_total += raw
        global_split_total += split_total

        # Penentuan status untuk visualisasi mudah
        status = "‚úÖ OK" if difference == 0 else (
                 "‚ö†Ô∏è SELISIH" if difference > 0 else "‚ùå OVERFLOW")

        table_data.append([
            class_name,
            raw,
            train,
            val,
            split_total,
            difference,
            status
        ])

    # Baris Total
    table_data.append(["---", "---", "---", "---", "---", "---", "---"])
    table_data.append([
        "TOTAL",
        global_raw_total,
        sum(train_counts.values()),
        sum(val_counts.values()),
        global_split_total,
        global_raw_total - global_split_total,
        "‚ùå FAIL" if global_raw_total - global_split_total != 0 else "‚úÖ OK"
    ])

    # Tampilkan Tabel
    print("\n" + "="*100)
    print("### üìä HASIL INTEGRITAS SPLIT DATASET ###")
    print(tabulate(table_data,
                   headers=["Kelas", "Total Mentah", "Train Count", "Val Count", "Split Total", "Selisih", "Status"],
                   tablefmt="fancy_grid",
                   numalign="center"))
    print("="*100)

if __name__ == "__main__":
    try:
        compare_split_integrity()
    except NameError:
        print("‚ùå ERROR: Library 'tabulate' tidak ditemukan. Jalankan: !pip install tabulate")
    except Exception as e:
        print(f"Terjadi error saat menjalankan komparasi: {e}")