# Genetic Algorithm Hyperparameter Tuning for Batik Classification

This notebook implements a custom Genetic Algorithm for hyperparameter tuning for the batik classification model.

In [None]:
# Standard library
import os
import random
import shutil
from collections import Counter, defaultdict
from sklearn.model_selection import train_test_split

# Third-party packages
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import compute_class_weight

# TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras
import tensorflowjs as tfjs
from keras.layers import TFSMLayer
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import (
    Conv2D,
    MaxPooling2D,
    GlobalAveragePooling2D,
    Dense,
    Dropout
)
from tensorflow.keras.applications import InceptionV3, MobileNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import (
    ImageDataGenerator,
    load_img,
    img_to_array
)
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau,
    Callback
)

# Hilangkan warning TensorFlow dan Python
import warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings('ignore')

In [None]:
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    print("✅ TensorFlow mendeteksi GPU (CUDA):")
    for gpu in gpus:
        print(f"  - {gpu.name}")
else:
    print("⚠️ TensorFlow TIDAK mendeteksi GPU. Model berjalan di CPU.")

In [None]:
train_dir = os.path.join('dataset_split', 'train')
val_dir = os.path.join('dataset_split', 'val')
test_dir = os.path.join('dataset_split', 'test')

# Fungsi untuk menghitung jumlah file per kelas dalam sebuah direktori
def count_images_per_class(directory):
    return {
        cls: len(os.listdir(os.path.join(directory, cls)))
        for cls in os.listdir(directory)
        if os.path.isdir(os.path.join(directory, cls))
    }
    
# Hitung jumlah gambar per kelas
train_counts = count_images_per_class(train_dir)
val_counts = count_images_per_class(val_dir)
test_counts = count_images_per_class(test_dir)
total_gambar = sum(train_counts.values()) + sum(val_counts.values()) + sum(test_counts.values())

# Gabungkan ke dalam DataFrame
df = pd.DataFrame({
    'Train': train_counts,
    'Validation': val_counts,
    'Test': test_counts
}).T.fillna(0).astype(int).T  # Transpose agar kelas sebagai indeks


print(f"Total gambar : {total_gambar}")
# Tampilkan sebagai tabel
print("\nSebaran Data per Kelas:")
print(df)

In [None]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 4
SEED = 42
NUM_CLASSES = 60

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,                # Normalisasi piksel gambar dari 0-255 menjadi 0-1
    rotation_range=20,            # Rotasi gambar acak hingga 20 derajat
    zoom_range=0.2,               # Zoom acak hingga 20% untuk mensimulasikan perbedaan jarak
    width_shift_range=0.2,        # Geser gambar secara horizontal hingga 20% lebar gambar
    height_shift_range=0.2,       # Geser gambar secara vertikal hingga 20% tinggi gambar
    shear_range=0.15,             # Distorsi gambar secara miring (shear)
    horizontal_flip=True,         # Membalik gambar secara horizontal (misalnya daun kiri dan kanan)
    brightness_range=[0.8, 1.2],  # Variasi pencahayaan gambar
    fill_mode='nearest'           # Isi area kosong hasil transformasi dengan piksel terdekat
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_dir,  # Folder berisi data latih
    target_size = IMG_SIZE,         # Ukuran gambar diubah menjadi 224x224
    batch_size = BATCH_SIZE,        # Jumlah gambar per batch
    class_mode = 'categorical',       # Label dalam format one-hot (karena klasifikasi multi-kelas)
    shuffle = True,                   # Acak data untuk melatih model dengan lebih baik
    seed = SEED                         # Seed untuk konsistensi hasil saat diacak
)


val_generator = val_datagen.flow_from_directory(
    val_dir,  # Folder validasi
    target_size = IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode = 'categorical',
    shuffle = False,                  # Tidak diacak agar evaluasi konsisten
    seed = 42
)

test_generator = test_datagen.flow_from_directory(
    test_dir,   # Folder pengujian
    target_size = IMG_SIZE,
    batch_size = BATCH_SIZE,
    class_mode = 'categorical',
    shuffle = False,                  # Tidak diacak agar prediksi bisa dibandingkan langsung
    seed = 42
)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, MaxPooling2D, Dropout
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from sklearn.utils.class_weight import compute_class_weight
import os
import time
import random
import copy
from typing import List, Dict, Any, Tuple

# Callback untuk progress log
class TuningCallback(tf.keras.callbacks.Callback):
    def __init__(self, tuner_name):
        super().__init__()
        self.tuner_name = tuner_name
    
    def on_epoch_end(self, epoch, logs=None):
        metrics_str = " | ".join([f"{k}: {v:.4f}" for k, v in logs.items()])
        print(f"✨ {self.tuner_name} - Epoch {epoch + 1} — {metrics_str}")

# =========== GENETIC ALGORITHM IMPLEMENTATION ===========
class GeneticAlgorithm:
    def __init__(self, population_size=10, generations=20, mutation_rate=0.1, crossover_rate=0.8):
        self.population_size = population_size
        self.generations = generations
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.best_individual = None
        self.best_fitness = 0
        self.history = []
        
    def create_individual(self) -> Dict[str, Any]:
        """Create a random individual (hyperparameter set)"""
        return {
            'learning_rate': random.choice([1e-4, 3e-4, 1e-3, 3e-3, 1e-2]),
            'optimizer': random.choice(['adam', 'rmsprop', 'sgd']),
            'dense_units': random.choice([64, 128, 256, 384, 512]),
            'dropout_rate': random.choice([0.0, 0.1, 0.2, 0.3, 0.4, 0.5]),
            'add_conv_layer': random.choice([True, False]),
            'conv_filters': random.choice([32, 64, 96, 128])
        }
    
    def create_population(self) -> List[Dict[str, Any]]:
        """Create initial population"""
        return [self.create_individual() for _ in range(self.population_size)]
    
    def build_model_from_individual(self, individual: Dict[str, Any]) -> tf.keras.Model:
        """Build model from individual hyperparameters"""
        # Load pre-trained MobileNetV2 dengan freezing
        base_model = MobileNetV2(
            input_shape=(*IMG_SIZE, 3),
            include_top=False,
            weights='imagenet'
        )
        
        # Freeze semua layer pada base model
        base_model.trainable = False
        
        model = Sequential()
        model.add(base_model)
        
        # Tambahkan ConvLayer (opsional)
        if individual['add_conv_layer']:
            model.add(Conv2D(individual['conv_filters'], (3, 3), activation='relu', padding='same'))
            model.add(MaxPooling2D(pool_size=(2, 2)))
        
        # GlobalAveragePooling2D untuk flatten feature map
        model.add(GlobalAveragePooling2D())
        
        # Dense layers
        model.add(Dense(individual['dense_units'], activation='relu'))
        
        # Dropout untuk regularisasi
        if individual['dropout_rate'] > 0:
            model.add(Dropout(individual['dropout_rate']))
        
        # Layer output
        model.add(Dense(NUM_CLASSES, activation='softmax'))
        
        # Kompilasi dengan hyperparameter
        if individual['optimizer'] == 'adam':
            optimizer = Adam(learning_rate=individual['learning_rate'])
        elif individual['optimizer'] == 'rmsprop':
            optimizer = RMSprop(learning_rate=individual['learning_rate'])
        else:
            optimizer = SGD(learning_rate=individual['learning_rate'], momentum=0.9)
        
        model.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        
        return model
    
    def evaluate_individual(self, individual: Dict[str, Any]) -> float:
        """Evaluate individual by training model and returning validation accuracy"""
        try:
            model = self.build_model_from_individual(individual)
            
            # Callbacks
            early_stopping = EarlyStopping(
                monitor='val_loss',
                patience=3,
                restore_best_weights=True,
                verbose=0
            )
            
            # Train model
            history = model.fit(
                train_generator,
                validation_data=val_generator,
                epochs=10,  # Shorter training for faster evaluation
                callbacks=[early_stopping],
                verbose=0
            )
            
            # Return best validation accuracy
            best_val_acc = max(history.history['val_accuracy'])
            return best_val_acc
            
        except Exception as e:
            print(f"Error evaluating individual: {e}")
            return 0.0
    
    def select_parents(self, population: List[Dict[str, Any]], fitness_scores: List[float]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """Select parents using tournament selection"""
        tournament_size = 3
        
        # Select first parent
        tournament1 = random.sample(list(enumerate(population)), tournament_size)
        parent1_idx = max(tournament1, key=lambda x: fitness_scores[x[0]])[0]
        
        # Select second parent
        tournament2 = random.sample(list(enumerate(population)), tournament_size)
        parent2_idx = max(tournament2, key=lambda x: fitness_scores[x[0]])[0]
        
        return population[parent1_idx], population[parent2_idx]
    
    def crossover(self, parent1: Dict[str, Any], parent2: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """Perform crossover between two parents"""
        if random.random() > self.crossover_rate:
            return parent1, parent2
        
        child1 = copy.deepcopy(parent1)
        child2 = copy.deepcopy(parent2)
        
        # Single point crossover for each parameter
        for key in parent1.keys():
            if random.random() < 0.5:
                child1[key], child2[key] = child2[key], child1[key]
        
        return child1, child2
    
    def mutate(self, individual: Dict[str, Any]) -> Dict[str, Any]:
        """Mutate individual with some probability"""
        mutated = copy.deepcopy(individual)
        
        for key in mutated.keys():
            if random.random() < self.mutation_rate:
                if key == 'learning_rate':
                    mutated[key] = random.choice([1e-4, 3e-4, 1e-3, 3e-3, 1e-2])
                elif key == 'optimizer':
                    mutated[key] = random.choice(['adam', 'rmsprop', 'sgd'])
                elif key == 'dense_units':
                    mutated[key] = random.choice([64, 128, 256, 384, 512])
                elif key == 'dropout_rate':
                    mutated[key] = random.choice([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
                elif key == 'add_conv_layer':
                    mutated[key] = random.choice([True, False])
                elif key == 'conv_filters':
                    mutated[key] = random.choice([32, 64, 96, 128])
        
        return mutated
    
    def evolve(self) -> Dict[str, Any]:
        """Run genetic algorithm"""
        print(f"🚀 Memulai Genetic Algorithm dengan {self.population_size} individu dan {self.generations} generasi")
        
        # Create initial population
        population = self.create_population()
        
        for generation in range(self.generations):
            print(f"\n🔄 Generasi {generation + 1}/{self.generations}")
            
            # Evaluate all individuals
            fitness_scores = []
            for i, individual in enumerate(population):
                print(f"  Evaluating individual {i+1}/{len(population)}...")
                fitness = self.evaluate_individual(individual)
                fitness_scores.append(fitness)
                print(f"    Fitness: {fitness:.4f}")
            
            # Update best individual
            best_idx = np.argmax(fitness_scores)
            if fitness_scores[best_idx] > self.best_fitness:
                self.best_fitness = fitness_scores[best_idx]
                self.best_individual = copy.deepcopy(population[best_idx])
                print(f"  🎉 New best fitness: {self.best_fitness:.4f}")
            
            # Record history
            self.history.append({
                'generation': generation + 1,
                'best_fitness': max(fitness_scores),
                'avg_fitness': np.mean(fitness_scores),
                'best_individual': copy.deepcopy(population[best_idx])
            })
            
            # Create new population
            new_population = []
            
            # Elitism: keep best individual
            new_population.append(copy.deepcopy(population[best_idx]))
            
            # Generate rest of population through selection, crossover, and mutation
            while len(new_population) < self.population_size:
                parent1, parent2 = self.select_parents(population, fitness_scores)
                child1, child2 = self.crossover(parent1, parent2)
                
                child1 = self.mutate(child1)
                child2 = self.mutate(child2)
                
                new_population.extend([child1, child2])
            
            # Trim to population size
            population = new_population[:self.population_size]
            
            print(f"  Best fitness this generation: {max(fitness_scores):.4f}")
            print(f"  Average fitness: {np.mean(fitness_scores):.4f}")
        
        print(f"\n🏆 Genetic Algorithm selesai!")
        print(f"Best fitness achieved: {self.best_fitness:.4f}")
        print(f"Best individual: {self.best_individual}")
        
        return self.best_individual

In [None]:
# =========== RUN GENETIC ALGORITHM ===========

# Buat direktori untuk menyimpan hasil tuning
project_dir = "hyperparameter_tuning_genetic_results"
os.makedirs(project_dir, exist_ok=True)

# Initialize genetic algorithm
ga = GeneticAlgorithm(
    population_size=8,  # Smaller population for faster execution
    generations=10,     # Fewer generations for demonstration
    mutation_rate=0.1,
    crossover_rate=0.8
)

# Run genetic algorithm
best_hyperparameters = ga.evolve()

print("\n==== HYPERPARAMETER TERBAIK ====")
for key, value in best_hyperparameters.items():
    print(f"{key}: {value}")

In [None]:
# Build and train final model with best hyperparameters
print("\n==== LATIH MODEL FINAL DENGAN HYPERPARAMETER TERBAIK ====")

# Build model with best hyperparameters
best_model = ga.build_model_from_individual(best_hyperparameters)

# Calculate class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights_dict = dict(enumerate(class_weights))

# Callbacks for final training
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-6
)

# Train final model
history = best_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=[
        early_stopping,
        ModelCheckpoint('best_genetic_model.keras', save_best_only=True, monitor='val_accuracy'),
        reduce_lr
    ],
    class_weight=class_weights_dict,
    verbose=1
)

# Save best model
best_model.save('final_genetic_model.keras')
print("Model final telah disimpan sebagai 'final_genetic_model.keras'")

# Evaluate model on test set
print("\n==== EVALUASI MODEL PADA TEST SET ====")
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False,
    seed=SEED
)

test_loss, test_accuracy = best_model.evaluate(test_generator)
print(f"Test accuracy: {test_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}")

In [None]:
# Visualize genetic algorithm progress
plt.figure(figsize=(15, 5))

# Plot fitness evolution
plt.subplot(1, 3, 1)
generations = [h['generation'] for h in ga.history]
best_fitness = [h['best_fitness'] for h in ga.history]
avg_fitness = [h['avg_fitness'] for h in ga.history]

plt.plot(generations, best_fitness, 'b-', label='Best Fitness', linewidth=2)
plt.plot(generations, avg_fitness, 'r--', label='Average Fitness', linewidth=2)
plt.xlabel('Generation')
plt.ylabel('Fitness (Validation Accuracy)')
plt.title('Genetic Algorithm Evolution')
plt.legend()
plt.grid(True)

# Plot training history
plt.subplot(1, 3, 2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')

plt.subplot(1, 3, 3)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.tight_layout()
plt.savefig('genetic_algorithm_results.png')
plt.show()

print("Genetic algorithm results telah disimpan sebagai 'genetic_algorithm_results.png'")

In [None]:
# Analysis of genetic algorithm results
print("\n==== ANALISIS HASIL GENETIC ALGORITHM ====")
print(f"Total generations: {len(ga.history)}")
print(f"Population size: {ga.population_size}")
print(f"Mutation rate: {ga.mutation_rate}")
print(f"Crossover rate: {ga.crossover_rate}")
print(f"Best fitness achieved: {ga.best_fitness:.4f}")

print("\nEvolution of best fitness:")
for i, record in enumerate(ga.history):
    print(f"Generation {record['generation']}: {record['best_fitness']:.4f}")

print("\nComparison with other methods:")
print("Genetic Algorithm advantages:")
print("- Can find good solutions with fewer evaluations")
print("- Maintains diversity in search space")
print("- Can escape local optima through mutation")
print("- More efficient than GridSearch for large search spaces")