# ==============================
# 1. SETUP
# ==============================

In [4]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import mediapipe as mp
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score
import albumentations as A
import json
import random
from collections import Counter
import glob

In [5]:
print("TensorFlow version:", tf.__version__)
print("OpenCV version:", cv2.__version__)

TensorFlow version: 2.12.0
OpenCV version: 4.12.0


# ==============================
# 2. KONFIGURASI DAN PARAMETER
# ==============================

In [6]:
# Konfigurasi dataset
DATASET_PATH = 'hand_gesture_dataset'
CLASS_NAMES = ['maju', 'kanan', 'kiri', 'stop']
NUM_CLASSES = len(CLASS_NAMES)
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001

# Membuat direktori untuk hasil
os.makedirs('content/processed_data', exist_ok=True)
os.makedirs('content/models', exist_ok=True)
os.makedirs('content/results', exist_ok=True)

# ==============================
# 3. PREPROCESSING DENGAN MEDIAPIPE
# ==============================

In [2]:
class HandPreprocessor:
    def __init__(self):
        self.mp_hands = mp.solutions.hands
        self.mp_drawing = mp.solutions.drawing_utils
        self.hands = self.mp_hands.Hands(
            static_image_mode=True,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.5
        )
    
    def detect_and_crop_hand(self, image_path, output_size=(224, 224)):
        """Deteksi tangan menggunakan MediaPipe dan crop area tangan"""
        try:
            # Baca gambar
            image = cv2.imread(image_path)
            if image is None:
                return None, None
            
            # Convert BGR to RGB
            rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Deteksi tangan
            results = self.hands.process(rgb_image)
            
            if results.multi_hand_landmarks:
                # Ambil landmark tangan pertama
                hand_landmarks = results.multi_hand_landmarks[0]
                
                # Dapatkan koordinat bounding box
                h, w, _ = image.shape
                x_coords = [landmark.x * w for landmark in hand_landmarks.landmark]
                y_coords = [landmark.y * h for landmark in hand_landmarks.landmark]
                
                # Hitung bounding box dengan padding
                x_min, x_max = int(min(x_coords)), int(max(x_coords))
                y_min, y_max = int(min(y_coords)), int(max(y_coords))
                
                # Tambah padding 20%
                padding_x = int((x_max - x_min) * 0.2)
                padding_y = int((y_max - y_min) * 0.2)
                
                x_min = max(0, x_min - padding_x)
                x_max = min(w, x_max + padding_x)
                y_min = max(0, y_min - padding_y)
                y_max = min(h, y_max + padding_y)
                
                # Crop tangan
                cropped_hand = image[y_min:y_max, x_min:x_max]
                
                # Resize ke ukuran target
                cropped_hand = cv2.resize(cropped_hand, output_size)
                
                return cropped_hand, (x_min, y_min, x_max, y_max)
            else:
                # Jika tidak terdeteksi tangan, gunakan gambar original dengan resize
                resized_image = cv2.resize(image, output_size)
                return resized_image, None
                
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")
            return None, None

def preprocess_dataset():
    """Preprocessing dataset dengan MediaPipe"""
    preprocessor = HandPreprocessor()
    
    processed_data = []
    labels = []
    failed_images = []
    
    print("Memulai preprocessing dataset...")
    
    for class_idx, class_name in enumerate(CLASS_NAMES):
        class_path = os.path.join(DATASET_PATH, class_name)
        if not os.path.exists(class_path):
            print(f"Warning: Path {class_path} tidak ditemukan!")
            continue
        
        print(f"Processing class: {class_name}")
        
        # Ambil semua file gambar
        image_files = []
        for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
            image_files.extend(glob.glob(os.path.join(class_path, ext)))
            image_files.extend(glob.glob(os.path.join(class_path, ext.upper())))
        
        print(f"Found {len(image_files)} images in {class_name}")
        
        for img_path in image_files:
            cropped_image, bbox = preprocessor.detect_and_crop_hand(img_path)
            
            if cropped_image is not None:
                processed_data.append(cropped_image)
                labels.append(class_idx)
            else:
                failed_images.append(img_path)
    
    print(f"Preprocessing selesai!")
    print(f"Total gambar berhasil diproses: {len(processed_data)}")
    print(f"Total gambar gagal: {len(failed_images)}")
    
    # Simpan data yang gagal untuk review
    if failed_images:
        with open('/content/failed_images.txt', 'w') as f:
            for img in failed_images:
                f.write(f"{img}\n")
    
    return np.array(processed_data), np.array(labels)

In [7]:
print("\n1. PREPROCESSING DATA DENGAN MEDIAPIPE")
print("-"*40)
X, y = preprocess_dataset()
    
if len(X) == 0:
    print("Error: No data loaded. Please check your dataset path!")
    exit()    
    
# Normalisasi data
X = X.astype(np.float32) / 255.0


1. PREPROCESSING DATA DENGAN MEDIAPIPE
----------------------------------------
Memulai preprocessing dataset...
Processing class: maju
Found 600 images in maju
Processing class: kanan
Found 600 images in kanan
Processing class: kiri
Found 600 images in kiri
Processing class: stop
Found 600 images in stop
Preprocessing selesai!
Total gambar berhasil diproses: 2400
Total gambar gagal: 0


# ==============================
# 4. DATA AUGMENTASI
# ==============================

In [8]:
def create_augmentation_pipeline():
    """Buat pipeline augmentasi data"""
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=15, p=0.7),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
        A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
        A.MotionBlur(blur_limit=3, p=0.3),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.5),
        A.CoarseDropout(max_holes=8, max_height=8, max_width=8, p=0.3),
    ], p=1.0)

def augment_data(X, y, augment_factor=2):
    """Augmentasi data untuk meningkatkan variasi"""
    transform = create_augmentation_pipeline()
    
    augmented_X = []
    augmented_y = []
    
    # Tambahkan data original
    augmented_X.extend(X)
    augmented_y.extend(y)
    
    print("Melakukan augmentasi data...")
    
    for i in range(len(X)):
        for _ in range(augment_factor):
            # Apply augmentation
            augmented = transform(image=X[i])
            augmented_image = augmented['image']
            
            augmented_X.append(augmented_image)
            augmented_y.append(y[i])
    
    print(f"Data original: {len(X)}")
    print(f"Data setelah augmentasi: {len(augmented_X)}")
    
    return np.array(augmented_X), np.array(augmented_y)

In [9]:
print("\n2. SPLITTING DATA")
print("-"*40)
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp
)

print(f"Train set: {len(X_train)} samples")
print(f"Validation set: {len(X_val)} samples")
print(f"Test set: {len(X_test)} samples")


2. SPLITTING DATA
----------------------------------------
Train set: 1440 samples
Validation set: 480 samples
Test set: 480 samples


# ==============================
# 5. MOBILENETV2 SSD MODEL
# ==============================

In [10]:
def create_mobilenetv2_model(input_shape=(224, 224, 3), num_classes=5):
    """Buat model MobileNetV2 dengan SSD-inspired architecture"""
    
    # Base model MobileNetV2
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=input_shape,
        alpha=1.0,
        include_top=False,
        weights='imagenet'
    )
    
    # Freeze beberapa layer awal
    for layer in base_model.layers[:-20]:
        layer.trainable = False
    
    # SSD-inspired head
    inputs = base_model.input
    x = base_model.output
    
    # Multi-scale feature extraction
    # Feature map 1: 7x7
    feat1 = layers.GlobalAveragePooling2D(name='feat1_gap')(x)
    feat1 = layers.Dense(512, activation='relu', name='feat1_dense')(feat1)
    
    # Feature map 2: dari layer sebelumnya
    x_prev = base_model.get_layer('block_13_expand_relu').output  # 14x14
    feat2 = layers.GlobalAveragePooling2D(name='feat2_gap')(x_prev)
    feat2 = layers.Dense(256, activation='relu', name='feat2_dense')(feat2)
    
    # Feature map 3: dari layer lebih awal
    x_early = base_model.get_layer('block_6_expand_relu').output  # 28x28
    feat3 = layers.GlobalAveragePooling2D(name='feat3_gap')(x_early)
    feat3 = layers.Dense(128, activation='relu', name='feat3_dense')(feat3)
    
    # Gabungkan semua features
    combined_features = layers.Concatenate(name='combined_features')([feat1, feat2, feat3])
    
    # Classification head
    x = layers.Dropout(0.5, name='dropout1')(combined_features)
    x = layers.Dense(512, activation='relu', name='classifier_dense1')(x)
    x = layers.Dropout(0.3, name='dropout2')(x)
    x = layers.Dense(256, activation='relu', name='classifier_dense2')(x)
    
    # Output layer
    predictions = layers.Dense(num_classes, activation='softmax', name='predictions')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=predictions)
    
    return model

In [11]:
print("\n3. DATA AUGMENTATION")
print("-"*40)
X_train_aug, y_train_aug = augment_data(X_train, y_train, augment_factor=2)


3. DATA AUGMENTATION
----------------------------------------
Melakukan augmentasi data...


  A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
  A.CoarseDropout(max_holes=8, max_height=8, max_width=8, p=0.3),


Data original: 1440
Data setelah augmentasi: 4320


# ==============================
# 6. TRAINING PIPELINE
# ==============================

In [15]:
def train_model(X_train, y_train, X_val, y_val):
    """Training model dengan callbacks"""
    
    # Create model
    model = create_mobilenetv2_model(num_classes=NUM_CLASSES)
    
    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    print("Model Summary:")
    model.summary()
    
    # Callbacks
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            '/content/models/best_hand_model.h5',
            monitor='val_accuracy',
            save_best_only=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=10,
            restore_best_weights=True,
            verbose=1
        )
    ]
    
    # Training
    history = model.fit(
        X_train, y_train,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=(X_val, y_val),
        callbacks=callbacks,
        verbose=1
    )
    
    return model, history

def plot_training_history(history):
    """Plot hasil training"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Accuracy
    axes[0, 0].plot(history.history['accuracy'], label='Train Accuracy')
    axes[0, 0].plot(history.history['val_accuracy'], label='Val Accuracy')
    axes[0, 0].set_title('Model Accuracy')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Loss
    axes[0, 1].plot(history.history['loss'], label='Train Loss')
    axes[0, 1].plot(history.history['val_loss'], label='Val Loss')
    axes[0, 1].set_title('Model Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Learning rate (jika ada)
    if 'lr' in history.history:
        axes[1, 0].plot(history.history['lr'], label='Learning Rate')
        axes[1, 0].set_title('Learning Rate')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('LR')
        axes[1, 0].set_yscale('log')
        axes[1, 0].legend()
        axes[1, 0].grid(True)
    
    plt.tight_layout()
    plt.savefig('/content/results/training_history.png', dpi=300, bbox_inches='tight')
    plt.show()

# ==============================
# 7. EVALUASI DAN METRIK
# ==============================

In [16]:
def evaluate_model(model, X_test, y_test):
    """Evaluasi model dengan berbagai metrik"""
    
    # Prediksi
    y_pred_proba = model.predict(X_test)
    y_pred = np.argmax(y_pred_proba, axis=1)
    
    # Accuracy
    accuracy = np.mean(y_pred == y_test)
    
    # Precision, Recall, F1-score per class
    precision = precision_score(y_test, y_pred, average=None)
    recall = recall_score(y_test, y_pred, average=None)
    f1 = f1_score(y_test, y_pred, average=None)
    
    # Overall metrics
    precision_macro = precision_score(y_test, y_pred, average='macro')
    recall_macro = recall_score(y_test, y_pred, average='macro')
    f1_macro = f1_score(y_test, y_pred, average='macro')
    
    print("="*50)
    print("HASIL EVALUASI MODEL")
    print("="*50)
    print(f"Overall Accuracy: {accuracy:.4f}")
    print(f"Macro Precision: {precision_macro:.4f}")
    print(f"Macro Recall: {recall_macro:.4f}")
    print(f"Macro F1-Score: {f1_macro:.4f}")
    print()
    
    print("Per-Class Metrics:")
    print("-"*40)
    for i, class_name in enumerate(CLASS_NAMES):
        print(f"{class_name:10} - Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
    
    # Classification Report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=CLASS_NAMES))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=CLASS_NAMES,
                yticklabels=CLASS_NAMES)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig('/content/results/confusion_matrix.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # Simpan metrik ke file
    metrics = {
        'accuracy': float(accuracy),
        'precision_macro': float(precision_macro),
        'recall_macro': float(recall_macro),
        'f1_macro': float(f1_macro),
        'per_class_metrics': {}
    }
    
    for i, class_name in enumerate(CLASS_NAMES):
        metrics['per_class_metrics'][class_name] = {
            'precision': float(precision[i]),
            'recall': float(recall[i]),
            'f1_score': float(f1[i])
        }
    
    with open('/content/results/metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    
    return y_pred, y_pred_proba, metrics


# ==============================
# 8. TESTING DENGAN 10 SAMPLE TERBAIK
# ==============================

In [17]:
def get_best_predictions(model, X_test, y_test, top_n=10, samples_per_class=2):
    """Ambil sample terbaik untuk setiap kelas"""
    
    y_pred_proba = model.predict(X_test)
    y_pred = np.argmax(y_pred_proba, axis=1)
    
    best_samples = {}
    
    for class_idx, class_name in enumerate(CLASS_NAMES):
        # Ambil indices untuk kelas ini
        class_indices = np.where(y_test == class_idx)[0]
        
        # Filter hanya prediksi yang benar
        correct_indices = class_indices[y_pred[class_indices] == class_idx]
        
        if len(correct_indices) == 0:
            print(f"Warning: No correct predictions for class {class_name}")
            continue
        
        # Ambil confidence scores untuk prediksi yang benar
        confidences = y_pred_proba[correct_indices, class_idx]
        
        # Sort berdasarkan confidence
        sorted_indices = correct_indices[np.argsort(confidences)[::-1]]
        
        # Ambil top samples
        top_samples = sorted_indices[:samples_per_class]
        
        best_samples[class_name] = {
            'indices': top_samples,
            'confidences': y_pred_proba[top_samples, class_idx]
        }
    
    return best_samples

def visualize_best_predictions(X_test, y_test, best_samples):
    """Visualisasi sample terbaik"""
    
    fig, axes = plt.subplots(len(CLASS_NAMES), 2, figsize=(8, len(CLASS_NAMES)*3))
    
    for row, (class_name, samples) in enumerate(best_samples.items()):
        for col, (idx, conf) in enumerate(zip(samples['indices'], samples['confidences'])):
            if len(CLASS_NAMES) == 1:
                ax = axes[col]
            else:
                ax = axes[row, col]
            
            # Tampilkan gambar
            img = X_test[idx]
            if img.dtype != np.uint8:
                img = (img * 255).astype(np.uint8)
            
            ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            ax.set_title(f'{class_name}\nConfidence: {conf:.4f}')
            ax.axis('off')
    
    plt.tight_layout()
    plt.savefig('/content/results/best_predictions.png', dpi=300, bbox_inches='tight')
    plt.show()

# ==============================
# 9. MAIN EXECUTION PIPELINE
# ==============================

In [None]:
def main():
    """Main execution pipeline"""
    
    print("="*60)
    print("HAND DETECTION WITH MOBILENETV2 SSD")
    print("="*60)
    
    # Step 1: Preprocessing data
    print("\n1. PREPROCESSING DATA DENGAN MEDIAPIPE")
    print("-"*40)
    X, y = preprocess_dataset()
    
    if len(X) == 0:
        print("Error: No data loaded. Please check your dataset path!")
        return
    
    # Normalisasi data
    X = X.astype(np.float32) / 255.0
    
    # Step 2: Split data
    print("\n2. SPLITTING DATA")
    print("-"*40)
    X_temp, X_test, y_temp, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    X_train, X_val, y_train, y_val = train_test_split(
        X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp
    )
    
    print(f"Train set: {len(X_train)} samples")
    print(f"Validation set: {len(X_val)} samples")
    print(f"Test set: {len(X_test)} samples")
    
    # Step 3: Data augmentation
    print("\n3. DATA AUGMENTATION")
    print("-"*40)
    X_train_aug, y_train_aug = augment_data(X_train, y_train, augment_factor=2)
    
    # Step 4: Training
    print("\n4. TRAINING MODEL")
    print("-"*40)
    model, history = train_model(X_train_aug, y_train_aug, X_val, y_val)
    
    # Step 5: Plot training history
    print("\n5. PLOTTING TRAINING HISTORY")
    print("-"*40)
    plot_training_history(history)
    
    # Step 6: Evaluasi model
    print("\n6. EVALUASI MODEL")
    print("-"*40)
    y_pred, y_pred_proba, metrics = evaluate_model(model, X_test, y_test)
    
    # Step 7: Get best predictions
    print("\n7. ANALISIS 10 SAMPLE TERBAIK")
    print("-"*40)
    best_samples = get_best_predictions(model, X_test, y_test)
    visualize_best_predictions(X_test, y_test, best_samples)    
    
    # Step 8: Save final model
    print("\n8. MENYIMPAN MODEL FINAL")
    print("-"*40)
    model.save('/content/models/final_hand_detection_model.h5')
    
    # Step 9: Summary
    print("\n9. RINGKASAN HASIL")
    print("="*40)
    print(f"✓ Dataset berhasil diproses: {len(X)} gambar")
    print(f"✓ Model accuracy: {metrics['accuracy']:.4f}")
    print(f"✓ Precision (macro): {metrics['precision_macro']:.4f}")
    print(f"✓ Recall (macro): {metrics['recall_macro']:.4f}")
    print(f"✓ F1-Score (macro): {metrics['f1_macro']:.4f}")
    print("\nFile yang tersedia untuk download:")
    print("- /content/models/final_hand_detection_model.h5 (Model terlatih)")
    print("- /content/realtime_hand_detection.py (Kode real-time)")
    print("- /content/results/ (Hasil evaluasi)")
    
    print("\n" + "="*60)
    print("PIPELINE SELESAI!")
    print("="*60)

# Jalankan main pipeline
if __name__ == "__main__":
    main()

# ==============================
# BONUS: UTILITY FUNCTIONS
# ==============================

# def download_results():
#     """Download hasil training"""
#     from google.colab import files
    
#     # Download model
#     files.download('/content/models/final_hand_detection_model.h5')
    
#     # Download real-time code
#     files.download('/content/realtime_hand_detection.py')
    
#     # Download metrics
#     files.download('/content/results/metrics.json')
    
#     print("Semua file penting telah didownload!")

# Uncomment line di bawah untuk auto-download
# download_results()