# Boxing Pose Classifier Training
# Notebook untuk training model klasifikasi pose boxing (Jab, Hook, Uppercut)

## 1. Import Library yang Dibutuhkan


In [None]:
import json
import os
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

## 2. Fungsi untuk Load Data JSON

In [None]:
def load_json_data(input_path, is_folder=False):
    """
    Load data dari file JSON atau folder berisi multiple JSON files
    
    Args:
        input_path: Path ke file JSON atau folder
        is_folder: Boolean, True jika input adalah folder
    
    Returns:
        List of samples dengan format: [{'name': 'pose_name', 'landmarks': [...]}]
    """
    data = []
    
    if is_folder:
        # Load dari folder
        json_files = [f for f in os.listdir(input_path) if f.endswith('.json')]
        print(f"Ditemukan {len(json_files)} file JSON dalam folder")
        
        for json_file in json_files:
            with open(os.path.join(input_path, json_file), 'r') as f:
                file_data = json.load(f)
                data.extend(file_data)
                print(f"Loaded {len(file_data)} samples dari {json_file}")
    else:
        # Load dari single file
        with open(input_path, 'r') as f:
            data = json.load(f)
            print(f"Loaded {len(data)} samples dari file")
    
    return data


## 3. Preprocessing Data

In [None]:
def preprocess_data(data, expected_landmarks=6):
    """
    Preprocessing data untuk ekstraksi features dan labels
    
    Args:
        data: Raw data dari JSON
        expected_landmarks: Jumlah landmark yang diharapkan (default: 6)
    
    Returns:
        features: Array numpy berisi koordinat landmark
        labels: Array numpy berisi nama pose
    """
    features = []
    labels = []
    invalid_samples = 0
    
    for sample in data:
        try:
            pose_name = sample['name']
            landmarks = sample['landmarks']
            
            # Validasi jumlah landmarks
            if len(landmarks) != expected_landmarks:
                invalid_samples += 1
                continue
            
            # Ekstraksi koordinat x, y, z dari setiap landmark
            landmark_coords = []
            for landmark in landmarks:
                landmark_coords.extend([landmark['x'], landmark['y'], landmark['z']])
            
            features.append(landmark_coords)
            labels.append(pose_name)
            
        except KeyError as e:
            print(f"Missing key in sample: {e}")
            invalid_samples += 1
            continue
    
    print(f"Valid samples: {len(features)}")
    print(f"Invalid samples: {invalid_samples}")
    
    return np.array(features), np.array(labels)

## 4. Exploratory Data Analysis

In [None]:
def analyze_data(labels):
    """
    Analisis distribusi data
    """
    # Hitung distribusi kelas
    class_counts = Counter(labels)
    
    print("Distribusi Kelas:")
    for pose, count in class_counts.items():
        print(f"  {pose}: {count} samples")
    
    # Visualisasi distribusi
    plt.figure(figsize=(10, 5))
    
    plt.subplot(1, 2, 1)
    poses = list(class_counts.keys())
    counts = list(class_counts.values())
    plt.bar(poses, counts, color=['red', 'blue', 'green'])
    plt.title('Distribusi Data per Pose')
    plt.xlabel('Pose')
    plt.ylabel('Jumlah Samples')
    
    plt.subplot(1, 2, 2)
    plt.pie(counts, labels=poses, autopct='%1.1f%%', colors=['red', 'blue', 'green'])
    plt.title('Proporsi Data per Pose')
    
    plt.tight_layout()
    plt.show()
    
    return class_counts

## 5. Training dengan K-Fold Cross Validation

In [None]:
def train_with_kfold(X, y, k=5):
    """
    Training model menggunakan K-Fold Cross Validation
    
    Args:
        X: Features array
        y: Labels array
        k: Jumlah fold (default: 5)
    
    Returns:
        fold_results: Hasil evaluasi per fold
        mean_accuracy: Rata-rata akurasi
        std_accuracy: Standard deviasi akurasi
    """
    print(f"Memulai {k}-Fold Cross Validation...")
    print(f"Feature shape: {X.shape}")
    print(f"Number of labels: {len(y)}")
    
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
    fold_accuracies = []
    fold_results = []
    
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), 1):
        print(f"\nTraining Fold {fold}...")
        
        # Split data
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        
        # Training model
        model = MLPClassifier(alpha=1, max_iter=1000, random_state=42)
        model.fit(X_train, y_train)
        
        # Evaluasi
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, target_names=sorted(set(y)), output_dict=True)
        
        fold_accuracies.append(accuracy)
        fold_results.append({
            'fold': fold,
            'accuracy': accuracy,
            'report': report,
            'y_test': y_test,
            'y_pred': y_pred
        })
        
        print(f"Fold {fold} - Accuracy: {accuracy:.4f}")
    
    mean_accuracy = np.mean(fold_accuracies)
    std_accuracy = np.std(fold_accuracies)
    
    print(f"\n=== HASIL K-FOLD CROSS VALIDATION ===")
    print(f"Mean Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
    
    return fold_results, mean_accuracy, std_accuracy