# Notebook Pelatihan Utama - Pengenalan Ucapan Disartria (KAGGLE VERSION)
**Version:** 20260115_2101\n
**Tujuan:** Analisis Perbandingan **Lightweight CNN-STFT** (Diusulkan) vs **Model Transfer Learning**.
**Platform:** Kaggle Kernels (GPU T4 x2).
**Strategy:** Subject-Independent Split (Verified).

## 📋 Panduan Setup Kaggle
1. **Add Data**: Upload folder `backend` anda sebagai Dataset (beri nama `thesis-backend` misalnya).
2. **Add Data**: Cari dataset `UASpeech` dan `TORGO` (atau upload zip-nya jika punya privasi).
3. **Internet**: Aktifkan Internet di menu Settings (kanan) jika perlu download via `gdown`.

In [None]:
# 1. Setup Environment & Path (Kaggle Symlink Fix)
import os
import sys
import glob

print("🚀 Memulai Setup Kaggle Environment...")

# A. Cari file 'config.py' dimanapun dia berada
config_path = None
for root, dirs, files in os.walk('/kaggle/input'):
    if 'config.py' in files:
        config_path = os.path.join(root, 'config.py')
        break

if config_path:
    print(f"✅ Ditemukan Config di: {config_path}")
    source_dir = os.path.dirname(config_path)
    
    # B. Buat Symlink 'src' di Working Directory
    # Tujuannya agar 'from src import config' SELALU jalan, tidak peduli struktur aslinya rusak/flatten
    target_link = '/kaggle/working/src'
    if os.path.exists(target_link):
        if os.path.islink(target_link):
            os.unlink(target_link)
        else:
            import shutil
            shutil.rmtree(target_link)
            
    os.symlink(source_dir, target_link)
    print(f"🔗 Symlink dibuat: {target_link} -> {source_dir}")
    
    # C. Tambah Working Dir ke Sys Path
    if '/kaggle/working' not in sys.path:
        sys.path.append('/kaggle/working')
else:
    print("❌ FATAL: File 'config.py' tidak ditemukan di Input manapun!")
    print("   Pastikan Anda sudah 'Add Data' folder backend.")

# D. Setup Output Paths
OUTPUT_ROOT = '/kaggle/working'
LOCAL_DATA_ROOT = '/kaggle/working/data'
os.makedirs(LOCAL_DATA_ROOT, exist_ok=True)

print("Environment Siap!")

In [None]:
# 2. Install Dependencies
!pip install -q tensorflow-io
!pip install -q pandas matplotlib seaborn scikit-learn librosa

In [None]:
# 3. Import Modul Proyek
try:
    from src import config, data_loader, models, trainer
    print("✅ Modul berhasil diimport: config, data_loader, models, trainer")

    # Override Config untuk Kaggle Output
    config.MODELS_DIR = os.path.join(OUTPUT_ROOT, 'models')
    config.OUTPUTS_DIR = os.path.join(OUTPUT_ROOT, 'outputs')
    os.makedirs(config.MODELS_DIR, exist_ok=True)
    os.makedirs(config.OUTPUTS_DIR, exist_ok=True)
    print(f"📂 Output Directory set to: {config.OUTPUTS_DIR}")

except ImportError as e:
    print(f"❌ Gagal import modul: {e}")
    print("Pastikan 'backend' terdeteksi dengan benar.")

In [None]:
# 4. Persiapan Data (Kaggle Auto-Detect or Gdown)
import shutil
import subprocess
import gdown

# IDs Google Drive (Backup jika file tidak ada di Kaggle Dataset)
UASPEECH_ID = '1L17F0SAkRk3rEjHDUyToLUvNp99sNMvE'
TORGO_ID = '1YU7aCqa4qyn75XRdFPAWEqVv_1Qpl9cG'

def setup_dataset_kaggle(name, file_id, extract_path):
    print(f"\n--- Setup Dataset: {name} ---")
    
    # 1. Cek di /kaggle/input (Siapa tau user sudah add data)
    # Polanya: /kaggle/input/<name> atau /kaggle/input/<anything>/<name>
    candidates = glob.glob(f'/kaggle/input/**/*{name}*', recursive=True)
    
    # Filter hanya folder yang valid (bukan file zip/meta)
    potential_dirs = [c for c in candidates if os.path.isdir(c)]
    
    # Spesifik untuk TORGO/UASpeech foldernya biasanya 'UASpeech' atau 'TORGO'
    for p in potential_dirs:
        if os.path.basename(p).lower() == name.lower() or os.path.basename(p).lower() == f"{name}_smalldataset".lower():
             print(f"✅ Ditemukan Dataset di Input: {p}")
             return p

    # 3. Jika tidak ketemu di Input, Coba Download (Gdown)
    print(f"⚠️ {name} tidak ditemukan di ke Kaggle Input. Mencoba download via Gdown...")
    
    local_zip_path = os.path.join(extract_path, f"{name}.zip")
    target_extract = os.path.join(extract_path, name)
    
    if os.path.exists(target_extract):
         print(f"✅ Dataset sudah ada di Working Dir: {target_extract}")
         return target_extract
         
    url = f'https://drive.google.com/uc?id={file_id}'
    gdown.download(url, local_zip_path, quiet=False)
    
    print(f"Mengekstrak {name}...")
    subprocess.check_call(['unzip', '-o', '-q', local_zip_path, '-d', extract_path])
    print(f"✅ {name} Selesai diekstrak.")
    
    # Handle nama folder TORGO yang kadang beda
    if name == 'TORGO' and not os.path.exists(target_extract):
         alt = os.path.join(extract_path, 'TORGO_smalldataset')
         if os.path.exists(alt): return alt
         
    return target_extract

# Jalankan Setup
uaspeech_path = setup_dataset_kaggle('UASpeech', UASPEECH_ID, LOCAL_DATA_ROOT)
torgo_path = setup_dataset_kaggle('TORGO', TORGO_ID, LOCAL_DATA_ROOT)

# ---------------------------------------------------------
# LOADING DATA
# ---------------------------------------------------------
print("\nMemuat Path File...")

# Load Path File Audio
uaspeech_files, uaspeech_labels, uaspeech_speakers = data_loader.get_file_paths(uaspeech_path, 'UASpeech')
torgo_files, torgo_labels, torgo_speakers = data_loader.get_file_paths(torgo_path, 'TORGO')

# --- GENERATE DATASET STATS FOR DASHBOARD ---
import json
print("Generating Dataset Statistics...")

def get_stats(name, files, labels, speakers):
    unique_lbl = list(set(labels))
    counts = {l: 0 for l in unique_lbl}
    for l in labels: counts[l] += 1
    
    summary = []
    for l in unique_lbl:
        cat = "Dysarthric" if l == 1 else "Control"
        total = counts[l]
        summary.append({
            "category": cat,
            "speakers": len(set(speakers)), # Rough approx
            "totalRaw": total,
            "trainRaw": int(total * 0.8),
            "testRaw": total - int(total * 0.8)
        })
        
    return {
        "name": name,
        "stats": {
            "samples": f"{len(files):,}",
            "classes": str(len(unique_lbl)),
            "avgLen": "N/A" # Skip expensive calc
        },
        "summaryData": summary
    }

stats_export = {
    "uaspeech": get_stats('UASpeech', uaspeech_files, uaspeech_labels, uaspeech_speakers),
    "torgo": get_stats('TORGO', torgo_files, torgo_labels, torgo_speakers)
}

with open(os.path.join(config.OUTPUTS_DIR, "dataset_stats.json"), 'w') as f:
    json.dump(stats_export, f, indent=4)
print("✅ dataset_stats.json saved.")

# --- GENERATE REAL EDA SAMPLES (Audio + Signals) ---
print("Generating EDA Samples (Waveform & Spectrogram data)...")
import random
import shutil
import librosa
import numpy as np

samples_out_dir = os.path.join(config.OUTPUTS_DIR, "samples")
os.makedirs(samples_out_dir, exist_ok=True)

eda_export = {}

# Iterate over both datasets
for ds_name, (ds_files, ds_labels, ds_speakers) in [('uaspeech', (uaspeech_files, uaspeech_labels, uaspeech_speakers)), ('torgo', (torgo_files, torgo_labels, torgo_speakers))]:
    eda_export[ds_name] = {'dysarthric': [], 'control': []}
    
    # Binary Classification Logic (Assuming 1=Dysarthric)
    # Note: If labels are different, adjust accordingly.
    # Based on previous cell: "Dysarthric" if l == 1 else "Control"
    
    indices_dys = [i for i, x in enumerate(ds_labels) if x == 1]
    indices_ctrl = [i for i, x in enumerate(ds_labels) if x != 1]
    
    # Pick 5 random from each
    picks_dys = random.sample(indices_dys, min(5, len(indices_dys)))
    picks_ctrl = random.sample(indices_ctrl, min(5, len(indices_ctrl)))
    
    for category, picks in [('dysarthric', picks_dys), ('control', picks_ctrl)]:
        for idx in picks:
            src = ds_files[idx]
            fname = f"{ds_name}_{os.path.basename(src)}" # Prefix to avoid collision
            dst = os.path.join(samples_out_dir, fname)
            shutil.copy(src, dst)
            
            # Analyze Signal
            try:
                y, sr = librosa.load(src, sr=16000)
                duration = len(y) / sr
                
                # 1. Waveform (100 points max, absolute amplitude)
                hop_len = max(1, len(y) // 80) # 80 bars
                waveform = [float(np.max(np.abs(y[i:i+hop_len]))) for i in range(0, len(y), hop_len)][:80]
                # Normalize waveform 0-100 for CSS height
                max_val = max(waveform) if waveform else 1
                waveform = [int((v / max_val) * 100) for v in waveform]
                
                # 2. Mel Spectrogram (Low Res for JSON: 40 bands x 60 time steps)
                # Enough for visual "texture" without bloating JSON
                n_mels = 40
                hop_spec = len(y) // 60
                if hop_spec < 512: hop_spec = 512 # Minimum hop
                
                S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_spec)
                S_db = librosa.power_to_db(S, ref=np.max)
                
                # Normalize 0-1
                min_db, max_db = S_db.min(), S_db.max()
                S_norm = (S_db - min_db) / (max_db - min_db)
                
                # Ensure dimensions (cut if too long)
                if S_norm.shape[1] > 60: S_norm = S_norm[:, :60]
                
                spectrogram = S_norm.tolist() # List of lists
                
                eda_export[ds_name][category].append({
                    "id": os.path.splitext(fname)[0],
                    "name": fname,
                    "duration": f"{duration:.1f}s",
                    "durationSec": duration,
                    "type": category,
                    "severity": "Unknown",
                    "waveform": waveform,
                    "spectrogram": spectrogram,
                    "url": f"/static/samples/{fname}"
                })
            except Exception as e:
                print(f"⚠️ Error processing {fname}: {e}")

with open(os.path.join(config.OUTPUTS_DIR, "eda_samples.json"), 'w') as f:
    json.dump(eda_export, f)
print("✅ eda_samples.json saved (Audio & Visuals).")

print("Data terload. Siap training.")

In [None]:
# 5. ANALISIS MODEL & PERBANDINGAN STRUKTUR (WAJIB PAPER 2)
# Bagian ini dipisahkan agar analisa FLOPs, Parameter, dan Memory terlihat jelas sebelum Training dimulai.
import io
import pandas as pd
import tensorflow as tf
import os

print("\n--- 2. Membangun dan Meringkas Semua Arsitektur Model ---")
summary_list = []

# Setup Input Shape Standar untuk Analisa (3 Channel untuk Model TL, 1 Channel untuk STFT)
# Fix: Gunakan MFCC_MAX_LEN yang benar dari config
# UPDATED: ImageNet models need 3 channels (RGB emulation)
input_shape_mfcc = (config.N_MFCC, config.MFCC_MAX_LEN, 3)
# Fix: Hitung N_STFT dari N_FFT/2 + 1 (Spectrogram Height)
n_stft_bins = (config.N_FFT // 2) + 1
input_shape_stft = (n_stft_bins, config.MFCC_MAX_LEN, 1)

for model_key, model_display_name in config.MODELS.items():
    print(f"Menganalisis arsitektur untuk: {model_display_name}...")
    
    # Tentukan input shape berdasarkan jenis model
    current_input_shape = input_shape_stft if model_key == 'cnn_stft' else input_shape_mfcc
    
    # Build Model
    tf.keras.backend.clear_session()
    try:
        model = models.get_model(model_key, current_input_shape, num_classes=2)
        
        # Hitung Metrik
        total_params = model.count_params()
        # Hitung FLOPs
        flops = trainer.get_flops(model)
        peak_mem_32bit, disk_size_32bit = trainer.get_model_memory_usage(model)
    except Exception as e:
        print(f"⚠️ Gagal build/metric {model_display_name}: {e}")
        flops = 0; peak_mem_32bit = 0; disk_size_32bit = 0
        # Dummy summary
        architecture_summary = "Error building model"
    else:
        # Capture Summary
        stream = io.StringIO()
        model.summary(print_fn=lambda x: stream.write(x + '\n'))
        architecture_summary = stream.getvalue()
        stream.close()

    summary_list.append({
        "Model": model_display_name,
        "Total Parameter": total_params,
        "FLOPs": flops,
        "Ukuran di Disk (32-bit)": disk_size_32bit,
        "Estimasi Ukuran 8-bit": disk_size_32bit / 4,
        "Estimasi Memori Aktivasi 8-bit": peak_mem_32bit / 4,
        "Architecture Summary": architecture_summary
    })

    # --- SAVE EFFICIENCY METRICS (JSON) ---
    efficiency_export = {}
    for item in summary_list:
        # Clean up keys for JSON export
        efficiency_export[item['Model']] = {
            "params": str(item['Total Parameter']),
            "flops": str(item['FLOPs']),
            "size": f"{item['Estimasi Ukuran 8-bit'] / 1024:.2f} MB",
            "activation": f"{item['Estimasi Memori Aktivasi 8-bit'] / 1024:.2f} KB"
        }
    
    with open(os.path.join(config.OUTPUTS_DIR, "model_efficiency.json"), 'w') as f:
        json.dump(efficiency_export, f, indent=4)
    print("✅ model_efficiency.json saved.")

# Tampilkan Tabel Ringkasan
print("\n" + "="*80)
print(f"--- 3. Tabel Ringkasan Metrik untuk Edge Device ---")
print("="*80)

columns_to_show = ["Model", "Total Parameter", "FLOPs", "Estimasi Ukuran 8-bit", "Estimasi Memori Aktivasi 8-bit"]
param_summary_df = pd.DataFrame(summary_list)[columns_to_show]

def format_flops_str(f):
    if f is None or f == 0: return "N/A"
    return f'{f / 1e9:.2f} GFLOPs' if f > 1e9 else f'{f / 1e6:.2f} MFLOPs'
def format_bytes_str(b):
    if b is None or b == 0: return "N/A"
    return f'{b / 1e6:.2f} MB' if b > 1e6 else f'{b / 1e3:.2f} KB'

param_summary_df['Total Parameter'] = param_summary_df['Total Parameter'].map('{:,}'.format)
param_summary_df['FLOPs'] = param_summary_df['FLOPs'].map(format_flops_str)
param_summary_df['Estimasi Ukuran 8-bit'] = param_summary_df['Estimasi Ukuran 8-bit'].map(format_bytes_str)
param_summary_df['Estimasi Memori Aktivasi 8-bit'] = param_summary_df['Estimasi Memori Aktivasi 8-bit'].map(format_bytes_str)

print(param_summary_df.to_string(index=False))

# Tampilkan Rincian Arsitektur
print("\n\n" + "="*65)
print(f"--- 4. Rincian Arsitektur per Model ---")
print("="*65)
for model_data in summary_list:
    print(f"\n>>> {model_data['Model']}:")
    print(model_data['Architecture Summary'])

In [None]:
# 6. Loop Pelatihan (Sekarang Fokus Training Saja)
from sklearn.model_selection import GroupShuffleSplit
import numpy as np

datasets = {
    'UASpeech': (uaspeech_files, uaspeech_labels, uaspeech_speakers),
    'TORGO': (torgo_files, torgo_labels, torgo_speakers)
}

for dataset_name, (data_files, data_labels, data_speakers) in datasets.items():
    print(f"\n{'#'*60}")
    print(f"MEMPROSES TRAINING DATASET: {dataset_name}")
    print(f"{'#'*60}\n")
    
    if len(data_files) == 0: continue

    # Mapping Kelas & Split (Sama seperti sebelumnya)
    unique_classes = sorted(list(set(data_labels)))
    class_mapping = {label: idx for idx, label in enumerate(unique_classes)}
    
    # Convert to Numpy for easy indexing
    X = np.array(data_files)
    y = np.array(data_labels)
    groups = np.array(data_speakers)
    
    # 1. SPLIT METODE PAPER 2 (STANDARD RANDOM SPLIT)
    # Tujuan: Meniru metodologi Paper 2 untuk mendapatkan performa 97%.
    # Menggunakan Stratified Shuffle Split, BUKAN Group Split.
    from sklearn.model_selection import train_test_split
    
    # Split 1: 80% Train, 20% (Test + Val)
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Split 2: 50% Test, 50% Val (Dari sisa 20% tadi) -> Jadi 10% Val, 10% Test totalnya
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
    )
    
    # Print Distribution
    print(f"--- Data Distribution ({dataset_name}) [Paper 2 Style - Random Split] ---")
    print(f"[Train] Samples: {len(X_train)}")
    print(f"[Val  ] Samples: {len(X_val)}")
    print(f"[Test ] Samples: {len(X_test)}")

    for model_key, model_display_name in config.MODELS.items():
        print(f"\n--- Training Pipeline: {model_display_name} @ {dataset_name} ---")

        # ... (Pipeline sama: Dataset -> Build -> Train -> Eval)
        try:
             # Tipe Fitur
            feature_type = 'stft' if model_key == 'cnn_stft' else 'mfcc'
            
            # Create Dataset
            train_ds = data_loader.create_tf_dataset(X_train, y_train, class_mapping, is_training=True, feature_type=feature_type)
            val_ds = data_loader.create_tf_dataset(X_val, y_val, class_mapping, is_training=False, feature_type=feature_type)
            test_ds = data_loader.create_tf_dataset(X_test, y_test, class_mapping, is_training=False, feature_type=feature_type)

            # Get Input Shape from DS
            input_shape = None
            for feature, label in train_ds.take(1):
                input_shape = feature.shape[1:]
                break

            tf.keras.backend.clear_session()
            model = models.get_model(model_key, input_shape, num_classes=len(unique_classes))

            # Training
            run_name = f"{model_key}_{dataset_name}"
            history, time_taken = trainer.train_model(model, train_ds, val_ds, model_name=run_name)
            print(f"-> Training Done ({time_taken:.2f}s)")
            
            # Eval & Benchmark Metrics
            print(f"-> Evaluating {run_name}...")
            import time
            import json
            from sklearn.metrics import classification_report
            import numpy as np
            
            # 1. Inference Time Measurement
            start_eval = time.time()
            y_pred_probs = model.predict(test_ds)
            end_eval = time.time()
            
            # Count samples via ds iteration
            num_samples = 0
            y_true = []
            for features, labels in test_ds:
                num_samples += features.shape[0]
                y_true.extend(labels.numpy())
                
            inference_time_ms = ((end_eval - start_eval) / num_samples) * 1000
            
            # 2. Classification Report JSON
            y_pred = np.argmax(y_pred_probs, axis=1)
            report_dict = classification_report(y_true, y_pred, target_names=unique_classes, output_dict=True)
            
            # Save Report
            report_path = os.path.join(config.OUTPUTS_DIR, f"{run_name}_report.json")
            with open(report_path, 'w') as f:
                json.dump(report_dict, f, indent=4)
            print(f"-> Report saved: {report_path}")
            
            # 3. Append to Benchmark Summary
            if 'benchmark_results' not in locals(): benchmark_results = []
            
            benchmark_entry = {
                "model": model_key,
                "dataset": dataset_name,
                "accuracy": report_dict['accuracy'],
                "inference_time_ms": inference_time_ms,
                "training_time_sec": time_taken,
                "run_name": run_name
            }
            benchmark_results.append(benchmark_entry)
            
            # 4. EXTENDED EVALUATION (Thesis Edition)
            try:
                from sklearn.metrics import confusion_matrix, roc_curve, precision_recall_curve, auc
                import matplotlib.pyplot as plt
                import seaborn as sns
                import pandas as pd
                
                # A. Save Model Architecture
                arch_path = os.path.join(config.OUTPUTS_DIR, f"{run_name}_arch.txt")
                with open(arch_path, 'w') as f:
                    model.summary(print_fn=lambda x: f.write(x + '\n'))
                print(f"-> Architecture saved: {arch_path}")

                # B. Save Full Predictions (For Error Analysis)
                # Re-map files for current dataset
                curr_test_files = uaspeech_test_files if dataset_name == 'UASpeech' else torgo_test_files
                
                # Create DataFrame
                pred_df = pd.DataFrame({
                    'file': [os.path.basename(f) for f in curr_test_files] if len(curr_test_files) == len(y_true) else ['Unknown']*len(y_true),
                    'true_label': y_true,
                    'pred_label': y_pred,
                    'prob_dysarthric': y_pred_probs[:, 1],
                    'is_correct': (np.array(y_true) == np.array(y_pred))
                })
                pred_csv_path = os.path.join(config.OUTPUTS_DIR, f"{run_name}_predictions.csv")
                pred_df.to_csv(pred_csv_path, index=False)
                print(f"-> Prediction Log saved: {pred_csv_path}")

                # C. Generate & Save Static Plots (PNG for Thesis)
                # 1. Confusion Matrix
                plt.figure(figsize=(6, 5))
                cm = confusion_matrix(y_true, y_pred)
                sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=unique_classes, yticklabels=unique_classes)
                plt.title(f'Confusion Matrix - {model_display_name}')
                plt.ylabel('True Label')
                plt.xlabel('Predicted Label')
                plt.tight_layout()
                plt.savefig(os.path.join(config.OUTPUTS_DIR, f"{run_name}_cm.png"))
                plt.close()

                # 2. ROC Curve
                fpr, tpr, _ = roc_curve(y_true, y_pred_probs[:, 1])
                roc_auc = auc(fpr, tpr)
                plt.figure(figsize=(6, 5))
                plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
                plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title(f'ROC - {model_display_name}')
                plt.legend(loc="lower right")
                plt.savefig(os.path.join(config.OUTPUTS_DIR, f"{run_name}_roc.png"))
                plt.close()
                
                # JSON Export for Dashboard
                cm_list = cm.tolist()
                indices = np.linspace(0, len(fpr)-1, 50).astype(int)
                roc_data = [{"x": fpr[i], "y": tpr[i]} for i in indices]
                
                precision, recall, _ = precision_recall_curve(y_true, y_pred_probs[:, 1])
                indices_pr = np.linspace(0, len(precision)-1, 50).astype(int)
                pr_data = [{"x": recall[i], "y": precision[i]} for i in indices_pr]
                
                eval_export = {
                    "cm": cm_list,
                    "roc": roc_data,
                    "pr": pr_data,
                    "auroc": roc_auc
                }
                
                eval_path = os.path.join(config.OUTPUTS_DIR, f"{run_name}_eval.json")
                with open(eval_path, 'w') as f:
                    json.dump(eval_export, f)
                print(f"-> Extended Eval saved: {eval_path}")
            except Exception as e:
                print(f"⚠️ Failed to generate extended eval: {e}")
            
            # Save Summary
            summary_path = os.path.join(config.OUTPUTS_DIR, "benchmark_summary.json")
            with open(summary_path, 'w') as f:
                json.dump(benchmark_results, f, indent=4)
            
            # Standard Eval Print
            trainer.evaluate_model(model, test_ds, unique_classes, model_name=run_name)
        except Exception as e:
            print(f"ERROR Training {model_display_name}: {e}")

In [None]:
# 7. Visualisasi TensorBoard
logs_base_dir = os.path.join(config.OUTPUTS_DIR, 'logs')
%tensorboard --logdir "{logs_base_dir}"