# WaveletDiff Optuna Hyperparameter Optimization

**Clean Interface**: All logic is in `src/torch_gpu_waveletDiff/optuna_config/runner.py`  
**Workflow**: Configure ‚Üí Setup ‚Üí Create Study ‚Üí Optimize ‚Üí Analyze ‚Üí Export

Modern Features:
- üéØ Multi-objective optimization (loss, speed, stability)
- üß† TPESampler for intelligent search
- ‚úÇÔ∏è HyperbandPruner for early stopping
- üíæ Persistent SQLite storage (survives Colab restarts)
- üìä Optuna Dashboard for visualization
- üîÑ Git-pullable updates (no notebook rewrites needed!)

In [None]:
# @title üìã Cell 1: Configuration

# === STUDY SETTINGS ===
STUDY_NAME = "waveletdiff_multiobjective_v1"  # @param {type:"string"}
N_TRIALS = 50  # @param {type:"integer"}
TIMEOUT_HOURS = None  # @param {type:"number"}

# === TRIAL SETTINGS ===
STEPS_PER_TRIAL = 2000  # @param {type:"integer"}
EVAL_INTERVAL = 100  # @param {type:"integer"}
COMPILE_MODE = None  # @param ["default", "reduce-overhead", "max-autotune", "None"]
# ^ None = No compilation (faster for short trials)

# === OPTIMIZATION MODE ===
USE_MULTI_OBJECTIVE = True  # @param {type:"boolean"}
# ^ If False, uses weighted scalarization (single objective)

# Multi-Objective Weights (only used if USE_MULTI_OBJECTIVE=False)
WEIGHT_LOSS = 1.0  # @param {type:"number"}
WEIGHT_SPEED = 0.0  # @param {type:"number"}
WEIGHT_STABILITY = 0.1  # @param {type:"number"}

# === PRUNER SETTINGS ===
ENABLE_PRUNING = True  # @param {type:"boolean"}
PRUNER_TYPE = "hyperband"  # @param ["hyperband", "median", "none"]
PRUNER_MIN_RESOURCE = 500  # @param {type:"integer"}
PRUNER_REDUCTION_FACTOR = 3  # @param {type:"integer"}

# === SAMPLER SETTINGS ===
SAMPLER_TYPE = "tpe"  # @param ["tpe", "random"]
N_STARTUP_TRIALS = 10  # @param {type:"integer"}

# === DASHBOARD ===
ENABLE_DASHBOARD = True  # @param {type:"boolean"}
DASHBOARD_PORT = 8080  # @param {type:"integer"}
NGROK_AUTH_TOKEN = ""  # @param {type:"string"}
# ^ Get token from: https://dashboard.ngrok.com/get-started/your-authtoken

# === HYPERPARAMETER TUNING TOGGLES ===
TUNE_LEARNING_RATE = True  # @param {type:"boolean"}
TUNE_MAX_LR = True  # @param {type:"boolean"}
TUNE_WEIGHT_DECAY = True  # @param {type:"boolean"}
TUNE_EMBED_DIM = True  # @param {type:"boolean"}
TUNE_NUM_HEADS = False  # @param {type:"boolean"}
TUNE_NUM_LAYERS = True  # @param {type:"boolean"}
TUNE_DROPOUT = True  # @param {type:"boolean"}
TUNE_BATCH_SIZE = True  # @param {type:"boolean"}
TUNE_PCT_START = False  # @param {type:"boolean"}
TUNE_GRAD_CLIP_NORM = False  # @param {type:"boolean"}
TUNE_TIME_EMBED_DIM = False  # @param {type:"boolean"}

# Collect tune flags
TUNE_FLAGS = {
    'learning_rate': TUNE_LEARNING_RATE,
    'max_lr': TUNE_MAX_LR,
    'weight_decay': TUNE_WEIGHT_DECAY,
    'embed_dim': TUNE_EMBED_DIM,
    'num_heads': TUNE_NUM_HEADS,
    'num_layers': TUNE_NUM_LAYERS,
    'dropout': TUNE_DROPOUT,
    'batch_size': TUNE_BATCH_SIZE,
    'pct_start': TUNE_PCT_START,
    'grad_clip_norm': TUNE_GRAD_CLIP_NORM,
    'time_embed_dim': TUNE_TIME_EMBED_DIM,
}

# === DEFAULT HYPERPARAMETERS ===
DEFAULT_HYPERPARAMS = {
    'learning_rate': 2e-4,
    'max_lr': 1e-3,
    'weight_decay': 1e-5,
    'embed_dim': 256,
    'num_heads': 8,
    'num_layers': 8,
    'dropout': 0.1,
    'batch_size': 512,
    'pct_start': 0.3,
    'grad_clip_norm': 1.0,
    'time_embed_dim': 128,
}

# === DATASET CONFIGURATION ===
DATASET_NAME = "stocks"  # @param {type:"string"}
SEQ_LEN = 24  # @param {type:"integer"}
WAVELET_TYPE = "db2"  # @param {type:"string"}
WAVELET_LEVELS = "auto"
DATA_PATH = "src/copied_waveletDiff/data/stocks/stock_data.csv"  # @param {type:"string"}

# === PATHS ===
DRIVE_BASE_PATH = "/content/drive/MyDrive/personal_drive/trading"  # @param {type:"string"}
OPTUNA_DB_PATH = f"{DRIVE_BASE_PATH}/optuna/waveletdiff/optuna_studies/waveletdiff.db"
CHECKPOINT_DIR = f"{DRIVE_BASE_PATH}/optuna/waveletdiff/optuna_checkpoints/temp"
REPO_URL = "https://github.com/MilesHoffman/waveletDiff_synth_data.git"
REPO_DIR = "/content/waveletDiff_synth_data"

# Print summary
print("="*60)
print("CONFIGURATION SUMMARY")
print("="*60)
tuned_params = [k for k, v in TUNE_FLAGS.items() if v]
fixed_params = [k for k, v in TUNE_FLAGS.items() if not v]
print(f"Tuning {len(tuned_params)} parameters: {', '.join(tuned_params)}")
print(f"Fixed {len(fixed_params)} parameters")
print(f"Mode: {'Multi-Objective' if USE_MULTI_OBJECTIVE else 'Single-Objective'}")
print(f"Trials: {N_TRIALS} √ó {STEPS_PER_TRIAL} steps")
print("="*60)

In [None]:
# @title üîß Cell 2: Environment Setup
import os
import sys
import subprocess
import importlib

print("üöÄ Starting bootstrap setup...")

# 1. Bootstrap Repository
if not os.path.exists(REPO_DIR):
    print(f"Cloning {REPO_URL} into {REPO_DIR}...")
    subprocess.run(["git", "clone", REPO_URL, REPO_DIR], check=True)
else:
    print(f"Repo exists at {REPO_DIR}, pulling latest changes...")
    subprocess.run(["git", "-C", REPO_DIR, "pull"], check=True)

# 2. Add repo to path FIRST setup
if REPO_DIR not in sys.path:
    sys.path.insert(0, REPO_DIR)

# 3. Import runner and finish environment setup (mounts drive, installs deps)
try:
    from src.torch_gpu_waveletDiff.optuna_config import runner
    status = runner.setup_environment(
        repo_url=REPO_URL,
        repo_dir=REPO_DIR,
        drive_base_path=DRIVE_BASE_PATH,
        optuna_db_path=OPTUNA_DB_PATH,
        checkpoint_dir=CHECKPOINT_DIR
    )
    print("‚úÖ Setup complete!")
except ImportError as e:
    print(f"‚ùå Critical error during bootstrap: {e}")
    print("Consider restarting the runtime if you just installed new packages.")

In [None]:
# @title ‚ö° Cell 3: Initialize Fabric & Config
import importlib
import sys

# Ensure caches are invalidated so it sees the newly installed 'lightning'
importlib.invalidate_caches()

try:
    import lightning
except ImportError:
    print("‚ö†Ô∏è 'lightning' still not found. Attempting a final force install...")
    import subprocess
    subprocess.run([sys.executable, "-m", "pip", "install", "lightning"], check=True)
    importlib.invalidate_caches()

from src.torch_gpu_waveletDiff.train import trainer
fabric = trainer.setup_fabric(precision="bf16-mixed", matmul_precision="high")

BASE_CONFIG = {
    'dataset': {'name': DATASET_NAME, 'seq_len': SEQ_LEN},
    'training': {'batch_size': DEFAULT_HYPERPARAMS['batch_size'], 'epochs': 1},
    'data': {'data_dir': f'src/copied_waveletDiff/data/{DATASET_NAME}', 'normalize_data': False},
    'wavelet': {'type': WAVELET_TYPE, 'levels': WAVELET_LEVELS},
    'model': {'prediction_target': 'noise'},
    'attention': {'use_cross_level_attention': True},
    'noise': {'schedule': 'exponential'},
    'sampling': {'ddim_eta': 0.0, 'ddim_steps': None},
    'energy': {'weight': 0.0},
    'optimizer': {'scheduler_type': 'onecycle'}
}

print(f"‚úÖ Fabric: {fabric.device} | Precision: bf16-mixed")

In [None]:
# @title üìä Cell 4: Create Optuna Study
storage_url = f"sqlite:///{OPTUNA_DB_PATH}"

study = runner.create_study(
    study_name=STUDY_NAME,
    storage_url=storage_url,
    sampler_type=SAMPLER_TYPE,
    n_startup_trials=N_STARTUP_TRIALS,
    pruner_type=PRUNER_TYPE,
    pruner_min_resource=PRUNER_MIN_RESOURCE,
    pruner_reduction_factor=PRUNER_REDUCTION_FACTOR,
    enable_pruning=ENABLE_PRUNING,
    use_multi_objective=USE_MULTI_OBJECTIVE,
    weight_config={'loss': WEIGHT_LOSS, 'speed': WEIGHT_SPEED, 'stability': WEIGHT_STABILITY}
)

In [None]:
# @title üöÄ Cell 5: Run Optimization
summary = runner.run_optimization(
    study=study,
    fabric=fabric,
    base_config=BASE_CONFIG,
    repo_dir=REPO_DIR,
    data_path=DATA_PATH,
    tune_flags=TUNE_FLAGS,
    default_hyperparams=DEFAULT_HYPERPARAMS,
    checkpoint_dir=CHECKPOINT_DIR,
    trial_steps=STEPS_PER_TRIAL,
    eval_interval=EVAL_INTERVAL,
    compile_mode=COMPILE_MODE,
    n_trials=N_TRIALS,
    timeout_hours=TIMEOUT_HOURS,
    use_multi_objective=USE_MULTI_OBJECTIVE,
    enable_dashboard=ENABLE_DASHBOARD,
    dashboard_port=DASHBOARD_PORT,
    ngrok_token=NGROK_AUTH_TOKEN,
    storage_url=storage_url
)

In [None]:
# @title üìà Cell 6: Analyze Results
analysis = runner.analyze_results(
    study_name=STUDY_NAME,
    storage_url=storage_url,
    use_multi_objective=USE_MULTI_OBJECTIVE
)

In [None]:
# @title üíæ Cell 7: Export Best Configs
exported_files = runner.export_best_configs(
    study_name=STUDY_NAME,
    storage_url=storage_url,
    checkpoint_dir=CHECKPOINT_DIR,
    use_multi_objective=USE_MULTI_OBJECTIVE
)