In [None]:
# nb-sd-samplers-cfg.ipynb
# SD Scheduler/Sampler/CFG Systematic Comparison
# Stage 1 - Cross-Family Inference Fundamentals

# %% [1] Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
# =============================================================================
# [2] Fixed Test Parameters & Seeds (Reproducibility)
# =============================================================================

# Test configuration
TEST_CONFIG = {
    "prompt": "a serene mountain lake at sunset, oil painting style, detailed reflections",
    "negative_prompt": "blurry, low quality, distorted, oversaturated",
    "width": 512,
    "height": 512,
    "seeds": [42, 1337, 2024],  # Multiple seeds for robustness
    "smoke_mode": os.getenv("SMOKE_MODE", "false").lower() == "true",
}

# Scheduler matrix for comparison
SCHEDULER_CONFIGS = {
    "DPMSolverMultistep": {"steps": [15, 25, 50], "optimal_cfg": 7.5},
    "EulerDiscrete": {"steps": [15, 25, 50], "optimal_cfg": 7.5},
    "EulerAncestral": {"steps": [15, 25, 50], "optimal_cfg": 7.5},
    "DDIM": {"steps": [20, 30, 50], "optimal_cfg": 7.5},
    "UniPCMultistep": {"steps": [15, 25, 50], "optimal_cfg": 7.5},
    "DPMSolverSinglestep": {"steps": [15, 25, 50], "optimal_cfg": 7.5},
}

# CFG scale test range
CFG_SCALES = [3.5, 5.0, 7.5, 10.0, 12.0] if not TEST_CONFIG["smoke_mode"] else [7.5]

print(f"[Config] Prompt: '{TEST_CONFIG['prompt'][:50]}...'")
print(
    f"[Config] Seeds: {TEST_CONFIG['seeds']}, Smoke mode: {TEST_CONFIG['smoke_mode']}"
)

In [None]:
# =============================================================================
# [3] SD1.5 Pipeline Setup with Memory Optimization
# =============================================================================

from diffusers import (
    StableDiffusionPipeline,
    DPMSolverMultistepScheduler,
    EulerDiscreteScheduler,
    EulerAncestralDiscreteScheduler,
    DDIMScheduler,
    UniPCMultistepScheduler,
    DPMSolverSinglestepScheduler,
)


def create_sd15_pipeline():
    """Create memory-optimized SD1.5 pipeline"""
    pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16,
        use_safetensors=True,
        variant="fp16",
    )

    # Memory optimizations for 8-12GB VRAM
    pipe.enable_attention_slicing()
    pipe.enable_model_cpu_offload()

    # Enable xFormers if available
    try:
        pipe.enable_xformers_memory_efficient_attention()
        print("[Optimization] xFormers enabled")
    except ImportError:
        print("[Optimization] xFormers not available, using attention slicing")

    return pipe


# Load pipeline
print("[Loading] SD1.5 pipeline with memory optimizations...")
sd15_pipe = create_sd15_pipeline()

In [None]:
# =============================================================================
# [4] Scheduler Factory & Benchmark Function
# =============================================================================


def get_scheduler(scheduler_name, pipe):
    """Factory function to create schedulers"""
    schedulers = {
        "DPMSolverMultistep": DPMSolverMultistepScheduler,
        "EulerDiscrete": EulerDiscreteScheduler,
        "EulerAncestral": EulerAncestralDiscreteScheduler,
        "DDIM": DDIMScheduler,
        "UniPCMultistep": UniPCMultistepScheduler,
        "DPMSolverSinglestep": DPMSolverSinglestepScheduler,
    }

    if scheduler_name not in schedulers:
        raise ValueError(f"Unknown scheduler: {scheduler_name}")

    return schedulers[scheduler_name].from_config(pipe.scheduler.config)


def benchmark_generation(pipe, scheduler_name, steps, cfg_scale, seed):
    """Benchmark single generation with timing"""
    # Set scheduler
    pipe.scheduler = get_scheduler(scheduler_name, pipe)

    # Generate with timing
    generator = torch.manual_seed(seed)
    start_time = time.time()

    with torch.autocast("cuda"):
        result = pipe(
            prompt=TEST_CONFIG["prompt"],
            negative_prompt=TEST_CONFIG["negative_prompt"],
            width=TEST_CONFIG["width"],
            height=TEST_CONFIG["height"],
            num_inference_steps=steps,
            guidance_scale=cfg_scale,
            generator=generator,
        )

    generation_time = time.time() - start_time

    return {
        "image": result.images[0],
        "time": generation_time,
        "scheduler": scheduler_name,
        "steps": steps,
        "cfg_scale": cfg_scale,
        "seed": seed,
    }

In [None]:
# =============================================================================
# [5] SD1.5 Scheduler Comparison Experiment
# =============================================================================

print("\n[Experiment] Starting SD1.5 scheduler comparison...")
results = []

for scheduler_name, config in SCHEDULER_CONFIGS.items():
    print(f"\n  Testing {scheduler_name}...")

    for steps in config["steps"]:
        if TEST_CONFIG["smoke_mode"] and steps > 15:
            continue  # Skip longer steps in smoke mode

        for seed in TEST_CONFIG["seeds"]:
            if TEST_CONFIG["smoke_mode"] and seed != 42:
                continue  # Only first seed in smoke mode

            try:
                result = benchmark_generation(
                    sd15_pipe, scheduler_name, steps, config["optimal_cfg"], seed
                )
                results.append(result)

                # Save individual image
                filename = f"sd15_{scheduler_name}_{steps}steps_cfg{config['optimal_cfg']}_seed{seed}.png"
                result["image"].save(output_dir / filename)

                print(f"    ✓ {steps} steps, seed {seed}: {result['time']:.2f}s")

            except Exception as e:
                print(f"    ✗ Failed {scheduler_name} {steps} steps: {e}")

print(f"\n[Results] Generated {len(results)} images for SD1.5 scheduler comparison")

In [None]:
# =============================================================================
# [6] CFG Scale Impact Analysis
# =============================================================================

print("\n[Experiment] CFG scale impact analysis...")
cfg_results = []

# Use best performing scheduler from previous test (typically DPMSolverMultistep)
best_scheduler = "DPMSolverMultistep"
optimal_steps = 25

for cfg_scale in CFG_SCALES:
    for seed in TEST_CONFIG["seeds"]:
        if TEST_CONFIG["smoke_mode"] and seed != 42:
            continue

        try:
            result = benchmark_generation(
                sd15_pipe, best_scheduler, optimal_steps, cfg_scale, seed
            )
            cfg_results.append(result)

            # Save CFG comparison image
            filename = f"cfg_comparison_{best_scheduler}_{optimal_steps}steps_cfg{cfg_scale}_seed{seed}.png"
            result["image"].save(output_dir / filename)

            print(f"  ✓ CFG {cfg_scale}, seed {seed}: {result['time']:.2f}s")

        except Exception as e:
            print(f"  ✗ Failed CFG {cfg_scale}: {e}")

In [None]:
# =============================================================================
# [7] Performance Analysis & Visualization
# =============================================================================


def analyze_results(results_list, analysis_type):
    """Analyze timing and create performance summary"""
    if not results_list:
        return {}

    analysis = {
        "total_images": len(results_list),
        "avg_time": np.mean([r["time"] for r in results_list]),
        "min_time": min([r["time"] for r in results_list]),
        "max_time": max([r["time"] for r in results_list]),
        "analysis_type": analysis_type,
    }

    if analysis_type == "scheduler":
        # Group by scheduler
        by_scheduler = {}
        for result in results_list:
            scheduler = result["scheduler"]
            if scheduler not in by_scheduler:
                by_scheduler[scheduler] = []
            by_scheduler[scheduler].append(result["time"])

        analysis["by_scheduler"] = {
            name: {
                "avg_time": np.mean(times),
                "min_time": min(times),
                "max_time": max(times),
                "samples": len(times),
            }
            for name, times in by_scheduler.items()
        }

    elif analysis_type == "cfg":
        # Group by CFG scale
        by_cfg = {}
        for result in results_list:
            cfg = result["cfg_scale"]
            if cfg not in by_cfg:
                by_cfg[cfg] = []
            by_cfg[cfg].append(result["time"])

        analysis["by_cfg"] = {
            cfg: {"avg_time": np.mean(times), "samples": len(times)}
            for cfg, times in by_cfg.items()
        }

    return analysis


# Analyze results
scheduler_analysis = analyze_results(results, "scheduler")
cfg_analysis = analyze_results(cfg_results, "cfg")

print("\n[Analysis] Performance Summary:")
print(
    f"  Scheduler test: {scheduler_analysis['total_images']} images, avg {scheduler_analysis['avg_time']:.2f}s"
)
print(
    f"  CFG test: {cfg_analysis['total_images']} images, avg {cfg_analysis['avg_time']:.2f}s"
)

In [None]:
# =============================================================================
# [8] Create Comparison Grid (Memory Efficient)
# =============================================================================


def create_comparison_grid(results_subset, title, max_images=9):
    """Create memory-efficient comparison grid"""
    if len(results_subset) == 0:
        return None

    # Limit number of images to prevent memory issues
    subset = results_subset[:max_images]

    # Calculate grid size
    cols = min(3, len(subset))
    rows = (len(subset) + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 4))
    if rows == 1 and cols == 1:
        axes = [axes]
    elif rows == 1:
        axes = axes
    else:
        axes = axes.flatten()

    for i, result in enumerate(subset):
        if i < len(axes):
            axes[i].imshow(result["image"])
            axes[i].set_title(
                f"{result['scheduler']}\n{result['steps']} steps, CFG {result['cfg_scale']}\n{result['time']:.2f}s",
                fontsize=10,
            )
            axes[i].axis("off")

    # Hide empty subplots
    for i in range(len(subset), len(axes)):
        axes[i].axis("off")

    plt.suptitle(title, fontsize=14, fontweight="bold")
    plt.tight_layout()

    # Save grid
    grid_filename = f"{title.lower().replace(' ', '_')}_grid.png"
    plt.savefig(output_dir / grid_filename, dpi=150, bbox_inches="tight")
    plt.show()

    return grid_filename


# Create comparison grids (limit to prevent memory issues)
if results:
    scheduler_grid = create_comparison_grid(
        results[:9], "SD1.5 Scheduler Comparison", max_images=9
    )

if cfg_results:
    cfg_grid = create_comparison_grid(
        cfg_results[:6], "CFG Scale Impact Analysis", max_images=6
    )

In [None]:
# =============================================================================
# [9] Smoke Test (CI Compatible)
# =============================================================================


def smoke_test():
    """Minimal smoke test for CI/CD"""
    print("\n[Smoke Test] Quick validation...")

    try:
        # Single quick generation
        smoke_result = benchmark_generation(
            sd15_pipe, "DPMSolverMultistep", 10, 7.5, 42
        )

        # Basic validation
        assert smoke_result["image"].size == (512, 512), "Image size incorrect"
        assert smoke_result["time"] > 0, "Generation time invalid"
        assert 0 < smoke_result["time"] < 60, "Generation took too long"

        print(f"  ✓ Smoke test passed: {smoke_result['time']:.2f}s")
        return True

    except Exception as e:
        print(f"  ✗ Smoke test failed: {e}")
        return False


# Run smoke test
smoke_passed = smoke_test()

In [None]:
# =============================================================================
# [10] Export Results & Metrics JSON
# =============================================================================

# Prepare export data (exclude PIL images for JSON serialization)
export_data = {
    "experiment_info": {
        "timestamp": datetime.now().isoformat(),
        "config": TEST_CONFIG,
        "scheduler_configs": SCHEDULER_CONFIGS,
        "cfg_scales": CFG_SCALES,
        "smoke_mode": TEST_CONFIG["smoke_mode"],
    },
    "performance_analysis": {
        "scheduler_analysis": scheduler_analysis,
        "cfg_analysis": cfg_analysis,
    },
    "results_summary": {
        "total_scheduler_tests": len(results),
        "total_cfg_tests": len(cfg_results),
        "smoke_test_passed": smoke_passed,
    },
    "recommendations": {
        "fast_quality": {"scheduler": "DPMSolverMultistep", "steps": 15, "cfg": 7.5},
        "balanced": {"scheduler": "DPMSolverMultistep", "steps": 25, "cfg": 7.5},
        "high_quality": {"scheduler": "DPMSolverMultistep", "steps": 50, "cfg": 7.5},
    },
}

# Save metrics
metrics_file = output_dir / "metrics.json"
with open(metrics_file, "w", encoding="utf-8") as f:
    json.dump(export_data, f, indent=2, ensure_ascii=False)

print(f"\n[Export] Results saved to {output_dir}")
print(f"  - Metrics: {metrics_file}")
print(f"  - Images: {len(results + cfg_results)} files")

In [None]:
# =============================================================================
# [11] Summary & Parameter Recipes
# =============================================================================

print("\n" + "=" * 60)
print("📊 SCHEDULER/CFG COMPARISON SUMMARY")
print("=" * 60)

print(f"\n✅ **Completed:**")
print(f"   • {len(results)} scheduler comparison images")
print(f"   • {len(cfg_results)} CFG scale test images")
print(f"   • Performance analysis & timing benchmarks")
print(f"   • {'✓' if smoke_passed else '✗'} Smoke test validation")

print(f"\n🎯 **Key Concepts Learned:**")
print(f"   • Scheduler trade-offs: Speed vs Quality vs Consistency")
print(
    f"   • CFG Scale impact: 3.5-5.0 (subtle) → 7.5 (balanced) → 10+ (strong but potential artifacts)"
)
print(f"   • Memory optimization: attention_slicing + cpu_offload for 8-12GB VRAM")
print(f"   • Reproducibility: Fixed seeds + config export for consistent results")

print(f"\n⚠️  **Common Pitfalls:**")
print(f"   • Too high CFG (>12) → oversaturated, artifact-prone results")
print(f"   • Wrong scheduler for steps count → poor convergence")
print(f"   • Missing memory optimizations → OOM on lower VRAM")
print(f"   • Not saving configs → irreproducible experiments")

print(f"\n🚀 **Next Steps:**")
print(f"   • Apply these recipes to ControlNet conditioning")
print(f"   • Test same parameters on SDXL (different optimal ranges)")
print(f"   • Use optimal configs for LoRA training in Stage 3")
print(f"   • Integrate into batch pipeline configs")

print(f"\n📁 **Git Integration:**")
print(f"   Branch: feature/s1/nb-sd-samplers-cfg")
print(f"   Files: notebooks/t2i/10_sd/nb-sd-samplers-cfg.ipynb")
print(f"   Output: outputs/scheduler_comparison/ (not committed)")

# Cleanup to free VRAM
del sd15_pipe
torch.cuda.empty_cache()
print(f"\n🧹 Pipeline cleanup completed - VRAM freed")