# ðŸŒŠ Diffusion Sudoku Training (Iterative Denoising)

Train a Sudoku solver using the **Diffusion Depth Controller** with iterative denoising.

**Key Features:**
- Iterative denoising of routing weights
- Learned noise schedules (linear, cosine, sqrt)
- Adaptive LayerNorm (adaLN) conditioning from DiT
- Smooth, temporally coherent routing evolution

**Reference:** [Scalable Diffusion Models with Transformers (DiT)](https://arxiv.org/abs/2212.09748)


In [None]:
# Clone repository and install dependencies
!git clone https://github.com/Eran-BA/PoT.git
%cd PoT
!pip install -q -r requirements.txt


In [None]:
# Check GPU and verify Diffusion controller
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

from src.pot.core import DiffusionDepthController, create_controller
print("\nDiffusion controller available: âœ“")

# Quick test
controller = create_controller("diffusion", d_model=256, n_heads=8, noise_schedule="cosine")
print(f"Controller params: {sum(p.numel() for p in controller.parameters()):,}")

# Test denoising step
X = torch.randn(2, 81, 256)
alpha, state, aux = controller.step(X)
print(f"Initial sigma (noise): {aux['sigma']:.4f}")


## ðŸš€ Train with Diffusion Controller


In [None]:
# Run Diffusion training with all arguments
!python experiments/sudoku_poh_benchmark.py \
    --download \
    --model hybrid \
    --controller diffusion \
    --d-model 512 \
    --d-ff 2048 \
    --n-heads 8 \
    --H-layers 2 \
    --L-layers 2 \
    --H-cycles 2 \
    --L-cycles 8 \
    --halt-max-steps 4 \
    --halt-exploration-prob 0.1 \
    --max-depth 32 \
    --hrm-grad-style \
    --epochs 500 \
    --batch-size 128 \
    --lr 3e-4 \
    --weight-decay 0.01 \
    --beta1 0.9 \
    --beta2 0.95 \
    --warmup-steps 500 \
    --lr-min-ratio 0.1 \
    --grad-clip 1.0 \
    --dropout 0.0 \
    --subsample 10000 \
    --num-aug 100 \
    --eval-interval 50 \
    --wandb \
    --project sudoku-diffusion \
    --seed 42 \
    --output experiments/results/diffusion_sudoku


## ðŸ“ˆ Results


In [None]:
# Plot results
import json
import matplotlib.pyplot as plt

with open('experiments/results/diffusion_sudoku/hybrid_results.json', 'r') as f:
    results = json.load(f)

print(f"Best Grid Accuracy: {results['best_grid_acc']:.2f}%")
print(f"Parameters: {results['parameters']:,}")

history = results['history']
epochs = [h['epoch'] for h in history]
train_acc = [h['train_grid_acc'] for h in history]
test_acc = [h['test_grid_acc'] for h in history]

plt.figure(figsize=(10, 5))
plt.plot(epochs, train_acc, label='Train')
plt.plot(epochs, test_acc, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Grid Accuracy (%)')
plt.title('Diffusion Controller - Sudoku Training')
plt.legend()
plt.grid(True)
plt.show()


## ðŸ”¬ Visualize Noise Schedule


In [None]:
# Visualize sigma (noise level) across depth steps
from src.pot.core import DiffusionDepthController

controller = DiffusionDepthController(d_model=256, n_heads=8, max_depth=32, noise_schedule="cosine")
sigmas = controller.sigma_schedule.cpu().numpy()

plt.figure(figsize=(10, 4))
plt.plot(range(len(sigmas)), sigmas, 'b-', linewidth=2)
plt.xlabel('Depth Step')
plt.ylabel('Sigma (Noise Level)')
plt.title('Cosine Noise Schedule - Denoising Process')
plt.grid(True)
plt.show()

print(f"Step 0 (start):  Ïƒ = {sigmas[0]:.4f} (noisy)")
print(f"Step 16 (mid):   Ïƒ = {sigmas[16]:.4f}")
print(f"Step 31 (end):   Ïƒ = {sigmas[31]:.4f} (clean)")
