# Multi-Resolution Pyramids: Direct Level Access

**Demonstrate memory-efficient pyramid access for progressive detail loading.**

**Pyramid levels (10980×10980 input):**
- Level 0: 10980×10980 @ 10m (920MB)
- Level 1: 5490×5490 @ 20m (230MB)  
- Level 2: 2745×2745 @ 40m (58MB)
- Level 3: 1372×1372 @ 80m (14MB) — **64× smaller**

**Learn:** Load specific resolutions, compare sizes, choose optimal levels

## 1. Setup

In [None]:
import os
import time

import dask.array as da
import matplotlib.pyplot as plt
import numpy as np
import s3fs
import zarr

## 2. S3 credentials (K8s secret or env vars)

In [None]:
# Import credential helper from quickstart
import base64
import subprocess
from pathlib import Path

# Find kubectl (search PATH and common locations)
kubectl_locations = [
    "kubectl",  # Use PATH
    "/opt/homebrew/bin/kubectl",  # Homebrew Apple Silicon
    "/usr/local/bin/kubectl",  # Homebrew Intel / Linux
    "/usr/bin/kubectl",  # System (Linux)
    str(Path.home() / ".local/bin/kubectl"),  # User install (Linux)
]
kubectl = next((k for k in kubectl_locations if k == "kubectl" or Path(k).exists()), "kubectl")

# Auto-detect kubeconfig (relative to notebook location or environment)
kubeconfig_paths = [
    Path.cwd().parent / ".work/kubeconfig",  # Relative: ../work/kubeconfig from notebooks/
    Path(os.getenv("KUBECONFIG", "")),  # Environment variable
    Path.home() / ".kube/config",  # Default kubectl location
]
kubeconfig = next((str(p) for p in kubeconfig_paths if p.exists()), None)

# Try to fetch from Kubernetes
if (not os.getenv("AWS_SECRET_ACCESS_KEY") or not os.getenv("AWS_ACCESS_KEY_ID")) and kubeconfig:
    try:
        for key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]:
            result = subprocess.run(
                [
                    kubectl,
                    "get",
                    "secret",
                    "geozarr-s3-credentials",
                    "-n",
                    "devseed",
                    "-o",
                    f"jsonpath={{.data.{key}}}",
                ],
                env={"KUBECONFIG": kubeconfig},
                capture_output=True,
                text=True,
                timeout=5,
            )
            if result.returncode == 0 and result.stdout:
                os.environ[key] = base64.b64decode(result.stdout).decode()
    except Exception:
        pass

if not os.getenv("AWS_ENDPOINT_URL"):
    os.environ["AWS_ENDPOINT_URL"] = "https://s3.de.io.cloud.ovh.net"

# Verify
if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"):
    print(f"✅ AWS configured: {os.getenv('AWS_ENDPOINT_URL')}")
else:
    print("❌ Missing AWS credentials! Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY")

## 3. Dataset path + S3 filesystem

In [None]:
# S3 dataset
s3_base = "s3://esa-zarr-sentinel-explorer-fra/tests-output/sentinel-2-l2a/S2B_MSIL2A_20250921T100029_N0511_R122_T33TUG_20250921T135752.zarr"

# Pyramid levels available in this dataset (eopf-geozarr generates 0-3 for 10980×10980 input)
LEVELS = [0, 1, 2, 3]  # Full resolution → coarsest overview

# S3 filesystem
fs = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": os.getenv("AWS_ENDPOINT_URL")})

print(f"✓ Dataset: {s3_base.split('/')[-1]}")
print(f"✓ Levels to test: {LEVELS}")
print("✓ Expected dimensions: [10980, 5490, 2745, 1372] pixels")

## 4. Load all levels (0-3) with timing

In [None]:
# Store results for each level
level_data = {}

for level in LEVELS:
    print(f"\nLoading level {level}...")

    # Load red band
    band_path = f"{s3_base[5:]}/measurements/reflectance/r10m/{level}/b04"
    store = s3fs.S3Map(root=band_path, s3=fs)

    # Time the load
    start = time.perf_counter()
    z_array = zarr.open(store, mode="r")
    da_array = da.from_zarr(store)
    elapsed = time.perf_counter() - start

    # Get metadata
    shape = z_array.shape
    chunk_size = z_array.chunks
    nbytes = np.prod(shape) * 8  # float64

    level_data[level] = {
        "shape": shape,
        "chunks": chunk_size,
        "size_mb": nbytes / 1024**2,
        "load_time_ms": elapsed * 1000,
        "data": da_array,
    }

    print(f"  Shape: {shape}")
    print(f"  Chunks: {chunk_size}")
    print(f"  Size: {nbytes / 1024**2:.1f}MB")
    print(f"  Load time: {elapsed * 1000:.1f}ms")

print(f"\n✓ Loaded {len(LEVELS)} pyramid levels")

## 5. Size comparison (920MB → 14MB)

In [None]:
# Extract data for plotting
levels = sorted(level_data.keys())
sizes_mb = [level_data[lvl]["size_mb"] for lvl in levels]
shapes = [f"{level_data[lvl]['shape'][0]}×{level_data[lvl]['shape'][1]}" for lvl in levels]

# Create bar chart
fig, ax = plt.subplots(figsize=(10, 6))
colors = ["darkred", "red", "orange", "gold"]
bars = ax.bar(range(len(levels)), sizes_mb, color=colors[: len(levels)])

ax.set_xlabel("Pyramid Level", fontsize=11)
ax.set_ylabel("Data Size (MB, uncompressed)", fontsize=11)
ax.set_title("GeoZarr Pyramid Size Reduction (Red Band, 10m)", fontsize=12, fontweight="bold")
ax.set_xticks(range(len(levels)))
ax.set_xticklabels([f"Level {lvl}\n{s}" for lvl, s in zip(levels, shapes, strict=False)])
ax.grid(axis="y", alpha=0.3)

# Add size labels on bars
for _i, (bar, size) in enumerate(zip(bars, sizes_mb, strict=False)):
    height = bar.get_height()
    ax.text(
        bar.get_x() + bar.get_width() / 2,
        height,
        f"{size:.1f}MB",
        ha="center",
        va="bottom",
        fontsize=10,
        fontweight="bold",
    )

plt.tight_layout()
plt.show()

# Print size reduction
reduction = (1 - sizes_mb[-1] / sizes_mb[0]) * 100
ratio = sizes_mb[0] / sizes_mb[-1]
print(f"\n📊 Size reduction: {reduction:.1f}% (level 0 → level {levels[-1]})")
print(f"   Ratio: {ratio:.0f}x smaller")
print(f"   Storage overhead: {(sum(sizes_mb) / sizes_mb[0] - 1) * 100:.0f}% for all pyramid levels")

## 6. Visual comparison (detail vs file size)

In [None]:
# Create grid of visualizations
fig, axes = plt.subplots(2, 2, figsize=(14, 14))
axes = axes.flatten()

for idx, level in enumerate(LEVELS):
    ax = axes[idx]
    data = level_data[level]["data"].compute()  # Load data from S3

    # Normalize for visualization (handle nodata)
    data_norm = np.nan_to_num(data, nan=0)
    valid_data = data_norm[np.isfinite(data_norm) & (data_norm > 0)]

    if len(valid_data) > 0:
        p2, p98 = np.percentile(valid_data, [2, 98])
        data_stretched = np.clip((data_norm - p2) / (p98 - p2), 0, 1)
    else:
        data_stretched = data_norm

    # Display
    im = ax.imshow(data_stretched, cmap="RdYlGn", aspect="auto")

    shape = level_data[level]["shape"]
    size = level_data[level]["size_mb"]
    resolution = 10 * (2**level)  # Resolution in meters
    ax.set_title(
        f"Level {level}: {shape[0]}×{shape[1]} pixels @ {resolution}m\n{size:.1f}MB uncompressed",
        fontsize=11,
        fontweight="bold",
    )
    ax.axis("off")

plt.suptitle(
    "Multi-Resolution Pyramid Visualization (Red Band, B04)", fontsize=14, fontweight="bold", y=0.98
)
plt.tight_layout()
plt.show()

print("✓ Visual comparison complete")
print(
    f"✓ Loaded {sum(level_data[lvl]['size_mb'] for lvl in levels):.1f}MB total across {len(levels)} levels"
)

## 7. Use case decision guide

In [None]:
# Use case decision matrix
use_cases = [
    ("L0: 10980×10980 @ 10m", "Scientific analysis, exports, pixel-accurate work"),
    ("L1: 5490×5490 @ 20m", "Regional mapping, high-zoom web maps"),
    ("L2: 2745×2745 @ 40m", "Quick previews, medium-zoom, mobile"),
    ("L3: 1372×1372 @ 80m", "Thumbnails, low-zoom, continental views"),
]

print("\n📖 Level Selection Guide:\n")
for level, use in use_cases:
    print(f"{level}: {use}")

# Performance insights from measurements
if level_data:
    ratio = level_data[0]["size_mb"] / level_data[3]["size_mb"]
    overhead = (
        sum(level_data[lvl]["size_mb"] for lvl in level_data) / level_data[0]["size_mb"] - 1
    ) * 100

    print("\n💡 Key Facts:")
    print(f"  • L3 is {ratio:.0f}× smaller than L0")
    print(f"  • Total storage: {overhead:.0f}% overhead for all levels")
    print("  • Web maps: Auto-select level by zoom (L3→L0 on demand)")
    print("  • Tile speedup: 3-5× (see 02_pyramid_performance.ipynb)")

## Summary

**Measured:** 4 pyramid levels (0-3) from S3, 64× size reduction (920MB → 14MB), ~33% total storage overhead

**Key insight:** Each level is ¼ the previous (geometric series: 1 + ¼ + 1/16 + 1/64 = 1.33)

**Pyramid generation:**
```python
# eopf-geozarr: create_geozarr_dataset(spatial_chunk=4096, min_dimension=256)
# While dimension ≥ 256: downsample 2×, write to /0, /1, /2, /3
```

**Production value:** 
- TiTiler auto-selects level by zoom
- Progressive loading: level 3 (fast) → level 0 (detailed)
- 3-5× tile speedup (see `02_pyramid_performance.ipynb`)

**Resources:** [GeoZarr Spec](https://geozarr.github.io) | [TiTiler-EOPF](https://github.com/developmentseed/titiler-eopf) | [STAC API](https://api.explorer.eopf.copernicus.eu/stac)