# SatCLIP Resolution Tests: Multi-Scale Evaluation

Comprehensive testing of L=10 vs L=40 across multiple spatial scales and boundary types.

## Test Categories

### Paper Benchmarks (Replication)
1. **Air Temperature** - Regression, global, smooth spatial variation
2. **Elevation** - Regression, global, varies at all scales
3. **Population Density** - Regression, global, clustered around cities
4. **Countries** - Classification, ~200 classes, sharp political boundaries
5. **Biomes** - Classification, 14 classes, fuzzy ecological boundaries
6. **Ecoregions** - Classification, 846 classes, fine-grained

### Multi-Scale Boundary Tests
7. **States/Provinces** - ~4000 classes, medium-scale boundaries
8. **Checkerboard at Multiple Scales** - Controlled synthetic test

For GPU acceleration: `Runtime -> Change runtime type -> T4 GPU`

---
## 1. Setup

In [None]:
# Clone SatCLIP repository (only needed in Colab)
import os
if 'COLAB_GPU' in os.environ:
    !rm -rf sample_data .config satclip 2>/dev/null
    !git clone https://github.com/1hamzaiqbal/satclip.git

In [None]:
# Install dependencies
!pip install lightning rasterio torchgeo huggingface_hub geopandas shapely --quiet

In [None]:
import sys
import os
import zipfile
from urllib import request
import io

# Handle path for both Colab and local execution
if 'COLAB_GPU' in os.environ:
    sys.path.append('./satclip/satclip')
else:
    sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'satclip'))

import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point
from huggingface_hub import hf_hub_download
from load import get_satclip
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Load both models
print("Loading L=10 model...")
model_l10 = get_satclip(
    hf_hub_download("microsoft/SatCLIP-ViT16-L10", "satclip-vit16-l10.ckpt"),
    device=device,
)
model_l10.eval()

print("Loading L=40 model...")
model_l40 = get_satclip(
    hf_hub_download("microsoft/SatCLIP-ViT16-L40", "satclip-vit16-l40.ckpt"),
    device=device,
)
model_l40.eval()
print("Both models loaded!")

In [None]:
# ============================================
# HELPER FUNCTIONS (same setup as paper)
# ============================================

def get_embeddings(model, coords):
    """Get embeddings for coordinates."""
    coords_tensor = torch.tensor(coords).double()
    with torch.no_grad():
        emb = model(coords_tensor.to(device)).cpu().numpy()
    return emb

def evaluate_classification(emb_l10, emb_l40, y, task_name, use_mlp=True):
    """
    Evaluate classification task with both models.
    Uses MLP by default (same as paper).
    """
    # Split data
    X_train_l10, X_test_l10, y_train, y_test = train_test_split(
        emb_l10, y, test_size=0.3, random_state=42
    )
    X_train_l40, X_test_l40, _, _ = train_test_split(
        emb_l40, y, test_size=0.3, random_state=42
    )
    
    if use_mlp:
        # MLP classifier (paper uses similar architecture)
        clf_l10 = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, 
                                 random_state=42, early_stopping=True)
        clf_l40 = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, 
                                 random_state=42, early_stopping=True)
    else:
        clf_l10 = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1)
        clf_l40 = LogisticRegression(max_iter=1000, random_state=42, n_jobs=-1)
    
    clf_l10.fit(X_train_l10, y_train)
    clf_l40.fit(X_train_l40, y_train)
    
    acc_l10 = accuracy_score(y_test, clf_l10.predict(X_test_l10))
    acc_l40 = accuracy_score(y_test, clf_l40.predict(X_test_l40))
    
    return acc_l10, acc_l40

def evaluate_regression(emb_l10, emb_l40, y, task_name, use_mlp=True):
    """
    Evaluate regression task with both models.
    Uses MLP by default (same as paper).
    """
    # Split data (paper uses 50/50 split)
    X_train_l10, X_test_l10, y_train, y_test = train_test_split(
        emb_l10, y, test_size=0.5, random_state=42
    )
    X_train_l40, X_test_l40, _, _ = train_test_split(
        emb_l40, y, test_size=0.5, random_state=42
    )
    
    if use_mlp:
        # MLP regressor (paper uses 3 hidden layers of 64)
        reg_l10 = MLPRegressor(hidden_layer_sizes=(64, 64, 64), max_iter=3000, 
                                random_state=42, early_stopping=True)
        reg_l40 = MLPRegressor(hidden_layer_sizes=(64, 64, 64), max_iter=3000, 
                                random_state=42, early_stopping=True)
    else:
        reg_l10 = Ridge(alpha=1.0)
        reg_l40 = Ridge(alpha=1.0)
    
    reg_l10.fit(X_train_l10, y_train)
    reg_l40.fit(X_train_l40, y_train)
    
    pred_l10 = reg_l10.predict(X_test_l10)
    pred_l40 = reg_l40.predict(X_test_l40)
    
    mse_l10 = mean_squared_error(y_test, pred_l10)
    mse_l40 = mean_squared_error(y_test, pred_l40)
    r2_l10 = r2_score(y_test, pred_l10)
    r2_l40 = r2_score(y_test, pred_l40)
    
    return mse_l10, mse_l40, r2_l10, r2_l40

# Store all results
all_results = []

---
## 2. Download All Datasets

In [None]:
# ============================================
# DATASET 1: Air Temperature (from paper)
# ============================================
print("Downloading Air Temperature dataset...")
url = 'https://springernature.figshare.com/ndownloader/files/12609182'
url_open = request.urlopen(url)
temp_data = np.array(pd.read_csv(io.StringIO(url_open.read().decode('utf-8'))))
temp_coords = temp_data[:, :2]
temp_y = temp_data[:, 4] / temp_data[:, 4].max()  # Normalize
print(f"  ✓ {len(temp_coords)} observations")

In [None]:
# ============================================
# DATASET 2: Elevation (from ETOPO1 via Natural Earth)
# We'll sample elevation values from coordinates
# ============================================
print("\nCreating Elevation proxy dataset...")
print("  (Using latitude as elevation proxy - higher lat = colder = often higher elevation correlation)")
# Note: For a proper test, you'd download actual elevation data
# Here we create a proxy that correlates with elevation patterns
np.random.seed(42)
n_elev = 5000
elev_lons = np.random.uniform(-180, 180, n_elev)
elev_lats = np.random.uniform(-60, 70, n_elev)
# Elevation proxy: combination of latitude and some spatial patterns
elev_y = np.abs(elev_lats) / 70 + 0.3 * np.sin(np.radians(elev_lons) * 2) + np.random.normal(0, 0.1, n_elev)
elev_y = (elev_y - elev_y.min()) / (elev_y.max() - elev_y.min())
elev_coords = np.stack([elev_lons, elev_lats], axis=1)
print(f"  ✓ {len(elev_coords)} samples")

In [None]:
# ============================================
# DATASET 3: Countries (Natural Earth 110m)
# ============================================
print("\nDownloading Countries dataset (Natural Earth)...")
try:
    countries_url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
    !wget -q {countries_url} -O countries.zip
    with zipfile.ZipFile('countries.zip', 'r') as z:
        z.extractall('countries_data')
    countries_gdf = gpd.read_file('countries_data/ne_110m_admin_0_countries.shp')
    print(f"  ✓ {len(countries_gdf)} countries loaded")
except Exception as e:
    print(f"  Error: {e}")
    countries_gdf = None

In [None]:
# ============================================
# DATASET 4: States/Provinces (Natural Earth 10m)
# ============================================
print("\nDownloading States/Provinces dataset (Natural Earth)...")
try:
    states_url = "https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_1_states_provinces.zip"
    !wget -q {states_url} -O states.zip
    with zipfile.ZipFile('states.zip', 'r') as z:
        z.extractall('states_data')
    shp_files = [f for f in os.listdir('states_data') if f.endswith('.shp')]
    states_gdf = gpd.read_file(os.path.join('states_data', shp_files[0]))
    print(f"  ✓ {len(states_gdf)} states/provinces loaded")
except Exception as e:
    print(f"  Error: {e}")
    states_gdf = None

In [None]:
# ============================================
# DATASET 5: Ecoregions (RESOLVE 2017)
# ============================================
print("\nDownloading Ecoregions dataset (RESOLVE 2017)...")
try:
    !wget -q https://storage.googleapis.com/teow2016/Ecoregions2017.zip -O ecoregions.zip
    with zipfile.ZipFile('ecoregions.zip', 'r') as z:
        z.extractall('ecoregions_data')
    shp_files = [f for f in os.listdir('ecoregions_data') if f.endswith('.shp')]
    ecoregions_gdf = gpd.read_file(os.path.join('ecoregions_data', shp_files[0]))
    print(f"  ✓ {len(ecoregions_gdf)} ecoregions loaded")
    print(f"  Biomes: {ecoregions_gdf['BIOME_NUM'].nunique()} unique")
except Exception as e:
    print(f"  Error: {e}")
    ecoregions_gdf = None

In [None]:
# ============================================
# Sample points from geographic datasets
# ============================================
def sample_points_from_geodataframe(gdf, label_col, n_samples=5000, seed=42):
    """Sample random points and get labels from a GeoDataFrame."""
    np.random.seed(seed)
    bounds = gdf.total_bounds  # [minx, miny, maxx, maxy]
    
    # Oversample to account for ocean/invalid points
    lons = np.random.uniform(bounds[0], bounds[2], n_samples * 10)
    lats = np.random.uniform(bounds[1], bounds[3], n_samples * 10)
    
    points = gpd.GeoDataFrame(
        geometry=[Point(lon, lat) for lon, lat in zip(lons, lats)],
        crs=gdf.crs
    )
    
    joined = gpd.sjoin(points, gdf[[label_col, 'geometry']], how='inner', predicate='within')
    joined = joined.drop_duplicates(subset='geometry').head(n_samples)
    
    coords = np.array([[p.x, p.y] for p in joined.geometry])
    labels = joined[label_col].values
    
    return coords, labels

print("Sampling points from geographic datasets...")

# Countries
if countries_gdf is not None:
    country_coords, country_labels = sample_points_from_geodataframe(
        countries_gdf, 'ADMIN', n_samples=5000
    )
    print(f"  Countries: {len(country_coords)} points, {len(np.unique(country_labels))} unique")
else:
    country_coords, country_labels = None, None

# States/Provinces
if states_gdf is not None:
    # Find the name column
    name_col = 'name' if 'name' in states_gdf.columns else 'NAME' if 'NAME' in states_gdf.columns else states_gdf.columns[1]
    state_coords, state_labels = sample_points_from_geodataframe(
        states_gdf, name_col, n_samples=8000
    )
    print(f"  States: {len(state_coords)} points, {len(np.unique(state_labels))} unique")
else:
    state_coords, state_labels = None, None

# Ecoregions (multiple levels)
if ecoregions_gdf is not None:
    # Biome level (14 classes)
    biome_coords, biome_labels = sample_points_from_geodataframe(
        ecoregions_gdf, 'BIOME_NUM', n_samples=5000
    )
    print(f"  Biomes: {len(biome_coords)} points, {len(np.unique(biome_labels))} unique")
    
    # Ecoregion level (846 classes)
    eco_coords, eco_labels = sample_points_from_geodataframe(
        ecoregions_gdf, 'ECO_NAME', n_samples=8000
    )
    print(f"  Ecoregions: {len(eco_coords)} points, {len(np.unique(eco_labels))} unique")
else:
    biome_coords, biome_labels = None, None
    eco_coords, eco_labels = None, None

---
## 3. Paper Benchmark Tests

In [None]:
print("="*70)
print("PAPER BENCHMARK TESTS")
print("(Using MLP as in paper: hidden_layers=(64,64,64) for regression, (128,64) for classification)")
print("="*70)

In [None]:
# ============================================
# TEST 1: Air Temperature (Regression)
# Paper reports: MSE 0.25±0.02 for ViT16-L40
# ============================================
print("\n1. AIR TEMPERATURE (Regression)")
print("-" * 50)

temp_emb_l10 = get_embeddings(model_l10, temp_coords)
temp_emb_l40 = get_embeddings(model_l40, temp_coords)

mse_l10, mse_l40, r2_l10, r2_l40 = evaluate_regression(
    temp_emb_l10, temp_emb_l40, temp_y, "Air Temperature"
)

print(f"  L=10: MSE={mse_l10:.6f}, R²={r2_l10:.4f}")
print(f"  L=40: MSE={mse_l40:.6f}, R²={r2_l40:.4f}")
print(f"  Winner: {'L=10' if mse_l10 < mse_l40 else 'L=40'}")

all_results.append({
    'task': 'Air Temperature', 'type': 'regression', 'scale': 'global-smooth',
    'n_samples': len(temp_coords), 'n_classes': 'continuous',
    'l10_score': r2_l10, 'l40_score': r2_l40, 'metric': 'R²'
})

In [None]:
# ============================================
# TEST 2: Elevation Proxy (Regression)
# ============================================
print("\n2. ELEVATION PROXY (Regression)")
print("-" * 50)

elev_emb_l10 = get_embeddings(model_l10, elev_coords)
elev_emb_l40 = get_embeddings(model_l40, elev_coords)

mse_l10, mse_l40, r2_l10, r2_l40 = evaluate_regression(
    elev_emb_l10, elev_emb_l40, elev_y, "Elevation"
)

print(f"  L=10: MSE={mse_l10:.6f}, R²={r2_l10:.4f}")
print(f"  L=40: MSE={mse_l40:.6f}, R²={r2_l40:.4f}")
print(f"  Winner: {'L=10' if mse_l10 < mse_l40 else 'L=40'}")

all_results.append({
    'task': 'Elevation (proxy)', 'type': 'regression', 'scale': 'global-varied',
    'n_samples': len(elev_coords), 'n_classes': 'continuous',
    'l10_score': r2_l10, 'l40_score': r2_l40, 'metric': 'R²'
})

In [None]:
# ============================================
# TEST 3: Countries (Classification)
# Paper reports: ~96% for ViT16
# ============================================
if country_coords is not None:
    print("\n3. COUNTRIES (Classification)")
    print("-" * 50)
    
    # Encode labels
    le = LabelEncoder()
    country_y = le.fit_transform(country_labels)
    n_countries = len(le.classes_)
    
    country_emb_l10 = get_embeddings(model_l10, country_coords)
    country_emb_l40 = get_embeddings(model_l40, country_coords)
    
    acc_l10, acc_l40 = evaluate_classification(
        country_emb_l10, country_emb_l40, country_y, "Countries"
    )
    
    print(f"  Classes: {n_countries}")
    print(f"  L=10: Accuracy={acc_l10:.2%}")
    print(f"  L=40: Accuracy={acc_l40:.2%}")
    print(f"  Winner: {'L=10' if acc_l10 > acc_l40 else 'L=40'}")
    
    all_results.append({
        'task': 'Countries', 'type': 'classification', 'scale': '~1000km boundaries',
        'n_samples': len(country_coords), 'n_classes': n_countries,
        'l10_score': acc_l10, 'l40_score': acc_l40, 'metric': 'Accuracy'
    })

In [None]:
# ============================================
# TEST 4: Biomes (Classification - 14 classes)
# Paper reports: 94.27% for ViT16-L40
# ============================================
if biome_coords is not None:
    print("\n4. BIOMES (Classification - 14 classes)")
    print("-" * 50)
    
    le = LabelEncoder()
    biome_y = le.fit_transform(biome_labels.astype(str))
    n_biomes = len(le.classes_)
    
    biome_emb_l10 = get_embeddings(model_l10, biome_coords)
    biome_emb_l40 = get_embeddings(model_l40, biome_coords)
    
    acc_l10, acc_l40 = evaluate_classification(
        biome_emb_l10, biome_emb_l40, biome_y, "Biomes"
    )
    
    print(f"  Classes: {n_biomes}")
    print(f"  L=10: Accuracy={acc_l10:.2%}")
    print(f"  L=40: Accuracy={acc_l40:.2%}")
    print(f"  Paper reports ~94% for L=40")
    print(f"  Winner: {'L=10' if acc_l10 > acc_l40 else 'L=40'}")
    
    all_results.append({
        'task': 'Biomes', 'type': 'classification', 'scale': '~5000km regions (fuzzy)',
        'n_samples': len(biome_coords), 'n_classes': n_biomes,
        'l10_score': acc_l10, 'l40_score': acc_l40, 'metric': 'Accuracy'
    })

In [None]:
# ============================================
# TEST 5: Ecoregions (Classification - 846 classes)
# Paper reports: 91.61% for ViT16-L40
# ============================================
if eco_coords is not None:
    print("\n5. ECOREGIONS (Classification - fine-grained)")
    print("-" * 50)
    
    le = LabelEncoder()
    eco_y = le.fit_transform(eco_labels.astype(str))
    n_ecos = len(le.classes_)
    
    eco_emb_l10 = get_embeddings(model_l10, eco_coords)
    eco_emb_l40 = get_embeddings(model_l40, eco_coords)
    
    acc_l10, acc_l40 = evaluate_classification(
        eco_emb_l10, eco_emb_l40, eco_y, "Ecoregions"
    )
    
    print(f"  Classes: {n_ecos}")
    print(f"  L=10: Accuracy={acc_l10:.2%}")
    print(f"  L=40: Accuracy={acc_l40:.2%}")
    print(f"  Paper reports ~92% for L=40")
    print(f"  Winner: {'L=10' if acc_l10 > acc_l40 else 'L=40'}")
    
    all_results.append({
        'task': 'Ecoregions', 'type': 'classification', 'scale': '~500km regions',
        'n_samples': len(eco_coords), 'n_classes': n_ecos,
        'l10_score': acc_l10, 'l40_score': acc_l40, 'metric': 'Accuracy'
    })

In [None]:
# ============================================
# TEST 6: States/Provinces (Classification)
# Medium-scale political boundaries
# ============================================
if state_coords is not None:
    print("\n6. STATES/PROVINCES (Classification)")
    print("-" * 50)
    
    le = LabelEncoder()
    state_y = le.fit_transform(state_labels.astype(str))
    n_states = len(le.classes_)
    
    state_emb_l10 = get_embeddings(model_l10, state_coords)
    state_emb_l40 = get_embeddings(model_l40, state_coords)
    
    acc_l10, acc_l40 = evaluate_classification(
        state_emb_l10, state_emb_l40, state_y, "States"
    )
    
    print(f"  Classes: {n_states}")
    print(f"  L=10: Accuracy={acc_l10:.2%}")
    print(f"  L=40: Accuracy={acc_l40:.2%}")
    print(f"  Winner: {'L=10' if acc_l10 > acc_l40 else 'L=40'}")
    
    all_results.append({
        'task': 'States/Provinces', 'type': 'classification', 'scale': '~300km boundaries',
        'n_samples': len(state_coords), 'n_classes': n_states,
        'l10_score': acc_l10, 'l40_score': acc_l40, 'metric': 'Accuracy'
    })

In [None]:
# ============================================
# TEST 7: Population Density (Regression)
# Paper reports: MSE ~0.48 for ViT16
# Using synthetic proxy based on known urban coordinates
# ============================================
print("\n7. POPULATION DENSITY PROXY (Regression)")
print("-" * 50)

# Create population density proxy dataset
# Higher values near major cities, lower elsewhere
np.random.seed(42)
n_pop = 5000

# Major city coordinates (lon, lat) with high population
major_cities = [
    (-74.0, 40.7),    # New York
    (-122.4, 37.8),   # San Francisco
    (-87.6, 41.9),    # Chicago
    (139.7, 35.7),    # Tokyo
    (-0.1, 51.5),     # London
    (116.4, 39.9),    # Beijing
    (72.9, 19.1),     # Mumbai
    (-46.6, -23.5),   # Sao Paulo
    (31.2, 30.0),     # Cairo
    (2.3, 48.9),      # Paris
]

pop_lons = np.random.uniform(-170, 170, n_pop)
pop_lats = np.random.uniform(-50, 65, n_pop)
pop_coords = np.stack([pop_lons, pop_lats], axis=1)

# Calculate population density proxy (inverse distance to nearest city)
def distance_deg(lon1, lat1, lon2, lat2):
    return np.sqrt((lon1-lon2)**2 + (lat1-lat2)**2)

pop_density = np.zeros(n_pop)
for i in range(n_pop):
    min_dist = min(distance_deg(pop_lons[i], pop_lats[i], c[0], c[1]) for c in major_cities)
    pop_density[i] = 1.0 / (1.0 + min_dist/10)  # Decay with distance

# Add noise
pop_density += np.random.normal(0, 0.05, n_pop)
pop_density = np.clip(pop_density, 0, 1)
pop_density = (pop_density - pop_density.min()) / (pop_density.max() - pop_density.min())

print(f"  Samples: {len(pop_coords)}")

pop_emb_l10 = get_embeddings(model_l10, pop_coords)
pop_emb_l40 = get_embeddings(model_l40, pop_coords)

mse_l10, mse_l40, r2_l10, r2_l40 = evaluate_regression(
    pop_emb_l10, pop_emb_l40, pop_density, "Population Density"
)

print(f"  L=10: MSE={mse_l10:.6f}, R²={r2_l10:.4f}")
print(f"  L=40: MSE={mse_l40:.6f}, R²={r2_l40:.4f}")
print(f"  Paper reports MSE ~0.48 for L=40")
print(f"  Winner: {'L=10' if mse_l10 < mse_l40 else 'L=40'}")

all_results.append({
    'task': 'Population Density (proxy)', 'type': 'regression', 'scale': 'clustered-urban',
    'n_samples': len(pop_coords), 'n_classes': 'continuous',
    'l10_score': r2_l10, 'l40_score': r2_l40, 'metric': 'R²'
})

---
## 4. Multi-Scale Checkerboard Tests

In [None]:
print("\n" + "="*70)
print("MULTI-SCALE CHECKERBOARD TESTS")
print("(Controlled synthetic test at exact scales)")
print("="*70)

def create_checkerboard(cell_size_deg, n_samples=6000):
    """Create checkerboard classification dataset."""
    np.random.seed(42)
    lons = np.random.uniform(-180, 180, n_samples)
    lats = np.random.uniform(-60, 60, n_samples)
    
    cell_x = (lons / cell_size_deg).astype(int)
    cell_y = (lats / cell_size_deg).astype(int)
    labels = (cell_x + cell_y) % 2
    
    return np.stack([lons, lats], axis=1), labels

# Test at multiple scales
cell_sizes = [90, 45, 20, 10, 5, 2, 1, 0.5, 0.2, 0.1]
approx_km = [c * 111 for c in cell_sizes]

print(f"\n{'Cell Size':>10} | {'≈ km':>8} | {'L=10 Acc':>10} | {'L=40 Acc':>10} | {'Winner':>10}")
print("-" * 60)

checkerboard_results = []

for cell_size, km in zip(cell_sizes, approx_km):
    coords, labels = create_checkerboard(cell_size)
    
    emb_l10 = get_embeddings(model_l10, coords)
    emb_l40 = get_embeddings(model_l40, coords)
    
    # Use MLP for consistency with paper
    acc_l10, acc_l40 = evaluate_classification(emb_l10, emb_l40, labels, f"Checker_{cell_size}")
    
    winner = "L=40" if acc_l40 > acc_l10 + 0.01 else ("L=10" if acc_l10 > acc_l40 + 0.01 else "~Same")
    
    # Mark if at random
    if max(acc_l10, acc_l40) < 0.55:
        winner = "RANDOM"
    
    print(f"{cell_size:>8.1f}° | {km:>7.0f} | {acc_l10:>10.2%} | {acc_l40:>10.2%} | {winner:>10}")
    
    checkerboard_results.append({
        'cell_size_deg': cell_size,
        'cell_size_km': km,
        'l10_acc': acc_l10,
        'l40_acc': acc_l40
    })
    
    all_results.append({
        'task': f'Checkerboard {km:.0f}km', 'type': 'classification', 'scale': f'{km:.0f}km grid',
        'n_samples': len(coords), 'n_classes': 2,
        'l10_score': acc_l10, 'l40_score': acc_l40, 'metric': 'Accuracy'
    })

In [None]:
# Plot checkerboard results
fig, ax = plt.subplots(figsize=(12, 6))

kms = [r['cell_size_km'] for r in checkerboard_results]
l10_accs = [r['l10_acc'] for r in checkerboard_results]
l40_accs = [r['l40_acc'] for r in checkerboard_results]

ax.semilogx(kms, l10_accs, 'o-', label='L=10', linewidth=2, markersize=10)
ax.semilogx(kms, l40_accs, 's-', label='L=40', linewidth=2, markersize=10)
ax.axhline(y=0.5, color='red', linestyle='--', alpha=0.7, label='Random (50%)')
ax.axhline(y=0.9, color='green', linestyle='--', alpha=0.5, label='90% threshold')

ax.set_xlabel('Checkerboard Cell Size (km)', fontsize=12)
ax.set_ylabel('Classification Accuracy', fontsize=12)
ax.set_title('Multi-Scale Checkerboard Test: L=10 vs L=40\n(Effective Resolution = where accuracy drops to random)', fontsize=14)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_ylim(0.4, 1.05)

plt.tight_layout()
plt.show()

# Find effective resolution
for r in checkerboard_results:
    if r['l10_acc'] < 0.6:
        print(f"\nL=10 effective resolution: ~{r['cell_size_km']:.0f} km")
        break
        
for r in checkerboard_results:
    if r['l40_acc'] < 0.6:
        print(f"L=40 effective resolution: ~{r['cell_size_km']:.0f} km")
        break

---
## 5. Boundary Sharpness Comparison

Compare performance on:
- **Sharp boundaries**: Countries, States (political borders)
- **Fuzzy boundaries**: Biomes, Ecoregions (ecological gradients)

In [None]:
print("\n" + "="*70)
print("BOUNDARY SHARPNESS ANALYSIS")
print("="*70)

# Group results by boundary type
sharp_boundaries = ['Countries', 'States/Provinces']
fuzzy_boundaries = ['Biomes', 'Ecoregions']

print("\nSHARP BOUNDARIES (Political):")
print("-" * 50)
for r in all_results:
    if r['task'] in sharp_boundaries:
        diff = r['l10_score'] - r['l40_score']
        winner = 'L=10' if diff > 0.01 else ('L=40' if diff < -0.01 else '~Same')
        print(f"  {r['task']:20s}: L=10={r['l10_score']:.1%}, L=40={r['l40_score']:.1%} -> {winner}")

print("\nFUZZY BOUNDARIES (Ecological):")
print("-" * 50)
for r in all_results:
    if r['task'] in fuzzy_boundaries:
        diff = r['l10_score'] - r['l40_score']
        winner = 'L=10' if diff > 0.01 else ('L=40' if diff < -0.01 else '~Same')
        print(f"  {r['task']:20s}: L=10={r['l10_score']:.1%}, L=40={r['l40_score']:.1%} -> {winner}")

---
## 6. Summary Table

In [None]:
print("\n" + "="*80)
print("COMPLETE RESULTS SUMMARY")
print("="*80)

# Convert to DataFrame for nice display
results_df = pd.DataFrame(all_results)

# Add winner column
def get_winner(row):
    diff = row['l10_score'] - row['l40_score']
    if row['metric'] == 'R²' or row['metric'] == 'Accuracy':
        if diff > 0.01:
            return 'L=10'
        elif diff < -0.01:
            return 'L=40'
    return '~Same'

results_df['winner'] = results_df.apply(get_winner, axis=1)

# Display
print(f"\n{'Task':<25} | {'Type':<15} | {'Scale':<20} | {'L=10':>8} | {'L=40':>8} | {'Winner':>8}")
print("-" * 95)

for _, row in results_df.iterrows():
    if 'Checkerboard' not in row['task']:  # Skip checkerboard for summary
        score_fmt = '.2%' if row['metric'] == 'Accuracy' else '.4f'
        print(f"{row['task']:<25} | {row['type']:<15} | {row['scale']:<20} | {row['l10_score']:{score_fmt}} | {row['l40_score']:{score_fmt}} | {row['winner']:>8}")

In [None]:
# ============================================
# FINAL VISUALIZATION: L=10 vs L=40 across all tasks
# ============================================
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Filter non-checkerboard results for main comparison
main_results = [r for r in all_results if 'Checkerboard' not in r['task']]

# Left plot: Classification tasks
class_results = [r for r in main_results if r['type'] == 'classification']
if class_results:
    tasks = [r['task'] for r in class_results]
    l10_scores = [r['l10_score'] * 100 for r in class_results]
    l40_scores = [r['l40_score'] * 100 for r in class_results]
    
    x = np.arange(len(tasks))
    width = 0.35
    
    bars1 = axes[0].bar(x - width/2, l10_scores, width, label='L=10', color='steelblue')
    bars2 = axes[0].bar(x + width/2, l40_scores, width, label='L=40', color='coral')
    
    axes[0].set_ylabel('Accuracy (%)', fontsize=12)
    axes[0].set_title('Classification Tasks: L=10 vs L=40', fontsize=14)
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(tasks, rotation=45, ha='right')
    axes[0].legend()
    axes[0].set_ylim(0, 100)
    axes[0].axhline(y=50, color='red', linestyle='--', alpha=0.5, label='Random')
    
    # Add value labels
    for bar in bars1:
        height = bar.get_height()
        axes[0].annotate(f'{height:.1f}%', xy=(bar.get_x() + bar.get_width()/2, height),
                        xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
    for bar in bars2:
        height = bar.get_height()
        axes[0].annotate(f'{height:.1f}%', xy=(bar.get_x() + bar.get_width()/2, height),
                        xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)

# Right plot: Regression tasks
reg_results = [r for r in main_results if r['type'] == 'regression']
if reg_results:
    tasks = [r['task'] for r in reg_results]
    l10_scores = [r['l10_score'] for r in reg_results]
    l40_scores = [r['l40_score'] for r in reg_results]
    
    x = np.arange(len(tasks))
    width = 0.35
    
    bars1 = axes[1].bar(x - width/2, l10_scores, width, label='L=10', color='steelblue')
    bars2 = axes[1].bar(x + width/2, l40_scores, width, label='L=40', color='coral')
    
    axes[1].set_ylabel('R² Score', fontsize=12)
    axes[1].set_title('Regression Tasks: L=10 vs L=40', fontsize=14)
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(tasks, rotation=45, ha='right')
    axes[1].legend()
    axes[1].set_ylim(0, 1)
    
    for bar in bars1:
        height = bar.get_height()
        if height > 0:
            axes[1].annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width()/2, height),
                            xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
    for bar in bars2:
        height = bar.get_height()
        if height > 0:
            axes[1].annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width()/2, height),
                            xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.savefig('satclip_resolution_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
print("\nSaved: satclip_resolution_comparison.png")

In [None]:
# Final statistics
print("\n" + "="*80)
print("OVERALL STATISTICS")
print("="*80)

l10_wins = (results_df['winner'] == 'L=10').sum()
l40_wins = (results_df['winner'] == 'L=40').sum()
ties = (results_df['winner'] == '~Same').sum()

print(f"\nL=10 wins: {l10_wins}")
print(f"L=40 wins: {l40_wins}")
print(f"Ties: {ties}")

# Checkerboard effective resolution
checker_df = results_df[results_df['task'].str.contains('Checkerboard')]
if len(checker_df) > 0:
    print("\nCheckerboard Effective Resolution:")
    for _, row in checker_df.iterrows():
        if row['l10_score'] > 0.55 and row['l40_score'] > 0.55:
            print(f"  Both models work at {row['scale']}")
        elif row['l10_score'] < 0.55 and row['l40_score'] < 0.55:
            print(f"  Both models fail at {row['scale']}")
            break

print("\n" + "="*80)
print("KEY FINDINGS")
print("="*80)
print("""
1. Compare L=10 vs L=40 performance across different spatial scales
2. Identify the effective resolution limit of each model
3. Test whether boundary sharpness (political vs ecological) affects results
4. Validate against paper's reported benchmarks
""")