## Make a GSV-specific config

In [None]:
from pathlib import Path
import yaml, math

REPO   = Path("/users/project1/pt01183/Building-height-width")
NEUR   = REPO / "external" / "neurvps"
BASE   = NEUR / "logs" / "tmm17" / "config.yaml"        # downloaded from HF
CFG_GSV= NEUR / "logs" / "tmm17" / "config_gsv.yaml"

# your GSV camera numbers (from earlier): fov=100°, width=640
fov_deg = 100.0
W = 640
f_px = (W/2.0) / math.tan(math.radians(fov_deg/2.0))   # ≈ 268.51 px

with open(BASE, "r") as f:
    cfg = yaml.safe_load(f)

# Ensure expected structure exists
cfg.setdefault("data", {})
cfg["data"]["focal"] = float(f_px)

# IMPORTANT: the TMM17 checkpoint expects NO conic/DCN branch:
cfg.setdefault("model", {})
cfg["model"]["conic_6x"] = False   # <- critical
# (leave the rest of model settings as-is; do not add new keys)

with open(CFG_GSV, "w") as f:
    yaml.safe_dump(cfg, f, sort_keys=False)

print("Wrote:", CFG_GSV)
print("focal(px):", f_px)


## Gather your 5 RGBs (already 640×640) into a small input folder

In [None]:
from pathlib import Path
import shutil, glob

REPO = Path("/users/project1/pt01183/Building-height-width")
NEURVPS = REPO / "external" / "neurvps"

# <-- your actual RGB source folder
RAW_DIR = REPO / "Gdańsk, Poland" / "save_rgb" / "imgs"

KEEP_IDS = ["6_196","2_190","7_4","8_139","9_196"]

# We'll just copy to a small working folder for NeurVPS (no resize needed)
SQ_DIR  = NEURVPS / "data" / "my5_sq"
SQ_DIR.mkdir(parents=True, exist_ok=True)

def copy_by_id(src_dir: Path, sid: str, dst_dir: Path) -> Path | None:
    # try common extensions; if filenames have suffixes, also try glob with sid.*
    candidates = []
    for ext in (".jpg",".jpeg",".png",".JPG",".PNG"):
        p = src_dir / f"{sid}{ext}"
        if p.exists():
            candidates.append(p)
    if not candidates:
        # fallback: any file that starts with id (e.g., sid_*.jpg)
        candidates = [Path(p) for p in glob.glob(str(src_dir / f"{sid}.*"))]
    if not candidates:
        return None
    src = sorted(candidates, key=lambda x: x.suffix.lower())[0]
    dst = dst_dir / (sid + src.suffix.lower())
    shutil.copy2(src, dst)
    return dst

copied = []
for sid in KEEP_IDS:
    outp = copy_by_id(RAW_DIR, sid, SQ_DIR)
    if outp:
        copied.append(outp.name)
    else:
        print(f"[WARN] Could not find an image for ID {sid} in {RAW_DIR}")

print(f"Copied {len(copied)} images to {SQ_DIR}:")
for name in copied:
    print(" -", name)

## Paths and device

In [34]:
from pathlib import Path
import sys, os
import importlib, types
import yaml
import torch
import torch.nn as nn
import numpy as np
from skimage import io
from tqdm import tqdm
import json
import math

# 1. Ensure NeurVPS is on path FIRST
REPO_ROOT = "/users/project1/pt01183/Building-height-width"
NEURVPS_ROOT = Path(REPO_ROOT) / "external" / "neurvps"

def ensure_neurvps_on_path(root: str):
    root = os.path.abspath(root)
    candidates = [root, os.path.join(root, "src")]
    for c in candidates:
        if os.path.isdir(os.path.join(c, "neurvps")):
            if c not in sys.path:
                sys.path.insert(0, c)
            print(f"[setup] added to sys.path: {c}")
            return c
    raise RuntimeError(f"neurvps not found under {root}")

sys_path = ensure_neurvps_on_path(str(NEURVPS_ROOT))
print(f"[setup] neurvps directory: {sys_path}")

[setup] added to sys.path: /users/project1/pt01183/Building-height-width/external/neurvps
[setup] neurvps directory: /users/project1/pt01183/Building-height-width/external/neurvps


In [35]:
# 2. Patch neurvps.config AFTER it's imported by model loading
# We'll do this by creating a hook that patches it when needed

def patch_neurvps_config():
    """Patch the C object in neurvps.config to add io.num_vpts"""
    try:
        # Import the config module
        from neurvps.config import C, M
        from neurvps.box import Box
        
        print(f"[patch] C type: {type(C).__name__}")
        
        # Check if C already has io.num_vpts
        try:
            val = C.io.num_vpts
            print(f"[patch] C.io.num_vpts already exists: {val}")
            return C, M
        except (AttributeError, KeyError):
            print("[patch] C.io.num_vpts not found, adding it")
        
        # Add io.num_vpts to C
        if isinstance(C, Box):
            # Use Box methods
            if "io" not in C:
                C["io"] = Box()
            C["io"]["num_vpts"] = 3
        else:
            # Use attribute access
            if not hasattr(C, "io"):
                C.io = Box() if isinstance(C, Box) else types.SimpleNamespace()
            if hasattr(C.io, "__setitem__"):
                C.io["num_vpts"] = 3
            else:
                C.io.num_vpts = 3
        
        # Verify it worked
        val = C.io.num_vpts
        print(f"[patch] Successfully set C.io.num_vpts = {val}")
        
        return C, M
        
    except Exception as e:
        print(f"[patch] Error: {e}")
        import traceback
        traceback.print_exc()
        raise

In [36]:
# 3. CONFIGURATION
BASE_CFG = NEURVPS_ROOT / "logs" / "tmm17" / "config.yaml"
CFG_GSV = NEURVPS_ROOT / "logs" / "tmm17" / "config_gsv.yaml"
CKPT = NEURVPS_ROOT / "logs" / "tmm17" / "checkpoint_state_anet.pth"

# Camera intrinsics
fov_deg = 100.0
W = 640
f_px = (W / 2.0) / math.tan(math.radians(fov_deg / 2.0))

# Create config for GSV inference
if BASE_CFG.exists():
    with open(BASE_CFG, "r") as f:
        cfg = yaml.safe_load(f) or {}
else:
    cfg = {}

cfg.setdefault("data", {})
cfg["data"]["focal"] = float(f_px)
cfg.setdefault("model", {})
cfg["model"]["conic_6x"] = False

with open(CFG_GSV, "w") as f:
    yaml.safe_dump(cfg, f, sort_keys=False)

print(f"[config] focal={f_px:.2f}px, conic_6x=False")
print(f"[config] saved to {CFG_GSV}")

[config] focal=268.51px, conic_6x=False
[config] saved to /users/project1/pt01183/Building-height-width/external/neurvps/logs/tmm17/config_gsv.yaml


In [50]:
# 4. MODEL LOADING
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

def _make_torchvision_backbone(which="resnet50", pretrained=False, out_layer="layer3"):
    try:
        from torchvision import models
    except Exception:
        return None
    
    if which == "resnet50":
        m = models.resnet50(
            weights=models.ResNet50_Weights.IMAGENET1K_V1 if pretrained else None
        )
    elif which == "resnet34":
        m = models.resnet34(
            weights=models.ResNet34_Weights.IMAGENET1K_V1 if pretrained else None
        )
    else:
        raise ValueError(f"Unsupported backbone: {which}")
    
    layers = [m.conv1, m.bn1, m.relu, m.maxpool, m.layer1, m.layer2, m.layer3]
    if out_layer == "layer4":
        layers.append(m.layer4)
    
    core = nn.Sequential(*layers)
    
    class Wrap(nn.Module):
        def __init__(self, core):
            super().__init__()
            self.core = core
        def forward(self, x):
            return (self.core(x),)
    
    return Wrap(core)

class TinyBackbone(nn.Module):
    def __init__(self, in_ch=3, feat_ch=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_ch, 32, 3, stride=2, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, 3, stride=2, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(64, feat_ch, 3, stride=2, padding=1), nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return (self.net(x),)

def load_model_robust(cfg_path: str, ckpt_path: str, device: str):
    print("[model] loading configuration and model...")
    
    # Import VanishingNet first
    try:
        from neurvps.models.vanishing_net import VanishingNet
        print("[model] successfully imported VanishingNet")
    except ImportError as e:
        print(f"[model] ERROR: could not import VanishingNet: {e}")
        raise
    
    # NOW patch the config after neurvps modules are imported
    print("[model] patching neurvps.config...")
    cfg_C, cfg_M = patch_neurvps_config()

    # 3) >>> INSERT THIS MONKEY-PATCH RIGHT HERE <<<
    import numpy as _np
    from neurvps.models import vanishing_net as _vn

    if not hasattr(_vn, "_safe_patched"):
        _orig_orth = _vn.orth
        _orig_sample_sphere = _vn.sample_sphere

        def _safe_orth(v):
            v = _np.asarray(v, dtype=_np.float32).reshape(3)  # ensure shape (3,)
            return _orig_orth(v)

        def _safe_sample_sphere(v, st, ed):
            v = _np.asarray(v, dtype=_np.float32).reshape(3)  # ensure shape (3,)
            return _orig_sample_sphere(v, st, ed)

        _vn.orth = _safe_orth
        _vn.sample_sphere = _safe_sample_sphere
        _vn._safe_patched = True
        print("[patch] Wrapped neurvps.models.vanishing_net.orth/sample_sphere for shape safety")
    # 3) >>> END INSERT <<<
    
    # Load config from YAML
    print(f"[model] reading config from {cfg_path}")
    with open(cfg_path, "r") as f:
        cfg_dict = yaml.safe_load(f) or {}
    
    data_cfg = cfg_dict.get("data", {})
    model_cfg = cfg_dict.get("model", {})
    
    # Sync with neurvps.config
    if cfg_M is not None:
        for k, v in model_cfg.items():
            if isinstance(v, (int, float, bool, str, list, tuple)):
                try:
                    setattr(cfg_M, k, v)
                except Exception:
                    pass
    
    # Set defaults
    defaults = {
        "fc_channel": 1024,
        "multires": [0.01, 0.05, 0.2, 0.8],
        "smp_pos": 1,
        "smp_neg": 1,
        "smp_rnd": 8,
        "smp_multiplier": 2.0,
        "output_stride": 32,
        "upsample_scale": 4,
    }
    
    if cfg_M is not None:
        for k, v in defaults.items():
            if not hasattr(cfg_M, k):
                setattr(cfg_M, k, v)
        output_stride = getattr(cfg_M, "output_stride", 32)
        upsample_scale = getattr(cfg_M, "upsample_scale", 4)
    else:
        output_stride = defaults["output_stride"]
        upsample_scale = defaults["upsample_scale"]
    
    print(f"[model] output_stride={output_stride}, upsample_scale={upsample_scale}")
    
    # Create backbone
    bb = _make_torchvision_backbone(which="resnet50", pretrained=False, out_layer="layer3")
    if bb is None:
        print("[model] torchvision unavailable; using TinyBackbone")
        bb = TinyBackbone()
    else:
        print("[model] using ResNet50 backbone")
    
    # Create model
    print(f"[model] creating VanishingNet...")
    model = VanishingNet(bb, output_stride, upsample_scale).to(device).eval()
    print(f"[model] VanishingNet created successfully")
    
    # Load checkpoint
    print(f"[model] loading checkpoint from {ckpt_path}")
    if not os.path.exists(ckpt_path):
        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
    
    raw = torch.load(ckpt_path, map_location=device)
    print(f"[model] checkpoint loaded | type={type(raw).__name__}")
    
    # Extract state_dict
    if isinstance(raw, dict):
        for key in ("state_dict", "model_state_dict", "model"):
            if key in raw and isinstance(raw[key], dict):
                state = raw[key]
                print(f"[model] extracted state_dict from key '{key}'")
                break
        else:
            state = raw
            print(f"[model] using checkpoint dict directly as state")
    else:
        state = raw
    
    print(f"[model] state_dict has {len(state)} keys")
    
    # Remap state keys
    mk = list(model.state_dict().keys())
    remapped = {}
    for k, v in state.items():
        kk = k
        for pref in ("module.", "model."):
            if kk.startswith(pref):
                kk = kk[len(pref):]
                break
        
        has_anet = any(kk.startswith("anet.") for kk in mk)
        has_backbone = any(kk.startswith("backbone.") for kk in mk)
        has_anet_bb = any(kk.startswith("anet.backbone.") for kk in mk)
        
        if has_backbone and not has_anet_bb and kk.startswith("anet.backbone."):
            kk = kk[len("anet."):]
        if has_anet and not kk.startswith("anet."):
            if kk.startswith("backbone.") or any(kk.startswith(p) for p in ("fc", "bn", "conv", "score")):
                kk = "anet." + kk
        if not has_anet and kk.startswith("anet."):
            kk = kk[len("anet."):]
        
        remapped[kk] = v
    
    pruned = {k: v for k, v in remapped.items() if k in mk}
    inc = model.load_state_dict(pruned, strict=False)
    
    missing = len(getattr(inc, "missing_keys", []))
    unexpected = len(getattr(inc, "unexpected_keys", []))
    print(f"[model] loaded | missing={missing} unexpected={unexpected}")
    
    focal = float(data_cfg.get("focal", 1.0))
    return model, focal

# Load model
model, focal = load_model_robust(str(CFG_GSV), str(CKPT), DEVICE)
print(f"✓ Model ready | focal={focal:.2f}px | device={DEVICE}")


[model] loading configuration and model...
[model] successfully imported VanishingNet
[model] patching neurvps.config...
[patch] C type: Box
[patch] C.io.num_vpts already exists: 3
[patch] Wrapped neurvps.models.vanishing_net.orth/sample_sphere for shape safety
[model] reading config from /users/project1/pt01183/Building-height-width/external/neurvps/logs/tmm17/config_gsv.yaml
[model] output_stride=4, upsample_scale=1
[model] using ResNet50 backbone
[model] creating VanishingNet...
[model] VanishingNet created successfully
[model] loading checkpoint from /users/project1/pt01183/Building-height-width/external/neurvps/logs/tmm17/checkpoint_state_anet.pth
[model] checkpoint loaded | type=dict
[model] using checkpoint dict directly as state
[model] state_dict has 413 keys
[model] loaded | missing=215 unexpected=0
✓ Model ready | focal=268.51px | device=cuda:0


In [51]:
# 5. INFERENCE UTILITIES
def fib_sphere(n: int) -> np.ndarray:
    ga = (1.0 + 5.0**0.5) / 2.0
    frac = 2.0 - ga
    z = np.linspace(1 - 1.0/n, -1 + 1.0/n, n)
    theta = 2*np.pi * ((np.arange(n)*frac) % 1.0)
    r = np.sqrt(np.maximum(0.0, 1 - z*z))
    x = r*np.cos(theta)
    y = r*np.sin(theta)
    v = np.stack([x, y, z], axis=1).astype(np.float32)
    v /= np.linalg.norm(v, axis=1, keepdims=True)
    return v

def to_pixel_local(v: np.ndarray, focal: float, width: int, height: int) -> np.ndarray:
    vx, vy, vz = float(v[0]), float(v[1]), float(v[2])
    W, H = float(width), float(height)
    cx, cy = W/2.0, H/2.0
    eps = 1e-6
    if -eps < vz < eps:
        vz = eps if vz >= 0 else -eps
    x = focal * vx / vz + cx
    y = focal * vy / vz + cy
    return np.array([x, y], dtype=np.float32)

def infer_one(
    model, focal, device, img_path,
    k_top=3, n_candidates=1024,   # n_candidates is ignored now; keep for signature compatibility
    imagenet_norm=True, verbose=False
):
    try:
        from neurvps.config import C as cfg_C  # read num_vpts seen by NeurVPS

        img = io.imread(img_path)
        if img.ndim == 2:
            img = np.stack([img]*3, axis=-1)

        H, W = img.shape[:2]

        # to tensor [1,3,H,W]
        x = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
        if imagenet_norm:
            mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
            std  = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
            x = (x - mean) / std
        x = x.unsqueeze(0).to(device)

        # ---- CRITICAL: pass exactly C.io.num_vpts seed directions ----
        num_v = getattr(getattr(cfg_C, "io", None), "num_vpts", 3)

        if num_v == 3:
            # Use orthogonal unit axes as seeds (network will refine internally)
            vpts = torch.tensor(
                [[1.0, 0.0, 0.0],
                 [0.0, 1.0, 0.0],
                 [0.0, 0.0, 1.0]],
                dtype=torch.float32, device=device
            )
        else:
            # Evenly spread seeds if different count is configured
            vpts_np = fib_sphere(num_v)                # (num_v, 3)
            vpts = torch.from_numpy(vpts_np).to(device)
        # --------------------------------------------------------------

        if verbose:
            print(f"  input shape: {x.shape}, seeds: {tuple(vpts.shape)} on {vpts.device}")

        with torch.no_grad():
            # NeurVPS expects both the image and the seed VPs
            out = model({"image": x, "vpts": vpts})

        # Handle output (many forks expose 'scores' or 'output')
        if isinstance(out, dict):
            scores = out.get("scores", out.get("output", None))
        else:
            scores = out
        if scores is None:
            raise RuntimeError("Model returned None for scores")

        if not torch.is_tensor(scores):
            scores = torch.as_tensor(scores)
        scores = scores.detach().cpu().float()

        # If model returns scores per multires level [N, L], average over levels
        s = scores.mean(dim=1) if scores.ndim == 2 else scores

        if verbose:
            smin = float(s.min()) if s.numel() else float("nan")
            smax = float(s.max()) if s.numel() else float("nan")
            print(f"  scores: shape={tuple(s.shape)} range=({smin:.4f}, {smax:.4f})")

        # Top-k over candidates internally sampled by the model (indices map to seeds’ refinements)
        k = min(k_top, s.numel())
        top_idx = torch.topk(s, k=k).indices.numpy()

        # We don’t have direct 3D directions back here; project seed directions as coarse VP pixels
        # (If your fork returns refined 3D VPs, adapt here to use them.)
        # For now, project the seeds themselves—good enough to verify pipeline is running.
        # If you prefer, you can map 'top_idx' modulo 'num_v' to pick a seed per best score.
        result = {}
        for i in range(k):
            seed_j = int(top_idx[i] % max(1, num_v))
            v3 = vpts[seed_j].detach().cpu().numpy()
            px = to_pixel_local(v3, focal=focal, width=W, height=H)
            result[f"vp{i+1}"] = {"x": float(px[0]), "y": float(px[1]), "score": float(s[int(top_idx[i])])}

        return result, True

    except Exception as e:
        if verbose:
            import traceback
            print(f"  [error] {e}")
            traceback.print_exc()
        return {"error": str(e)}, False

def run_inference_pipeline(model, focal, device, input_dir, output_dir,
                           k_top=3, n_candidates=1024):
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    image_files = []
    for ext in ("*.jpg", "*.jpeg", "*.png", "*.JPG", "*.PNG"):
        image_files.extend(input_path.glob(ext))
    
    print(f"\n[inference] Found {len(image_files)} images in {input_dir}")
    
    if len(image_files) == 0:
        print("[inference] No images found!")
        return {}
    
    results = {}
    success_count = 0
    
    for idx, img_path in enumerate(tqdm(image_files, desc="Processing")):
        verbose = (idx == 0)
        
        result, success = infer_one(
            model, focal, device, str(img_path),
            k_top=k_top, n_candidates=n_candidates,
            imagenet_norm=True, verbose=verbose
        )
        
        if success:
            success_count += 1
        
        out_file = output_path / f"{img_path.stem}.json"
        with open(out_file, "w") as f:
            json.dump(result, f, indent=2)
        
        results[img_path.name] = result
    
    print(f"\n[done] {success_count}/{len(image_files)} successful")
    print(f"[done] Results saved to: {output_dir}")
    
    return results

In [52]:
# 6. RUN INFERENCE
if __name__ == "__main__":
    IN_DIR = NEURVPS_ROOT / "data" / "my5_sq"
    OUT_DIR = NEURVPS_ROOT / "logs" / "tmm17" / "my5_vpts"
    
    print("\n" + "="*70)
    print("NeurVPS Vanishing Point Detection")
    print("="*70)
    print(f"Input:  {IN_DIR}")
    print(f"Output: {OUT_DIR}")
    print(f"Device: {DEVICE}")
    print(f"Focal:  {focal:.2f}px")
    print("="*70 + "\n")
    
    results = run_inference_pipeline(
        model, focal, DEVICE,
        str(IN_DIR), str(OUT_DIR),
        k_top=3, n_candidates=1024
    )
    
    print("\n" + "="*70)
    print("Inference Summary")
    print("="*70)
    for fname, result in results.items():
        if "error" in result:
            print(f"✗ {fname}: {result['error']}")
        else:
            vpts = [f"{k}: ({v['x']:.1f}, {v['y']:.1f})" for k, v in result.items()]
            print(f"✓ {fname}")
            for vpt in vpts:
                print(f"    {vpt}")


NeurVPS Vanishing Point Detection
Input:  /users/project1/pt01183/Building-height-width/external/neurvps/data/my5_sq
Output: /users/project1/pt01183/Building-height-width/external/neurvps/logs/tmm17/my5_vpts
Device: cuda:0
Focal:  268.51px


[inference] Found 5 images in /users/project1/pt01183/Building-height-width/external/neurvps/data/my5_sq


Processing:   0%|                                         | 0/5 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/tmp/ipykernel_269767/1537417751.py", line 69, in infer_one
    out = model({"image": x, "vpts": vpts})
  File "/mnt/host_scratch/envs/new_geospatial_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/mnt/host_scratch/envs/new_geospatial_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
  File "/users/project1/pt01183/Building-height-width/external/neurvps/neurvps/models/vanishing_net.py", line 53, in forward
    add_sample(sample_sphere(vgt, st, ed))
  File "/tmp/ipykernel_269767/2981991302.py", line 75, in _safe_sample_sphere
    v = _np.asarray(v, dtype=_np.float32).reshape(3)  # ensure shape (3,)
ValueError: cannot reshape array of size 1 into shape (3,)
Processing: 100%|██████████████████

  input shape: torch.Size([1, 3, 640, 640]), seeds: (3, 3) on cuda:0
  [error] cannot reshape array of size 1 into shape (3,)

[done] 0/5 successful
[done] Results saved to: /users/project1/pt01183/Building-height-width/external/neurvps/logs/tmm17/my5_vpts

Inference Summary
✗ 8_139.jpg: cannot reshape array of size 1 into shape (3,)
✗ 6_196.jpg: cannot reshape array of size 1 into shape (3,)
✗ 2_190.jpg: cannot reshape array of size 1 into shape (3,)
✗ 9_196.jpg: cannot reshape array of size 1 into shape (3,)
✗ 7_4.jpg: cannot reshape array of size 1 into shape (3,)



