In [2]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))


Device: cuda
GPU: NVIDIA GeForce RTX 3050


In [4]:
DATASET_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"

all_files = sorted(glob.glob(DATASET_DIR + "/*.npz"))
print("Total blocks:", len(all_files))

# split
split = int(len(all_files) * 0.85)
train_files = all_files[:split]
val_files   = all_files[split:]

print("Train:", len(train_files), "Val:", len(val_files))


Total blocks: 3543
Train: 3011 Val: 532


In [5]:
def get_all_classes(files, max_files=200):
    s = set()
    for f in files[:max_files]:
        d = np.load(f)
        s.update(np.unique(d["y"]).tolist())
    return sorted(list(s))

classes = get_all_classes(train_files)
print("Classes:", classes)

class_to_idx = {c:i for i,c in enumerate(classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}

NUM_CLASSES = len(classes)


Classes: [1, 2, 3, 6, 12]


In [6]:
class PointCloudDataset(Dataset):
    def __init__(self, files, class_to_idx):
        self.files = files
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        d = np.load(self.files[idx])

        points = d["X"].astype(np.float32)
        labels = d["y"].astype(np.int64)

        # SAFE label mapping
        labels = np.array(
            [self.class_to_idx.get(int(l), self.class_to_idx[1]) for l in labels],
            dtype=np.int64
        )

        points = torch.from_numpy(points)
        labels = torch.from_numpy(labels)

        return points, labels



In [7]:
BATCH_SIZE = 8

train_dataset = PointCloudDataset(train_files, class_to_idx)
val_dataset   = PointCloudDataset(val_files, class_to_idx)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

pts, lbl = next(iter(train_loader))
print("Batch:", pts.shape, lbl.shape)
print("Labels range:", lbl.min().item(), lbl.max().item())


Batch: torch.Size([8, 4096, 10]) torch.Size([8, 4096])
Labels range: 0 4


In [8]:
def compute_class_weights(loader, num_classes, max_batches=120):
    counts = torch.zeros(num_classes)

    for i, (_, y) in enumerate(loader):
        if i >= max_batches:
            break
        for c in range(num_classes):
            counts[c] += (y == c).sum()

    weights = 1.0 / (counts + 1e-6)
    weights = weights / weights.sum() * num_classes
    return weights

class_weights = compute_class_weights(train_loader, NUM_CLASSES).to(device)
print("Weights:", class_weights)


Weights: tensor([0.0265, 0.0378, 0.3009, 3.7421, 0.8927], device='cuda:0')


In [9]:
pts, lbl = next(iter(train_loader))
print("Min label:", lbl.min().item())
print("Max label:", lbl.max().item())


Min label: 0
Max label: 4


In [10]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

MODEL_PATH = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)

NUM_CLASSES = 5  # because labels range 0..4 (confirmed)
IN_FEATURES = 10

class PointNetSmall(nn.Module):
    def __init__(self, in_features=10, num_classes=5):
        super().__init__()
        self.mlp1 = nn.Conv1d(in_features, 64, 1)
        self.mlp2 = nn.Conv1d(64, 128, 1)
        self.mlp3 = nn.Conv1d(128, 256, 1)
        self.fc1  = nn.Conv1d(256, 128, 1)
        self.fc2  = nn.Conv1d(128, num_classes, 1)

    def forward(self, x):
        # x: (B,N,F) -> (B,F,N)
        x = x.transpose(1, 2)
        x = F.relu(self.mlp1(x))
        x = F.relu(self.mlp2(x))
        x = F.relu(self.mlp3(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)          # (B,C,N)
        x = x.transpose(1, 2)    # (B,N,C)
        return x

model = PointNetSmall(IN_FEATURES, NUM_CLASSES).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCHS = 15
best_val = 1e9

for epoch in range(1, EPOCHS + 1):

    # ---- TRAIN ----
    model.train()
    train_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [train]")

    for Xb, yb in pbar:
        Xb = Xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        out = model(Xb)  # (B,N,C)

        loss = criterion(out.reshape(-1, NUM_CLASSES), yb.reshape(-1))
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pbar.set_postfix(loss=float(loss.item()))

    train_loss /= len(train_loader)

    # ---- VAL ----
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for Xb, yb in val_loader:
            Xb = Xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)

            out = model(Xb)
            loss = criterion(out.reshape(-1, NUM_CLASSES), yb.reshape(-1))
            val_loss += loss.item()

            pred = out.argmax(dim=2)
            correct += (pred == yb).sum().item()
            total += yb.numel()

    val_loss /= len(val_loader)
    val_acc = correct / total

    print(f"Epoch {epoch} | train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | val_acc={val_acc:.4f}")

    # ---- SAVE BEST ----
    if val_loss < best_val:
        best_val = val_loss
        torch.save({
            "model_state": model.state_dict(),
            "num_classes": NUM_CLASSES,
            "class_weights": class_weights.detach().cpu()
        }, MODEL_PATH)
        print("✅ Saved best:", MODEL_PATH)

print("DONE")


Epoch 1/15 [train]: 100%|██████████| 377/377 [00:41<00:00,  9.18it/s, loss=2.35]


Epoch 1 | train_loss=13.0721 | val_loss=2.1746 | val_acc=0.2949
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 2/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 32.96it/s, loss=1.67] 


Epoch 2 | train_loss=2.1163 | val_loss=2.9801 | val_acc=0.1987


Epoch 3/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 33.83it/s, loss=1.46] 


Epoch 3 | train_loss=1.7750 | val_loss=1.0116 | val_acc=0.1744
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 4/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 33.64it/s, loss=1.92] 


Epoch 4 | train_loss=1.5302 | val_loss=2.0150 | val_acc=0.1540


Epoch 5/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 34.07it/s, loss=1.28] 


Epoch 5 | train_loss=1.5314 | val_loss=0.9277 | val_acc=0.4009
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 6/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 33.99it/s, loss=1.35] 


Epoch 6 | train_loss=1.4430 | val_loss=0.5626 | val_acc=0.3863
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 7/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 32.90it/s, loss=1.36] 


Epoch 7 | train_loss=1.4219 | val_loss=0.7821 | val_acc=0.1534


Epoch 8/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 33.91it/s, loss=1.29] 


Epoch 8 | train_loss=1.3981 | val_loss=0.6410 | val_acc=0.2604


Epoch 9/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 34.39it/s, loss=1.46] 


Epoch 9 | train_loss=1.4148 | val_loss=0.6673 | val_acc=0.1713


Epoch 10/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 34.32it/s, loss=1.48] 


Epoch 10 | train_loss=1.5382 | val_loss=2.0551 | val_acc=0.1540


Epoch 11/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 34.43it/s, loss=1.41] 


Epoch 11 | train_loss=1.4048 | val_loss=0.5855 | val_acc=0.2101


Epoch 12/15 [train]: 100%|██████████| 377/377 [00:11<00:00, 33.72it/s, loss=1.38] 


Epoch 12 | train_loss=1.3831 | val_loss=0.5520 | val_acc=0.1873
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 13/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 34.68it/s, loss=1.76] 


Epoch 13 | train_loss=1.4457 | val_loss=1.0182 | val_acc=0.1232


Epoch 14/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 36.45it/s, loss=1.69] 


Epoch 14 | train_loss=1.6021 | val_loss=0.7738 | val_acc=0.1540


Epoch 15/15 [train]: 100%|██████████| 377/377 [00:10<00:00, 34.41it/s, loss=1.52] 


Epoch 15 | train_loss=1.5578 | val_loss=1.6487 | val_acc=0.1448
DONE


In [11]:
import torch
from collections import Counter

c = Counter()
for i, (_, y) in enumerate(train_loader):
    if i > 50: break
    u, cnt = torch.unique(y, return_counts=True)
    for a,b in zip(u.tolist(), cnt.tolist()):
        c[a] += b
print(c)


Counter({0: 904455, 1: 643892, 2: 87404, 4: 25402, 3: 10015})


In [12]:
print("Class weights used:", class_weights)


Class weights used: tensor([0.0265, 0.0378, 0.3009, 3.7421, 0.8927], device='cuda:0')


In [13]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Allocated MB:", torch.cuda.memory_allocated()/1024/1024)


CUDA available: True
Allocated MB: 2.3447265625


In [14]:
import torch
print("Allocated MB:", torch.cuda.memory_allocated()/1024/1024)
print("Reserved  MB:", torch.cuda.memory_reserved()/1024/1024)
print("Max alloc MB:", torch.cuda.max_memory_allocated()/1024/1024)


Allocated MB: 2.3447265625
Reserved  MB: 186.0
Max alloc MB: 123.2470703125


In [15]:
print("Model device:", next(model.parameters()).device)


Model device: cuda:0


In [23]:
for step, (points, labels) in enumerate(train_loader):

    points = points.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()
    outputs = model(points)
    loss = criterion(outputs.reshape(-1, NUM_CLASSES), labels.reshape(-1))
    loss.backward()
    optimizer.step()

    # GPU monitor every 50 steps
    if step % 50 == 0 and device.type == "cuda":
        if step == 0:
            print("outputs device:", outputs.device)

            



outputs device: cuda:0


In [19]:
import torch
print(torch.cuda.memory_allocated()/1024/1024)
print(torch.cuda.memory_reserved()/1024/1024)


3.1416015625
186.0


In [29]:
import numpy as np
import glob

DATASET_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
files = sorted(glob.glob(DATASET_DIR + "/*.npz"))

all_classes = set()
for f in files[::50]:  # every 50th file (fast)
    d = np.load(f)
    all_classes.update(np.unique(d["y"]).tolist())

print("Classes found in blocks (sample):", sorted(all_classes))


Classes found in blocks (sample): [1, 2, 3, 6, 7, 12]


In [30]:
DATASET_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"     # your .npz blocks
MODEL_PATH  = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"

# Full-scene prediction input (unclassified or stage outputs)
PRED_IN_LAZ  = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
PRED_OUT_LAS = r"D:/lidarrrrr/anbu/New folder/dl_predicted_full.las"
PRED_OUT_LAZ = r"D:/lidarrrrr/anbu/New folder/dl_predicted_full.laz"


In [31]:
import os, glob, math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)
if device.type == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))


Device: cuda
GPU: NVIDIA GeForce RTX 3050


In [32]:
all_files = sorted(glob.glob(os.path.join(DATASET_DIR, "*.npz")))
if not all_files:
    raise RuntimeError("No .npz blocks found in: " + DATASET_DIR)

print("Total blocks:", len(all_files))

split = int(len(all_files) * 0.85)
train_files = all_files[:split]
val_files   = all_files[split:]

print("Train:", len(train_files), "Val:", len(val_files))
print("Example:", train_files[0])


Total blocks: 3543
Train: 3011 Val: 532
Example: D:/lidarrrrr/anbu/dl_dataset/blocks\block_0000000.npz


In [33]:
def detect_classes(files, stride=30):
    s = set()
    for f in files[::stride]:
        d = np.load(f)
        s.update(np.unique(d["y"]).tolist())
    return sorted(list(s))

orig_classes = detect_classes(train_files, stride=20)
print("Original classes found in blocks (sample):", orig_classes)

# IMPORTANT: ensure DEFAULT class 1 exists for fallback
if 1 not in orig_classes:
    orig_classes = [1] + orig_classes

orig_classes = sorted(orig_classes)

class_to_idx = {c:i for i,c in enumerate(orig_classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}

NUM_CLASSES = len(orig_classes)
print("NUM_CLASSES:", NUM_CLASSES)
print("class_to_idx:", class_to_idx)


Original classes found in blocks (sample): [1, 2, 3, 6, 7, 12, 13]
NUM_CLASSES: 7
class_to_idx: {1: 0, 2: 1, 3: 2, 6: 3, 7: 4, 12: 5, 13: 6}


In [34]:
class BlockDataset(Dataset):
    def __init__(self, files, class_to_idx):
        self.files = files
        self.class_to_idx = class_to_idx
        self.default_idx = self.class_to_idx.get(1, 0)  # fallback to class 1

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        d = np.load(self.files[idx])
        X = d["X"].astype(np.float32)   # (N, F)
        y = d["y"].astype(np.int64)     # (N,)

        # ---- per-block normalization (VERY IMPORTANT) ----
        mu = X.mean(axis=0, keepdims=True)
        sd = X.std(axis=0, keepdims=True) + 1e-6
        X = (X - mu) / sd

        # ---- safe label remap (unknown -> default class 1) ----
        y = np.array([self.class_to_idx.get(int(v), self.default_idx) for v in y], dtype=np.int64)

        return torch.from_numpy(X), torch.from_numpy(y)


In [36]:
def block_weight(npz_path, rare_set):
    d = np.load(npz_path)
    y = d["y"]
    u = set(np.unique(y).tolist())
    # weight up if block contains rare classes
    return 5.0 if len(u.intersection(rare_set)) > 0 else 1.0

# Pick rare classes (you can edit)
# Typically: 7(outliers), 9(sea), 10(bridge), 12(overlap), 13(lakes)
rare_classes = {7, 9, 10, 12, 13, 6, 3}  # include 6/3 to strengthen building/veg
rare_set = set([c for c in rare_classes if c in orig_classes])

weights = np.array([block_weight(f, rare_set) for f in train_files], dtype=np.float32)
sampler = WeightedRandomSampler(weights, num_samples=len(train_files), replacement=True)

print("Rare set used:", sorted(list(rare_set)))
print("Sampler weights stats:", float(weights.min()), float(weights.max()))


Rare set used: [3, 6, 7, 12, 13]
Sampler weights stats: 1.0 5.0


In [37]:
BATCH_SIZE = 8  # RTX3050 safe; try 12 if stable

train_ds = BlockDataset(train_files, class_to_idx)
val_ds   = BlockDataset(val_files, class_to_idx)

train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    sampler=sampler,          # balanced sampling
    num_workers=0,            # Windows safe
    pin_memory=(device.type=="cuda")
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=(device.type=="cuda")
)

Xb, yb = next(iter(train_loader))
print("Batch:", Xb.shape, yb.shape)
print("Label range:", yb.min().item(), yb.max().item())


Batch: torch.Size([8, 4096, 10]) torch.Size([8, 4096])
Label range: 0 5


In [38]:
def compute_class_weights(loader, num_classes, max_batches=120):
    counts = torch.zeros(num_classes, dtype=torch.float64)

    it = iter(loader)
    for _ in tqdm(range(max_batches), desc="Counting labels"):
        try:
            _, y = next(it)
        except StopIteration:
            break
        y = y.reshape(-1)
        for c in range(num_classes):
            counts[c] += (y == c).sum().item()

    counts = counts + 1.0
    w = 1.0 / counts
    w = w / w.sum() * num_classes
    return w.float()

class_weights = compute_class_weights(train_loader, NUM_CLASSES, max_batches=120).to(device)
print("Class weights:", class_weights)


Counting labels: 100%|██████████| 120/120 [00:02<00:00, 45.32it/s]

Class weights: tensor([1.3090e-04, 1.6267e-04, 1.0486e-03, 7.3461e-03, 6.4695e+00, 3.1641e-03,
        5.1860e-01], device='cuda:0')





In [39]:
class PointNetSmall(nn.Module):
    def __init__(self, in_features, num_classes):
        super().__init__()
        self.mlp1 = nn.Conv1d(in_features, 64, 1)
        self.mlp2 = nn.Conv1d(64, 128, 1)
        self.mlp3 = nn.Conv1d(128, 256, 1)
        self.head1 = nn.Conv1d(256, 128, 1)
        self.head2 = nn.Conv1d(128, num_classes, 1)

    def forward(self, x):
        # x: (B,N,F) -> (B,F,N)
        x = x.transpose(1, 2)
        x = F.relu(self.mlp1(x))
        x = F.relu(self.mlp2(x))
        x = F.relu(self.mlp3(x))
        x = F.relu(self.head1(x))
        x = self.head2(x)         # (B,C,N)
        x = x.transpose(1, 2)     # (B,N,C)
        return x

IN_FEATURES = Xb.shape[-1]
model = PointNetSmall(IN_FEATURES, NUM_CLASSES).to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [40]:
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)

EPOCHS = 25
best_val = 1e9

for epoch in range(1, EPOCHS+1):
    # ---- train ----
    model.train()
    tr_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [train]")

    for X, y in pbar:
        X = X.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        out = model(X)  # (B,N,C)

        loss = criterion(out.reshape(-1, NUM_CLASSES), y.reshape(-1))
        loss.backward()
        optimizer.step()

        tr_loss += loss.item()
        pbar.set_postfix(loss=float(loss.item()))

    tr_loss /= len(train_loader)

    # ---- val ----
    model.eval()
    va_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for X, y in val_loader:
            X = X.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            out = model(X)
            loss = criterion(out.reshape(-1, NUM_CLASSES), y.reshape(-1))
            va_loss += loss.item()

            pred = out.argmax(dim=2)
            correct += (pred == y).sum().item()
            total += y.numel()

    va_loss /= len(val_loader)
    va_acc = correct / total

    print(f"Epoch {epoch} | train_loss={tr_loss:.4f} | val_loss={va_loss:.4f} | val_acc={va_acc:.4f}")

    # save best
    if va_loss < best_val:
        best_val = va_loss
        torch.save({
            "model_state": model.state_dict(),
            "in_features": IN_FEATURES,
            "num_classes": NUM_CLASSES,
            "orig_classes": orig_classes,   # to map back to LAS classes
        }, MODEL_PATH)
        print("✅ Saved best:", MODEL_PATH)

print("DONE training. Best val_loss:", best_val)


Epoch 1/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.25it/s, loss=1.81] 


Epoch 1 | train_loss=1.5487 | val_loss=1.6972 | val_acc=0.2226
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 2/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 31.06it/s, loss=0.808]


Epoch 2 | train_loss=1.1831 | val_loss=1.2063 | val_acc=0.1685
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 3/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.61it/s, loss=0.936]


Epoch 3 | train_loss=1.1421 | val_loss=1.3194 | val_acc=0.1793


Epoch 4/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.39it/s, loss=1.83] 


Epoch 4 | train_loss=1.0946 | val_loss=1.3655 | val_acc=0.2048


Epoch 5/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.37it/s, loss=0.728]


Epoch 5 | train_loss=0.9832 | val_loss=1.1286 | val_acc=0.1989
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 6/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.42it/s, loss=0.49] 


Epoch 6 | train_loss=0.9446 | val_loss=1.3426 | val_acc=0.2790


Epoch 7/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.46it/s, loss=0.901]


Epoch 7 | train_loss=0.9469 | val_loss=1.0853 | val_acc=0.5030
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 8/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.58it/s, loss=1.09] 


Epoch 8 | train_loss=0.9218 | val_loss=0.9122 | val_acc=0.4013
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 9/25 [train]: 100%|██████████| 377/377 [00:13<00:00, 29.00it/s, loss=1.45] 


Epoch 9 | train_loss=0.8993 | val_loss=0.8368 | val_acc=0.4057
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 10/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.25it/s, loss=2.45] 


Epoch 10 | train_loss=0.8586 | val_loss=0.7715 | val_acc=0.4548
✅ Saved best: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 11/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.50it/s, loss=0.578]


Epoch 11 | train_loss=0.8612 | val_loss=0.8707 | val_acc=0.4454


Epoch 12/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.26it/s, loss=0.767]


Epoch 12 | train_loss=0.8478 | val_loss=1.0972 | val_acc=0.5255


Epoch 13/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.25it/s, loss=0.656]


Epoch 13 | train_loss=0.8679 | val_loss=1.0264 | val_acc=0.4456


Epoch 14/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.70it/s, loss=0.87] 


Epoch 14 | train_loss=0.8634 | val_loss=0.8471 | val_acc=0.3676


Epoch 15/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.56it/s, loss=1.12] 


Epoch 15 | train_loss=0.8485 | val_loss=1.2059 | val_acc=0.6345


Epoch 16/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.78it/s, loss=0.734]


Epoch 16 | train_loss=0.8163 | val_loss=1.2254 | val_acc=0.5245


Epoch 17/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.29it/s, loss=0.732]


Epoch 17 | train_loss=0.8012 | val_loss=0.8935 | val_acc=0.4126


Epoch 18/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.04it/s, loss=0.95] 


Epoch 18 | train_loss=0.8067 | val_loss=1.2416 | val_acc=0.4414


Epoch 19/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.57it/s, loss=0.275]


Epoch 19 | train_loss=0.8527 | val_loss=1.1834 | val_acc=0.4737


Epoch 20/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.57it/s, loss=1.01] 


Epoch 20 | train_loss=0.8222 | val_loss=1.3881 | val_acc=0.4734


Epoch 21/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.63it/s, loss=0.765]


Epoch 21 | train_loss=0.8297 | val_loss=1.2629 | val_acc=0.4863


Epoch 22/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.01it/s, loss=0.662]


Epoch 22 | train_loss=0.8790 | val_loss=1.3061 | val_acc=0.5960


Epoch 23/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.35it/s, loss=0.614]


Epoch 23 | train_loss=0.8391 | val_loss=1.3299 | val_acc=0.5522


Epoch 24/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 30.03it/s, loss=0.665]


Epoch 24 | train_loss=0.7962 | val_loss=1.1558 | val_acc=0.5011


Epoch 25/25 [train]: 100%|██████████| 377/377 [00:12<00:00, 29.98it/s, loss=0.48] 


Epoch 25 | train_loss=0.7780 | val_loss=1.3703 | val_acc=0.5870
DONE training. Best val_loss: 0.7715395947000874


In [41]:
import laspy

ckpt = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(ckpt["model_state"])
model.eval()

orig_classes = ckpt["orig_classes"]
idx_to_class = {i:c for i,c in enumerate(orig_classes)}
NUM_CLASSES  = ckpt["num_classes"]
IN_FEATURES  = ckpt["in_features"]

print("Loaded model. Classes:", orig_classes)

las = laspy.read(PRED_IN_LAZ)
xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)

# ---- IMPORTANT ----
# For DL inference you must use the SAME 10 features used in blocks.
# If your blocks already store 10 features computed earlier,
# you MUST recompute them in exactly the same way.
#
# If your "X" feature is already (z, hag, slope, intensity, returns etc),
# you must paste your same feature-extractor here.
#
# For now we will assume your blocks used raw xyz+intensity+returns style:
# (This is placeholder — replace with your actual feature builder)


Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]


In [42]:
d = np.load(train_files[0])
print("X shape:", d["X"].shape)
print("First row:", d["X"][0])
print("Feature mins:", d["X"].min(axis=0))
print("Feature maxs:", d["X"].max(axis=0))


X shape: (4096, 10)
First row: [ 1.6250000e+00 -5.5000000e+00  6.9839835e-01  1.1400001e+00
  2.7481000e+04  1.0000000e+00  2.0000000e+00  0.0000000e+00
  0.0000000e+00  0.0000000e+00]
Feature mins: [-10.3125     -10.          -0.6516018   -0.07000017   0.
   1.           1.           0.           0.           0.        ]
Feature maxs: [4.6250000e+00 6.5000000e+00 2.5483985e+00 3.0700002e+00 4.4541000e+04
 3.0000000e+00 3.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]


In [43]:
import numpy as np
import torch
import laspy

# paths
MODEL_PATH = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
IN_LAZ     = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
OUT_LAS    = r"D:/lidarrrrr/anbu/New folder/dl_predicted.las"
OUT_LAZ    = r"D:/lidarrrrr/anbu/New folder/dl_predicted.laz"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ----------------------------
# Load model
# ----------------------------
ckpt = torch.load(MODEL_PATH, map_location=device)

orig_classes = ckpt["orig_classes"]
NUM_CLASSES  = ckpt["num_classes"]
IN_FEATURES  = ckpt["in_features"]

idx_to_class = {i:c for i,c in enumerate(orig_classes)}

model.load_state_dict(ckpt["model_state"])
model.to(device)
model.eval()

print("Loaded model. Classes:", orig_classes)

# ----------------------------
# Load LAS
# ----------------------------
las = laspy.read(IN_LAZ)

xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)

z = xyz[:,2]

# intensity and returns
intensity = np.asarray(las.intensity).astype(np.float32)
ret_num   = np.asarray(las.return_number).astype(np.float32)
n_returns = np.asarray(las.number_of_returns).astype(np.float32)

# ----------------------------
# Rebuild SAME features as training
# ----------------------------

# simple relative height approximation
z_min = z.min()
hag = z - z_min

# simple slope proxy
local_range = np.zeros_like(z)
slope = np.zeros_like(z)

# placeholder columns (must match training shape)
f8 = np.zeros_like(z)
f9 = np.zeros_like(z)
f10 = np.zeros_like(z)

X = np.stack([
    z,
    hag,
    slope,
    local_range,
    intensity,
    ret_num,
    n_returns,
    f8,
    f9,
    f10
], axis=1).astype(np.float32)

print("Feature matrix shape:", X.shape)

# ----------------------------
# Predict in batches
# ----------------------------
BATCH = 4096

pred_labels = np.zeros(len(X), dtype=np.int32)

with torch.no_grad():
    for i in range(0, len(X), BATCH):
        chunk = X[i:i+BATCH]

        # normalize same way as training
        mu = chunk.mean(axis=0, keepdims=True)
        sd = chunk.std(axis=0, keepdims=True) + 1e-6
        chunk = (chunk - mu) / sd

        pts = torch.from_numpy(chunk).unsqueeze(0).to(device)
        out = model(pts)
        pred = out.argmax(dim=2).cpu().numpy()[0]

        pred_labels[i:i+BATCH] = pred

print("Prediction finished.")

# ----------------------------
# Map back to LAS classes
# ----------------------------
mapped = np.array([idx_to_class[int(p)] for p in pred_labels], dtype=np.uint8)

las.classification = mapped

las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except:
    print("LAZ writing failed (ok)")


Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Feature matrix shape: (12374846, 10)
Prediction finished.
Saved LAS: D:/lidarrrrr/anbu/New folder/dl_predicted.las
Saved LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted.laz


In [44]:
# ============================
# 01_make_blocks.py
# - Reads labeled LAS/LAZ
# - Creates blocks of N points
# - Saves .npz with keys: X, y, idx
# ============================

import os, glob
import numpy as np
import laspy

# -------------------- SETTINGS --------------------
LABELED_DIR = r"D:/lidarrrrr/anbu/training_labeled"
OUT_DIR     = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
NPTS        = 4096

# keep only these classes (edit if you want)
KEEP_CLASSES = [1, 2, 3, 6, 7, 12, 13]  # default, ground, veg, building, tower?, etc.

# Feature definition (must match training + prediction)
# 10 features total (example):
# 0: x_centered, 1: y_centered, 2: z_centered,
# 3: height_above_local_min (HAG approx),
# 4: intensity,
# 5: return_number,
# 6: number_of_returns,
# 7: scan_angle (or 0),
# 8: deviation (or 0),
# 9: z_local_range
CELL = 2.0  # meters

# -------------------- HELPERS --------------------
def get_dim(las, name, fallback=0.0):
    """Safely read LAS dimension; fallback zeros if missing."""
    try:
        return np.asarray(las[name])
    except Exception:
        try:
            if name == "scan_angle":
                return np.asarray(las["scan_angle_rank"])
        except Exception:
            pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_stats_zmin_zmax(x, y, z, cell):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)
    zmax = np.full(len(uniq), -np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(key_s)
        zs = z_s[a:b]
        zmin[i] = zs.min()
        zmax[i] = zs.max()

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos], zmax[pos]

def make_features(xyz, intensity, ret_num, n_returns, scan_angle, deviation, cell=CELL):
    x = xyz[:,0].astype(np.float32)
    y = xyz[:,1].astype(np.float32)
    z = xyz[:,2].astype(np.float32)

    zmin, zmax = grid_stats_zmin_zmax(x, y, z, cell)
    hag = (z - zmin).astype(np.float32)
    zrange = (zmax - zmin).astype(np.float32)

    # center xyz to reduce scale issues
    x0 = x - x.mean()
    y0 = y - y.mean()
    z0 = z - z.mean()

    X = np.stack([
        x0, y0, z0,
        hag,
        intensity.astype(np.float32),
        ret_num.astype(np.float32),
        n_returns.astype(np.float32),
        scan_angle.astype(np.float32),
        deviation.astype(np.float32),
        zrange
    ], axis=1).astype(np.float32)

    return X

# -------------------- MAIN --------------------
os.makedirs(OUT_DIR, exist_ok=True)

files = sorted(glob.glob(os.path.join(LABELED_DIR, "*.la*")))
if not files:
    raise RuntimeError("No LAS/LAZ found in LABELED_DIR")

block_id = 0
for fp in files:
    las = laspy.read(fp)

    xyz = np.vstack([las.x, las.y, las.z]).T
    cls = np.asarray(las.classification, dtype=np.int32)

    intensity = get_dim(las, "intensity", 0.0)
    ret_num   = get_dim(las, "return_number", 1.0)
    n_returns = get_dim(las, "number_of_returns", 1.0)
    scan_angle = get_dim(las, "scan_angle", 0.0)
    deviation  = get_dim(las, "Deviation", 0.0)

    # keep only wanted classes
    keep = np.isin(cls, KEEP_CLASSES)
    idx_all = np.where(keep)[0]
    if len(idx_all) < NPTS:
        print("Skip (too few points):", os.path.basename(fp), len(idx_all))
        continue

    X_all = make_features(xyz[idx_all], intensity[idx_all], ret_num[idx_all], n_returns[idx_all],
                          scan_angle[idx_all], deviation[idx_all])

    y_all = cls[idx_all].astype(np.int32)

    # shuffle for blocks
    rng = np.random.default_rng(42)
    perm = rng.permutation(len(idx_all))

    # make blocks
    nblocks = len(idx_all) // NPTS
    for b in range(nblocks):
        s = b * NPTS
        e = s + NPTS
        sel = perm[s:e]

        Xb = X_all[sel]
        yb = y_all[sel]
        idxb = idx_all[sel]  # IMPORTANT: maps back to original LAS indices

        out = os.path.join(OUT_DIR, f"block_{block_id:07d}.npz")
        np.savez_compressed(out, X=Xb, y=yb, idx=idxb)
        block_id += 1

    print("Done:", os.path.basename(fp), "blocks:", nblocks)

print("Total blocks saved:", block_id)
print("OUT_DIR:", OUT_DIR)


Done: DX3011148 ULMIANO000001.laz blocks: 978
Done: DX3011148 ULMIANO000002.laz blocks: 846
Done: DX3011148 ULMIANO000003.laz blocks: 3549
Done: DX3011148 ULMIANO000004.laz blocks: 797
Done: DX3011148 ULMIANO000005.laz blocks: 6957
Done: DX3011148 ULMIANO000006.laz blocks: 1997
Done: DX3011148 ULMIANO000007.laz blocks: 4790
Done: DX3011148 ULMIANO000008.laz blocks: 2718
Done: DX3011148 ULMIANO000009.laz blocks: 1678
Done: pt013390.laz blocks: 3387
Total blocks saved: 27697
OUT_DIR: D:/lidarrrrr/anbu/dl_dataset/blocks


In [45]:
# ============================
# 02_train_pointnet.py
# - Loads blocks (.npz) with keys: X, y, idx
# - Builds class_to_idx mapping
# - Normalizes features using train stats
# - Trains PointNet-small on GPU
# - Saves best model to: MODEL_OUT
# ============================

import os, glob
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm

# ---------------- SETTINGS ----------------
BLOCK_DIR  = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
MODEL_OUT  = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"

EPOCHS     = 15
BATCH_SIZE = 8
LR         = 1e-3
NPTS       = 4096
MAX_WEIGHT = 5.0  # clip class weights to avoid exploding loss

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE, "| GPU:", torch.cuda.get_device_name(0) if DEVICE=="cuda" else "None")

os.makedirs(os.path.dirname(MODEL_OUT), exist_ok=True)

# ---------------- DATA ----------------
all_files = sorted(glob.glob(os.path.join(BLOCK_DIR, "*.npz")))
if not all_files:
    raise RuntimeError("No .npz blocks found. Run 01_make_blocks.py first.")

rng = np.random.default_rng(42)
rng.shuffle(all_files)

split = int(0.85 * len(all_files))
train_files = all_files[:split]
val_files   = all_files[split:]

print("Total blocks:", len(all_files))
print("Train:", len(train_files), "Val:", len(val_files))
print("Example:", train_files[0])

# ---- find classes from a sample (fast) ----
def scan_classes(files, max_files=300):
    s = set()
    for fp in files[:max_files]:
        d = np.load(fp)
        u = np.unique(d["y"])
        for v in u.tolist():
            s.add(int(v))
    return sorted(list(s))

classes = scan_classes(train_files, max_files=400)
class_to_idx = {c:i for i,c in enumerate(classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}
NUM_CLASSES = len(classes)
FEATS = 10

print("Classes:", classes)
print("NUM_CLASSES:", NUM_CLASSES, "| FEATS:", FEATS)

# ---- compute feature mean/std on train sample ----
def compute_norm(files, max_blocks=400):
    xs = []
    for fp in files[:max_blocks]:
        d = np.load(fp)
        X = d["X"].astype(np.float32)  # (N,10)
        xs.append(X)
    Xcat = np.concatenate(xs, axis=0)
    mean = Xcat.mean(axis=0)
    std  = Xcat.std(axis=0) + 1e-6
    return mean.astype(np.float32), std.astype(np.float32)

Xmean, Xstd = compute_norm(train_files, max_blocks=300)
print("Feature mean:", Xmean)
print("Feature std :", Xstd)

# ---- dataset ----
class PointCloudDataset(Dataset):
    def __init__(self, files, class_to_idx, Xmean, Xstd):
        self.files = files
        self.class_to_idx = class_to_idx
        self.Xmean = Xmean
        self.Xstd  = Xstd

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):
        d = np.load(self.files[i])
        X = d["X"].astype(np.float32)  # (N,10)
        y = d["y"].astype(np.int64)    # (N,)

        # normalize features
        X = (X - self.Xmean) / self.Xstd

        # map labels -> 0..C-1 (avoid CUDA assert)
        # any unknown label -> ignore (-1)
        mapped = np.full_like(y, fill_value=-1, dtype=np.int64)
        for orig, idx in self.class_to_idx.items():
            mapped[y == orig] = idx

        X = torch.from_numpy(X)          # (N,10)
        y = torch.from_numpy(mapped)     # (N,)
        return X, y

# Windows safer first: num_workers=0 (later you can increase)
train_ds = PointCloudDataset(train_files, class_to_idx, Xmean, Xstd)
val_ds   = PointCloudDataset(val_files,   class_to_idx, Xmean, Xstd)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# ---------------- CLASS WEIGHTS ----------------
@torch.no_grad()
def compute_class_weights(loader, num_classes, max_batches=200):
    counts = torch.zeros(num_classes, dtype=torch.float64)
    nb = 0
    for X, y in loader:
        nb += 1
        y = y.view(-1)
        y = y[y >= 0]
        if y.numel() > 0:
            binc = torch.bincount(y.cpu(), minlength=num_classes).to(torch.float64)
            counts += binc
        if nb >= max_batches:
            break
    counts = torch.clamp(counts, min=1.0)
    inv = counts.sum() / counts
    w = inv / inv.mean()
    w = torch.clamp(w, max=MAX_WEIGHT)
    return w.to(torch.float32)

class_weights = compute_class_weights(train_loader, NUM_CLASSES).to(DEVICE)
print("Class weights:", class_weights)

# ---------------- MODEL ----------------
class PointNetSmall(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(256 + 256, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        # x: (B,N,F) -> (B,F,N)
        x = x.transpose(1,2)
        f = self.mlp1(x)
        f = self.mlp2(f)  # (B,256,N)
        g = torch.max(f, dim=2, keepdim=True)[0]  # (B,256,1)
        g = g.repeat(1, 1, f.size(2))             # (B,256,N)
        out = self.head(torch.cat([f, g], dim=1)) # (B,C,N)
        return out.transpose(1,2)                 # (B,N,C)

model = PointNetSmall(in_ch=FEATS, num_classes=NUM_CLASSES).to(DEVICE)

criterion = nn.CrossEntropyLoss(weight=class_weights, ignore_index=-1)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)

scaler = torch.amp.GradScaler("cuda", enabled=(DEVICE=="cuda"))

# ---------------- TRAIN / EVAL ----------------
def accuracy(logits, y):
    # logits: (B,N,C), y: (B,N)
    pred = logits.argmax(dim=-1)
    mask = (y >= 0)
    if mask.sum() == 0:
        return 0.0
    return (pred[mask] == y[mask]).float().mean().item()

@torch.no_grad()
def evaluate():
    model.eval()
    tot_loss, tot_acc, n = 0.0, 0.0, 0
    for X, y in val_loader:
        X = X.to(DEVICE, non_blocking=True)
        y = y.to(DEVICE, non_blocking=True)
        logits = model(X)
        loss = criterion(logits.reshape(-1, NUM_CLASSES), y.reshape(-1))
        tot_loss += float(loss.item())
        tot_acc  += accuracy(logits, y)
        n += 1
    return tot_loss / max(n,1), tot_acc / max(n,1)

best_val = 1e9

for epoch in range(1, EPOCHS+1):
    model.train()
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [train]", leave=True)
    tot_loss, n = 0.0, 0

    for step, (X, y) in enumerate(pbar):
        X = X.to(DEVICE, non_blocking=True)
        y = y.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=(DEVICE=="cuda")):
            logits = model(X)
            loss = criterion(logits.reshape(-1, NUM_CLASSES), y.reshape(-1))

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        tot_loss += float(loss.item())
        n += 1
        pbar.set_postfix(loss=float(loss.item()))

    tr_loss = tot_loss / max(n,1)
    va_loss, va_acc = evaluate()
    print(f"Epoch {epoch}/{EPOCHS} | train_loss={tr_loss:.4f} | val_loss={va_loss:.4f} | val_acc={va_acc:.4f}")

    # save best
    if va_loss < best_val:
        best_val = va_loss
        ckpt = {
            "model_state": model.state_dict(),
            "classes": classes,
            "class_to_idx": class_to_idx,
            "Xmean": Xmean,
            "Xstd": Xstd,
            "feats": FEATS,
        }
        torch.save(ckpt, MODEL_OUT)
        print("✅ Saved best model:", MODEL_OUT)

print("DONE TRAINING.")


Device: cuda | GPU: NVIDIA GeForce RTX 3050
Total blocks: 27697
Train: 23542 Val: 4155
Example: D:/lidarrrrr/anbu/dl_dataset/blocks\block_0022740.npz
Classes: [1, 2, 3, 6, 7, 12, 13]
NUM_CLASSES: 7 | FEATS: 10
Feature mean: [-2.3929443e-02  4.3289871e+01  3.9696936e-03  3.1222883e-01
  3.6305730e+04  1.0526814e+00  1.0699357e+00 -1.4197598e+02
  7.9566729e-01  9.0607953e-01]
Feature std : [2.9278833e+02 2.3654257e+02 3.6797585e+00 9.5976192e-01 6.7891382e+03
 2.6334301e-01 3.0616912e-01 9.0680487e+02 4.3515396e+00 1.5073984e+00]
Class weights: tensor([5.9334e-04, 5.8850e-04, 5.3555e-03, 7.1202e-03, 5.0000e+00, 1.2393e-02,
        5.7689e-01], device='cuda:0')


Epoch 1/15 [train]: 100%|██████████| 2943/2943 [06:31<00:00,  7.52it/s, loss=0.262]


Epoch 1/15 | train_loss=0.4242 | val_loss=0.3931 | val_acc=0.7240
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 2/15 [train]: 100%|██████████| 2943/2943 [01:33<00:00, 31.49it/s, loss=0.295]


Epoch 2/15 | train_loss=0.3100 | val_loss=0.4024 | val_acc=0.7325


Epoch 3/15 [train]: 100%|██████████| 2943/2943 [03:33<00:00, 13.81it/s, loss=0.187]


Epoch 3/15 | train_loss=0.2862 | val_loss=0.3270 | val_acc=0.7997
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 4/15 [train]: 100%|██████████| 2943/2943 [05:13<00:00,  9.39it/s, loss=0.282]


Epoch 4/15 | train_loss=0.2625 | val_loss=0.3059 | val_acc=0.7539
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 5/15 [train]: 100%|██████████| 2943/2943 [01:33<00:00, 31.59it/s, loss=0.23] 


Epoch 5/15 | train_loss=0.2607 | val_loss=0.3729 | val_acc=0.7497


Epoch 6/15 [train]: 100%|██████████| 2943/2943 [01:33<00:00, 31.54it/s, loss=0.263]


Epoch 6/15 | train_loss=0.2477 | val_loss=0.2892 | val_acc=0.7811
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 7/15 [train]: 100%|██████████| 2943/2943 [01:31<00:00, 32.15it/s, loss=0.133]


Epoch 7/15 | train_loss=0.2397 | val_loss=0.2850 | val_acc=0.7707
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 8/15 [train]: 100%|██████████| 2943/2943 [01:41<00:00, 29.10it/s, loss=0.218]


Epoch 8/15 | train_loss=0.2346 | val_loss=0.2875 | val_acc=0.7923


Epoch 9/15 [train]: 100%|██████████| 2943/2943 [01:25<00:00, 34.24it/s, loss=0.201]


Epoch 9/15 | train_loss=0.2294 | val_loss=0.2571 | val_acc=0.7920
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 10/15 [train]: 100%|██████████| 2943/2943 [01:30<00:00, 32.50it/s, loss=0.191]


Epoch 10/15 | train_loss=0.2304 | val_loss=0.2393 | val_acc=0.8039
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 11/15 [train]: 100%|██████████| 2943/2943 [01:27<00:00, 33.46it/s, loss=0.201]


Epoch 11/15 | train_loss=0.2211 | val_loss=0.3168 | val_acc=0.7760


Epoch 12/15 [train]: 100%|██████████| 2943/2943 [01:29<00:00, 32.94it/s, loss=0.241]


Epoch 12/15 | train_loss=0.2211 | val_loss=0.2911 | val_acc=0.7842


Epoch 13/15 [train]: 100%|██████████| 2943/2943 [01:26<00:00, 33.97it/s, loss=0.149]


Epoch 13/15 | train_loss=0.2165 | val_loss=0.2360 | val_acc=0.8076
✅ Saved best model: D:/lidarrrrr/anbu/dl_models/pointnet_best.pt


Epoch 14/15 [train]: 100%|██████████| 2943/2943 [01:31<00:00, 32.29it/s, loss=0.199]


Epoch 14/15 | train_loss=0.2213 | val_loss=0.3090 | val_acc=0.8109


Epoch 15/15 [train]: 100%|██████████| 2943/2943 [01:29<00:00, 33.02it/s, loss=0.225]


Epoch 15/15 | train_loss=0.2180 | val_loss=0.3567 | val_acc=0.7800
DONE TRAINING.


In [47]:
# ============================
# 03_predict_las.py
# - Loads LAS/LAZ
# - Builds SAME 10 features
# - Splits into blocks with idx mapping
# - Runs model -> per-point prediction (stable)
# - Writes output LAS/LAZ with predicted classification
# ============================

import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

import torch

ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)


IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_clean.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_clean.laz"

NPTS = 4096
CELL = 2.0
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def get_dim(las, name, fallback=0.0):
    try:
        return np.asarray(las[name])
    except Exception:
        try:
            if name == "scan_angle":
                return np.asarray(las["scan_angle_rank"])
        except Exception:
            pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_stats_zmin_zmax(x, y, z, cell):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)
    zmax = np.full(len(uniq), -np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(key_s)
        zs = z_s[a:b]
        zmin[i] = zs.min()
        zmax[i] = zs.max()

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos], zmax[pos]

def make_features(xyz, intensity, ret_num, n_returns, scan_angle, deviation, cell=CELL):
    x = xyz[:,0].astype(np.float32)
    y = xyz[:,1].astype(np.float32)
    z = xyz[:,2].astype(np.float32)

    zmin, zmax = grid_stats_zmin_zmax(x, y, z, cell)
    hag = (z - zmin).astype(np.float32)
    zrange = (zmax - zmin).astype(np.float32)

    x0 = x - x.mean()
    y0 = y - y.mean()
    z0 = z - z.mean()

    X = np.stack([
        x0, y0, z0,
        hag,
        intensity.astype(np.float32),
        ret_num.astype(np.float32),
        n_returns.astype(np.float32),
        scan_angle.astype(np.float32),
        deviation.astype(np.float32),
        zrange
    ], axis=1).astype(np.float32)
    return X

# ---- model ----
class PointNetSmall(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(256 + 256, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        x = x.transpose(1,2)
        f = self.mlp1(x)
        f = self.mlp2(f)
        g = torch.max(f, dim=2, keepdim=True)[0]
        g = g.repeat(1, 1, f.size(2))
        out = self.head(torch.cat([f, g], dim=1))
        return out.transpose(1,2)

ckpt = torch.load(MODEL_PT, map_location=DEVICE)
classes = ckpt["classes"]
Xmean = ckpt["Xmean"]
Xstd  = ckpt["Xstd"]
FEATS = ckpt["feats"]
NUM_CLASSES = len(classes)
idx_to_class = {i:c for i,c in enumerate(classes)}

model = PointNetSmall(FEATS, NUM_CLASSES).to(DEVICE)
model.load_state_dict(ckpt["model_state"])
model.eval()

print("Device:", DEVICE)
print("Loaded model. Classes:", classes)

# ---- load LAS ----
las = laspy.read(IN_LAZ)
xyz = np.vstack([las.x, las.y, las.z]).T
N = len(xyz)

intensity = get_dim(las, "intensity", 0.0)
ret_num   = get_dim(las, "return_number", 1.0)
n_returns = get_dim(las, "number_of_returns", 1.0)
scan_angle = get_dim(las, "scan_angle", 0.0)
deviation  = get_dim(las, "Deviation", 0.0)

X = make_features(xyz, intensity, ret_num, n_returns, scan_angle, deviation)
print("Total points:", N, "| features:", X.shape)

# normalize using train stats
X = (X - Xmean) / (Xstd + 1e-6)

# ---- make blocks with idx mapping (stable tiling by XY grid) ----
# group by grid cell to keep spatial coherence
minx, miny = float(xyz[:,0].min()), float(xyz[:,1].min())
gx = np.floor((xyz[:,0] - minx) / CELL).astype(np.int32)
gy = np.floor((xyz[:,1] - miny) / CELL).astype(np.int32)
key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

order = np.argsort(key)
order = order.astype(np.int64)

# create blocks sequentially from sorted order
blocks = []
for s in range(0, N, NPTS):
    idx = order[s:s+NPTS]
    if len(idx) < NPTS:
        break
    blocks.append(idx)

print("Blocks for prediction:", len(blocks))

# per-point voting (reduces “salt & pepper”)
votes = np.zeros((N, NUM_CLASSES), dtype=np.uint16)

with torch.no_grad():
    for idx in tqdm(blocks, desc="Predict blocks"):
        xb = torch.from_numpy(X[idx]).float().unsqueeze(0).to(DEVICE)  # (1,N,10)
        logits = model(xb)  # (1,N,C)
        pred = logits.argmax(dim=-1).squeeze(0).detach().cpu().numpy()  # (N,)
        # vote
        votes[idx, pred] += 1

final_idx = votes.argmax(axis=1)          # 0..C-1
final_cls = np.array([idx_to_class[i] for i in final_idx], dtype=np.uint8)

las.classification = final_cls

os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)
las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(final_cls, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL numpy._core.multiarray._reconstruct was not an allowed global by default. Please use `torch.serialization.add_safe_globals([numpy._core.multiarray._reconstruct])` or the `torch.serialization.safe_globals([numpy._core.multiarray._reconstruct])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.

In [48]:
# ============================================
# FULL WORKING CODE: Load PointNet checkpoint safely (PyTorch 2.6+),
# run prediction on a LAS/LAZ, and save output LAS/LAZ for CloudCompare.
#
# Fixes your error:
# UnpicklingError / weights_only load failed
# ============================================

import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

# ----------------------------
# PATHS (EDIT)
# ----------------------------
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"     # your checkpoint
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"  # input
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.laz"

# ----------------------------
# DEVICE
# ----------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ----------------------------
# MODEL (must match training architecture)
# Input: (B,N,F)
# Output: (B,C,N)
# ----------------------------
class PointNetSmall(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Linear(in_ch, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Linear(256 + 256, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Linear(128, num_classes)

    def forward(self, x):
        # x: (B,N,F)
        f = self.mlp1(x)              # (B,N,256)
        g = torch.max(f, dim=1)[0]    # (B,256)
        g = g.unsqueeze(1).repeat(1, f.shape[1], 1)  # (B,N,256)
        h = self.mlp2(torch.cat([f, g], dim=-1))     # (B,N,128)
        out = self.head(h)            # (B,N,C)
        return out.permute(0, 2, 1)   # (B,C,N)

# ----------------------------
# SAFE LOAD CHECKPOINT (PyTorch 2.6+)
# Use weights_only=False ONLY if you trust your own file.
# ----------------------------
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

# Expected keys:
# ckpt["state_dict"], ckpt["classes"], ckpt["Xmean"], ckpt["Xstd"]
if "state_dict" in ckpt:
    state = ckpt["state_dict"]
else:
    # if you saved model.state_dict() directly
    state = ckpt

classes = ckpt.get("classes", None)     # list of original LAS class ids
Xmean   = ckpt.get("Xmean", None)       # (F,)
Xstd    = ckpt.get("Xstd", None)        # (F,)

if classes is None:
    raise RuntimeError("Checkpoint missing 'classes'. Re-save your checkpoint with classes list.")
print("Loaded model. Classes:", classes)

NUM_CLASSES = len(classes)

# Infer feature count from checkpoint if possible
# (we assume Xmean exists; if not, set manually)
if Xmean is not None:
    IN_CH = int(len(Xmean))
else:
    # set manually if needed
    IN_CH = 10
    Xmean = np.zeros(IN_CH, dtype=np.float32)
    Xstd  = np.ones(IN_CH, dtype=np.float32)

# Build + load model
model = PointNetSmall(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()

# ----------------------------
# HELPERS: robust dimension getter
# ----------------------------
def get_dim(las, name, fallback=0.0):
    try:
        return np.asarray(las[name])
    except Exception:
        # scan_angle might be stored as scan_angle_rank in some files
        if name == "scan_angle":
            try:
                return np.asarray(las["scan_angle_rank"])
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_min_z(x, y, z, cell=2.0):
    """Returns per-point zmin of its XY grid cell (approx ground surface proxy)."""
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(z_s)
        zmin[i] = float(z_s[a:b].min())

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos]

def make_features_from_las(las, cell=2.0):
    """
    IMPORTANT: this must match your training block feature design.
    Below is a COMMON 10-feature setup:
      0 z
      1 hag (z - grid_min_z)
      2 dx (x - mean_x)  [or centered x]
      3 dy (y - mean_y)
      4 intensity
      5 return_number
      6 number_of_returns
      7 scan_angle (or 0)
      8 deviation (or 0)
      9 overlap flag (or 0)
    If your training used a different definition, tell me and I’ll align it.
    """
    x = np.asarray(las.x, dtype=np.float32)
    y = np.asarray(las.y, dtype=np.float32)
    z = np.asarray(las.z, dtype=np.float32)

    intensity = get_dim(las, "intensity", 0.0).astype(np.float32)
    ret_num   = get_dim(las, "return_number", 1.0).astype(np.float32)
    nret      = get_dim(las, "number_of_returns", 1.0).astype(np.float32)

    scan_angle = get_dim(las, "scan_angle", 0.0).astype(np.float32)
    deviation  = get_dim(las, "Deviation", 0.0).astype(np.float32)
    overlap    = get_dim(las, "overlap", 0.0).astype(np.float32)

    # simple approximate HAG using grid min z
    zmin = grid_min_z(x, y, z, cell=cell)
    hag = (z - zmin).astype(np.float32)

    # center XY locally to reduce huge coordinate magnitudes
    cx, cy = x.mean(), y.mean()
    dx = (x - cx).astype(np.float32)
    dy = (y - cy).astype(np.float32)

    X = np.stack([z, hag, dx, dy, intensity, ret_num, nret, scan_angle, deviation, overlap], axis=1).astype(np.float32)
    return X

# ----------------------------
# PREDICT IN BLOCKS (to avoid OOM)
# ----------------------------
BATCH_POINTS = 4096  # must match training block size
STRIDE = 4096        # no overlap; you can set 2048 for overlap voting later

las = laspy.read(IN_LAZ)
X = make_features_from_las(las, cell=2.0)
print("Total points:", len(X), "| features:", X.shape)

# Normalize using training stats
Xn = (X - Xmean.astype(np.float32)) / (Xstd.astype(np.float32) + 1e-6)

# output arrays
pred_idx = np.zeros(len(Xn), dtype=np.int64)  # predicted class index in [0..C-1]

with torch.no_grad():
    for start in tqdm(range(0, len(Xn), STRIDE), desc="Predicting blocks"):
        end = min(start + BATCH_POINTS, len(Xn))
        block = Xn[start:end]

        # pad to 4096 if last block smaller
        if len(block) < BATCH_POINTS:
            pad_n = BATCH_POINTS - len(block)
            pad = np.repeat(block[-1:], pad_n, axis=0)
            block = np.vstack([block, pad])

        inp = torch.from_numpy(block).unsqueeze(0).to(DEVICE)  # (1,4096,F)

        logits = model(inp)          # (1,C,4096)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()  # (4096,)

        p = p[: (end - start)]       # unpad
        pred_idx[start:end] = p

# map predicted indices back to LAS class codes
pred_cls = np.array([classes[i] for i in pred_idx], dtype=np.uint8)

# write output
las.classification = pred_cls
os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)

las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_cls, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]


RuntimeError: Error(s) in loading state_dict for PointNetSmall:
	Missing key(s) in state_dict: "mlp1.0.weight", "mlp1.0.bias", "mlp1.2.weight", "mlp1.2.bias", "mlp1.4.weight", "mlp1.4.bias", "mlp2.0.weight", "mlp2.0.bias", "mlp2.2.weight", "mlp2.2.bias", "head.weight", "head.bias". 
	Unexpected key(s) in state_dict: "model_state", "classes", "class_to_idx", "Xmean", "Xstd", "feats". 

In [49]:
# ============================================
# FULL FIXED CODE: Load your checkpoint correctly
# (because your .pt stores model_state + metadata)
# ============================================

import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

# ----------------------------
# PATHS (EDIT)
# ----------------------------
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.laz"

# ----------------------------
# DEVICE
# ----------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ----------------------------
# MODEL (MUST MATCH TRAINING ARCH)
# If your training code used a different PointNet class,
# you must paste that model definition here.
# ----------------------------
class PointNetSmall(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Linear(in_ch, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Linear(256 + 256, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Linear(128, num_classes)

    def forward(self, x):
        # x: (B,N,F)
        f = self.mlp1(x)              # (B,N,256)
        g = torch.max(f, dim=1)[0]    # (B,256)
        g = g.unsqueeze(1).repeat(1, f.shape[1], 1)  # (B,N,256)
        h = self.mlp2(torch.cat([f, g], dim=-1))     # (B,N,128)
        out = self.head(h)            # (B,N,C)
        return out.permute(0, 2, 1)   # (B,C,N)

# ----------------------------
# LOAD CHECKPOINT (PyTorch 2.6+ safe)
# ----------------------------
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

# IMPORTANT: your checkpoint stores weights under "model_state"
if "model_state" not in ckpt:
    raise RuntimeError(f"Checkpoint keys are {list(ckpt.keys())}, but 'model_state' not found.")

state_dict = ckpt["model_state"]

# remove "module." prefix if it exists (from DataParallel)
fixed_state = {}
for k, v in state_dict.items():
    if k.startswith("module."):
        fixed_state[k[len("module."):]] = v
    else:
        fixed_state[k] = v

classes = ckpt["classes"]          # list of LAS class codes
Xmean   = np.array(ckpt["Xmean"], dtype=np.float32)
Xstd    = np.array(ckpt["Xstd"], dtype=np.float32)

NUM_CLASSES = len(classes)
IN_CH = int(len(Xmean))

print("Loaded model. Classes:", classes)
print("Input features:", IN_CH, "| Num classes:", NUM_CLASSES)

# build model + load weights
model = PointNetSmall(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(fixed_state, strict=True)
model.eval()

# ----------------------------
# Helpers
# ----------------------------
def get_dim(las, name, fallback=0.0):
    try:
        return np.asarray(las[name])
    except Exception:
        if name == "scan_angle":
            try:
                return np.asarray(las["scan_angle_rank"])
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_min_z(x, y, z, cell=2.0):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(z_s)
        zmin[i] = float(z_s[a:b].min())

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos]

def make_features_from_las(las, cell=2.0):
    """
    WARNING: this MUST match your dataset block feature creation.
    If you created blocks with keys ['X','y'] and X has 10 columns,
    then this must generate the SAME 10 columns in SAME order.
    """
    x = np.asarray(las.x, dtype=np.float32)
    y = np.asarray(las.y, dtype=np.float32)
    z = np.asarray(las.z, dtype=np.float32)

    intensity = get_dim(las, "intensity", 0.0).astype(np.float32)
    ret_num   = get_dim(las, "return_number", 1.0).astype(np.float32)
    nret      = get_dim(las, "number_of_returns", 1.0).astype(np.float32)

    scan_angle = get_dim(las, "scan_angle", 0.0).astype(np.float32)
    deviation  = get_dim(las, "Deviation", 0.0).astype(np.float32)
    overlap    = get_dim(las, "overlap", 0.0).astype(np.float32)

    zmin = grid_min_z(x, y, z, cell=cell)
    hag  = (z - zmin).astype(np.float32)

    cx, cy = x.mean(), y.mean()
    dx = (x - cx).astype(np.float32)
    dy = (y - cy).astype(np.float32)

    X = np.stack([z, hag, dx, dy, intensity, ret_num, nret, scan_angle, deviation, overlap], axis=1).astype(np.float32)
    return X

# ----------------------------
# Predict (block-wise)
# ----------------------------
BATCH_POINTS = 4096
STRIDE = 4096

las = laspy.read(IN_LAZ)
X = make_features_from_las(las, cell=2.0)

print("Total points:", len(X), "| X shape:", X.shape)

# normalize using training stats
Xn = (X - Xmean) / (Xstd + 1e-6)

pred_idx = np.zeros(len(Xn), dtype=np.int64)

with torch.no_grad():
    for start in tqdm(range(0, len(Xn), STRIDE), desc="Predicting"):
        end = min(start + BATCH_POINTS, len(Xn))
        block = Xn[start:end]

        if len(block) < BATCH_POINTS:
            pad_n = BATCH_POINTS - len(block)
            pad = np.repeat(block[-1:], pad_n, axis=0)
            block = np.vstack([block, pad])

        inp = torch.from_numpy(block).unsqueeze(0).to(DEVICE)  # (1,4096,F)
        logits = model(inp)                                    # (1,C,4096)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()  # (4096,)

        pred_idx[start:end] = p[: end-start]

# map to LAS classes
pred_cls = np.array([classes[i] for i in pred_idx], dtype=np.uint8)

las.classification = pred_cls
os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)

las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_cls, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Input features: 10 | Num classes: 7


RuntimeError: Error(s) in loading state_dict for PointNetSmall:
	Missing key(s) in state_dict: "mlp1.2.weight", "mlp1.2.bias", "mlp2.2.weight", "mlp2.2.bias", "head.weight", "head.bias". 
	Unexpected key(s) in state_dict: "mlp1.1.weight", "mlp1.1.bias", "mlp1.1.running_mean", "mlp1.1.running_var", "mlp1.1.num_batches_tracked", "mlp1.3.weight", "mlp1.3.bias", "mlp1.4.running_mean", "mlp1.4.running_var", "mlp1.4.num_batches_tracked", "mlp2.1.weight", "mlp2.1.bias", "mlp2.1.running_mean", "mlp2.1.running_var", "mlp2.1.num_batches_tracked", "head.0.weight", "head.0.bias", "head.1.weight", "head.1.bias", "head.1.running_mean", "head.1.running_var", "head.1.num_batches_tracked", "head.4.weight", "head.4.bias". 
	size mismatch for mlp1.0.weight: copying a param with shape torch.Size([64, 10, 1]) from checkpoint, the shape in current model is torch.Size([64, 10]).
	size mismatch for mlp1.4.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([256, 128]).
	size mismatch for mlp1.4.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for mlp2.0.weight: copying a param with shape torch.Size([256, 128, 1]) from checkpoint, the shape in current model is torch.Size([256, 512]).

In [50]:
# ============================================
# FULL WORKING CODE (matches your checkpoint):
# - Conv1d PointNet + BatchNorm
# - Loads ckpt["model_state"]
# - Predicts classes for full LAS/LAZ
# - Saves output LAS/LAZ for CloudCompare
# ============================================

import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

# ----------------------------
# PATHS (EDIT)
# ----------------------------
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.laz"

# ----------------------------
# DEVICE
# ----------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ----------------------------
# Conv1d PointNet model (matches your checkpoint)
# Input: (B, F, N)
# Output: (B, C, N)
# ----------------------------
class PointNetConv(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        # per-point feature extractor
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),

            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )

        # fuse local+global
        self.mlp2 = nn.Sequential(
            nn.Conv1d(256 + 256, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )

        # head (Sequential in checkpoint)
        self.head = nn.Sequential(
            nn.Conv1d(256, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Conv1d(128, num_classes, 1),
        )

    def forward(self, x):
        # x: (B, F, N)
        f = self.mlp1(x)                 # (B,256,N)
        g = torch.max(f, dim=2)[0]       # (B,256)
        g = g.unsqueeze(2).repeat(1, 1, f.shape[2])  # (B,256,N)
        h = self.mlp2(torch.cat([f, g], dim=1))      # (B,256,N)
        out = self.head(h)               # (B,C,N)
        return out

# ----------------------------
# LOAD CHECKPOINT properly
# ----------------------------
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

if "model_state" not in ckpt:
    raise RuntimeError(f"Checkpoint keys: {list(ckpt.keys())}. Expected 'model_state' key.")

state_dict = ckpt["model_state"]

# remove DataParallel "module." prefix if exists
fixed_state = {}
for k, v in state_dict.items():
    fixed_state[k.replace("module.", "")] = v

classes = ckpt["classes"]
Xmean   = np.array(ckpt["Xmean"], dtype=np.float32)
Xstd    = np.array(ckpt["Xstd"], dtype=np.float32)

NUM_CLASSES = len(classes)
IN_CH = int(len(Xmean))

print("Loaded model. Classes:", classes)
print("Input features:", IN_CH, "| Num classes:", NUM_CLASSES)

model = PointNetConv(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(fixed_state, strict=True)
model.eval()

# ----------------------------
# Helpers (MUST match training feature order)
# ----------------------------
def get_dim(las, name, fallback=0.0):
    try:
        return np.asarray(las[name])
    except Exception:
        if name == "scan_angle":
            try:
                return np.asarray(las["scan_angle_rank"])
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_min_z(x, y, z, cell=2.0):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(z_s)
        zmin[i] = float(z_s[a:b].min())

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos]

def make_features_from_las(las, cell=2.0):
    """
    Generate SAME 10 features you trained on.
    If your dataset builder used a different feature order, prediction will be bad.
    """
    x = np.asarray(las.x, dtype=np.float32)
    y = np.asarray(las.y, dtype=np.float32)
    z = np.asarray(las.z, dtype=np.float32)

    intensity = get_dim(las, "intensity", 0.0).astype(np.float32)
    ret_num   = get_dim(las, "return_number", 1.0).astype(np.float32)
    nret      = get_dim(las, "number_of_returns", 1.0).astype(np.float32)

    scan_angle = get_dim(las, "scan_angle", 0.0).astype(np.float32)
    deviation  = get_dim(las, "Deviation", 0.0).astype(np.float32)
    overlap    = get_dim(las, "overlap", 0.0).astype(np.float32)

    zmin = grid_min_z(x, y, z, cell=cell)
    hag  = (z - zmin).astype(np.float32)

    cx, cy = x.mean(), y.mean()
    dx = (x - cx).astype(np.float32)
    dy = (y - cy).astype(np.float32)

    # SAME as earlier assumption: [z,hag,dx,dy,intensity,ret_num,nret,scan_angle,deviation,overlap]
    X = np.stack([z, hag, dx, dy, intensity, ret_num, nret, scan_angle, deviation, overlap], axis=1).astype(np.float32)
    return X

# ----------------------------
# Predict in blocks
# ----------------------------
BATCH_POINTS = 4096
STRIDE = 4096

las = laspy.read(IN_LAZ)
X = make_features_from_las(las, cell=2.0)

print("Total points:", len(X), "| X shape:", X.shape)

# normalize
Xn = (X - Xmean) / (Xstd + 1e-6)

pred_idx = np.zeros(len(Xn), dtype=np.int64)

with torch.no_grad():
    for start in tqdm(range(0, len(Xn), STRIDE), desc="Predicting"):
        end = min(start + BATCH_POINTS, len(Xn))
        block = Xn[start:end]

        if len(block) < BATCH_POINTS:
            pad_n = BATCH_POINTS - len(block)
            pad = np.repeat(block[-1:], pad_n, axis=0)
            block = np.vstack([block, pad])

        # Conv1d expects (B,F,N)
        inp = torch.from_numpy(block).to(DEVICE)          # (N,F)
        inp = inp.unsqueeze(0).permute(0, 2, 1)           # (1,F,N)

        logits = model(inp)                                # (1,C,N)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()  # (N,)

        pred_idx[start:end] = p[: end-start]

pred_cls = np.array([classes[i] for i in pred_idx], dtype=np.uint8)

las.classification = pred_cls
os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)

las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_cls, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Input features: 10 | Num classes: 7


RuntimeError: Error(s) in loading state_dict for PointNetConv:
	Missing key(s) in state_dict: "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var". 
	size mismatch for mlp2.0.weight: copying a param with shape torch.Size([256, 128, 1]) from checkpoint, the shape in current model is torch.Size([256, 512, 1]).
	size mismatch for head.0.weight: copying a param with shape torch.Size([256, 512, 1]) from checkpoint, the shape in current model is torch.Size([128, 256, 1]).
	size mismatch for head.0.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for head.1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for head.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for head.1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for head.1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for head.4.weight: copying a param with shape torch.Size([7, 256, 1]) from checkpoint, the shape in current model is torch.Size([7, 128, 1]).

In [52]:
import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted.laz"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ----------------------------
# Load checkpoint
# ----------------------------
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
state = ckpt["model_state"]
state = {k.replace("module.", ""): v for k, v in state.items()}

classes = ckpt["classes"]
Xmean   = np.array(ckpt["Xmean"], dtype=np.float32)
Xstd    = np.array(ckpt["Xstd"], dtype=np.float32)

NUM_CLASSES = len(classes)
IN_CH = int(len(Xmean))

print("Loaded model. Classes:", classes)
print("Input features:", IN_CH, "| Num classes:", NUM_CLASSES)

# ----------------------------
# Build model EXACTLY from checkpoint shapes
# ----------------------------
def conv1_out_ch(key):
    # weight shape: (out_ch, in_ch, 1)
    return int(state[key].shape[0])

def conv1_in_ch(key):
    return int(state[key].shape[1])

# detect how many conv blocks in mlp1 by scanning keys
mlp1_conv_keys = sorted([k for k in state.keys() if k.startswith("mlp1.") and k.endswith(".weight") and state[k].ndim == 3])
# keep only conv weights (BN weights are 1D, conv are 3D)
mlp1_conv_keys = [k for k in mlp1_conv_keys if state[k].shape[-1] == 1]
# Example conv keys look like: mlp1.0.weight, mlp1.3.weight, mlp1.6.weight, ...
# We'll build them in order based on module index number
def module_index(k): return int(k.split(".")[1])
mlp1_conv_keys.sort(key=module_index)

mlp2_conv_key = [k for k in state.keys() if k.startswith("mlp2.") and k.endswith(".weight") and state[k].ndim == 3][0]
head0_key     = [k for k in state.keys() if k.startswith("head.0") and k.endswith(".weight")][0]
head4_key     = [k for k in state.keys() if k.startswith("head.4") and k.endswith(".weight")][0]

mlp1_out = conv1_out_ch(mlp1_conv_keys[-1])
mlp2_in  = conv1_in_ch(mlp2_conv_key)
mlp2_out = conv1_out_ch(mlp2_conv_key)

head_in  = conv1_in_ch(head0_key)
head_mid = conv1_out_ch(head0_key)
head_out = conv1_out_ch(head4_key)

print("Checkpoint shapes summary:")
print(" mlp1 last out:", mlp1_out)
print(" mlp2 in/out :", mlp2_in, "→", mlp2_out)
print(" head in/mid/out:", head_in, "→", head_mid, "→", head_out)

class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()

        # Build mlp1 blocks from conv keys list
        layers = []
        prev_in = in_ch

        for conv_k in mlp1_conv_keys:
            out_ch = conv1_out_ch(conv_k)
            # Conv
            layers.append(nn.Conv1d(prev_in, out_ch, 1))
            # BN
            layers.append(nn.BatchNorm1d(out_ch))
            # ReLU
            layers.append(nn.ReLU(inplace=True))
            prev_in = out_ch

        self.mlp1 = nn.Sequential(*layers)

        # mlp2 is a single conv+bn+relu in your checkpoint
        self.mlp2 = nn.Sequential(
            nn.Conv1d(mlp2_in, mlp2_out, 1),
            nn.BatchNorm1d(mlp2_out),
            nn.ReLU(inplace=True),
        )

        # head is Conv(head_in→head_mid), BN, ReLU, Dropout, Conv(head_mid→num_classes)
        self.head = nn.Sequential(
            nn.Conv1d(head_in, head_mid, 1),
            nn.BatchNorm1d(head_mid),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Conv1d(head_mid, num_classes, 1),
        )

    def forward(self, x):
        f = self.mlp1(x)
        g = torch.max(f, dim=2)[0]       
        g = g.unsqueeze(2).repeat(1, 1, f.shape[2]) 
        local_for_mlp2 = f[:, :mlp2_in, :]           
        h_local = self.mlp2(local_for_mlp2)          
        feat = torch.cat([h_local, f, g], dim=1)    
        out = self.head(feat)                        
        return out


model = PointNetFromCkpt(IN_CH, NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()
print("✅ Model loaded successfully.")

# ----------------------------
# Feature builder (must match training order)
# ----------------------------
def get_dim(las, name, fallback=0.0):
    try:
        return np.asarray(las[name])
    except Exception:
        if name == "scan_angle":
            try:
                return np.asarray(las["scan_angle_rank"])
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=np.float32)

def grid_min_z(x, y, z, cell=2.0):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.full(len(uniq), np.inf, dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(z_s)
        zmin[i] = float(z_s[a:b].min())

    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos]

def make_features_from_las(las, cell=2.0):
    x = np.asarray(las.x, dtype=np.float32)
    y = np.asarray(las.y, dtype=np.float32)
    z = np.asarray(las.z, dtype=np.float32)

    intensity = get_dim(las, "intensity", 0.0).astype(np.float32)
    ret_num   = get_dim(las, "return_number", 1.0).astype(np.float32)
    nret      = get_dim(las, "number_of_returns", 1.0).astype(np.float32)
    scan_angle = get_dim(las, "scan_angle", 0.0).astype(np.float32)
    deviation  = get_dim(las, "Deviation", 0.0).astype(np.float32)
    overlap    = get_dim(las, "overlap", 0.0).astype(np.float32)

    zmin = grid_min_z(x, y, z, cell=cell)
    hag  = (z - zmin).astype(np.float32)

    cx, cy = x.mean(), y.mean()
    dx = (x - cx).astype(np.float32)
    dy = (y - cy).astype(np.float32)

    # IMPORTANT: must match your training blocks!
    X = np.stack([z, hag, dx, dy, intensity, ret_num, nret, scan_angle, deviation, overlap], axis=1).astype(np.float32)
    return X

# ----------------------------
# Predict in blocks
# ----------------------------
BATCH_POINTS = 4096
STRIDE = 4096

las = laspy.read(IN_LAZ)
X = make_features_from_las(las, cell=2.0)
print("Total points:", len(X), "| X shape:", X.shape)

Xn = (X - Xmean) / (Xstd + 1e-6)

pred_idx = np.zeros(len(Xn), dtype=np.int64)

with torch.no_grad():
    for start in tqdm(range(0, len(Xn), STRIDE), desc="Predicting"):
        end = min(start + BATCH_POINTS, len(Xn))
        block = Xn[start:end]

        if len(block) < BATCH_POINTS:
            pad_n = BATCH_POINTS - len(block)
            pad = np.repeat(block[-1:], pad_n, axis=0)
            block = np.vstack([block, pad])

        inp = torch.from_numpy(block).to(DEVICE)         # (N,F)
        inp = inp.unsqueeze(0).permute(0, 2, 1)          # (1,F,N)

        logits = model(inp)                              # (1,C,N)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
        pred_idx[start:end] = p[: end-start]

pred_cls = np.array([classes[i] for i in pred_idx], dtype=np.uint8)

las.classification = pred_cls
os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)

las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_cls, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Input features: 10 | Num classes: 7
Checkpoint shapes summary:
 mlp1 last out: 128
 mlp2 in/out : 128 → 256
 head in/mid/out: 512 → 256 → 7
✅ Model loaded successfully.
Total points: 12374846 | X shape: (12374846, 10)


Predicting: 100%|██████████| 3022/3022 [00:05<00:00, 525.58it/s]


Saved LAS: D:/lidarrrrr/anbu/New folder/dl_predicted.las
Saved LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted.laz
Pred class counts: {1: 5909693, 2: 6425564, 6: 36652, 12: 2937}


In [53]:
ckpt = torch.load(MODEL_PT, map_location="cpu", weights_only=False)
print("ckpt keys:", ckpt.keys())
print("feats:", ckpt.get("feats", None))   # very important
print("Xmean shape:", np.array(ckpt.get("Xmean")).shape)
print("Xstd  shape:", np.array(ckpt.get("Xstd")).shape)


ckpt keys: dict_keys(['model_state', 'classes', 'class_to_idx', 'Xmean', 'Xstd', 'feats'])
feats: 10
Xmean shape: (10,)
Xstd  shape: (10,)


In [54]:
d = np.load(train_files[0])
print("Train X[0]:", d["X"][0])

# After you build prediction features matrix Xpred for LAS:
print("Pred  X[0]:", Xpred[0])


Train X[0]: [-7.4062500e+01 -1.0600000e+02 -5.1693678e-01  1.9999981e-02
  3.9495000e+04  1.0000000e+00  1.0000000e+00  3.5000000e+01
  0.0000000e+00  1.6000009e-01]


NameError: name 'Xpred' is not defined

In [1]:
import numpy as np
import laspy
import torch

# -------- paths --------
TRAIN_NPZ = r"D:/lidarrrrr/anbu/dl_dataset/blocks/block_0000000.npz"
MODEL_PT  = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"   # change if needed
PRED_LAZ  = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz" # change if needed

# -------- load training sample --------
d = np.load(TRAIN_NPZ)
print("Train keys:", d.files)
print("Train X shape:", d["X"].shape, "y shape:", d["y"].shape)
print("Train X[0]:", d["X"][0])

# -------- load checkpoint stats --------
ckpt = torch.load(MODEL_PT, map_location="cpu", weights_only=False)
Xmean = np.asarray(ckpt["Xmean"], dtype=np.float32)
Xstd  = np.asarray(ckpt["Xstd"],  dtype=np.float32)
feats = int(ckpt["feats"])
print("\nckpt keys:", ckpt.keys())
print("feats:", feats)
print("Xmean shape:", Xmean.shape, "Xstd shape:", Xstd.shape)

# -----------------------------
# IMPORTANT: this MUST match your dataset maker!
# If your dataset maker used local block coords,
# you must do the same here.
# -----------------------------
def build_features_for_prediction(las):
    xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)

    # ---- FEATURE 1–2: local XY (to match Train X[0] negative values) ----
    # If your dataset builder used centered coords, do this:
    x = xyz[:,0] - np.mean(xyz[:,0])
    y = xyz[:,1] - np.mean(xyz[:,1])

    # ---- other features: edit to match exactly what you used in blocks ----
    # Below is a COMMON 10-feature layout, but you MUST align with your dataset.
    z = xyz[:,2].astype(np.float32)

    intensity = np.asarray(getattr(las, "intensity", np.zeros(len(z))), dtype=np.float32)
    rn = np.asarray(getattr(las, "return_number", np.ones(len(z))), dtype=np.float32)
    nr = np.asarray(getattr(las, "number_of_returns", np.ones(len(z))), dtype=np.float32)

    # If you had HAG/slope/local_range etc in X, you must compute them here too.
    # For now, make placeholders (zeros) so code runs; replace with real computations.
    hag = np.zeros_like(z, dtype=np.float32)
    slope = np.zeros_like(z, dtype=np.float32)
    local_range = np.zeros_like(z, dtype=np.float32)
    zstd = np.zeros_like(z, dtype=np.float32)

    # ---- choose an order and KEEP IT IDENTICAL to training ----
    # Example 10 features:
    Xpred = np.stack([
        x, y, z, hag, local_range, zstd, intensity, rn, nr, slope
    ], axis=1).astype(np.float32)

    return Xpred

# -------- create Xpred then print --------
las = laspy.read(PRED_LAZ)
Xpred = build_features_for_prediction(las)

print("\nPred X shape:", Xpred.shape)
print("Pred X[0]:", Xpred[0])

# -------- compare normalized features (what the model sees) --------
train_x0 = d["X"][0].astype(np.float32)
pred_x0  = Xpred[0].astype(np.float32)

train_norm = (train_x0 - Xmean) / (Xstd + 1e-6)
pred_norm  = (pred_x0  - Xmean) / (Xstd + 1e-6)

print("\nTrain norm[0]:", train_norm)
print("Pred  norm[0]:", pred_norm)

print("\nDiff (abs) norm:", np.abs(train_norm - pred_norm))


Train keys: ['X', 'y', 'idx']
Train X shape: (4096, 10) y shape: (4096,)
Train X[0]: [ 6.1937500e+01 -1.2750000e+02 -3.8357973e-02  5.9999943e-02
  3.4995000e+04  1.0000000e+00  1.0000000e+00  2.0000000e+01
  0.0000000e+00  1.0999999e+00]

ckpt keys: dict_keys(['model_state', 'classes', 'class_to_idx', 'Xmean', 'Xstd', 'feats'])
feats: 10
Xmean shape: (10,) Xstd shape: (10,)

Pred X shape: (12374846, 10)
Pred X[0]: [-1.409375e+02 -5.755000e+02  1.500000e+00  0.000000e+00  0.000000e+00
  0.000000e+00  3.460200e+04  1.000000e+00  1.000000e+00  0.000000e+00]

Train norm[0]: [ 0.21162534 -0.72202593 -0.01150283 -0.26280332 -0.19306287 -0.200048
 -0.22842097  0.17862275 -0.18284726  0.12864566]
Pred  norm[0]: [-4.8128140e-01 -2.6159768e+00  4.0655655e-01 -3.2531869e-01
 -5.3476200e+00 -3.9973624e+00  1.1301210e+05  1.5767007e-01
  4.6956405e-02 -6.0108793e-01]

Diff (abs) norm: [6.9290674e-01 1.8939509e+00 4.1805938e-01 6.2515378e-02 5.1545572e+00
 3.7973144e+00 1.1301233e+05 2.0952687e-02 

In [2]:
print("feats:", ckpt["feats"])
print("feature list (feats):", ckpt.get("feature_names", "Not stored"))
print("Train X mins:", d["X"].min(axis=0))
print("Train X maxs:", d["X"].max(axis=0))


feats: 10
feature list (feats): Not stored
Train X mins: [-1.378125e+02 -2.095000e+02 -8.483579e-01  0.000000e+00  0.000000e+00
  1.000000e+00  1.000000e+00 -5.900000e+01  0.000000e+00  3.999996e-02]
Train X maxs: [1.4343750e+02 1.5750000e+02 1.0561642e+01 1.0889999e+01 4.4083000e+04
 5.0000000e+00 5.0000000e+00 5.4000000e+01 0.0000000e+00 1.6670000e+01]


In [3]:
import numpy as np, laspy

BLOCK_NPZ = r"D:/lidarrrrr/anbu/dl_dataset/blocks/block_0000000.npz"
SOURCE_LAZ = r"D:/lidarrrrr/anbu/training_labeled/DX3011148 ULMIANO000001.laz"  # <-- the file that block came from

d = np.load(BLOCK_NPZ)
idx = d["idx"].astype(np.int64)
X_train = d["X"].astype(np.float32)

las = laspy.read(SOURCE_LAZ)

# raw dims
x = np.asarray(las.x, dtype=np.float32)[idx]
y = np.asarray(las.y, dtype=np.float32)[idx]
z = np.asarray(las.z, dtype=np.float32)[idx]
inten = np.asarray(las.intensity, dtype=np.float32)[idx]
rn = np.asarray(las.return_number, dtype=np.float32)[idx]
nr = np.asarray(las.number_of_returns, dtype=np.float32)[idx]

# optional dims (may not exist)
def get_dim(las, name, default=0.0):
    try:
        return np.asarray(las[name], dtype=np.float32)[idx]
    except Exception:
        if name == "scan_angle":
            try:
                return np.asarray(las["scan_angle_rank"], dtype=np.float32)[idx]
            except Exception:
                pass
    return np.full(len(idx), default, dtype=np.float32)

scan_angle = get_dim(las, "scan_angle", 0.0)

# now compare against training columns
print("Training block X[0]:", X_train[0])
print("Raw check 0:")
print("x,y,z,inten,rn,nr,scan:", x[0], y[0], z[0], inten[0], rn[0], nr[0], scan_angle[0])

# Correlation guess: which X column matches intensity etc.
for j in range(10):
    col = X_train[:, j]
    # print rough min/max and a correlation with intensity
    corr_int = np.corrcoef(col, inten)[0,1]
    corr_z = np.corrcoef(col, z)[0,1]
    corr_x = np.corrcoef(col, x)[0,1]
    corr_y = np.corrcoef(col, y)[0,1]
    print(j, "min/max", float(col.min()), float(col.max()),
          "| corr(x,y,z,int)", round(corr_x,3), round(corr_y,3), round(corr_z,3), round(corr_int,3))


Training block X[0]: [ 6.1937500e+01 -1.2750000e+02 -3.8357973e-02  5.9999943e-02
  3.4995000e+04  1.0000000e+00  1.0000000e+00  2.0000000e+01
  0.0000000e+00  1.0999999e+00]
Raw check 0:
x,y,z,inten,rn,nr,scan: 609816.6 4847096.5 2.56 34995.0 1.0 1.0 20.0
0 min/max -137.8125 143.4375 | corr(x,y,z,int) 1.0 -0.947 0.132 -0.194
1 min/max -209.5 157.5 | corr(x,y,z,int) -0.947 1.0 -0.157 0.19
2 min/max -0.8483579158782959 10.561641693115234 | corr(x,y,z,int) 0.132 -0.157 1.0 -0.177
3 min/max 0.0 10.889999389648438 | corr(x,y,z,int) 0.02 -0.03 0.916 -0.152
4 min/max 0.0 44083.0 | corr(x,y,z,int) -0.194 0.19 -0.177 1.0
5 min/max 1.0 5.0 | corr(x,y,z,int) 0.024 -0.028 0.193 -0.419
6 min/max 1.0 5.0 | corr(x,y,z,int) 0.013 -0.02 0.397 -0.473
7 min/max -59.0 54.0 | corr(x,y,z,int) 0.097 -0.261 -0.013 0.026
8 min/max 0.0 0.0 | corr(x,y,z,int) nan nan nan nan
9 min/max 0.039999961853027344 16.670000076293945 | corr(x,y,z,int) 0.076 -0.12 0.486 -0.193


  c /= stddev[:, None]
  c /= stddev[None, :]


In [5]:
# ===========================
# DL PREDICT (PointNet ckpt) — FULL WORKING SCRIPT
# Matches your discovered 10-feature order
# ===========================

import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm import tqdm

# ----------------------------
# PATHS (edit)
# ----------------------------
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"   # your ckpt
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.laz"

# ----------------------------
# BLOCKING (must match training)
# ----------------------------
TILE_SIZE = 40.0      # <-- if your dataset maker used another value, change it
NPTS      = 4096
BATCH_BLK = 16        # blocks per GPU batch (adjust if OOM)

# ----------------------------
# FEATURE GRID for HAG / local variability
# ----------------------------
CELL = 2.0            # grid size for zmin/zmax (keep stable)
EPS  = 1e-6

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ----------------------------
# Robust dimension getter
# ----------------------------
def get_dim(las, name, default=0.0, dtype=np.float32):
    # Works with standard dims + ExtraBytes
    try:
        return np.asarray(las[name], dtype=dtype)
    except Exception:
        pass
    # common alternate name
    if name == "scan_angle":
        for alt in ["scan_angle_rank", "scan_angle_rank_degrees"]:
            try:
                return np.asarray(las[alt], dtype=dtype)
            except Exception:
                pass
    return np.full(len(las.x), default, dtype=dtype)

# ----------------------------
# Grid stats: zmin/zmax per XY cell
# ----------------------------
def grid_zmin_zmax(x, y, z, cell=CELL):
    minx, miny = float(x.min()), float(y.min())
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)
    key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s   = z[order]

    uniq, start = np.unique(key_s, return_index=True)
    zmin = np.empty(len(uniq), dtype=np.float32)
    zmax = np.empty(len(uniq), dtype=np.float32)

    for i in range(len(uniq)):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(key_s)
        zs = z_s[a:b]
        zmin[i] = float(zs.min())
        zmax[i] = float(zs.max())

    # map back
    pos = np.searchsorted(uniq, key)
    pos = np.clip(pos, 0, len(uniq)-1)
    return zmin[pos], zmax[pos], minx, miny

# ----------------------------
# Build 10 features EXACT ORDER
# 0 dx, 1 dy, 2 hag_raw, 3 hag_clip, 4 intensity, 5 rn, 6 nr, 7 scan, 8 zero, 9 local_range
# ----------------------------
def make_X_for_points(x, y, z, intensity, rn, nr, scan, zmin, zmax, cx, cy):
    dx = (x - cx).astype(np.float32)
    dy = (y - cy).astype(np.float32)

    hag_raw  = (z - zmin).astype(np.float32)                  # can be negative if grid min isn't true ground
    hag_clip = np.maximum(hag_raw, 0.0).astype(np.float32)

    local_range = (zmax - zmin).astype(np.float32)            # matches your feat9 magnitude (0..~16)

    X = np.stack([
        dx,
        dy,
        hag_raw,
        hag_clip,
        intensity.astype(np.float32),
        rn.astype(np.float32),
        nr.astype(np.float32),
        scan.astype(np.float32),
        np.zeros_like(dx, dtype=np.float32),                  # feat8 = constant 0 in your training
        local_range
    ], axis=1).astype(np.float32)

    return X

# ----------------------------
# PointNet (must match checkpoint architecture)
# Your ckpt summary:
#  - mlp1 last out: 128
#  - mlp2: 128 -> 256
#  - head: 512 -> 256 -> C
# ----------------------------
class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch, num_classes):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(True),
        )
        # ✅ match checkpoint: head.0, head.1, head.4
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),    # head.0
            nn.BatchNorm1d(256),       # head.1
            nn.ReLU(True),             # head.2
            nn.Dropout(p=0.3),         # head.3  (this was missing)
            nn.Conv1d(256, num_classes, 1)  # head.4
        )

    def forward(self, x):  # x: (B,F,N)
        h = self.mlp1(x)                      # (B,128,N)
        h_local = self.mlp2(h)                # (B,256,N)
        g = torch.max(h_local, dim=2, keepdim=True).values  # (B,256,1)
        g = g.repeat(1, 1, h_local.size(2))                 # (B,256,N)
        feat = torch.cat([h_local, g], dim=1)               # (B,512,N)
        out = self.head(feat)                               # (B,C,N)
        return out

# ----------------------------
# Load checkpoint safely (PyTorch 2.6+ default changed)
# ----------------------------
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
state = ckpt["model_state"]
classes = ckpt["classes"]                 # original class labels like [1,2,3,6,7,12,13]
class_to_idx = ckpt["class_to_idx"]
Xmean = ckpt["Xmean"].astype(np.float32)
Xstd  = ckpt["Xstd"].astype(np.float32)
IN_CH = int(ckpt.get("feats", 10))
NUM_CLASSES = len(classes)

print("Device:", DEVICE)
print("Loaded model. Classes:", classes)
print("Input features:", IN_CH, "| Num classes:", NUM_CLASSES)

model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()

# ----------------------------
# Read LAZ
# ----------------------------
las = laspy.read(IN_LAZ)
x = np.asarray(las.x, dtype=np.float32)
y = np.asarray(las.y, dtype=np.float32)
z = np.asarray(las.z, dtype=np.float32)

intensity = get_dim(las, "intensity", 0.0)
rn        = get_dim(las, "return_number", 1.0)
nr        = get_dim(las, "number_of_returns", 1.0)
scan      = get_dim(las, "scan_angle", 0.0)

zmin, zmax, minx, miny = grid_zmin_zmax(x, y, z, cell=CELL)

# ----------------------------
# Build tile -> indices list
# ----------------------------
gx = np.floor((x - minx) / TILE_SIZE).astype(np.int32)
gy = np.floor((y - miny) / TILE_SIZE).astype(np.int32)
tile_key = gx.astype(np.int64) * 1_000_000 + gy.astype(np.int64)

# group points by tile
order = np.argsort(tile_key)
tile_s = tile_key[order]

uniq_tiles, start = np.unique(tile_s, return_index=True)

tile_slices = []
for i in range(len(uniq_tiles)):
    a = start[i]
    b = start[i+1] if i+1 < len(uniq_tiles) else len(order)
    idxs = order[a:b]
    if len(idxs) < 200:   # skip tiny tiles
        continue
    tile_slices.append(idxs)

print("Total tiles:", len(tile_slices))

# ----------------------------
# Predict per tile in batches
# ----------------------------
pred_idx = np.zeros(len(x), dtype=np.int64)  # stores class index 0..C-1

def norm_X(X):
    return (X - Xmean[None, :]) / (Xstd[None, :] + 1e-12)

rng = np.random.default_rng(0)

with torch.no_grad():
    pbar = tqdm(range(0, len(tile_slices), BATCH_BLK), desc="Predicting tiles")
    for t0 in pbar:
        batch_tiles = tile_slices[t0:t0 + BATCH_BLK]

        X_batch = []
        pick_map = []   # (point_indices_original, used_count)
        for idxs in batch_tiles:
            # sample NPTS points from this tile
            if len(idxs) >= NPTS:
                pick = rng.choice(idxs, NPTS, replace=False)
            else:
                pick = rng.choice(idxs, NPTS, replace=True)

            # tile center for dx/dy (must match training)
            cx = float(x[pick].mean())
            cy = float(y[pick].mean())

            Xb = make_X_for_points(
                x[pick], y[pick], z[pick],
                intensity[pick], rn[pick], nr[pick], scan[pick],
                zmin[pick], zmax[pick],
                cx, cy
            )
            Xb = norm_X(Xb)

            X_batch.append(Xb)
            pick_map.append(pick)

        X_batch = np.stack(X_batch, axis=0)                   # (B,N,F)
        inp = torch.from_numpy(X_batch).to(DEVICE)            # (B,N,F)
        inp = inp.permute(0, 2, 1).contiguous()               # (B,F,N)

        logits = model(inp)                                   # (B,C,N)
        p = torch.argmax(logits, dim=1).cpu().numpy()         # (B,N) class-index

        # write back to original points (only for sampled points)
        for b in range(len(batch_tiles)):
            pred_idx[pick_map[b]] = p[b]

# map class index -> real class label (LAS classification)
pred_class = np.array([classes[i] for i in pred_idx], dtype=np.uint8)

las.classification = pred_class
las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)

try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_class, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Input features: 10 | Num classes: 7
Total tiles: 118


Predicting tiles: 100%|██████████| 8/8 [00:01<00:00,  4.74it/s]


Saved LAS: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.las
Saved LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.laz
Pred class counts: {1: 12087409, 2: 232242, 3: 54453, 12: 742}


In [6]:
model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)   # ✅ should load


<All keys matched successfully>

In [7]:
print([k for k in state.keys() if k.startswith("head.")][:20])


['head.0.weight', 'head.0.bias', 'head.1.weight', 'head.1.bias', 'head.1.running_mean', 'head.1.running_var', 'head.1.num_batches_tracked', 'head.4.weight', 'head.4.bias']


In [8]:
d = np.load(train_files[0])
print("Train col mins:", d["X"].min(axis=0))
print("Train col maxs:", d["X"].max(axis=0))


NameError: name 'train_files' is not defined

In [9]:
print("Pred col mins:", X_block.min(axis=0))
print("Pred col maxs:", X_block.max(axis=0))


NameError: name 'X_block' is not defined

In [10]:
import glob

BLOCK_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"

train_files = sorted(glob.glob(BLOCK_DIR + "/*.npz"))

print("Total blocks:", len(train_files))
print("Example file:", train_files[0])


Total blocks: 27697
Example file: D:/lidarrrrr/anbu/dl_dataset/blocks\block_0000000.npz


In [11]:
import numpy as np

d = np.load(train_files[0])

print("Keys:", d.files)
print("Train col mins:", d["X"].min(axis=0))
print("Train col maxs:", d["X"].max(axis=0))
print("Train col means:", d["X"].mean(axis=0))


Keys: ['X', 'y', 'idx']
Train col mins: [-1.378125e+02 -2.095000e+02 -8.483579e-01  0.000000e+00  0.000000e+00
  1.000000e+00  1.000000e+00 -5.900000e+01  0.000000e+00  3.999996e-02]
Train col maxs: [1.4343750e+02 1.5750000e+02 1.0561642e+01 1.0889999e+01 4.4083000e+04
 5.0000000e+00 5.0000000e+00 5.4000000e+01 0.0000000e+00 1.6670000e+01]
Train col means: [-2.2071991e+00 -1.8538818e+00 -3.7940481e-04  2.1671166e-01
  3.5122293e+04  1.0463867e+00  1.0629883e+00  5.8276367e-01
  0.0000000e+00  8.9575773e-01]


In [12]:
print("Pred col mins:", X_block.min(axis=0))
print("Pred col maxs:", X_block.max(axis=0))


NameError: name 'X_block' is not defined

In [13]:
import os, glob
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# =========================================================
# EDIT PATHS
# =========================================================
TRAIN_BLOCK_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"     # has .npz blocks with keys X,y,idx
IN_LAS_OR_LAZ   = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT        = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"   # your saved ckpt
OUT_LAS         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed2.las"
OUT_LAZ         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed2.laz"

# =========================================================
# SETTINGS (must match training)
# =========================================================
CELL = 3.0          # same grid cell you used for HAG
BLOCK = 4096        # same points per block used in training blocks
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# =========================================================
# 1) LOAD 1 TRAINING BLOCK AND PRINT FEATURE STATS
# =========================================================
train_files = sorted(glob.glob(os.path.join(TRAIN_BLOCK_DIR, "*.npz")))
if not train_files:
    raise RuntimeError("No training .npz found in TRAIN_BLOCK_DIR")

d = np.load(train_files[0])
Xtr = d["X"].astype(np.float32)
ytr = d["y"].astype(np.int64)
print("\nTrain keys:", list(d.keys()))
print("Train X shape:", Xtr.shape, "y shape:", ytr.shape)
print("Train X mins:", Xtr.min(axis=0))
print("Train X maxs:", Xtr.max(axis=0))

# =========================================================
# 2) HELPERS: robust LAS dimension getter
# =========================================================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    # Try normal dimension name
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass
    # Special cases
    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass
    # RGB fields may not exist
    if name in ["red", "green", "blue"]:
        return np.full(len(las.x), fallback, dtype=dtype)
    # Extra bytes may not exist
    return np.full(len(las.x), fallback, dtype=dtype)

# =========================================================
# 3) BUILD PRED FEATURES (10 features)
#    IMPORTANT: This must match training feature order.
#    We use this order:
#    [x, y, z, hag, intensity, ret_num, n_returns, scan_angle, deviation, slope]
# =========================================================
def compute_hag_and_slope(xyz, cls, cell=3.0, ground_class=2):
    x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    # ground surface per cell from ground points (class 2)
    cell_min = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_min) or (zi < cell_min[k]):
            cell_min[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_min:
            hag[i] = z[i] - cell_min[k]
            has_ground[i] = True
        else:
            hag[i] = 0.0

    # local range proxy (zmax-zmin) for slope
    # compute zmin/zmax per cell quickly using dict
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope, has_ground

def make_Xpred(las, cell=3.0):
    xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)
    cls = np.asarray(las.classification, dtype=np.int32)

    intensity = get_dim(las, "intensity", fallback=0.0, dtype=np.float32)
    ret_num   = get_dim(las, "return_number", fallback=1.0, dtype=np.float32)
    n_returns = get_dim(las, "number_of_returns", fallback=1.0, dtype=np.float32)
    scan_ang  = get_dim(las, "scan_angle", fallback=0.0, dtype=np.float32)
    deviation = get_dim(las, "Deviation", fallback=0.0, dtype=np.float32)   # extra bytes

    hag, slope, has_ground = compute_hag_and_slope(xyz, cls, cell=cell)

    X = np.stack([
        xyz[:,0], xyz[:,1], xyz[:,2],
        hag,
        intensity,
        ret_num,
        n_returns,
        scan_ang,
        deviation,
        slope
    ], axis=1).astype(np.float32)

    return X, xyz, cls, has_ground

# =========================================================
# 4) LOAD CKPT + BUILD MODEL THAT MATCHES CKPT SHAPES
#    Your ckpt keys: model_state, classes, class_to_idx, Xmean, Xstd, feats
# =========================================================
class PointNetFromCkpt(nn.Module):
    """
    Matches a common 'PointNet-small' used in your training:
    - mlp1: (in -> 64 -> 64 -> 128) conv1d + BN + ReLU
    - mlp2: (128 -> 256) conv1d + BN + ReLU
    - global max pool -> 256
    - concat local(256) + global(256) = 512
    - head: (512 -> 256 -> C) conv1d
    """
    def __init__(self, in_ch=10, num_classes=7):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        # x: (B, F, N)
        h = self.mlp1(x)           # (B,128,N)
        h = self.mlp2(h)           # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]   # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])             # (B,256,N)
        feat = torch.cat([h, g], dim=1)            # (B,512,N)
        out = self.head(feat)                      # (B,C,N)
        return out

# Load ckpt safely (PyTorch 2.6+)
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

classes = ckpt["classes"]
class_to_idx = ckpt["class_to_idx"]
Xmean = ckpt["Xmean"].astype(np.float32)
Xstd  = ckpt["Xstd"].astype(np.float32)
IN_CH = int(ckpt["feats"])
NUM_CLASSES = len(classes)

print("\nLoaded model. Classes:", classes)
print("Input features:", IN_CH, "| Num classes:", NUM_CLASSES)

state = ckpt["model_state"]

model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()
print("✅ Model loaded successfully.")

# =========================================================
# 5) BUILD PRED FEATURES + PRINT STATS
# =========================================================
las = laspy.read(IN_LAS_OR_LAZ)
Xpred, xyz, cls_in, has_ground = make_Xpred(las, cell=CELL)

print("\nPred X shape:", Xpred.shape)
print("Pred X mins:", Xpred.min(axis=0))
print("Pred X maxs:", Xpred.max(axis=0))

# normalize EXACTLY like training
Xpred_n = (Xpred - Xmean[None, :]) / (Xstd[None, :] + 1e-6)

# show first-row sanity
print("\nTrain X[0]:", Xtr[0])
print("Pred  X[0]:", Xpred[0])
print("Train norm[0]:", (Xtr[0] - Xmean) / (Xstd + 1e-6))
print("Pred  norm[0]:", (Xpred[0] - Xmean) / (Xstd + 1e-6))

# =========================================================
# 6) PREDICT IN BLOCKS + SAVE LAS/LAZ
# =========================================================
pred_idx = np.zeros((len(Xpred_n),), dtype=np.int64)

with torch.no_grad():
    pbar = tqdm(range(0, len(Xpred_n), BLOCK), desc="Predicting blocks")
    for start in pbar:
        end = min(start + BLOCK, len(Xpred_n))
        X_block = Xpred_n[start:end]  # <-- defined here

        # print once (fixes your NameError problem)
        if start == 0:
            print("\n[DEBUG] X_block mins:", X_block.min(axis=0))
            print("[DEBUG] X_block maxs:", X_block.max(axis=0))

        inp = torch.from_numpy(X_block).to(DEVICE)          # (N,F)
        inp = inp.unsqueeze(0).permute(0, 2, 1)             # (1,F,N)

        logits = model(inp)                                  # (1,C,N)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
        pred_idx[start:end] = p[:end-start]

# map back to LAS class codes
idx_to_class = {i:c for c,i in class_to_idx.items()}
pred_class = np.vectorize(idx_to_class.get)(pred_idx).astype(np.uint8)

las.classification = pred_class
las.write(OUT_LAS)
print("\nSaved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_class, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050

Train keys: ['X', 'y', 'idx']
Train X shape: (4096, 10) y shape: (4096,)
Train X mins: [-1.378125e+02 -2.095000e+02 -8.483579e-01  0.000000e+00  0.000000e+00
  1.000000e+00  1.000000e+00 -5.900000e+01  0.000000e+00  3.999996e-02]
Train X maxs: [1.4343750e+02 1.5750000e+02 1.0561642e+01 1.0889999e+01 4.4083000e+04
 5.0000000e+00 5.0000000e+00 5.4000000e+01 0.0000000e+00 1.6670000e+01]

Loaded model. Classes: [1, 2, 3, 6, 7, 12, 13]
Input features: 10 | Num classes: 7


RuntimeError: Error(s) in loading state_dict for PointNetFromCkpt:
	Missing key(s) in state_dict: "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var", "head.3.weight", "head.3.bias". 
	Unexpected key(s) in state_dict: "head.4.weight", "head.4.bias". 
	size mismatch for mlp1.3.weight: copying a param with shape torch.Size([128, 64, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).
	size mismatch for mlp1.3.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).

In [14]:
import torch
import torch.nn as nn

class PointNetFromCkptV2(nn.Module):
    """
    Matches checkpoint structure:
    mlp1: Conv(in->64) + BN + ReLU
          Conv(64->128) + BN + ReLU
          Conv(128->128) + BN + ReLU
    mlp2: Conv(128->256) + BN + ReLU
    head: Conv(512->256) + BN + ReLU + Dropout + Conv(256->C)
    """
    def __init__(self, in_ch=10, num_classes=7, drop=0.3):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),

            nn.Conv1d(128, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=drop),              # <-- shifts final conv to head.4.*
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        # x: (B,F,N)
        h = self.mlp1(x)                            # (B,128,N)
        h = self.mlp2(h)                            # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]     # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])               # (B,256,N)
        feat = torch.cat([h, g], dim=1)              # (B,512,N)
        out = self.head(feat)                        # (B,C,N)
        return out


In [15]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
state = ckpt["model_state"]
classes = ckpt["classes"]

model = PointNetFromCkptV2(in_ch=int(ckpt["feats"]), num_classes=len(classes)).to(DEVICE)
model.load_state_dict(state, strict=True)   # ✅ should succeed now
model.eval()
print("✅ Model loaded OK")


RuntimeError: Error(s) in loading state_dict for PointNetFromCkptV2:
	Missing key(s) in state_dict: "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var". 

In [16]:
import torch
import torch.nn as nn

class PointNetFromCkptV3(nn.Module):
    """
    Matches checkpoint:
    mlp1: Conv(in->64) + BN + ReLU
          Conv(64->128) + BN + ReLU
    mlp2: Conv(128->256) + BN + ReLU
    head: Conv(512->256) + BN + ReLU + Dropout + Conv(256->C)
    """
    def __init__(self, in_ch=10, num_classes=7, drop=0.3):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=drop),          # keeps final conv as head.4.*
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        # x: (B,F,N)
        h = self.mlp1(x)                            # (B,128,N)
        h = self.mlp2(h)                            # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]     # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])               # (B,256,N)
        feat = torch.cat([h, g], dim=1)              # (B,512,N)
        return self.head(feat)                       # (B,C,N)


In [17]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
state = ckpt["model_state"]
classes = ckpt["classes"]

model = PointNetFromCkptV3(in_ch=int(ckpt["feats"]), num_classes=len(classes)).to(DEVICE)
model.load_state_dict(state, strict=True)   # ✅ should load now
model.eval()

print("✅ Loaded OK | feats:", ckpt["feats"], "| classes:", classes)


✅ Loaded OK | feats: 10 | classes: [1, 2, 3, 6, 7, 12, 13]


In [18]:
import os, glob
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# =========================================================
# EDIT PATHS
# =========================================================
TRAIN_BLOCK_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"     # .npz blocks with keys X,y,idx
IN_LAS_OR_LAZ   = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT        = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed2.las"
OUT_LAZ         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed2.laz"

# =========================================================
# MUST MATCH TRAINING
# =========================================================
GROUND_CLASS = 2
CELL = 3.0          # HAG grid cell
BLOCK = 4096        # points per forward pass
TILE_SIZE = 300.0   # IMPORTANT: local XY centering. This is what makes x/y ~ [-150..150]

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# =========================================================
# 1) Load 1 training block to see expected feature ranges
# =========================================================
train_files = sorted(glob.glob(os.path.join(TRAIN_BLOCK_DIR, "*.npz")))
if not train_files:
    raise RuntimeError("No .npz found in TRAIN_BLOCK_DIR")

d0 = np.load(train_files[0])
Xtr = d0["X"].astype(np.float32)
print("\n[TRAIN BLOCK]")
print("keys:", list(d0.keys()))
print("X shape:", Xtr.shape)
print("col mins:", Xtr.min(axis=0))
print("col maxs:", Xtr.max(axis=0))

# =========================================================
# 2) Robust LAS dimension getter
# =========================================================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass

    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass

    # RGB / extra bytes might not exist
    return np.full(len(las.x), fallback, dtype=dtype)

# =========================================================
# 3) PointNet model (MUST match your checkpoint)
# =========================================================
class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch=10, num_classes=7):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):  # x: (B,F,N)
        h = self.mlp1(x)                 # (B,128,N)
        h = self.mlp2(h)                 # (B,256,N)
        g = torch.max(h, 2, keepdim=True)[0]  # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])        # (B,256,N)
        feat = torch.cat([h, g], dim=1)       # (B,512,N)
        out = self.head(feat)                # (B,C,N)
        return out

# =========================================================
# 4) Load checkpoint
# =========================================================
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

classes     = ckpt["classes"]
class_to_idx= ckpt["class_to_idx"]
Xmean       = ckpt["Xmean"].astype(np.float32)
Xstd        = ckpt["Xstd"].astype(np.float32)
IN_CH       = int(ckpt["feats"])
NUM_CLASSES = len(classes)

print("\n[CKPT]")
print("classes:", classes)
print("feats:", IN_CH, "| num_classes:", NUM_CLASSES)

model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(ckpt["model_state"], strict=True)
model.eval()
print("✅ Model loaded OK.")

idx_to_class = {i:c for c,i in class_to_idx.items()}

# =========================================================
# 5) HAG + slope inside a tile
# =========================================================
def hag_slope_for_tile(xyz_tile, cls_tile, cell=3.0, ground_class=2):
    x, y, z = xyz_tile[:,0], xyz_tile[:,1], xyz_tile[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    # ground surface (per cell) from ground points only
    cell_min = {}
    g_idx = np.where(cls_tile == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_min) or (zi < cell_min[k]):
            cell_min[k] = zi

    # fallback if tile has no ground (rare)
    if len(cell_min) == 0:
        hag = np.zeros(len(z), dtype=np.float32)
        slope = np.zeros(len(z), dtype=np.float32)
        has_ground = np.zeros(len(z), dtype=bool)
        tile_ground_ref = float(np.percentile(z, 1))
        return hag, slope, has_ground, tile_ground_ref

    # HAG
    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_min:
            hag[i] = z[i] - cell_min[k]
            has_ground[i] = True
        else:
            hag[i] = 0.0

    # local range for slope proxy (per cell zmax-zmin)
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)

    # tile ground reference for z_local
    tile_ground_ref = float(np.percentile(z[g_idx], 1))  # stable, similar to what your blocks show
    return hag, slope, has_ground, tile_ground_ref

# =========================================================
# 6) Build prediction features tile-by-tile (MATCH TRAINING)
#    Feature order:
#    [x_local, y_local, z_local, hag, intensity, rn, nr, scan, deviation, slope]
# =========================================================
def predict_on_file(in_path, out_las, out_laz):
    las = laspy.read(in_path)

    xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)
    cls_in = np.asarray(las.classification, dtype=np.int32)

    intensity = get_dim(las, "intensity", fallback=0.0)
    rn        = get_dim(las, "return_number", fallback=1.0)
    nr        = get_dim(las, "number_of_returns", fallback=1.0)
    scan      = get_dim(las, "scan_angle", fallback=0.0)
    deviation = get_dim(las, "Deviation", fallback=0.0)

    x = xyz[:,0]; y = xyz[:,1]
    minx = float(x.min()); miny = float(y.min())

    tx = np.floor((x - minx)/TILE_SIZE).astype(np.int32)
    ty = np.floor((y - miny)/TILE_SIZE).astype(np.int32)
    tile_key = tx.astype(np.int64) * 1_000_000 + ty.astype(np.int64)

    order = np.argsort(tile_key)
    tile_key_s = tile_key[order]
    unique_tiles, start_idx = np.unique(tile_key_s, return_index=True)

    pred_idx_all = np.zeros(len(xyz), dtype=np.int64)

    print("\nTotal tiles:", len(unique_tiles))

    with torch.no_grad():
        for t in tqdm(range(len(unique_tiles)), desc="Predicting tiles"):
            a = start_idx[t]
            b = start_idx[t+1] if t+1 < len(unique_tiles) else len(order)
            idx = order[a:b]  # indices for this tile in original arrays

            xyz_tile = xyz[idx]
            cls_tile = cls_in[idx]

            # local XY centered in tile -> range about [-150..150]
            tile_minx = float(xyz_tile[:,0].min())
            tile_miny = float(xyz_tile[:,1].min())
            x_local = xyz_tile[:,0] - (tile_minx + TILE_SIZE/2.0)
            y_local = xyz_tile[:,1] - (tile_miny + TILE_SIZE/2.0)

            hag, slope, has_ground, tile_ground_ref = hag_slope_for_tile(
                xyz_tile, cls_tile, cell=CELL, ground_class=GROUND_CLASS
            )
            z_local = (xyz_tile[:,2] - tile_ground_ref).astype(np.float32)

            X_tile = np.stack([
                x_local.astype(np.float32),
                y_local.astype(np.float32),
                z_local,
                hag.astype(np.float32),
                intensity[idx].astype(np.float32),
                rn[idx].astype(np.float32),
                nr[idx].astype(np.float32),
                scan[idx].astype(np.float32),
                deviation[idx].astype(np.float32),
                slope.astype(np.float32),
            ], axis=1).astype(np.float32)

            # normalize like training
            X_tile_n = (X_tile - Xmean[None, :]) / (Xstd[None, :] + 1e-6)

            # debug once: compare prediction ranges to training ranges
            if t == 0:
                print("\n[PRED TILE 0]")
                print("col mins:", X_tile.min(axis=0))
                print("col maxs:", X_tile.max(axis=0))
                print("norm mins:", X_tile_n.min(axis=0))
                print("norm maxs:", X_tile_n.max(axis=0))

            # predict in blocks
            pred_idx_tile = np.zeros(len(idx), dtype=np.int64)

            for s in range(0, len(idx), BLOCK):
                e = min(s + BLOCK, len(idx))
                block = X_tile_n[s:e]
                inp = torch.from_numpy(block).to(DEVICE).unsqueeze(0).permute(0,2,1)  # (1,F,N)
                logits = model(inp)  # (1,C,N)
                p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
                pred_idx_tile[s:e] = p[:e-s]

            pred_idx_all[idx] = pred_idx_tile

    # map to LAS class codes
    pred_class = np.vectorize(idx_to_class.get)(pred_idx_all).astype(np.uint8)
    las.classification = pred_class

    las.write(out_las)
    print("Saved LAS:", out_las)
    try:
        las.write(out_laz)
        print("Saved LAZ:", out_laz)
    except Exception as e:
        print("LAZ write failed (ok):", e)

    u, c = np.unique(pred_class, return_counts=True)
    print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))

# =========================================================
# RUN
# =========================================================
predict_on_file(IN_LAS_OR_LAZ, OUT_LAS, OUT_LAZ)


Device: cuda
GPU: NVIDIA GeForce RTX 3050

[TRAIN BLOCK]
keys: ['X', 'y', 'idx']
X shape: (4096, 10)
col mins: [-1.378125e+02 -2.095000e+02 -8.483579e-01  0.000000e+00  0.000000e+00
  1.000000e+00  1.000000e+00 -5.900000e+01  0.000000e+00  3.999996e-02]
col maxs: [1.4343750e+02 1.5750000e+02 1.0561642e+01 1.0889999e+01 4.4083000e+04
 5.0000000e+00 5.0000000e+00 5.4000000e+01 0.0000000e+00 1.6670000e+01]

[CKPT]
classes: [1, 2, 3, 6, 7, 12, 13]
feats: 10 | num_classes: 7


RuntimeError: Error(s) in loading state_dict for PointNetFromCkpt:
	Missing key(s) in state_dict: "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var", "head.3.weight", "head.3.bias". 
	Unexpected key(s) in state_dict: "head.4.weight", "head.4.bias". 
	size mismatch for mlp1.3.weight: copying a param with shape torch.Size([128, 64, 1]) from checkpoint, the shape in current model is torch.Size([64, 64, 1]).
	size mismatch for mlp1.3.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).
	size mismatch for mlp1.4.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([64]).

In [19]:
import torch
import torch.nn as nn
import numpy as np

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

classes = ckpt["classes"]
IN_CH = int(ckpt["feats"])
NUM_CLASSES = len(classes)
state = ckpt["model_state"]

print("✅ CKPT classes:", classes)
print("✅ feats:", IN_CH, "num_classes:", NUM_CLASSES)

class PointNetFromCkptFixed(nn.Module):
    """
    Matches ckpt:
    mlp1: Conv(in->64), BN, ReLU, Conv(64->128), BN, ReLU
    mlp2: Conv(128->256), BN, ReLU
    head: Conv(512->256), BN, ReLU, Dropout, Conv(256->C)
    """
    def __init__(self, in_ch=10, num_classes=7, p_drop=0.3):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),        # mlp1.0.*
            nn.BatchNorm1d(64),             # mlp1.1.*
            nn.ReLU(inplace=True),          # mlp1.2
            nn.Conv1d(64, 128, 1),          # mlp1.3.*  (THIS is [128,64,1])
            nn.BatchNorm1d(128),            # mlp1.4.*
            nn.ReLU(inplace=True),          # mlp1.5
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),         # mlp2.0.*
            nn.BatchNorm1d(256),            # mlp2.1.*
            nn.ReLU(inplace=True),          # mlp2.2
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),         # head.0.*
            nn.BatchNorm1d(256),            # head.1.*
            nn.ReLU(inplace=True),          # head.2
            nn.Dropout(p_drop),             # head.3  (no weights)
            nn.Conv1d(256, num_classes, 1)  # head.4.*  (matches your ckpt)
        )

    def forward(self, x):
        # x: (B, F, N)
        h = self.mlp1(x)                    # (B,128,N)
        h = self.mlp2(h)                    # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]  # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])            # (B,256,N)
        feat = torch.cat([h, g], dim=1)           # (B,512,N)
        out = self.head(feat)                      # (B,C,N)
        return out

model = PointNetFromCkptFixed(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()
print("✅ Model loaded successfully (STRICT).")


✅ CKPT classes: [1, 2, 3, 6, 7, 12, 13]
✅ feats: 10 num_classes: 7
✅ Model loaded successfully (STRICT).


In [20]:
print([k for k in state.keys() if k.startswith("mlp1")][:20])
print([k for k in state.keys() if k.startswith("head")][:20])
print(state["mlp1.3.weight"].shape)


['mlp1.0.weight', 'mlp1.0.bias', 'mlp1.1.weight', 'mlp1.1.bias', 'mlp1.1.running_mean', 'mlp1.1.running_var', 'mlp1.1.num_batches_tracked', 'mlp1.3.weight', 'mlp1.3.bias', 'mlp1.4.weight', 'mlp1.4.bias', 'mlp1.4.running_mean', 'mlp1.4.running_var', 'mlp1.4.num_batches_tracked']
['head.0.weight', 'head.0.bias', 'head.1.weight', 'head.1.bias', 'head.1.running_mean', 'head.1.running_var', 'head.1.num_batches_tracked', 'head.4.weight', 'head.4.bias']
torch.Size([128, 64, 1])


In [21]:
import os, glob
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# ==========================
# PATHS (EDIT)
# ==========================
IN_LAS_OR_LAZ = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT      = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS       = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_final.las"
OUT_LAZ       = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_final.laz"

# ==========================
# SETTINGS (must match training)
# ==========================
CELL  = 3.0
BLOCK = 4096
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ==========================
# Helpers
# ==========================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    """Robust getter for LAS dimensions (works even if some fields missing)."""
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass

    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass

    # RGB may not exist
    if name in ["red", "green", "blue"]:
        return np.full(len(las.x), fallback, dtype=dtype)

    # ExtraBytes may not exist
    return np.full(len(las.x), fallback, dtype=dtype)

def compute_hag_and_slope(xyz, cls, cell=3.0, ground_class=2):
    """HAG and slope computed from ground (class 2) surface per grid cell."""
    x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    # ground surface per cell from class=2
    cell_min = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_min) or (zi < cell_min[k]):
            cell_min[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_min:
            hag[i] = z[i] - cell_min[k]
            has_ground[i] = True
        else:
            hag[i] = 0.0

    # local range per cell for slope proxy
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope, has_ground

def make_Xpred(las, cell=3.0):
    """
    IMPORTANT: feature order MUST match training.
    We'll use this 10-feature order:

    [0] x_rel
    [1] y_rel
    [2] z
    [3] hag
    [4] intensity
    [5] return_number
    [6] number_of_returns
    [7] scan_angle
    [8] deviation (ExtraBytes 'Deviation', else 0)
    [9] slope
    """
    xyz = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)
    cls = np.asarray(las.classification, dtype=np.int32)

    # shift x/y like training blocks did (relative coords improve generalization)
    x_rel = (xyz[:,0] - xyz[:,0].mean()).astype(np.float32)
    y_rel = (xyz[:,1] - xyz[:,1].mean()).astype(np.float32)
    z     = xyz[:,2].astype(np.float32)

    intensity = get_dim(las, "intensity", fallback=0.0, dtype=np.float32)
    rn        = get_dim(las, "return_number", fallback=1.0, dtype=np.float32)
    nr        = get_dim(las, "number_of_returns", fallback=1.0, dtype=np.float32)
    scan_ang  = get_dim(las, "scan_angle", fallback=0.0, dtype=np.float32)
    deviation = get_dim(las, "Deviation", fallback=0.0, dtype=np.float32)

    hag, slope, has_ground = compute_hag_and_slope(xyz, cls, cell=cell)

    X = np.stack([
        x_rel, y_rel, z,
        hag,
        intensity,
        rn,
        nr,
        scan_ang,
        deviation,
        slope
    ], axis=1).astype(np.float32)

    return X, xyz, cls, has_ground

# ==========================
# Model: matches your ckpt exactly
# ==========================
class PointNetFromCkptFixed(nn.Module):
    """
    mlp1: Conv(in->64), BN, ReLU, Conv(64->128), BN, ReLU
    mlp2: Conv(128->256), BN, ReLU
    head: Conv(512->256), BN, ReLU, Dropout, Conv(256->C)
    """
    def __init__(self, in_ch=10, num_classes=7, p_drop=0.3):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p_drop),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        h = self.mlp1(x)                               # (B,128,N)
        h = self.mlp2(h)                               # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]        # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])                  # (B,256,N)
        feat = torch.cat([h, g], dim=1)                 # (B,512,N)
        out = self.head(feat)                           # (B,C,N)
        return out

# ==========================
# Load checkpoint
# ==========================
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
classes     = ckpt["classes"]
class_to_idx= ckpt["class_to_idx"]
Xmean       = ckpt["Xmean"].astype(np.float32)
Xstd        = ckpt["Xstd"].astype(np.float32)
IN_CH       = int(ckpt["feats"])
NUM_CLASSES = len(classes)

print("\nLoaded CKPT classes:", classes)
print("Input feats:", IN_CH, "Num classes:", NUM_CLASSES)

model = PointNetFromCkptFixed(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(ckpt["model_state"], strict=True)
model.eval()
print("✅ Model loaded (strict).")

idx_to_class = {i:c for c,i in class_to_idx.items()}

# ==========================
# Build features for input LAS/LAZ
# ==========================
las = laspy.read(IN_LAS_OR_LAZ)
Xpred, xyz, cls_in, has_ground = make_Xpred(las, cell=CELL)
print("\nXpred:", Xpred.shape, "min/max:", float(Xpred.min()), float(Xpred.max()))

# Normalize exactly like training
Xpred_n = (Xpred - Xmean[None, :]) / (Xstd[None, :] + 1e-6)

# ==========================
# Predict in blocks
# ==========================
pred_idx = np.zeros((len(Xpred_n),), dtype=np.int64)

with torch.no_grad():
    for start in tqdm(range(0, len(Xpred_n), BLOCK), desc="Predicting blocks"):
        end = min(start + BLOCK, len(Xpred_n))
        xb = Xpred_n[start:end]

        inp = torch.from_numpy(xb).to(DEVICE)          # (N,F)
        inp = inp.unsqueeze(0).permute(0, 2, 1)        # (1,F,N)

        logits = model(inp)                            # (1,C,N)
        p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
        pred_idx[start:end] = p[:end-start]

pred_class = np.vectorize(idx_to_class.get)(pred_idx).astype(np.uint8)

# Optional safety: if no ground surface in that cell, force DEFAULT class 1
pred_class[~has_ground] = 1

las.classification = pred_class

# ==========================
# Save
# ==========================
os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)
las.write(OUT_LAS)
print("\nSaved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_class, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050

Loaded CKPT classes: [1, 2, 3, 6, 7, 12, 13]
Input feats: 10 Num classes: 7
✅ Model loaded (strict).

Xpred: (12374846, 10) min/max: -577.5 65535.0


Predicting blocks:   0%|          | 0/3022 [00:00<?, ?it/s]


Saved LAS: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_final.las
Saved LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_final.laz
Pred class counts: {1: 4259309, 2: 6828365, 3: 1287172}


In [22]:
# After you have Xtr (from one training .npz) and Xpred (from LAS) and ckpt Xmean/Xstd:

def norm_stats(X, Xmean, Xstd, name):
    Xn = (X - Xmean[None,:]) / (Xstd[None,:] + 1e-6)
    print(f"\n[{name}] normalized stats per column:")
    print("  mean:", np.round(Xn.mean(axis=0), 3))
    print("  std :", np.round(Xn.std(axis=0), 3))
    print("  min :", np.round(Xn.min(axis=0), 3))
    print("  max :", np.round(Xn.max(axis=0), 3))

norm_stats(Xtr, Xmean, Xstd, "TRAIN")
norm_stats(Xpred[:200000], Xmean, Xstd, "PRED (first 200k)")



[TRAIN] normalized stats per column:
  mean: [-0.007 -0.191 -0.001 -0.1   -0.174 -0.024 -0.023  0.157 -0.183 -0.007]
  std : [0.212 0.403 0.126 0.449 0.788 1.012 1.015 0.028 0.    0.621]
  min : [-0.471 -1.069 -0.232 -0.325 -5.348 -0.2   -0.228  0.092 -0.183 -0.575]
  max : [ 0.49   0.483  2.869 11.021  1.146 14.989 12.836  0.216 -0.183 10.458]

[PRED (first 200k)] normalized stats per column:
  mean: [-0.521 -2.518  0.488 -0.218  0.419 -0.197 -0.225  0.144 -0.183 -0.35 ]
  std : [0.033 0.052 0.045 0.133 0.186 0.091 0.105 0.02  0.    0.123]
  min : [-0.617 -2.62   0.371 -0.325 -5.348 -0.2   -0.228  0.114 -0.183 -0.601]
  max : [-0.474 -2.377  1.999  5.999  1.348  3.597  3.038  0.204 -0.183  0.062]


In [23]:
d = np.load(train_files[0])
print("feats per col mean:", d["X"].mean(axis=0))
print("feats per col std :", d["X"].std(axis=0))
print("first row:", d["X"][0])


feats per col mean: [-2.2071991e+00 -1.8538818e+00 -3.7940481e-04  2.1671166e-01
  3.5122293e+04  1.0463867e+00  1.0629883e+00  5.8276367e-01
  0.0000000e+00  8.9575773e-01]
feats per col std : [6.2053955e+01 9.5265366e+01 4.6353060e-01 4.3087605e-01 5.3501665e+03
 2.6663089e-01 3.1083465e-01 2.5165798e+01 0.0000000e+00 9.3632799e-01]
first row: [ 6.1937500e+01 -1.2750000e+02 -3.8357973e-02  5.9999943e-02
  3.4995000e+04  1.0000000e+00  1.0000000e+00  2.0000000e+01
  0.0000000e+00  1.0999999e+00]


In [24]:
import os, glob
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# =========================================================
# EDIT PATHS
# =========================================================
TRAIN_BLOCK_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"     # has .npz with keys X,y,idx
IN_LAS_OR_LAZ   = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT        = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_matched.las"
OUT_LAZ         = r"D:/lidarrrrr/anbu/New folder/dl_predicted_matched.laz"

# =========================================================
# SETTINGS (must match training)
# =========================================================
POINTS_PER_BLOCK = 4096         # N
TILE_SIZE_M      = 40.0         # IMPORTANT: set to the SAME tile size used while creating blocks
CELL_M           = 3.0          # HAG grid cell (same as before)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# =========================================================
# 1) LOAD ONE TRAIN BLOCK (just for sanity prints)
# =========================================================
train_files = sorted(glob.glob(os.path.join(TRAIN_BLOCK_DIR, "*.npz")))
if not train_files:
    raise RuntimeError("No training .npz found in TRAIN_BLOCK_DIR")

d = np.load(train_files[0])
Xtr = d["X"].astype(np.float32)
print("\n[TRAIN BLOCK]")
print("keys:", list(d.keys()))
print("X shape:", Xtr.shape)
print("col means:", Xtr.mean(axis=0))
print("col stds :", Xtr.std(axis=0))

# =========================================================
# 2) Model definition that matches your checkpoint keys
#    Your keys show:
#    mlp1.0 Conv(in->64), mlp1.1 BN64, mlp1.2 ReLU,
#    mlp1.3 Conv(64->128), mlp1.4 BN128, mlp1.5 ReLU
#    mlp2.0 Conv(128->256), mlp2.1 BN256, mlp2.2 ReLU
#    head.0 Conv(512->256), head.1 BN256, head.2 ReLU, head.3 (maybe Dropout), head.4 Conv(256->C)
# =========================================================
class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch=10, num_classes=7, p_drop=0.0):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),     # 0
            nn.BatchNorm1d(64),          # 1
            nn.ReLU(inplace=True),       # 2
            nn.Conv1d(64, 128, 1),       # 3
            nn.BatchNorm1d(128),         # 4
            nn.ReLU(inplace=True),       # 5
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),      # 0
            nn.BatchNorm1d(256),         # 1
            nn.ReLU(inplace=True),       # 2
        )
        # Keep index 3 so that "head.4.*" matches
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),      # 0
            nn.BatchNorm1d(256),         # 1
            nn.ReLU(inplace=True),       # 2
            nn.Dropout(p_drop),          # 3 (exists even if p_drop=0)
            nn.Conv1d(256, num_classes, 1),  # 4
        )

    def forward(self, x):
        # x: (B,F,N)
        h = self.mlp1(x)                         # (B,128,N)
        h = self.mlp2(h)                         # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0] # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])           # (B,256,N)
        feat = torch.cat([h, g], dim=1)          # (B,512,N)
        out = self.head(feat)                    # (B,C,N)
        return out

# =========================================================
# 3) Load checkpoint (PyTorch 2.6+ safe)
# =========================================================
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)

classes     = ckpt["classes"]          # e.g. [1,2,3,6,7,12,13]
class_to_idx= ckpt["class_to_idx"]
Xmean       = ckpt["Xmean"].astype(np.float32)
Xstd        = ckpt["Xstd"].astype(np.float32)
IN_CH       = int(ckpt["feats"])
NUM_CLASSES = len(classes)

print("\n[CKPT]")
print("classes:", classes)
print("feats:", IN_CH, "| num_classes:", NUM_CLASSES)

model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES, p_drop=0.0).to(DEVICE)
model.load_state_dict(ckpt["model_state"], strict=True)
model.eval()
print("✅ Model loaded (strict).")

idx_to_class = {i:c for c,i in class_to_idx.items()}

# =========================================================
# 4) Robust LAS dimension getter
# =========================================================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass
    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=dtype)

# =========================================================
# 5) HAG + slope (same idea as before)
# =========================================================
def compute_hag_and_slope(x, y, z, cls, cell=CELL_M, ground_class=2):
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx) / cell).astype(np.int32)
    gy = np.floor((y - miny) / cell).astype(np.int32)

    # ground surface per cell
    cell_min = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_min) or (zi < cell_min[k]):
            cell_min[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_min:
            hag[i] = z[i] - cell_min[k]
            has_ground[i] = True
        else:
            hag[i] = 0.0

    # local z-range proxy per cell
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope, has_ground

# =========================================================
# 6) Build tile index (to create LOCAL x,y,z like training)
# =========================================================
def build_tile_ids(x, y, tile_size=TILE_SIZE_M):
    minx, miny = x.min(), y.min()
    tx = np.floor((x - minx) / tile_size).astype(np.int32)
    ty = np.floor((y - miny) / tile_size).astype(np.int32)
    # combine into one id
    # (use large multiplier to avoid collisions)
    tile_id = tx.astype(np.int64) * 1_000_000 + ty.astype(np.int64)
    return tile_id

# =========================================================
# 7) Predict full LAS by processing each TILE in chunks of 4096
#     Features order MUST match training:
#     [x_local, y_local, z_local, hag, intensity, ret_num, n_returns, scan_angle, 0, slope]
# =========================================================
las = laspy.read(IN_LAS_OR_LAZ)

x = np.asarray(las.x, dtype=np.float32)
y = np.asarray(las.y, dtype=np.float32)
z = np.asarray(las.z, dtype=np.float32)
cls_in = np.asarray(las.classification, dtype=np.int32)

intensity = get_dim(las, "intensity", fallback=0.0, dtype=np.float32)
ret_num   = get_dim(las, "return_number", fallback=1.0, dtype=np.float32)
n_returns = get_dim(las, "number_of_returns", fallback=1.0, dtype=np.float32)
scan_ang  = get_dim(las, "scan_angle", fallback=0.0, dtype=np.float32)

# col8 is always 0 in training
zeros_col = np.zeros_like(x, dtype=np.float32)

hag, slope, has_ground = compute_hag_and_slope(x, y, z, cls_in, cell=CELL_M, ground_class=2)

tile_id = build_tile_ids(x, y, tile_size=TILE_SIZE_M)
unique_tiles = np.unique(tile_id)

pred_idx_all = np.zeros((len(x),), dtype=np.int64)

print("\nTotal points:", len(x), "| tiles:", len(unique_tiles))

with torch.no_grad():
    pbar_tiles = tqdm(unique_tiles, desc="Predicting tiles")
    for tid in pbar_tiles:
        inds = np.where(tile_id == tid)[0]
        if len(inds) == 0:
            continue

        # TILE center (local coords)
        xm = float(x[inds].mean())
        ym = float(y[inds].mean())
        zm = float(z[inds].mean())

        # process this tile in chunks of 4096
        for start in range(0, len(inds), POINTS_PER_BLOCK):
            part = inds[start:start + POINTS_PER_BLOCK]

            # pad to 4096 if needed
            if len(part) < POINTS_PER_BLOCK:
                pad = np.random.choice(part, size=(POINTS_PER_BLOCK - len(part)), replace=True)
                part_full = np.concatenate([part, pad])
                valid_len = len(part)
            else:
                part_full = part
                valid_len = len(part)

            # ====== build X block in TRAIN ORDER ======
            x_local = x[part_full] - xm
            y_local = y[part_full] - ym
            z_local = z[part_full] - zm

            Xblk = np.stack([
                x_local,                 # col0
                y_local,                 # col1
                z_local,                 # col2
                hag[part_full],          # col3
                intensity[part_full],    # col4
                ret_num[part_full],      # col5
                n_returns[part_full],    # col6
                scan_ang[part_full],     # col7
                zeros_col[part_full],    # col8
                slope[part_full],        # col9
            ], axis=1).astype(np.float32)

            # normalize EXACTLY like training
            Xblk_n = (Xblk - Xmean[None, :]) / (Xstd[None, :] + 1e-6)

            inp = torch.from_numpy(Xblk_n).to(DEVICE).unsqueeze(0).permute(0, 2, 1)  # (1,F,N)
            logits = model(inp)                       # (1,C,N)
            p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()  # (N,)

            pred_idx_all[part] = p[:valid_len]

# map back to LAS class codes
pred_class = np.vectorize(idx_to_class.get)(pred_idx_all).astype(np.uint8)

las.classification = pred_class
las.write(OUT_LAS)
print("\nSaved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_class, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))

# sanity check: compare normalized stats for first 200k points
Ncheck = min(200_000, len(x))
# rebuild exactly same X for those points (tile-local)
inds0 = np.arange(Ncheck)
# compute local per-point using its tile mean (vectorized)
# (fast approx: use per-tile mean via dict)
means = {}
for tid in np.unique(tile_id[inds0]):
    ii = inds0[tile_id[inds0] == tid]
    means[tid] = (float(x[ii].mean()), float(y[ii].mean()), float(z[ii].mean()))
xm0 = np.array([means[t][0] for t in tile_id[inds0]], dtype=np.float32)
ym0 = np.array([means[t][1] for t in tile_id[inds0]], dtype=np.float32)
zm0 = np.array([means[t][2] for t in tile_id[inds0]], dtype=np.float32)

X0 = np.stack([
    x[inds0]-xm0, y[inds0]-ym0, z[inds0]-zm0, hag[inds0], intensity[inds0],
    ret_num[inds0], n_returns[inds0], scan_ang[inds0], zeros_col[inds0], slope[inds0]
], axis=1).astype(np.float32)
X0n = (X0 - Xmean[None,:]) / (Xstd[None,:] + 1e-6)

print("\n[PRED normalized quick stats first 200k]")
print("mean:", np.round(X0n.mean(axis=0), 3))
print("std :", np.round(X0n.std(axis=0), 3))
print("min :", np.round(X0n.min(axis=0), 3))
print("max :", np.round(X0n.max(axis=0), 3))


Device: cuda
GPU: NVIDIA GeForce RTX 3050

[TRAIN BLOCK]
keys: ['X', 'y', 'idx']
X shape: (4096, 10)
col means: [-2.2071991e+00 -1.8538818e+00 -3.7940481e-04  2.1671166e-01
  3.5122293e+04  1.0463867e+00  1.0629883e+00  5.8276367e-01
  0.0000000e+00  8.9575773e-01]
col stds : [6.2053955e+01 9.5265366e+01 4.6353060e-01 4.3087605e-01 5.3501665e+03
 2.6663089e-01 3.1083465e-01 2.5165798e+01 0.0000000e+00 9.3632799e-01]

[CKPT]
classes: [1, 2, 3, 6, 7, 12, 13]
feats: 10 | num_classes: 7
✅ Model loaded (strict).

Total points: 12374846 | tiles: 120


Predicting tiles:   0%|          | 0/120 [00:00<?, ?it/s]


Saved LAS: D:/lidarrrrr/anbu/New folder/dl_predicted_matched.las
Saved LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted_matched.laz
Pred class counts: {1: 2471478, 2: 9592233, 3: 8518, 6: 302368, 12: 249}

[PRED normalized quick stats first 200k]
mean: [ 0.    -0.183 -0.001 -0.218  0.419 -0.197 -0.225  0.144 -0.183 -0.35 ]
std : [0.015 0.036 0.041 0.133 0.186 0.091 0.105 0.02  0.    0.123]
min : [-0.046 -0.268 -0.104 -0.325 -5.348 -0.2   -0.228  0.114 -0.183 -0.601]
max : [ 0.049 -0.113  1.523  5.999  1.348  3.597  3.038  0.204 -0.183  0.062]


In [None]:
d = np.load(train_files[0])
print("X first row:", d["X"][0])
print("X mean:", d["X"].mean(axis=0))
print("X std :", d["X"].std(axis=0))


In [1]:
import os, glob
import numpy as np

BLOCK_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
files = sorted(glob.glob(os.path.join(BLOCK_DIR, "*.npz")))
assert files, "No blocks found"

# Welford streaming mean/std
n = 0
mean = None
M2 = None

for fp in files:
    d = np.load(fp)
    X = d["X"].astype(np.float64)  # (4096,10)
    if mean is None:
        mean = np.zeros(X.shape[1], dtype=np.float64)
        M2   = np.zeros(X.shape[1], dtype=np.float64)

    n_batch = X.shape[0]
    n_new = n + n_batch

    batch_mean = X.mean(axis=0)
    batch_var  = X.var(axis=0)

    delta = batch_mean - mean
    mean = mean + delta * (n_batch / n_new)

    # combine variances
    M2 = M2 + batch_var * n_batch + (delta**2) * (n * n_batch / n_new)

    n = n_new

std = np.sqrt(M2 / max(n - 1, 1))

mean = mean.astype(np.float32)
std  = std.astype(np.float32)

print("Xmean:", mean)
print("Xstd :", std)


Xmean: [ 2.4770452e-01  4.4691383e+01 -1.3224035e-05  3.2253304e-01
  3.6415633e+04  1.0537578e+00  1.0711324e+00 -1.5879369e+02
  8.8634253e-01  9.2550790e-01]
Xstd : [2.9587668e+02 2.3407706e+02 3.8826628e+00 1.0009346e+00 6.9030566e+03
 2.6688504e-01 3.0669588e-01 9.5657166e+02 4.5917196e+00 1.5607965e+00]


In [1]:
import os, glob
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# ==============================
# PATHS
# ==============================
IN_LAS_OR_LAZ = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT      = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7.las"
OUT_LAZ       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7.laz"

# ==============================
# SETTINGS (IMPORTANT)
# ==============================
CELL      = 3.0      # same HAG grid cell
TILE_SIZE = 40.0     # MUST match dataset tiling used in block creation
DEVICE    = "cuda" if torch.cuda.is_available() else "cpu"

# block size used in training (points per block)
BLOCK_N = 4096

print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ==============================
# USE YOUR DATASET NORMALIZATION
# ==============================
Xmean = np.array([ 2.4770452e-01,  4.4691383e+01, -1.3224035e-05,  3.2253304e-01,
                   3.6415633e+04,  1.0537578e+00,  1.0711324e+00, -1.5879369e+02,
                   8.8634253e-01,  9.2550790e-01], dtype=np.float32)

Xstd  = np.array([2.9587668e+02, 2.3407706e+02, 3.8826628e+00, 1.0009346e+00,
                  6.9030566e+03, 2.6688504e-01, 3.0669588e-01, 9.5657166e+02,
                  4.5917196e+00, 1.5607965e+00], dtype=np.float32)

# ==============================
# LAS dim getter
# ==============================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass

    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass

    # common alt names
    if name == "return_number":
        for alt in ["return_number", "return_num"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass

    if name == "number_of_returns":
        for alt in ["number_of_returns", "num_returns"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass

    # RGB often missing
    return np.full(len(las.x), fallback, dtype=dtype)

# ==============================
# HAG + slope (grid-based)
# ==============================
def compute_hag_and_slope(xyz, cls, cell=3.0, ground_class=2):
    x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    # ground zmin per cell
    cell_gmin = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_gmin) or (zi < cell_gmin[k]):
            cell_gmin[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_gmin:
            hag[i] = z[i] - cell_gmin[k]
            has_ground[i] = True
        else:
            hag[i] = 0.0

    # zmin/zmax per cell for local range
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope, has_ground

# ==============================
# Build X for a tile (tile-wise XY centering)
# Feature order MUST match training:
# [x,y,z,hag,inten,rn,nr,scan,deviation,slope]
# ==============================
def build_features_for_indices(las, idx, cell=3.0):
    x = np.asarray(las.x, dtype=np.float32)[idx]
    y = np.asarray(las.y, dtype=np.float32)[idx]
    z = np.asarray(las.z, dtype=np.float32)[idx]
    xyz = np.stack([x,y,z], axis=1)

    cls = np.asarray(las.classification, dtype=np.int32)[idx]

    inten = get_dim(las, "intensity", 0.0)[idx]
    rn    = get_dim(las, "return_number", 1.0)[idx]
    nr    = get_dim(las, "number_of_returns", 1.0)[idx]
    scan  = get_dim(las, "scan_angle", 0.0)[idx]
    dev   = get_dim(las, "Deviation", 0.0)[idx]

    # tile-wise centering for XY (THIS is usually what blocks used)
    x0 = x.mean()
    y0 = y.mean()
    x_local = x - x0
    y_local = y - y0

    # HAG/slope computed using *tile xyz + tile cls*
    hag, slope, has_ground = compute_hag_and_slope(xyz, cls, cell=cell)

    X = np.stack([
        x_local, y_local, z,
        hag,
        inten,
        rn,
        nr,
        scan,
        dev,
        slope
    ], axis=1).astype(np.float32)

    return X, has_ground

# ==============================
# Model (must match ckpt)
# ==============================
class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch=10, num_classes=7):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),

            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        h = self.mlp1(x)                       # (B,128,N)
        h = self.mlp2(h)                       # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]  # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])         # (B,256,N)
        feat = torch.cat([h, g], dim=1)        # (B,512,N)
        out = self.head(feat)                  # (B,C,N)
        return out

# ==============================
# Load checkpoint
# ==============================
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
classes = ckpt["classes"]
class_to_idx = ckpt["class_to_idx"]
idx_to_class = {i:c for c,i in class_to_idx.items()}

IN_CH = int(ckpt["feats"])
NUM_CLASSES = len(classes)

print("Loaded CKPT classes:", classes)
print("Input feats:", IN_CH, "Num classes:", NUM_CLASSES)

model = PointNetFromCkpt(in_ch=IN_CH, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(ckpt["model_state"], strict=True)
model.eval()
print("✅ Model loaded (strict).")

# ==============================
# Predict by tiles
# ==============================
las = laspy.read(IN_LAS_OR_LAZ)
N = len(las.x)
xyz_all = np.vstack([las.x, las.y, las.z]).T.astype(np.float32)

# tile indexing on XY
x_all = xyz_all[:,0]
y_all = xyz_all[:,1]
minx, miny = x_all.min(), y_all.min()
tx = np.floor((x_all - minx) / TILE_SIZE).astype(np.int32)
ty = np.floor((y_all - miny) / TILE_SIZE).astype(np.int32)
tile_key = tx.astype(np.int64) * 1_000_000 + ty.astype(np.int64)

# group indices per tile
order = np.argsort(tile_key)
tile_key_s = tile_key[order]
uniq, start = np.unique(tile_key_s, return_index=True)

pred_out = np.zeros(N, dtype=np.uint8)

with torch.no_grad():
    for i in tqdm(range(len(uniq)), desc="Predicting tiles"):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(order)
        idx = order[a:b]

        # build tile features
        X, has_ground = build_features_for_indices(las, idx, cell=CELL)

        # normalize
        Xn = (X - Xmean[None, :]) / (Xstd[None, :] + 1e-6)

        # predict in chunks of BLOCK_N (4096)
        out_idx = np.zeros(len(idx), dtype=np.int64)
        for s in range(0, len(idx), BLOCK_N):
            e = min(s + BLOCK_N, len(idx))
            xb = Xn[s:e]

            inp = torch.from_numpy(xb).to(DEVICE)          # (n,f)
            inp = inp.unsqueeze(0).permute(0, 2, 1)        # (1,f,n)

            logits = model(inp)                             # (1,C,n)
            p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
            out_idx[s:e] = p

        # map to LAS classes
        pred_cls = np.vectorize(idx_to_class.get)(out_idx).astype(np.uint8)
        pred_out[idx] = pred_cls

# write output
las.classification = pred_out
las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed (ok):", e)

u, c = np.unique(pred_out, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050


KeyError: 'classes'

In [2]:
import torch

MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
ckpt = torch.load(MODEL_PT, map_location="cpu", weights_only=False)

print("Type:", type(ckpt))

if isinstance(ckpt, dict):
    print("Top-level keys:", list(ckpt.keys())[:50])
    # if it looks like a state_dict, print a few parameter keys
    # (state_dict keys usually contain '.weight' / '.bias')
    sample_keys = list(ckpt.keys())[:20]
    print("Sample keys:", sample_keys)
else:
    print("Not a dict. Example repr:", repr(ckpt)[:500])


Type: <class 'dict'>
Top-level keys: ['model_state', 'num_classes', 'class_weights']
Sample keys: ['model_state', 'num_classes', 'class_weights']


In [3]:
import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

IN_LAS_OR_LAZ = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT      = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7.las"
OUT_LAZ       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7.laz"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# ===== Your 7-class label set (must match training) =====
CLASSES = [1, 2, 3, 6, 7, 12, 13]  # order must match training
NUM_CLASSES = len(CLASSES)
idx_to_class = {i: c for i, c in enumerate(CLASSES)}

# ===== Your normalization (the one you posted) =====
Xmean = np.array([ 2.4770452e-01,  4.4691383e+01, -1.3224035e-05,  3.2253304e-01,
                   3.6415633e+04,  1.0537578e+00,  1.0711324e+00, -1.5879369e+02,
                   8.8634253e-01,  9.2550790e-01], dtype=np.float32)

Xstd  = np.array([2.9587668e+02, 2.3407706e+02, 3.8826628e+00, 1.0009346e+00,
                  6.9030566e+03, 2.6688504e-01, 3.0669588e-01, 9.5657166e+02,
                  4.5917196e+00, 1.5607965e+00], dtype=np.float32)

# ===== Feature build settings =====
CELL      = 3.0
TILE_SIZE = 40.0
BLOCK_N   = 4096

def get_dim(las, name, fallback=0.0, dtype=np.float32):
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass
    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=dtype)

def compute_hag_and_slope(xyz, cls, cell=3.0, ground_class=2):
    x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    cell_gmin = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_gmin) or (zi < cell_gmin[k]):
            cell_gmin[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_gmin:
            hag[i] = z[i] - cell_gmin[k]
            has_ground[i] = True

    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope, has_ground

def build_features_for_indices(las, idx):
    x = np.asarray(las.x, dtype=np.float32)[idx]
    y = np.asarray(las.y, dtype=np.float32)[idx]
    z = np.asarray(las.z, dtype=np.float32)[idx]
    xyz = np.stack([x,y,z], axis=1)
    cls = np.asarray(las.classification, dtype=np.int32)[idx]

    inten = get_dim(las, "intensity", 0.0)[idx]
    rn    = get_dim(las, "return_number", 1.0)[idx]
    nr    = get_dim(las, "number_of_returns", 1.0)[idx]
    scan  = get_dim(las, "scan_angle", 0.0)[idx]
    dev   = get_dim(las, "Deviation", 0.0)[idx]

    # tile-wise centering
    x_local = x - x.mean()
    y_local = y - y.mean()

    hag, slope, has_ground = compute_hag_and_slope(xyz, cls, cell=CELL)

    X = np.stack([x_local, y_local, z, hag, inten, rn, nr, scan, dev, slope], axis=1).astype(np.float32)
    return X

class PointNetFromCkpt(nn.Module):
    def __init__(self, in_ch=10, num_classes=7):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1), nn.BatchNorm1d(64), nn.ReLU(True),
            nn.Conv1d(64, 64, 1),    nn.BatchNorm1d(64), nn.ReLU(True),
            nn.Conv1d(64, 128, 1),   nn.BatchNorm1d(128), nn.ReLU(True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1), nn.BatchNorm1d(256), nn.ReLU(True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1), nn.BatchNorm1d(256), nn.ReLU(True),
            nn.Conv1d(256, num_classes, 1),
        )

    def forward(self, x):
        h = self.mlp1(x)                      # (B,128,N)
        h = self.mlp2(h)                      # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0].repeat(1,1,h.shape[2])
        feat = torch.cat([h, g], dim=1)       # (B,512,N)
        return self.head(feat)                # (B,C,N)

# ---- load state_dict only ----
state = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
model = PointNetFromCkpt(in_ch=10, num_classes=NUM_CLASSES).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()
print("✅ Loaded weights (state_dict).")

# ---- predict tile-wise ----
las = laspy.read(IN_LAS_OR_LAZ)
N = len(las.x)

xy = np.vstack([las.x, las.y]).T.astype(np.float32)
minx, miny = xy[:,0].min(), xy[:,1].min()
tx = np.floor((xy[:,0] - minx)/TILE_SIZE).astype(np.int32)
ty = np.floor((xy[:,1] - miny)/TILE_SIZE).astype(np.int32)
tile_key = tx.astype(np.int64)*1_000_000 + ty.astype(np.int64)

order = np.argsort(tile_key)
tile_key_s = tile_key[order]
uniq, start = np.unique(tile_key_s, return_index=True)

pred_cls_all = np.zeros(N, dtype=np.uint8)

with torch.no_grad():
    for i in tqdm(range(len(uniq)), desc="Predicting tiles"):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(order)
        idx = order[a:b]

        X = build_features_for_indices(las, idx)
        Xn = (X - Xmean[None,:]) / (Xstd[None,:] + 1e-6)

        out_idx = np.zeros(len(idx), dtype=np.int64)
        for s in range(0, len(idx), BLOCK_N):
            e = min(s+BLOCK_N, len(idx))
            xb = Xn[s:e]
            inp = torch.from_numpy(xb).to(DEVICE).unsqueeze(0).permute(0,2,1)
            logits = model(inp)
            p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
            out_idx[s:e] = p

        pred_cls_all[idx] = np.vectorize(idx_to_class.get)(out_idx).astype(np.uint8)

las.classification = pred_cls_all
las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed:", e)

u, c = np.unique(pred_cls_all, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050


RuntimeError: Error(s) in loading state_dict for PointNetFromCkpt:
	Missing key(s) in state_dict: "mlp1.0.weight", "mlp1.0.bias", "mlp1.1.weight", "mlp1.1.bias", "mlp1.1.running_mean", "mlp1.1.running_var", "mlp1.3.weight", "mlp1.3.bias", "mlp1.4.weight", "mlp1.4.bias", "mlp1.4.running_mean", "mlp1.4.running_var", "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var", "mlp2.0.weight", "mlp2.0.bias", "mlp2.1.weight", "mlp2.1.bias", "mlp2.1.running_mean", "mlp2.1.running_var", "head.0.weight", "head.0.bias", "head.1.weight", "head.1.bias", "head.1.running_mean", "head.1.running_var", "head.3.weight", "head.3.bias". 
	Unexpected key(s) in state_dict: "model_state", "num_classes", "class_weights". 

In [4]:
import os
import numpy as np
import laspy
import torch
import torch.nn as nn
from tqdm.auto import tqdm

# =========================================================
# PATHS
# =========================================================
IN_LAS_OR_LAZ = r"D:/lidarrrrr/anbu/New folder/stage1_outputs/DX3035724_stage1_ground_v2.las"
MODEL_PT      = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"
OUT_LAS       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7_fixed.las"
OUT_LAZ       = r"D:/lidarrrrr/anbu/New folder/dl_pred_pointnet7_fixed.laz"

# =========================================================
# SETTINGS
# =========================================================
DEVICE    = "cuda" if torch.cuda.is_available() else "cpu"
CELL      = 3.0
TILE_SIZE = 40.0
BLOCK_N   = 4096

print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))

# =========================================================
# LAS helpers
# =========================================================
def get_dim(las, name, fallback=0.0, dtype=np.float32):
    try:
        return np.asarray(las[name]).astype(dtype)
    except Exception:
        pass
    if name == "scan_angle":
        for alt in ["scan_angle", "scan_angle_rank"]:
            try:
                return np.asarray(las[alt]).astype(dtype)
            except Exception:
                pass
    return np.full(len(las.x), fallback, dtype=dtype)

def compute_hag_and_slope(xyz, cls, cell=3.0, ground_class=2):
    x, y, z = xyz[:,0], xyz[:,1], xyz[:,2]
    minx, miny = x.min(), y.min()
    gx = np.floor((x - minx)/cell).astype(np.int32)
    gy = np.floor((y - miny)/cell).astype(np.int32)

    # ground min per cell
    cell_gmin = {}
    g_idx = np.where(cls == ground_class)[0]
    for i in g_idx:
        k = (gx[i], gy[i])
        zi = float(z[i])
        if (k not in cell_gmin) or (zi < cell_gmin[k]):
            cell_gmin[k] = zi

    hag = np.zeros(len(z), dtype=np.float32)
    has_ground = np.zeros(len(z), dtype=bool)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        if k in cell_gmin:
            hag[i] = z[i] - cell_gmin[k]
            has_ground[i] = True

    # z range per cell
    cell_zmin, cell_zmax = {}, {}
    for i in range(len(z)):
        k = (gx[i], gy[i])
        zi = float(z[i])
        if k not in cell_zmin:
            cell_zmin[k] = zi
            cell_zmax[k] = zi
        else:
            if zi < cell_zmin[k]: cell_zmin[k] = zi
            if zi > cell_zmax[k]: cell_zmax[k] = zi

    local_range = np.zeros(len(z), dtype=np.float32)
    for i in range(len(z)):
        k = (gx[i], gy[i])
        local_range[i] = float(cell_zmax[k] - cell_zmin[k])

    slope = (hag / (local_range + 1e-6)).astype(np.float32)
    return hag, slope

def build_features_for_indices(las, idx):
    x = np.asarray(las.x, dtype=np.float32)[idx]
    y = np.asarray(las.y, dtype=np.float32)[idx]
    z = np.asarray(las.z, dtype=np.float32)[idx]
    xyz = np.stack([x,y,z], axis=1)
    cls = np.asarray(las.classification, dtype=np.int32)[idx]

    inten = get_dim(las, "intensity", 0.0)[idx]
    rn    = get_dim(las, "return_number", 1.0)[idx]
    nr    = get_dim(las, "number_of_returns", 1.0)[idx]
    scan  = get_dim(las, "scan_angle", 0.0)[idx]
    dev   = get_dim(las, "Deviation", 0.0)[idx]

    # IMPORTANT: tile-local centering for x,y (matches your later fix)
    x_local = x - x.mean()
    y_local = y - y.mean()

    hag, slope = compute_hag_and_slope(xyz, cls, cell=CELL)

    # 10 features (your training order)
    X = np.stack([x_local, y_local, z, hag, inten, rn, nr, scan, dev, slope], axis=1).astype(np.float32)
    return X

# =========================================================
# MODEL: match your checkpoint keys
# Needed keys from your error:
#  mlp1.0/1/3/4/6/7  (conv+bn pairs)
#  mlp2.0/1
#  head.0/1/3
# =========================================================
class PointNetFromCkptV3(nn.Module):
    def __init__(self, in_ch=10, num_classes=7):
        super().__init__()
        # mlp1: 3 conv blocks (64->64->128) but with 6/7 present => actually 4 convs:
        # conv64, conv64, conv128, conv128 (based on typical patterns + your keys)
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1), nn.BatchNorm1d(64), nn.ReLU(True),   # 0,1
            nn.Conv1d(64, 64, 1),    nn.BatchNorm1d(64), nn.ReLU(True),   # 3,4
            nn.Conv1d(64, 128, 1),   nn.BatchNorm1d(128), nn.ReLU(True),  # 6,7
        )
        # mlp2
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1), nn.BatchNorm1d(256), nn.ReLU(True)    # 0,1
        )
        # head (512 -> 256 -> C) with head.3 being final conv
        self.head = nn.Sequential(
            nn.Conv1d(512, 256, 1), nn.BatchNorm1d(256), nn.ReLU(True),   # 0,1
            nn.Conv1d(256, num_classes, 1),                               # 3
        )

    def forward(self, x):
        # x: (B,F,N)
        h = self.mlp1(x)                                # (B,128,N)
        h = self.mlp2(h)                                # (B,256,N)
        g = torch.max(h, dim=2, keepdim=True)[0]         # (B,256,1)
        g = g.repeat(1, 1, h.shape[2])                   # (B,256,N)
        feat = torch.cat([h, g], dim=1)                  # (B,512,N)
        return self.head(feat)                           # (B,C,N)

# =========================================================
# LOAD CHECKPOINT PROPERLY
# =========================================================
ckpt = torch.load(MODEL_PT, map_location=DEVICE, weights_only=False)
print("CKPT keys:", list(ckpt.keys()))

state = ckpt["model_state"]
NUM_CLASSES = int(ckpt.get("num_classes", 7))

# IMPORTANT: If you trained on these 7 LiDAR classes, keep this mapping
CLASSES = [1, 2, 3, 6, 7, 12, 13]
if NUM_CLASSES != len(CLASSES):
    print("⚠️ num_classes in ckpt =", NUM_CLASSES, "but CLASSES length =", len(CLASSES))
    print("   If this mismatch is real, tell me your training class list.")

idx_to_class = {i: c for i, c in enumerate(CLASSES)}

# Normalization (use the values you already printed)
Xmean = np.array([ 2.4770452e-01,  4.4691383e+01, -1.3224035e-05,  3.2253304e-01,
                   3.6415633e+04,  1.0537578e+00,  1.0711324e+00, -1.5879369e+02,
                   8.8634253e-01,  9.2550790e-01], dtype=np.float32)
Xstd  = np.array([2.9587668e+02, 2.3407706e+02, 3.8826628e+00, 1.0009346e+00,
                  6.9030566e+03, 2.6688504e-01, 3.0669588e-01, 9.5657166e+02,
                  4.5917196e+00, 1.5607965e+00], dtype=np.float32)

model = PointNetFromCkptV3(in_ch=10, num_classes=len(CLASSES)).to(DEVICE)
model.load_state_dict(state, strict=True)
model.eval()
print("✅ Model loaded (strict).")

# =========================================================
# PREDICT (tile-wise)
# =========================================================
las = laspy.read(IN_LAS_OR_LAZ)
N = len(las.x)
print("Total points:", N)

xy = np.vstack([las.x, las.y]).T.astype(np.float32)
minx, miny = xy[:,0].min(), xy[:,1].min()
tx = np.floor((xy[:,0] - minx)/TILE_SIZE).astype(np.int32)
ty = np.floor((xy[:,1] - miny)/TILE_SIZE).astype(np.int32)
tile_key = tx.astype(np.int64)*1_000_000 + ty.astype(np.int64)

order = np.argsort(tile_key)
tile_key_s = tile_key[order]
uniq, start = np.unique(tile_key_s, return_index=True)

pred_all = np.zeros(N, dtype=np.uint8)

with torch.no_grad():
    for i in tqdm(range(len(uniq)), desc="Predicting tiles"):
        a = start[i]
        b = start[i+1] if i+1 < len(uniq) else len(order)
        idx = order[a:b]

        X = build_features_for_indices(las, idx)
        Xn = (X - Xmean[None,:]) / (Xstd[None,:] + 1e-6)

        out_idx = np.zeros(len(idx), dtype=np.int64)
        for s in range(0, len(idx), BLOCK_N):
            e = min(s + BLOCK_N, len(idx))
            xb = Xn[s:e]
            inp = torch.from_numpy(xb).to(DEVICE).unsqueeze(0).permute(0,2,1)  # (1,F,N)
            logits = model(inp)
            p = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
            out_idx[s:e] = p

        pred_all[idx] = np.vectorize(idx_to_class.get)(out_idx).astype(np.uint8)

las.classification = pred_all
las.write(OUT_LAS)
print("Saved LAS:", OUT_LAS)
try:
    las.write(OUT_LAZ)
    print("Saved LAZ:", OUT_LAZ)
except Exception as e:
    print("LAZ write failed:", e)

u, c = np.unique(pred_all, return_counts=True)
print("Pred class counts:", dict(zip(u.tolist(), c.tolist())))


Device: cuda
GPU: NVIDIA GeForce RTX 3050
CKPT keys: ['model_state', 'num_classes', 'class_weights']
⚠️ num_classes in ckpt = 5 but CLASSES length = 7
   If this mismatch is real, tell me your training class list.


RuntimeError: Error(s) in loading state_dict for PointNetFromCkptV3:
	Missing key(s) in state_dict: "mlp1.0.weight", "mlp1.0.bias", "mlp1.1.weight", "mlp1.1.bias", "mlp1.1.running_mean", "mlp1.1.running_var", "mlp1.3.weight", "mlp1.3.bias", "mlp1.4.weight", "mlp1.4.bias", "mlp1.4.running_mean", "mlp1.4.running_var", "mlp1.6.weight", "mlp1.6.bias", "mlp1.7.weight", "mlp1.7.bias", "mlp1.7.running_mean", "mlp1.7.running_var", "mlp2.0.weight", "mlp2.0.bias", "mlp2.1.weight", "mlp2.1.bias", "mlp2.1.running_mean", "mlp2.1.running_var", "head.0.weight", "head.0.bias", "head.1.weight", "head.1.bias", "head.1.running_mean", "head.1.running_var", "head.3.weight", "head.3.bias". 
	Unexpected key(s) in state_dict: "mlp3.weight", "mlp3.bias", "fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias", "mlp1.weight", "mlp1.bias", "mlp2.weight", "mlp2.bias". 

In [6]:
import os
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import laspy

# ============================================================
# USER PATHS
# ============================================================
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"   # your ckpt
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.laz"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ============================================================
# CLASSES (raw LAS -> model 0..6 and back)
# ============================================================
RAW_CLASSES = [1, 2, 3, 6, 7, 12, 13]  # LAS codes
RAW_TO_CONTIG = {c: i for i, c in enumerate(RAW_CLASSES)}
CONTIG_TO_RAW = {i: c for i, c in enumerate(RAW_CLASSES)}
NUM_CLASSES = len(RAW_CLASSES)

# ============================================================
# NORMALIZATION STATS (PUT YOUR REAL ONES HERE)
# If you already have Xmean/Xstd from training, paste them exactly.
# Otherwise keep identity; model may perform worse.
# ============================================================
XMEAN = np.zeros(10, dtype=np.float32)
XSTD  = np.ones(10, dtype=np.float32)

# Example (your earlier std snippet looked like large values; paste full mean/std if you have them)
# XMEAN = np.array([...], dtype=np.float32)
# XSTD  = np.array([...], dtype=np.float32)

def normalize_X(X: np.ndarray) -> np.ndarray:
    std = np.where(XSTD == 0, 1.0, XSTD).astype(np.float32)
    return (X.astype(np.float32) - XMEAN.astype(np.float32)) / std


# ============================================================
# MODEL LOADING (robust for "legacy" ckpts)
# ============================================================
class PointNetLegacyAuto(nn.Module):
    """
    Matches ckpt keys:
      mlp1, mlp2, mlp3 (Conv1d)
      fc1, fc2 (Conv1d for segmentation)
    Outputs per-point logits: (B, N, C)
    """
    def __init__(self, state, in_ch=10, force_num_classes=None):
        super().__init__()

        def conv_shape(name):
            w = state[f"{name}.weight"]
            if w.ndim != 3:
                raise RuntimeError(f"{name}.weight expected 3D Conv1d weight, got {tuple(w.shape)}")
            return int(w.shape[0]), int(w.shape[1])

        o1, i1 = conv_shape("mlp1")
        o2, i2 = conv_shape("mlp2")
        o3, i3 = conv_shape("mlp3")

        if i1 != in_ch:
            print(f"⚠️ in_ch mismatch: ckpt expects {i1}, you set {in_ch}. Using ckpt value {i1}.")
            in_ch = i1

        self.mlp1 = nn.Conv1d(in_ch, o1, 1, bias=True)
        self.mlp2 = nn.Conv1d(o1,  o2, 1, bias=True)
        self.mlp3 = nn.Conv1d(o2,  o3, 1, bias=True)

        fc1_w = state["fc1.weight"]
        fc2_w = state["fc2.weight"]

        # segmentation expected (Conv1d)
        if fc2_w.ndim != 3:
            raise RuntimeError(
                f"Your ckpt looks like GLOBAL classification (fc2.weight is {tuple(fc2_w.shape)}). "
                f"This script expects per-point segmentation ckpt (Conv1d head)."
            )

        fc1_out, fc1_in = int(fc1_w.shape[0]), int(fc1_w.shape[1])
        fc2_out, fc2_in = int(fc2_w.shape[0]), int(fc2_w.shape[1])

        self.fc1 = nn.Conv1d(fc1_in, fc1_out, 1, bias=True)
        outc = force_num_classes if force_num_classes is not None else fc2_out
        self.fc2 = nn.Conv1d(fc2_in, outc, 1, bias=True)

    def forward(self, x):
        # x: (B,N,C)
        x = x.transpose(1, 2)  # (B,C,N)
        x = F.relu(self.mlp1(x))
        x = F.relu(self.mlp2(x))
        x = F.relu(self.mlp3(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)        # (B,C,N)
        return x.transpose(1, 2)  # (B,N,C)


def load_model(model_path: str, in_ch: int = 10, target_classes: int = 7) -> nn.Module:
    ckpt = torch.load(model_path, map_location=DEVICE)
    state = ckpt.get("model_state", ckpt)

    keys = list(state.keys())
    print("CKPT keys sample:", keys[:12])

    # Legacy naming
    if ("mlp1.weight" in state) and ("fc2.weight" in state):
        ckpt_classes = int(state["fc2.weight"].shape[0])
        print("✅ Legacy ckpt detected. ckpt head classes =", ckpt_classes)

        if ckpt_classes != target_classes:
            print(f"⚠️ Head class mismatch: ckpt {ckpt_classes} vs target {target_classes}. "
                  f"Will load backbone and replace head.")
            model = PointNetLegacyAuto(state, in_ch=in_ch, force_num_classes=target_classes).to(DEVICE)

            # Load all but fc2.* (new head)
            filtered = {k: v for k, v in state.items() if not k.startswith("fc2.")}
            missing, unexpected = model.load_state_dict(filtered, strict=False)
            print("Loaded backbone. Missing:", missing, "Unexpected:", unexpected)

        else:
            model = PointNetLegacyAuto(state, in_ch=in_ch).to(DEVICE)
            model.load_state_dict(state, strict=True)
            print("✅ Loaded strict=True.")

        model.eval()
        return model

    raise RuntimeError(
        "Checkpoint format not recognized by this script.\n"
        "If your ckpt is from a different PointNet implementation (Sequential keys like mlp1.0.weight), "
        "share the ckpt keys list and I’ll adapt the loader."
    )


# ============================================================
# FEATURE BUILDING FROM LAZ
# Your 10 features:
# (x_local, y_local, z, hag, intensity, return_number, number_of_returns, scan_angle, deviation, slope)
# ============================================================
def get_dim_safe(las, name, default=0.0):
    """Return dimension array if exists, else a constant array."""
    try:
        arr = getattr(las, name)
        return np.asarray(arr)
    except Exception:
        try:
            arr = las[name]  # extra dims
            return np.asarray(arr)
        except Exception:
            return None

def compute_hag_grid(x, y, z, cell=1.0, percentile=5):
    """
    Simple HAG approximation:
    - grid the XY plane
    - estimate ground per cell as low percentile of Z
    - HAG = Z - ground_z(cell)
    """
    x0 = x.min()
    y0 = y.min()
    gx = np.floor((x - x0) / cell).astype(np.int32)
    gy = np.floor((y - y0) / cell).astype(np.int32)

    # Hash cell id
    key = (gx.astype(np.int64) << 32) ^ gy.astype(np.int64)
    order = np.argsort(key)
    key_s = key[order]
    z_s = z[order]

    ground = np.empty_like(z_s, dtype=np.float32)

    # iterate groups
    start = 0
    n = len(z_s)
    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1
        z_cell = z_s[start:end]
        g = np.percentile(z_cell, percentile).astype(np.float32)
        ground[start:end] = g
        start = end

    # unsort
    ground_unsorted = np.empty_like(ground)
    ground_unsorted[order] = ground
    hag = (z.astype(np.float32) - ground_unsorted.astype(np.float32))
    # clamp negative small noise
    hag = np.maximum(hag, 0.0)
    return hag

def compute_slope_from_hag(x_local, y_local, hag, cell=1.0):
    """
    Approx slope proxy:
    - rasterize hag min per cell then compute local gradient magnitude
    For speed, we compute slope=0 if too heavy.
    """
    try:
        x0 = x_local.min()
        y0 = y_local.min()
        gx = np.floor((x_local - x0) / cell).astype(np.int32)
        gy = np.floor((y_local - y0) / cell).astype(np.int32)

        # build grid bounds
        W = gx.max() + 1
        H = gy.max() + 1
        grid = np.full((H, W), np.nan, dtype=np.float32)

        # fill with min hag per cell
        for i in range(len(hag)):
            yy, xx = gy[i], gx[i]
            v = hag[i]
            cur = grid[yy, xx]
            if np.isnan(cur) or v < cur:
                grid[yy, xx] = v

        # replace nans with nearest-ish 0
        grid = np.nan_to_num(grid, nan=0.0)

        # gradient magnitude
        gyg, gxg = np.gradient(grid)
        mag = np.sqrt(gxg**2 + gyg**2).astype(np.float32)

        slope = mag[gy, gx]
        return slope
    except Exception:
        return np.zeros_like(hag, dtype=np.float32)


def build_features_from_las(las):
    """
    Returns X: (N,10) float32 and xyz_local stats.
    """
    x = np.asarray(las.x, dtype=np.float64)
    y = np.asarray(las.y, dtype=np.float64)
    z = np.asarray(las.z, dtype=np.float32)

    # Local coordinates (per file)
    x_local = (x - x.min()).astype(np.float32)
    y_local = (y - y.min()).astype(np.float32)

    # Intensity / returns / scan angle (if present)
    intensity = get_dim_safe(las, "intensity")
    if intensity is None:
        intensity = np.zeros_like(z, dtype=np.float32)
    else:
        intensity = intensity.astype(np.float32)

    return_num = get_dim_safe(las, "return_number")
    if return_num is None:
        return_num = np.zeros_like(z, dtype=np.float32)
    else:
        return_num = return_num.astype(np.float32)

    num_returns = get_dim_safe(las, "number_of_returns")
    if num_returns is None:
        num_returns = np.zeros_like(z, dtype=np.float32)
    else:
        num_returns = num_returns.astype(np.float32)

    scan_angle = get_dim_safe(las, "scan_angle_rank")
    if scan_angle is None:
        scan_angle = get_dim_safe(las, "scan_angle")
    if scan_angle is None:
        scan_angle = np.zeros_like(z, dtype=np.float32)
    else:
        scan_angle = scan_angle.astype(np.float32)

    # deviation (often not standard) -> 0 if missing
    deviation = get_dim_safe(las, "deviation")
    if deviation is None:
        deviation = np.zeros_like(z, dtype=np.float32)
    else:
        deviation = deviation.astype(np.float32)

    # HAG (approx)
    hag = compute_hag_grid(x.astype(np.float32), y.astype(np.float32), z.astype(np.float32), cell=1.0, percentile=5)

    # slope (proxy)
    slope = compute_slope_from_hag(x_local, y_local, hag, cell=1.0)

    X = np.stack([
        x_local,          # 0
        y_local,          # 1
        z.astype(np.float32),  # 2
        hag.astype(np.float32),# 3
        intensity,        # 4
        return_num,       # 5
        num_returns,      # 6
        scan_angle,       # 7
        deviation,        # 8
        slope             # 9
    ], axis=1).astype(np.float32)

    return X


# ============================================================
# BLOCKING (4096 points per block with idx mapping)
# ============================================================
def make_blocks(X: np.ndarray, block_size=40.0, points_per_block=4096, seed=123):
    """
    Creates blocks by XY grid on x_local/y_local.
    - For each cell, sample points_per_block (with replacement if needed).
    Returns list of (X_block, idx_block)
    """
    rng = np.random.default_rng(seed)

    x = X[:, 0]  # x_local
    y = X[:, 1]  # y_local
    gx = np.floor(x / block_size).astype(np.int32)
    gy = np.floor(y / block_size).astype(np.int32)

    key = gx.astype(np.int64) * 10_000_000 + gy.astype(np.int64)
    order = np.argsort(key)
    key_s = key[order]

    blocks = []
    start = 0
    n = len(X)

    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1

        idxs = order[start:end]
        if idxs.size == 0:
            start = end
            continue

        # sample fixed size
        if idxs.size >= points_per_block:
            pick = rng.choice(idxs, size=points_per_block, replace=False)
        else:
            pick = rng.choice(idxs, size=points_per_block, replace=True)

        Xb = X[pick]
        blocks.append((Xb, pick.astype(np.int64)))

        start = end

    return blocks


# ============================================================
# INFERENCE + WRITE BACK
# ============================================================
@torch.no_grad()
def predict_full(model, X_full: np.ndarray, block_size=40.0, points_per_block=4096, batch_blocks=8):
    """
    Predict for all points by blocks.
    Writes predictions back to a full array using idx mapping.
    If points appear in multiple blocks (due to replacement), we vote by counts.
    """
    blocks = make_blocks(X_full, block_size=block_size, points_per_block=points_per_block)
    print(f"Blocks created: {len(blocks)} (block_size={block_size}, N={points_per_block})")

    # vote accumulators
    N = X_full.shape[0]
    votes = np.zeros((N, NUM_CLASSES), dtype=np.uint16)

    # batch blocks
    for i in range(0, len(blocks), batch_blocks):
        chunk = blocks[i:i+batch_blocks]
        Xb = np.stack([c[0] for c in chunk], axis=0)  # (B,4096,10)
        idxb = [c[1] for c in chunk]

        # normalize
        Xb = normalize_X(Xb)

        xb_t = torch.from_numpy(Xb).to(DEVICE)
        logits = model(xb_t)  # (B,4096,C)
        pred = logits.argmax(dim=-1).detach().cpu().numpy().astype(np.int32)  # (B,4096)

        # accumulate votes
        for b in range(pred.shape[0]):
            inds = idxb[b]
            pb = pred[b]
            # vote
            for k in range(NUM_CLASSES):
                votes[inds, k] += (pb == k).astype(np.uint16)

        if (i // batch_blocks) % 10 == 0:
            print(f"Predicted blocks {i}/{len(blocks)}")

    final = votes.argmax(axis=1).astype(np.int32)  # 0..6
    return final


def write_outputs(las, pred_contig: np.ndarray, out_las: str, out_laz: str):
    # Map 0..6 -> raw LAS codes
    pred_raw = np.vectorize(CONTIG_TO_RAW.get)(pred_contig).astype(np.uint8)

    # Set classification
    las.classification = pred_raw

    # Ensure output folder exists
    os.makedirs(os.path.dirname(out_las), exist_ok=True)

    # Write LAS
    las.write(out_las)
    print("✅ Wrote LAS:", out_las)

    # Write LAZ (requires laz backend: lazrs or laszip)
    try:
        las.write(out_laz)
        print("✅ Wrote LAZ:", out_laz)
    except Exception as e:
        print("⚠️ Could not write LAZ (backend missing). Error:", str(e))
        print("   Install one of these in your env:")
        print("   pip install lazrs")
        print("   (then retry writing .laz)")


# ============================================================
# MAIN
# ============================================================
def main():
    print("Device:", DEVICE)
    if DEVICE == "cuda":
        print("GPU:", torch.cuda.get_device_name(0))

    # Load LAZ
    las = laspy.read(IN_LAZ)
    N = len(las.x)
    print("Loaded:", IN_LAZ)
    print("Points:", N)

    # Build features
    X_full = build_features_from_las(las)
    print("X_full shape:", X_full.shape)

    # Load model
    model = load_model(MODEL_PT, in_ch=10, target_classes=NUM_CLASSES)

    # Predict
    pred_contig = predict_full(
        model,
        X_full,
        block_size=40.0,       # change if your data uses different scale
        points_per_block=4096,
        batch_blocks=8         # RTX 3050 safe
    )

    # Write outputs
    write_outputs(las, pred_contig, OUT_LAS, OUT_LAZ)

    # Quick counts
    unique, counts = np.unique(pred_contig, return_counts=True)
    print("Pred counts (contig 0..6):", dict(zip(unique.tolist(), counts.tolist())))
    unique_raw, counts_raw = np.unique(np.vectorize(CONTIG_TO_RAW.get)(pred_contig), return_counts=True)
    print("Pred counts (raw classes):", dict(zip(unique_raw.tolist(), counts_raw.tolist())))

if __name__ == "__main__":
    main()


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded: D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz
Points: 12374846
X_full shape: (12374846, 10)
CKPT keys sample: ['mlp1.weight', 'mlp1.bias', 'mlp2.weight', 'mlp2.bias', 'mlp3.weight', 'mlp3.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias']
✅ Legacy ckpt detected. ckpt head classes = 5
⚠️ Head class mismatch: ckpt 5 vs target 7. Will load backbone and replace head.
Loaded backbone. Missing: ['fc2.weight', 'fc2.bias'] Unexpected: []
Blocks created: 119 (block_size=40.0, N=4096)
Predicted blocks 0/119
Predicted blocks 80/119
✅ Wrote LAS: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.las
✅ Wrote LAZ: D:/lidarrrrr/anbu/New folder/dl_predicted_fixed.laz
Pred counts (contig 0..6): {0: 12372645, 1: 540, 2: 570, 3: 51, 4: 189, 5: 653, 6: 198}
Pred counts (raw classes): {1: 12372645, 2: 540, 3: 570, 6: 51, 7: 189, 12: 653, 13: 198}


In [7]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import laspy

# ============================================================
# USER PATHS
# ============================================================
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnet_best.pt"   # 5-class ckpt
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_5class.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/dl_predicted_fixed_5class.laz"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ============================================================
# IMPORTANT: CKPT TRAINED CLASSES (5 classes)
# Most common 5-class training set for LiDAR:
#   [1,2,3,6,12]
# If your training classes were different, change this list.
# Examples:
#   [1,2,3,12,13]
#   [1,2,3,6,13]
# ============================================================
CKPT_RAW_CLASSES = [1, 2, 3, 6, 12]   # <-- DEFAULT (change if needed)
NUM_CLASSES = len(CKPT_RAW_CLASSES)
CONTIG_TO_RAW = {i: c for i, c in enumerate(CKPT_RAW_CLASSES)}

# ============================================================
# NORMALIZATION STATS
# If you have training mean/std for 10 features, paste them.
# If not, keep identity (may reduce accuracy but will still run).
# ============================================================
XMEAN = np.zeros(10, dtype=np.float32)
XSTD  = np.ones(10, dtype=np.float32)

def normalize_X(X: np.ndarray) -> np.ndarray:
    std = np.where(XSTD == 0, 1.0, XSTD).astype(np.float32)
    return (X.astype(np.float32) - XMEAN.astype(np.float32)) / std


# ============================================================
# Legacy PointNet model matching your ckpt keys exactly
# keys: mlp1/2/3, fc1, fc2 (Conv1d => per-point segmentation)
# ============================================================
class PointNetLegacySeg(nn.Module):
    def __init__(self, state, in_ch=10):
        super().__init__()

        def get_conv(name):
            w = state[f"{name}.weight"]
            if w.ndim != 3:
                raise RuntimeError(f"{name}.weight must be Conv1d (3D), got {tuple(w.shape)}")
            out_ch, in_ch_ = int(w.shape[0]), int(w.shape[1])
            layer = nn.Conv1d(in_ch_, out_ch, 1, bias=True)
            return layer, in_ch_, out_ch

        self.mlp1, in1, o1 = get_conv("mlp1")
        self.mlp2, in2, o2 = get_conv("mlp2")
        self.mlp3, in3, o3 = get_conv("mlp3")

        # fc1/fc2 also Conv1d for segmentation
        w1 = state["fc1.weight"]
        w2 = state["fc2.weight"]
        if w2.ndim != 3:
            raise RuntimeError(
                f"fc2.weight is {tuple(w2.shape)} => looks like global classifier. "
                f"This script expects per-point segmentation ckpt."
            )

        fc1_out, fc1_in = int(w1.shape[0]), int(w1.shape[1])
        fc2_out, fc2_in = int(w2.shape[0]), int(w2.shape[1])

        self.fc1 = nn.Conv1d(fc1_in, fc1_out, 1, bias=True)
        self.fc2 = nn.Conv1d(fc2_in, fc2_out, 1, bias=True)

    def forward(self, x):
        # x: (B,N,C)
        x = x.transpose(1, 2)        # (B,C,N)
        x = F.relu(self.mlp1(x))
        x = F.relu(self.mlp2(x))
        x = F.relu(self.mlp3(x))
        x = F.relu(self.fc1(x))
        x = self.fc2(x)              # (B,C,N)
        return x.transpose(1, 2)     # (B,N,C)

def load_model_strict_5class(model_path: str) -> nn.Module:
    ckpt = torch.load(model_path, map_location=DEVICE)
    state = ckpt.get("model_state", ckpt)

    # Verify ckpt classes
    ckpt_classes = int(state["fc2.weight"].shape[0])
    print("✅ ckpt head classes =", ckpt_classes)
    if ckpt_classes != NUM_CLASSES:
        raise RuntimeError(
            f"Your CKPT outputs {ckpt_classes} classes but CKPT_RAW_CLASSES has {NUM_CLASSES}.\n"
            f"Fix CKPT_RAW_CLASSES to match ckpt output classes."
        )

    model = PointNetLegacySeg(state, in_ch=10).to(DEVICE)
    model.load_state_dict(state, strict=True)
    model.eval()
    return model


# ============================================================
# Feature building (same 10 features names)
# NOTE: If you trained with a different HAG/slope pipeline,
#       you should plug your training feature builder here.
# ============================================================
def get_dim_safe(las, name):
    try:
        return np.asarray(getattr(las, name))
    except Exception:
        try:
            return np.asarray(las[name])  # extra dims
        except Exception:
            return None

def compute_hag_grid(x, y, z, cell=1.0, percentile=5):
    x0 = x.min()
    y0 = y.min()
    gx = np.floor((x - x0) / cell).astype(np.int32)
    gy = np.floor((y - y0) / cell).astype(np.int32)
    key = (gx.astype(np.int64) << 32) ^ gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s = z[order]

    ground = np.empty_like(z_s, dtype=np.float32)
    start = 0
    n = len(z_s)
    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1
        g = np.percentile(z_s[start:end], percentile).astype(np.float32)
        ground[start:end] = g
        start = end

    ground_unsorted = np.empty_like(ground)
    ground_unsorted[order] = ground

    hag = (z.astype(np.float32) - ground_unsorted.astype(np.float32))
    hag = np.maximum(hag, 0.0)
    return hag

def compute_slope_proxy(x_local, y_local, hag, cell=1.0):
    # fast/simple: return zeros if you don’t have training-equivalent slope
    return np.zeros_like(hag, dtype=np.float32)

def build_features_from_las(las):
    x = np.asarray(las.x, dtype=np.float64)
    y = np.asarray(las.y, dtype=np.float64)
    z = np.asarray(las.z, dtype=np.float32)

    x_local = (x - x.min()).astype(np.float32)
    y_local = (y - y.min()).astype(np.float32)

    intensity = get_dim_safe(las, "intensity")
    if intensity is None: intensity = np.zeros_like(z, dtype=np.float32)
    else: intensity = intensity.astype(np.float32)

    return_num = get_dim_safe(las, "return_number")
    if return_num is None: return_num = np.zeros_like(z, dtype=np.float32)
    else: return_num = return_num.astype(np.float32)

    num_returns = get_dim_safe(las, "number_of_returns")
    if num_returns is None: num_returns = np.zeros_like(z, dtype=np.float32)
    else: num_returns = num_returns.astype(np.float32)

    scan_angle = get_dim_safe(las, "scan_angle_rank")
    if scan_angle is None:
        scan_angle = get_dim_safe(las, "scan_angle")
    if scan_angle is None: scan_angle = np.zeros_like(z, dtype=np.float32)
    else: scan_angle = scan_angle.astype(np.float32)

    deviation = get_dim_safe(las, "deviation")
    if deviation is None: deviation = np.zeros_like(z, dtype=np.float32)
    else: deviation = deviation.astype(np.float32)

    hag = compute_hag_grid(x.astype(np.float32), y.astype(np.float32), z.astype(np.float32), cell=1.0, percentile=5)
    slope = compute_slope_proxy(x_local, y_local, hag, cell=1.0)

    X = np.stack([
        x_local, y_local, z, hag,
        intensity, return_num, num_returns, scan_angle,
        deviation, slope
    ], axis=1).astype(np.float32)

    return X


# ============================================================
# Better blocking: grid cells -> split into chunks of 4096
# (covers all points, minimal replacement)
# ============================================================
def build_blocks_cover_all(X, block_size=20.0, points_per_block=4096):
    x = X[:, 0]  # x_local
    y = X[:, 1]  # y_local

    gx = np.floor(x / block_size).astype(np.int32)
    gy = np.floor(y / block_size).astype(np.int32)
    key = gx.astype(np.int64) * 10_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]

    blocks = []
    start = 0
    n = len(X)
    rng = np.random.default_rng(123)

    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1

        cell_indices = order[start:end]
        # split this cell into multiple 4096 chunks
        if cell_indices.size > 0:
            rng.shuffle(cell_indices)
            for s in range(0, cell_indices.size, points_per_block):
                chunk = cell_indices[s:s+points_per_block]
                if chunk.size < points_per_block:
                    # pad by sampling within the same cell
                    pad = rng.choice(cell_indices, size=(points_per_block - chunk.size), replace=True)
                    chunk = np.concatenate([chunk, pad], axis=0)
                blocks.append(chunk.astype(np.int64))

        start = end

    return blocks


@torch.no_grad()
def predict_points(model, X_full, block_size=20.0, points_per_block=4096, batch_blocks=8):
    blocks = build_blocks_cover_all(X_full, block_size=block_size, points_per_block=points_per_block)
    print(f"Blocks created: {len(blocks)} (block_size={block_size}, N={points_per_block})")

    N = X_full.shape[0]
    vote = np.zeros((N, NUM_CLASSES), dtype=np.uint16)

    for i in range(0, len(blocks), batch_blocks):
        batch = blocks[i:i+batch_blocks]
        Xb = np.stack([X_full[idx] for idx in batch], axis=0)  # (B,4096,10)
        Xb = normalize_X(Xb)

        xb = torch.from_numpy(Xb).to(DEVICE)
        logits = model(xb)  # (B,4096,C)
        pred = logits.argmax(dim=-1).cpu().numpy().astype(np.int32)

        for b, idxs in enumerate(batch):
            pb = pred[b]
            # vote per point
            for c in range(NUM_CLASSES):
                vote[idxs, c] += (pb == c).astype(np.uint16)

        if (i // batch_blocks) % 10 == 0:
            print(f"Predicted blocks {i}/{len(blocks)}")

    pred_contig = vote.argmax(axis=1).astype(np.int32)  # 0..4
    return pred_contig


def write_outputs(las, pred_contig, out_las, out_laz):
    pred_raw = np.vectorize(CONTIG_TO_RAW.get)(pred_contig).astype(np.uint8)
    las.classification = pred_raw

    os.makedirs(os.path.dirname(out_las), exist_ok=True)

    las.write(out_las)
    print("✅ Wrote LAS:", out_las)

    try:
        las.write(out_laz)
        print("✅ Wrote LAZ:", out_laz)
    except Exception as e:
        print("⚠️ Could not write LAZ:", e)
        print("   Try: pip install lazrs")


def main():
    print("Device:", DEVICE)
    if DEVICE == "cuda":
        print("GPU:", torch.cuda.get_device_name(0))

    las = laspy.read(IN_LAZ)
    print("Loaded:", IN_LAZ)
    print("Points:", len(las.x))

    X_full = build_features_from_las(las)
    print("X_full shape:", X_full.shape)

    model = load_model_strict_5class(MODEL_PT)

    # IMPORTANT: if your earlier run made too few blocks, reduce block_size:
    # try 20.0, 10.0, 5.0 based on your x_local/y_local scale.
    pred_contig = predict_points(
        model,
        X_full,
        block_size=20.0,        # <-- try 10.0 if still too few blocks
        points_per_block=4096,
        batch_blocks=8
    )

    write_outputs(las, pred_contig, OUT_LAS, OUT_LAZ)

    u, c = np.unique(pred_contig, return_counts=True)
    print("Pred counts (contig):", dict(zip(u.tolist(), c.tolist())))
    u2, c2 = np.unique(np.vectorize(CONTIG_TO_RAW.get)(pred_contig), return_counts=True)
    print("Pred counts (raw):", dict(zip(u2.tolist(), c2.tolist())))

if __name__ == "__main__":
    main()


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded: D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz
Points: 12374846
X_full shape: (12374846, 10)
✅ ckpt head classes = 5
Blocks created: 3222 (block_size=20.0, N=4096)
Predicted blocks 0/3222
Predicted blocks 80/3222
Predicted blocks 160/3222
Predicted blocks 240/3222
Predicted blocks 320/3222
Predicted blocks 400/3222
Predicted blocks 480/3222
Predicted blocks 560/3222
Predicted blocks 640/3222
Predicted blocks 720/3222
Predicted blocks 800/3222
Predicted blocks 880/3222
Predicted blocks 960/3222
Predicted blocks 1040/3222
Predicted blocks 1120/3222
Predicted blocks 1200/3222
Predicted blocks 1280/3222
Predicted blocks 1360/3222
Predicted blocks 1440/3222
Predicted blocks 1520/3222
Predicted blocks 1600/3222
Predicted blocks 1680/3222
Predicted blocks 1760/3222
Predicted blocks 1840/3222
Predicted blocks 1920/3222
Predicted blocks 2000/3222
Predicted blocks 2080/3222
Predicted blocks 2160/3222
Predicted blocks 2240/3222
Predicted blocks 23

In [17]:
train_files = list_npz(TRAIN_DIR)
val_files   = list_npz(VAL_DIR)


In [19]:
ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"   # folder that contains ALL npz files

all_files = list_npz(ALL_BLOCKS_DIR)

# shuffle
import random
random.seed(42)
random.shuffle(all_files)

# split 80/20
split = int(0.8 * len(all_files))
train_files = all_files[:split]
val_files   = all_files[split:]

print("Train blocks:", len(train_files))
print("Val blocks:", len(val_files))


Train blocks: 22157
Val blocks: 5540


In [20]:
import os, glob, math, random, time
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Sampler

# =========================
# CONFIG (EDIT THESE)
# =========================

# ✅ Start with 5-class (matches your working ckpt mapping):
# CKPT_RAW_CLASSES = [1,2,3,6,12]
# Later switch to 7-class:
# CKPT_RAW_CLASSES = [1,2,3,6,7,12,13]

RAW_CLASSES = [1, 2, 3, 6, 12]  # <-- START HERE (5-class). Change to 7-class when ready.
MAP = {c: i for i, c in enumerate(RAW_CLASSES)}
NUM_CLASSES = len(RAW_CLASSES)
IGNORE_INDEX = -100

# Paths
TRAIN_DIR = r"D:/lidarrrrr/anbu/dl_dataset/train_blocks"
VAL_DIR   = r"D:/lidarrrrr/anbu/dl_dataset/val_blocks"
OUT_DIR   = r"D:/lidarrrrr/anbu/dl_models"
        # <-- change
os.makedirs(OUT_DIR, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Data
POINTS = 4096
IN_CH = 10
BATCH = 6              # RTX 3050 safe with PointNet++
NUM_WORKERS = 0        # Windows safe; try 2 if stable

# Training
EPOCHS = 120
LR = 1e-3
WD = 1e-4
LABEL_SMOOTH = 0.05
GRAD_CLIP = 1.0
USE_AMP = True

# Sampling buckets (raw label codes!)
RARE_RAW = {7, 13, 6}     # works for 7-class; for 5-class it mainly hits 6
MED_RAW  = {12, 3}
RATIOS   = (0.50, 0.30, 0.20)  # rare, medium, common

# PointNet++ sizes
NPOINTS = [1024, 256, 64]
NSAMPLE = [32, 32, 32]        # kNN neighbors

# =========================
# UTILS
# =========================

def set_seed(seed=123):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(123)

def list_npz(folder: str) -> List[str]:
    return sorted(glob.glob(os.path.join(folder, "*.npz")))

def map_labels(y_raw: np.ndarray) -> np.ndarray:
    out = np.full_like(y_raw, IGNORE_INDEX)
    for raw, new in MAP.items():
        out[y_raw == raw] = new
    return out

def compute_class_weights_from_counts(counts_raw: Dict[int, int]) -> torch.Tensor:
    """
    weight = 1/sqrt(freq), normalize mean~1, clamp [0.25, 10]
    """
    freqs = []
    for raw in RAW_CLASSES:
        freqs.append(float(counts_raw.get(raw, 1)))
    freqs = np.array(freqs, dtype=np.float64)

    w = 1.0 / np.sqrt(freqs)
    w = w / (w.mean() + 1e-12)
    w = np.clip(w, 0.25, 10.0).astype(np.float32)
    return torch.from_numpy(w)

# =========================
# DATASET
# =========================

class BlocksNPZDataset(Dataset):
    def __init__(self, files: List[str]):
        self.files = files
        self.block_has = []

        for f in self.files:
            d = np.load(f)
            y_raw = d["y"].astype(np.int32)
            self.block_has.append(set(np.unique(y_raw).tolist()))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):
        path = self.files[i]
        d = np.load(path)
        X = d["X"].astype(np.float32)      # (4096, 10)
        y_raw = d["y"].astype(np.int32)    # (4096,)
        idx = d["idx"].astype(np.int64)    # (4096,)

        y = map_labels(y_raw)

        return torch.from_numpy(X), torch.from_numpy(y), torch.from_numpy(idx), path

class BucketBlockSampler(Sampler[int]):
    def __init__(self, dataset: BlocksNPZDataset, num_samples: Optional[int] = None, seed: int = 123):
        self.ds = dataset
        self.num_samples = num_samples if num_samples is not None else len(dataset)
        self.seed = seed

        self.rare_idx, self.med_idx, self.common_idx = self._split()

        if len(self.rare_idx) == 0:
            print("⚠️ Rare bucket empty.")
        if len(self.med_idx) == 0:
            print("⚠️ Medium bucket empty.")
        if len(self.common_idx) == 0:
            print("⚠️ Common bucket empty.")

    def _split(self):
        rare, med, common = [], [], []
        for i, present in enumerate(self.ds.block_has):
            if len(present & RARE_RAW) > 0:
                rare.append(i)
            elif len(present & MED_RAW) > 0:
                med.append(i)
            else:
                common.append(i)
        return rare, med, common

    def __iter__(self):
        rng = random.Random(self.seed)
        n = self.num_samples
        n_rare = int(n * RATIOS[0])
        n_med  = int(n * RATIOS[1])
        n_com  = n - n_rare - n_med

        def pick(bucket, k):
            if len(bucket) == 0:
                return []
            return [rng.choice(bucket) for _ in range(k)]

        indices = []
        indices += pick(self.rare_idx, n_rare)
        indices += pick(self.med_idx, n_med)
        indices += pick(self.common_idx, n_com)
        rng.shuffle(indices)
        return iter(indices)

    def __len__(self):
        return self.num_samples

# =========================
# POINTNET++ CORE OPS (FPS + kNN)
# =========================

def square_distance(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor:
    """
    src: (B, N, 3), dst: (B, M, 3) -> (B, N, M)
    """
    B, N, _ = src.shape
    _, M, _ = dst.shape
    dist = -2 * torch.matmul(src, dst.transpose(1, 2))  # (B,N,M)
    dist += torch.sum(src ** 2, dim=-1).unsqueeze(-1)
    dist += torch.sum(dst ** 2, dim=-1).unsqueeze(1)
    return dist

def index_points(points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
    """
    points: (B, N, C)
    idx: (B, S) or (B, S, K)
    """
    B = points.shape[0]
    if idx.dim() == 2:
        batch_indices = torch.arange(B, device=points.device).view(B, 1)
        return points[batch_indices, idx, :]
    else:
        batch_indices = torch.arange(B, device=points.device).view(B, 1, 1)
        return points[batch_indices, idx, :]

def farthest_point_sample(xyz: torch.Tensor, npoint: int) -> torch.Tensor:
    """
    xyz: (B, N, 3) -> idx (B, npoint)
    """
    device = xyz.device
    B, N, _ = xyz.shape
    centroids = torch.zeros(B, npoint, dtype=torch.long, device=device)
    distance = torch.full((B, N), 1e10, device=device)
    farthest = torch.randint(0, N, (B,), dtype=torch.long, device=device)
    batch_indices = torch.arange(B, device=device)

    for i in range(npoint):
        centroids[:, i] = farthest
        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
        dist = torch.sum((xyz - centroid) ** 2, dim=-1)
        distance = torch.minimum(distance, dist)
        farthest = torch.max(distance, dim=-1)[1]
    return centroids

def knn_point(k: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
    """
    xyz: (B, N, 3)
    new_xyz: (B, S, 3)
    return idx: (B, S, k)
    """
    dist = square_distance(new_xyz, xyz)  # (B,S,N)
    idx = dist.topk(k=k, dim=-1, largest=False)[1]
    return idx

# =========================
# POINTNET++ MODULES
# =========================

class SharedMLP(nn.Module):
    def __init__(self, channels: List[int], bn=True):
        super().__init__()
        layers = []
        for i in range(len(channels) - 1):
            layers.append(nn.Conv2d(channels[i], channels[i+1], 1))
            if bn:
                layers.append(nn.BatchNorm2d(channels[i+1]))
            layers.append(nn.ReLU(inplace=True))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

class PointNetSetAbstractionKNN(nn.Module):
    """
    SA layer using FPS + kNN grouping + PointNet on local groups
    """
    def __init__(self, npoint: int, nsample: int, in_ch: int, mlp: List[int]):
        super().__init__()
        self.npoint = npoint
        self.nsample = nsample
        self.mlp = SharedMLP([in_ch] + mlp, bn=True)

    def forward(self, xyz: torch.Tensor, points: Optional[torch.Tensor]):
        """
        xyz: (B, N, 3)
        points: (B, N, D) or None
        returns:
          new_xyz: (B, S, 3)
          new_points: (B, S, mlp[-1])
        """
        B, N, _ = xyz.shape
        S = self.npoint

        fps_idx = farthest_point_sample(xyz, S)        # (B,S)
        new_xyz = index_points(xyz, fps_idx)           # (B,S,3)

        knn_idx = knn_point(self.nsample, xyz, new_xyz)  # (B,S,K)
        grouped_xyz = index_points(xyz, knn_idx)         # (B,S,K,3)
        grouped_xyz_norm = grouped_xyz - new_xyz.unsqueeze(2)

        if points is not None:
            grouped_points = index_points(points, knn_idx)   # (B,S,K,D)
            new_group = torch.cat([grouped_xyz_norm, grouped_points], dim=-1)  # (B,S,K,3+D)
        else:
            new_group = grouped_xyz_norm  # (B,S,K,3)

        # (B, C, S, K)
        new_group = new_group.permute(0, 3, 1, 2).contiguous()
        new_group = self.mlp(new_group)                 # (B, mlp[-1], S, K)
        new_points = torch.max(new_group, dim=-1)[0]    # (B, mlp[-1], S)
        new_points = new_points.transpose(1, 2).contiguous()  # (B, S, mlp[-1])

        return new_xyz, new_points

class PointNetFeaturePropagation(nn.Module):
    def __init__(self, in_ch: int, mlp: List[int]):
        super().__init__()
        layers = []
        last = in_ch
        for out in mlp:
            layers += [nn.Conv1d(last, out, 1), nn.BatchNorm1d(out), nn.ReLU(inplace=True)]
            last = out
        self.mlp = nn.Sequential(*layers)

    def forward(self, xyz1, xyz2, points1, points2):
        """
        Interpolate from xyz2 (sparser) to xyz1 (denser)
        xyz1: (B, N, 3)
        xyz2: (B, S, 3)
        points1: (B, N, D1) or None
        points2: (B, S, D2)
        return new_points: (B, N, mlp[-1])
        """
        B, N, _ = xyz1.shape
        _, S, _ = xyz2.shape

        if S == 1:
            interpolated = points2.repeat(1, N, 1)
        else:
            dist = square_distance(xyz1, xyz2)          # (B,N,S)
            dist, idx = dist.topk(k=3, dim=-1, largest=False)  # (B,N,3)
            dist = torch.clamp(dist, min=1e-10)
            weight = 1.0 / dist
            weight = weight / torch.sum(weight, dim=-1, keepdim=True)

            grouped_points = index_points(points2, idx)  # (B,N,3,D2)
            interpolated = torch.sum(grouped_points * weight.unsqueeze(-1), dim=2)  # (B,N,D2)

        if points1 is not None:
            new_points = torch.cat([points1, interpolated], dim=-1)  # (B,N,D1+D2)
        else:
            new_points = interpolated

        new_points = new_points.transpose(1, 2).contiguous()  # (B, C, N)
        new_points = self.mlp(new_points)                     # (B, mlp[-1], N)
        return new_points.transpose(1, 2).contiguous()        # (B, N, mlp[-1])

# =========================
# POINTNET++ SEGMENTATION MODEL
# =========================

class PointNet2SSGSeg(nn.Module):
    def __init__(self, num_classes: int, in_ch: int = 10):
        super().__init__()
        # split xyz + features
        # xyz = X[:, :, 0:3]
        # feat = X[:, :, 3:10] (7 dims)

        self.sa1 = PointNetSetAbstractionKNN(
            npoint=NPOINTS[0], nsample=NSAMPLE[0],
            in_ch=3 + (in_ch - 3),     # (xyz_norm=3) + feat dims (7) = 10
            mlp=[64, 64, 128]
        )
        self.sa2 = PointNetSetAbstractionKNN(
            npoint=NPOINTS[1], nsample=NSAMPLE[1],
            in_ch=3 + 128,            # grouped xyz_norm + points(128)
            mlp=[128, 128, 256]
        )
        self.sa3 = PointNetSetAbstractionKNN(
            npoint=NPOINTS[2], nsample=NSAMPLE[2],
            in_ch=3 + 256,
            mlp=[256, 256, 512]
        )

        self.fp3 = PointNetFeaturePropagation(in_ch=256 + 512, mlp=[256, 256])
        self.fp2 = PointNetFeaturePropagation(in_ch=128 + 256, mlp=[256, 128])
        self.fp1 = PointNetFeaturePropagation(in_ch=(in_ch - 3) + 128, mlp=[128, 128, 128])

        self.classifier = nn.Sequential(
            nn.Conv1d(128, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Conv1d(128, num_classes, 1)
        )

    def forward(self, X):
        """
        X: (B, N, 10)
        returns logits: (B, N, num_classes)
        """
        xyz = X[:, :, 0:3].contiguous()
        feat = X[:, :, 3:].contiguous()  # (B,N,7)

        # SA1: use feat as points, but SA expects points grouped in channel-last; we combine inside SA
        l1_xyz, l1_points = self.sa1(xyz, feat)          # (B,1024,3), (B,1024,128)
        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)  # (B,256,3),  (B,256,256)
        l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)  # (B,64,3),   (B,64,512)

        l2_points_fp = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)   # (B,256,256)
        l1_points_fp = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points_fp) # (B,1024,128)
        l0_points_fp = self.fp1(xyz, l1_xyz, feat, l1_points_fp)         # (B,N,128)

        x = l0_points_fp.transpose(1, 2).contiguous()   # (B,128,N)
        logits = self.classifier(x)                     # (B,C,N)
        return logits.transpose(1, 2).contiguous()      # (B,N,C)

# =========================
# LOSS + METRICS
# =========================

class WeightedCELoss(nn.Module):
    def __init__(self, weight: torch.Tensor, label_smoothing: float, ignore_index: int):
        super().__init__()
        self.register_buffer("weight", weight)
        self.label_smoothing = label_smoothing
        self.ignore_index = ignore_index

    def forward(self, logits, target):
        B, N, C = logits.shape
        logits = logits.reshape(B*N, C)
        target = target.reshape(B*N)
        return F.cross_entropy(
            logits, target,
            weight=self.weight,
            ignore_index=self.ignore_index,
            label_smoothing=self.label_smoothing
        )

@torch.no_grad()
def compute_metrics(logits, y, num_classes: int):
    pred = logits.argmax(dim=-1)  # (B,N)
    mask = (y != IGNORE_INDEX)
    correct = (pred[mask] == y[mask]).sum().item()
    total = mask.sum().item()
    acc = correct / total if total > 0 else 0.0

    # macro F1
    f1s = []
    for c in range(num_classes):
        tp = ((pred == c) & (y == c) & mask).sum().item()
        fp = ((pred == c) & (y != c) & mask).sum().item()
        fn = ((pred != c) & (y == c) & mask).sum().item()
        denom = 2*tp + fp + fn
        f1s.append((2*tp/denom) if denom > 0 else 0.0)
    macro_f1 = float(sum(f1s) / len(f1s))
    return acc, macro_f1

# =========================
# TRAIN / VAL
# =========================

def train_one_epoch(model, loader, optim, scaler, loss_fn):
    model.train()
    total_loss = 0.0
    total_batches = 0

    for X, y, _, _ in loader:
        X = X.to(DEVICE)
        y = y.to(DEVICE)

        optim.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=(USE_AMP and DEVICE=="cuda")):
            logits = model(X)
            loss = loss_fn(logits, y)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
        scaler.step(optim)
        scaler.update()

        total_loss += loss.item()
        total_batches += 1

    return total_loss / max(total_batches, 1)

@torch.no_grad()
def validate(model, loader, loss_fn):
    model.eval()
    total_loss = 0.0
    total_batches = 0
    accs, f1s = [], []

    for X, y, _, _ in loader:
        X = X.to(DEVICE)
        y = y.to(DEVICE)
        logits = model(X)
        loss = loss_fn(logits, y)

        acc, macro_f1 = compute_metrics(logits, y, NUM_CLASSES)
        accs.append(acc)
        f1s.append(macro_f1)

        total_loss += loss.item()
        total_batches += 1

    return total_loss / max(total_batches, 1), float(np.mean(accs)), float(np.mean(f1s))

# =========================
# MAIN
# =========================

def main():
    print("Device:", DEVICE)
    if DEVICE == "cuda":
        print("GPU:", torch.cuda.get_device_name(0))

    train_files = list_npz(TRAIN_DIR)
    val_files   = list_npz(VAL_DIR)

    print("Train blocks:", len(train_files))
    print("Val blocks  :", len(val_files))
    if len(train_files) == 0:
        raise RuntimeError("No train .npz files found. Check TRAIN_DIR.")

    train_ds = BlocksNPZDataset(train_files)
    val_ds   = BlocksNPZDataset(val_files)

    sampler = BucketBlockSampler(train_ds, num_samples=len(train_ds), seed=123)

    train_loader = DataLoader(
        train_ds, batch_size=BATCH, sampler=sampler,
        num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"),
        drop_last=True
    )
    val_loader = DataLoader(
        val_ds, batch_size=BATCH, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"),
    )

    # If you have real counts, put them here. Otherwise use a mild default.
    # For 5-class [1,2,3,6,12], you can approximate from your earlier global counts.
    counts_guess = {1: 1, 2: 7000000, 3: 50000, 6: 1200000, 12: 3800000}
    class_w = compute_class_weights_from_counts(counts_guess).to(DEVICE)
    print("Class weights:", class_w.detach().cpu().numpy())

    model = PointNet2SSGSeg(num_classes=NUM_CLASSES, in_ch=IN_CH).to(DEVICE)

    optim = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max=EPOCHS, eta_min=1e-5)
    scaler = torch.cuda.amp.GradScaler(enabled=(USE_AMP and DEVICE=="cuda"))

    loss_fn = WeightedCELoss(class_w, LABEL_SMOOTH, IGNORE_INDEX)

    best_f1 = -1.0
    best_path = os.path.join(OUT_DIR, f"pointnetpp_best_{NUM_CLASSES}cls.pt")

    for epoch in range(1, EPOCHS+1):
        t0 = time.time()
        tr_loss = train_one_epoch(model, train_loader, optim, scaler, loss_fn)
        va_loss, va_acc, va_f1 = validate(model, val_loader, loss_fn)
        sched.step()

        lr = optim.param_groups[0]["lr"]
        dt = time.time() - t0
        print(f"Epoch {epoch:03d} | {dt:5.1f}s | lr={lr:.2e} | train={tr_loss:.4f} | val={va_loss:.4f} | acc={va_acc:.4f} | macroF1={va_f1:.4f}")

        if va_f1 > best_f1:
            best_f1 = va_f1
            torch.save(
                {
                    "model_state": model.state_dict(),
                    "num_classes": NUM_CLASSES,
                    "raw_classes": RAW_CLASSES,
                    "class_weights": class_w.detach().cpu(),
                },
                best_path
            )
            print("✅ Saved best:", best_path)

    print("Done. Best macroF1:", best_f1)

if __name__ == "__main__":
    main()


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Train blocks: 0
Val blocks  : 0


RuntimeError: No train .npz files found. Check TRAIN_DIR.

In [5]:
import glob, os, random
from torch.utils.data import DataLoader

# path to folder containing ALL npz files
ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"

def list_npz(folder):
    return sorted(glob.glob(os.path.join(folder, "*.npz")))

all_files = list_npz(ALL_BLOCKS_DIR)

print("Total blocks:", len(all_files))

# split 80/20
random.seed(42)
random.shuffle(all_files)
split = int(0.8 * len(all_files))

train_files = all_files[:split]
val_files   = all_files[split:]

print("Train blocks:", len(train_files))
print("Val blocks:", len(val_files))

# dataset objects
train_ds = BlocksNPZ(train_files)
val_ds   = BlocksNPZ(val_files)

# dataloaders
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH,
    shuffle=False,
    num_workers=0
)

print("Train loader ready:", len(train_loader))


Total blocks: 27697
Train blocks: 22157
Val blocks: 5540
Train loader ready: 3692


In [6]:
    import os, glob, random, time
    from typing import List, Dict
    
    import numpy as np
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    
    # =========================
    # 0) EDIT THESE PATHS
    # =========================
    ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"   # <-- folder containing ALL .npz
    OUT_DIR        = r"D:/lidarrrrr/anbu/dl_models"    # <-- where to save model
    os.makedirs(OUT_DIR, exist_ok=True)
    
    # =========================
    # 1) BASIC SETTINGS
    # =========================
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Start with 5-class (matches your current working mapping)
    RAW_CLASSES = [1, 2, 3, 6, 12]     # change to [1,2,3,6,7,12,13] later
    MAP = {c: i for i, c in enumerate(RAW_CLASSES)}
    NUM_CLASSES = len(RAW_CLASSES)
    IGNORE_INDEX = -100
    
    POINTS = 4096
    IN_CH = 10
    
    BATCH = 6            # RTX 3050 safe
    EPOCHS = 30          # start small; later set 120+
    LR = 1e-3
    WD = 1e-4
    LABEL_SMOOTH = 0.05
    USE_AMP = True
    NUM_WORKERS = 0      # Windows safe (try 2 if stable)
    
    # PointNet++ SSG sizes
    NPOINTS = [1024, 256, 64]
    NSAMPLE = [32, 32, 32]  # kNN neighbors
    
    # =========================
    # 2) HELPERS
    # =========================
    def list_npz(folder: str) -> List[str]:
        return sorted(glob.glob(os.path.join(folder, "*.npz")))
    
    def map_labels(y_raw: np.ndarray) -> np.ndarray:
        out = np.full_like(y_raw, IGNORE_INDEX)
        for raw, new in MAP.items():
            out[y_raw == raw] = new
        return out
    
    def class_weights_from_counts(counts_raw: Dict[int, int]) -> torch.Tensor:
        """
        w = 1/sqrt(freq), normalize mean~1, clamp [0.25, 10]
        """
        freqs = []
        for raw in RAW_CLASSES:
            freqs.append(float(counts_raw.get(raw, 1)))
        freqs = np.array(freqs, dtype=np.float64)
    
        w = 1.0 / np.sqrt(freqs)
        w = w / (w.mean() + 1e-12)
        w = np.clip(w, 0.25, 10.0).astype(np.float32)
        return torch.from_numpy(w)
    
    # =========================
    # 3) DATASET
    # =========================
    class BlocksNPZ(Dataset):
        def __init__(self, files: List[str]):
            self.files = files
    
        def __len__(self):
            return len(self.files)
    
        def __getitem__(self, i):
            d = np.load(self.files[i])
            X = d["X"].astype(np.float32)      # (4096,10)
            y = d["y"].astype(np.int32)        # (4096,)
            y = map_labels(y)
            return torch.from_numpy(X), torch.from_numpy(y)
    
    # =========================
    # 4) POINTNET++ OPS (FPS + kNN)
    # =========================
    def square_distance(src, dst):
        # src: (B,N,3), dst: (B,M,3) -> (B,N,M)
        dist = -2 * torch.matmul(src, dst.transpose(1,2))
        dist += torch.sum(src**2, dim=-1).unsqueeze(-1)
        dist += torch.sum(dst**2, dim=-1).unsqueeze(1)
        return dist
    
    def index_points(points, idx):
        # points: (B,N,C), idx: (B,S) or (B,S,K)
        B = points.shape[0]
        if idx.dim() == 2:
            batch = torch.arange(B, device=points.device).view(B,1)
            return points[batch, idx, :]
        else:
            batch = torch.arange(B, device=points.device).view(B,1,1)
            return points[batch, idx, :]
    
    def farthest_point_sample(xyz, npoint):
        # xyz: (B,N,3) -> (B,npoint)
        device = xyz.device
        B, N, _ = xyz.shape
        centroids = torch.zeros(B, npoint, dtype=torch.long, device=device)
        distance = torch.full((B, N), 1e10, device=device)
        farthest = torch.randint(0, N, (B,), dtype=torch.long, device=device)
        batch = torch.arange(B, device=device)
    
        for i in range(npoint):
            centroids[:, i] = farthest
            centroid = xyz[batch, farthest].view(B, 1, 3)
            dist = torch.sum((xyz - centroid) ** 2, dim=-1)
            distance = torch.minimum(distance, dist)
            farthest = torch.max(distance, dim=-1)[1]
        return centroids
    
    def knn_point(k, xyz, new_xyz):
        # xyz: (B,N,3), new_xyz: (B,S,3) -> idx: (B,S,k)
        dist = square_distance(new_xyz, xyz)   # (B,S,N)
        idx = dist.topk(k=k, dim=-1, largest=False)[1]
        return idx
    
    # =========================
    # 5) POINTNET++ MODULES
    # =========================
    class SharedMLP(nn.Module):
        def __init__(self, channels):
            super().__init__()
            layers = []
            for i in range(len(channels)-1):
                layers += [
                    nn.Conv2d(channels[i], channels[i+1], 1),
                    nn.BatchNorm2d(channels[i+1]),
                    nn.ReLU(inplace=True),
                ]
            self.net = nn.Sequential(*layers)
    
        def forward(self, x):
            return self.net(x)
    
    class SA_KNN(nn.Module):
        def __init__(self, npoint, nsample, in_ch, mlp):
            super().__init__()
            self.npoint = npoint
            self.nsample = nsample
            self.mlp = SharedMLP([in_ch] + mlp)
    
        def forward(self, xyz, points):
            # xyz: (B,N,3), points: (B,N,D) or None
            fps_idx = farthest_point_sample(xyz, self.npoint)  # (B,S)
            new_xyz = index_points(xyz, fps_idx)               # (B,S,3)
    
            knn_idx = knn_point(self.nsample, xyz, new_xyz)    # (B,S,K)
            grouped_xyz = index_points(xyz, knn_idx)           # (B,S,K,3)
            grouped_xyz = grouped_xyz - new_xyz.unsqueeze(2)   # normalize
    
            if points is not None:
                grouped_points = index_points(points, knn_idx) # (B,S,K,D)
                new_group = torch.cat([grouped_xyz, grouped_points], dim=-1)  # (B,S,K,3+D)
            else:
                new_group = grouped_xyz
    
            # (B,C,S,K)
            new_group = new_group.permute(0,3,1,2).contiguous()
            new_group = self.mlp(new_group)                     # (B,mlp[-1],S,K)
            new_points = torch.max(new_group, dim=-1)[0]        # (B,mlp[-1],S)
            new_points = new_points.transpose(1,2).contiguous() # (B,S,mlp[-1])
            return new_xyz, new_points
    
    class FP(nn.Module):
        def __init__(self, in_ch, mlp):
            super().__init__()
            layers = []
            last = in_ch
            for out in mlp:
                layers += [nn.Conv1d(last, out, 1), nn.BatchNorm1d(out), nn.ReLU(inplace=True)]
                last = out
            self.mlp = nn.Sequential(*layers)
    
        def forward(self, xyz1, xyz2, p1, p2):
            # xyz1: (B,N,3), xyz2: (B,S,3), p2: (B,S,D2), p1: (B,N,D1) or None
            B, N, _ = xyz1.shape
            _, S, _ = xyz2.shape
    
            if S == 1:
                interpolated = p2.repeat(1, N, 1)
            else:
                dist = square_distance(xyz1, xyz2)                  # (B,N,S)
                dist, idx = dist.topk(k=3, dim=-1, largest=False)   # (B,N,3)
                dist = torch.clamp(dist, min=1e-10)
                w = (1.0 / dist)
                w = w / torch.sum(w, dim=-1, keepdim=True)
    
                grouped = index_points(p2, idx)                     # (B,N,3,D2)
                interpolated = torch.sum(grouped * w.unsqueeze(-1), dim=2)  # (B,N,D2)
    
            if p1 is not None:
                new_points = torch.cat([p1, interpolated], dim=-1)
            else:
                new_points = interpolated
    
            new_points = new_points.transpose(1,2).contiguous()     # (B,C,N)
            new_points = self.mlp(new_points)
            return new_points.transpose(1,2).contiguous()           # (B,N,out)
    
    class PointNet2SSGSeg(nn.Module):
        def __init__(self, num_classes, in_ch=10):
            super().__init__()
            self.sa1 = SA_KNN(NPOINTS[0], NSAMPLE[0], in_ch, [64,64,128])     # xyz+feat packed as 10
            self.sa2 = SA_KNN(NPOINTS[1], NSAMPLE[1], 3+128, [128,128,256])   # grouped xyz(3)+128
            self.sa3 = SA_KNN(NPOINTS[2], NSAMPLE[2], 3+256, [256,256,512])
    
            self.fp3 = FP(256+512, [256,256])
            self.fp2 = FP(128+256, [256,128])
            self.fp1 = FP((in_ch-3)+128, [128,128,128])  # feat(7)+128
    
            self.cls = nn.Sequential(
                nn.Conv1d(128, 128, 1),
                nn.BatchNorm1d(128),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Conv1d(128, num_classes, 1)
            )
    
        def forward(self, X):
            xyz = X[:, :, 0:3].contiguous()      # (B,N,3)
            feat = X[:, :, 3:].contiguous()      # (B,N,7)
    
            # pack xyz_norm+feat inside SA1: we pass points=feat, SA will concat xyz_norm+points
            l1_xyz, l1_p = self.sa1(xyz, feat)     # (B,1024,3), (B,1024,128)
            l2_xyz, l2_p = self.sa2(l1_xyz, l1_p)  # (B,256,3),  (B,256,256)
            l3_xyz, l3_p = self.sa3(l2_xyz, l2_p)  # (B,64,3),   (B,64,512)
    
            l2_fp = self.fp3(l2_xyz, l3_xyz, l2_p, l3_p)      # (B,256,256)
            l1_fp = self.fp2(l1_xyz, l2_xyz, l1_p, l2_fp)     # (B,1024,128)
            l0_fp = self.fp1(xyz, l1_xyz, feat, l1_fp)        # (B,N,128)
    
            x = l0_fp.transpose(1,2).contiguous()             # (B,128,N)
            logits = self.cls(x).transpose(1,2).contiguous()  # (B,N,C)
            return logits
    
    # =========================
    # 6) LOSS + METRICS
    # =========================
    class WeightedCELoss(nn.Module):
        def __init__(self, w, label_smooth, ignore_index):
            super().__init__()
            self.register_buffer("w", w)
            self.ls = label_smooth
            self.ignore = ignore_index
    
        def forward(self, logits, y):
            B, N, C = logits.shape
            logits = logits.reshape(B*N, C)
            y = y.reshape(B*N)
            return F.cross_entropy(logits, y, weight=self.w, ignore_index=self.ignore, label_smoothing=self.ls)
    
    @torch.no_grad()
    def metrics(logits, y):
        pred = logits.argmax(dim=-1)
        mask = (y != IGNORE_INDEX)
        acc = (pred[mask] == y[mask]).float().mean().item() if mask.any() else 0.0
    
        # macro F1
        f1s = []
        for c in range(NUM_CLASSES):
            tp = ((pred==c) & (y==c) & mask).sum().item()
            fp = ((pred==c) & (y!=c) & mask).sum().item()
            fn = ((pred!=c) & (y==c) & mask).sum().item()
            denom = 2*tp + fp + fn
            f1s.append((2*tp/denom) if denom>0 else 0.0)
        return acc, float(sum(f1s)/len(f1s))
    
    # =========================
    # 7) TRAIN
    # =========================
    def main():
        print("Device:", DEVICE)
        if DEVICE=="cuda":
            print("GPU:", torch.cuda.get_device_name(0))
    
        all_files = list_npz(ALL_BLOCKS_DIR)
        if len(all_files) == 0:
            raise RuntimeError("No .npz found. Check ALL_BLOCKS_DIR path.")
    
        random.seed(42)
        random.shuffle(all_files)
        split = int(0.8 * len(all_files))
        train_files = all_files[:split]
        val_files = all_files[split:]
    
        print("All blocks :", len(all_files))
        print("Train blocks:", len(train_files))
        print("Val blocks  :", len(val_files))
    
        train_ds = BlocksNPZ(train_files)
        val_ds   = BlocksNPZ(val_files)
    
        train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,
                                  num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"), drop_last=True)
        val_loader   = DataLoader(val_ds, batch_size=BATCH, shuffle=False,
                                  num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"))
    
        # Use your known rough counts (edit if you have better)
        # For 5-class [1,2,3,6,12], your inference looked like:
        counts_guess = {1: 1, 2: 7185068, 3: 33809, 6: 1274169, 12: 3881799}
        w = class_weights_from_counts(counts_guess).to(DEVICE)
        print("Class weights:", w.detach().cpu().numpy())
    
        model = PointNet2SSGSeg(NUM_CLASSES, in_ch=IN_CH).to(DEVICE)
    
        opt = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
        sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS, eta_min=1e-5)
        scaler = torch.amp.GradScaler("cuda", enabled=(USE_AMP and DEVICE=="cuda"))
        with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE=="cuda")):
            logits = model(X)
            loss = loss_fn(logits, y)
            best_f1 = -1.0
    
    best_path = os.path.join(OUT_DIR, f"pointnetpp_best_{NUM_CLASSES}cls.pt")
    
    for epoch in range(1, EPOCHS + 1):
        t0 = time.time()
    
        # ---------------- TRAIN ----------------
        model.train()
        tr_loss = 0.0
    
        for X, y in train_loader:
            X = X.to(DEVICE, non_blocking=True).float()
            y = y.to(DEVICE, non_blocking=True).long()
    
            opt.zero_grad(set_to_none=True)
    
            with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE == "cuda")):
                logits = model(X)          # (B,N,C)
                loss = loss_fn(logits, y)
    
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt)
            scaler.update()
    
            tr_loss += loss.item()
    
        tr_loss /= max(len(train_loader), 1)
    
        # ---------------- VALIDATE ----------------
        model.eval()
        va_loss = 0.0
        accs, f1s = [], []
    
        with torch.no_grad():
            for X, y in val_loader:
                X = X.to(DEVICE, non_blocking=True).float()
                y = y.to(DEVICE, non_blocking=True).long()
    
                logits = model(X)
                va_loss += loss_fn(logits, y).item()
    
                acc, f1 = metrics(logits, y)
                accs.append(acc)
                f1s.append(f1)
    
        va_loss /= max(len(val_loader), 1)
        va_acc = float(np.mean(accs)) if accs else 0.0
        va_f1 = float(np.mean(f1s)) if f1s else 0.0
    
        sch.step()
        lr = opt.param_groups[0]["lr"]
        dt = time.time() - t0
    
        print(f"Epoch {epoch:03d} | {dt:5.1f}s | lr={lr:.2e} | train={tr_loss:.4f} | val={va_loss:.4f} | acc={va_acc:.4f} | macroF1={va_f1:.4f}")
    
        if va_f1 > best_f1:
            best_f1 = va_f1
            torch.save(
                {
                    "model_state": model.state_dict(),
                    "num_classes": NUM_CLASSES,
                    "raw_classes": RAW_CLASSES,
                    "class_weights": w.detach().cpu(),
                },
                best_path,
            )
            print("✅ Saved best:", best_path)
    
    print("Done. Best macroF1:", best_f1)
    print("Best model:", best_path)
    
    if __name__ == "__main__":
        main()


NameError: name 'model' is not defined

In [3]:
# Load dataset files
all_files = list_npz(ALL_BLOCKS_DIR)

import random
random.seed(42)
random.shuffle(all_files)

split = int(0.8 * len(all_files))
train_files = all_files[:split]
val_files   = all_files[split:]

print("All blocks :", len(all_files))
print("Train blocks:", len(train_files))
print("Val blocks  :", len(val_files))

# Dataset objects
train_ds = BlocksNPZ(train_files)
val_ds   = BlocksNPZ(val_files)

# DataLoaders
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=(DEVICE == "cuda"),
    drop_last=True
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=(DEVICE == "cuda")
)


All blocks : 27697
Train blocks: 22157
Val blocks  : 5540


In [1]:
print("Train loader ready:", len(train_loader))


NameError: name 'train_loader' is not defined

In [8]:
import glob, os, random
from torch.utils.data import DataLoader

# path to folder containing ALL npz files
ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"

def list_npz(folder):
    return sorted(glob.glob(os.path.join(folder, "*.npz")))

all_files = list_npz(ALL_BLOCKS_DIR)

print("Total blocks:", len(all_files))

# split 80/20
random.seed(42)
random.shuffle(all_files)
split = int(0.8 * len(all_files))

train_files = all_files[:split]
val_files   = all_files[split:]

print("Train blocks:", len(train_files))
print("Val blocks:", len(val_files))

# dataset objects
train_ds = BlocksNPZ(train_files)
val_ds   = BlocksNPZ(val_files)

# dataloaders
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH,
    shuffle=True,
    num_workers=0,
    drop_last=True
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH,
    shuffle=False,
    num_workers=0
)

print("Train loader ready:", len(train_loader))


Total blocks: 27697
Train blocks: 22157
Val blocks: 5540
Train loader ready: 3692


In [1]:
    import os, glob, random, time
    from typing import List, Dict
    
    import numpy as np
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.utils.data import Dataset, DataLoader
    
    # =========================
    # 0) EDIT THESE PATHS
    # =========================
    ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"   # <-- folder containing ALL .npz
    OUT_DIR        = r"D:/lidarrrrr/anbu/dl_models"    # <-- where to save model
    os.makedirs(OUT_DIR, exist_ok=True)
    
    # =========================
    # 1) BASIC SETTINGS
    # =========================
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Start with 5-class (matches your current working mapping)
    RAW_CLASSES = [1, 2, 3, 6, 12]     # change to [1,2,3,6,7,12,13] later
    MAP = {c: i for i, c in enumerate(RAW_CLASSES)}
    NUM_CLASSES = len(RAW_CLASSES)
    IGNORE_INDEX = -100
    
    POINTS = 4096
    IN_CH = 10
    
    BATCH = 6            # RTX 3050 safe
    EPOCHS = 30          # start small; later set 120+
    LR = 1e-3
    WD = 1e-4
    LABEL_SMOOTH = 0.05
    USE_AMP = True
    NUM_WORKERS = 0      # Windows safe (try 2 if stable)
    
    # PointNet++ SSG sizes
    NPOINTS = [1024, 256, 64]
    NSAMPLE = [32, 32, 32]  # kNN neighbors
    
    # =========================
    # 2) HELPERS
    # =========================
    def list_npz(folder: str) -> List[str]:
        return sorted(glob.glob(os.path.join(folder, "*.npz")))
    
    def map_labels(y_raw: np.ndarray) -> np.ndarray:
        out = np.full_like(y_raw, IGNORE_INDEX)
        for raw, new in MAP.items():
            out[y_raw == raw] = new
        return out
    
    def class_weights_from_counts(counts_raw: Dict[int, int]) -> torch.Tensor:
        """
        w = 1/sqrt(freq), normalize mean~1, clamp [0.25, 10]
        """
        freqs = []
        for raw in RAW_CLASSES:
            freqs.append(float(counts_raw.get(raw, 1)))
        freqs = np.array(freqs, dtype=np.float64)
    
        w = 1.0 / np.sqrt(freqs)
        w = w / (w.mean() + 1e-12)
        w = np.clip(w, 0.25, 10.0).astype(np.float32)
        return torch.from_numpy(w)
    
    # =========================
    # 3) DATASET
    # =========================
    class BlocksNPZ(Dataset):
        def __init__(self, files: List[str]):
            self.files = files
    
        def __len__(self):
            return len(self.files)
    
        def __getitem__(self, i):
            d = np.load(self.files[i])
            X = d["X"].astype(np.float32)      # (4096,10)
            y = d["y"].astype(np.int32)        # (4096,)
            y = map_labels(y)
            return torch.from_numpy(X), torch.from_numpy(y)
    
    # =========================
    # 4) POINTNET++ OPS (FPS + kNN)
    # =========================
    def square_distance(src, dst):
        # src: (B,N,3), dst: (B,M,3) -> (B,N,M)
        dist = -2 * torch.matmul(src, dst.transpose(1,2))
        dist += torch.sum(src**2, dim=-1).unsqueeze(-1)
        dist += torch.sum(dst**2, dim=-1).unsqueeze(1)
        return dist
    
    def index_points(points, idx):
        # points: (B,N,C), idx: (B,S) or (B,S,K)
        B = points.shape[0]
        if idx.dim() == 2:
            batch = torch.arange(B, device=points.device).view(B,1)
            return points[batch, idx, :]
        else:
            batch = torch.arange(B, device=points.device).view(B,1,1)
            return points[batch, idx, :]
    
    def farthest_point_sample(xyz, npoint):
        # xyz: (B,N,3) -> (B,npoint)
        device = xyz.device
        B, N, _ = xyz.shape
        centroids = torch.zeros(B, npoint, dtype=torch.long, device=device)
        distance = torch.full((B, N), 1e10, device=device)
        farthest = torch.randint(0, N, (B,), dtype=torch.long, device=device)
        batch = torch.arange(B, device=device)
    
        for i in range(npoint):
            centroids[:, i] = farthest
            centroid = xyz[batch, farthest].view(B, 1, 3)
            dist = torch.sum((xyz - centroid) ** 2, dim=-1)
            distance = torch.minimum(distance, dist)
            farthest = torch.max(distance, dim=-1)[1]
        return centroids
    
    def knn_point(k, xyz, new_xyz):
        # xyz: (B,N,3), new_xyz: (B,S,3) -> idx: (B,S,k)
        dist = square_distance(new_xyz, xyz)   # (B,S,N)
        idx = dist.topk(k=k, dim=-1, largest=False)[1]
        return idx
    
    # =========================
    # 5) POINTNET++ MODULES
    # =========================
    class SharedMLP(nn.Module):
        def __init__(self, channels):
            super().__init__()
            layers = []
            for i in range(len(channels)-1):
                layers += [
                    nn.Conv2d(channels[i], channels[i+1], 1),
                    nn.BatchNorm2d(channels[i+1]),
                    nn.ReLU(inplace=True),
                ]
            self.net = nn.Sequential(*layers)
    
        def forward(self, x):
            return self.net(x)
    
    class SA_KNN(nn.Module):
        def __init__(self, npoint, nsample, in_ch, mlp):
            super().__init__()
            self.npoint = npoint
            self.nsample = nsample
            self.mlp = SharedMLP([in_ch] + mlp)
    
        def forward(self, xyz, points):
            # xyz: (B,N,3), points: (B,N,D) or None
            fps_idx = farthest_point_sample(xyz, self.npoint)  # (B,S)
            new_xyz = index_points(xyz, fps_idx)               # (B,S,3)
    
            knn_idx = knn_point(self.nsample, xyz, new_xyz)    # (B,S,K)
            grouped_xyz = index_points(xyz, knn_idx)           # (B,S,K,3)
            grouped_xyz = grouped_xyz - new_xyz.unsqueeze(2)   # normalize
    
            if points is not None:
                grouped_points = index_points(points, knn_idx) # (B,S,K,D)
                new_group = torch.cat([grouped_xyz, grouped_points], dim=-1)  # (B,S,K,3+D)
            else:
                new_group = grouped_xyz
    
            # (B,C,S,K)
            new_group = new_group.permute(0,3,1,2).contiguous()
            new_group = self.mlp(new_group)                     # (B,mlp[-1],S,K)
            new_points = torch.max(new_group, dim=-1)[0]        # (B,mlp[-1],S)
            new_points = new_points.transpose(1,2).contiguous() # (B,S,mlp[-1])
            return new_xyz, new_points
    
    class FP(nn.Module):
        def __init__(self, in_ch, mlp):
            super().__init__()
            layers = []
            last = in_ch
            for out in mlp:
                layers += [nn.Conv1d(last, out, 1), nn.BatchNorm1d(out), nn.ReLU(inplace=True)]
                last = out
            self.mlp = nn.Sequential(*layers)
    
        def forward(self, xyz1, xyz2, p1, p2):
            # xyz1: (B,N,3), xyz2: (B,S,3), p2: (B,S,D2), p1: (B,N,D1) or None
            B, N, _ = xyz1.shape
            _, S, _ = xyz2.shape
    
            if S == 1:
                interpolated = p2.repeat(1, N, 1)
            else:
                dist = square_distance(xyz1, xyz2)                  # (B,N,S)
                dist, idx = dist.topk(k=3, dim=-1, largest=False)   # (B,N,3)
                dist = torch.clamp(dist, min=1e-10)
                w = (1.0 / dist)
                w = w / torch.sum(w, dim=-1, keepdim=True)
    
                grouped = index_points(p2, idx)                     # (B,N,3,D2)
                interpolated = torch.sum(grouped * w.unsqueeze(-1), dim=2)  # (B,N,D2)
    
            if p1 is not None:
                new_points = torch.cat([p1, interpolated], dim=-1)
            else:
                new_points = interpolated
    
            new_points = new_points.transpose(1,2).contiguous()     # (B,C,N)
            new_points = self.mlp(new_points)
            return new_points.transpose(1,2).contiguous()           # (B,N,out)
    
    class PointNet2SSGSeg(nn.Module):
        def __init__(self, num_classes, in_ch=10):
            super().__init__()
            self.sa1 = SA_KNN(NPOINTS[0], NSAMPLE[0], in_ch, [64,64,128])     # xyz+feat packed as 10
            self.sa2 = SA_KNN(NPOINTS[1], NSAMPLE[1], 3+128, [128,128,256])   # grouped xyz(3)+128
            self.sa3 = SA_KNN(NPOINTS[2], NSAMPLE[2], 3+256, [256,256,512])
    
            self.fp3 = FP(256+512, [256,256])
            self.fp2 = FP(128+256, [256,128])
            self.fp1 = FP((in_ch-3)+128, [128,128,128])  # feat(7)+128
    
            self.cls = nn.Sequential(
                nn.Conv1d(128, 128, 1),
                nn.BatchNorm1d(128),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Conv1d(128, num_classes, 1)
            )
    
        def forward(self, X):
            xyz = X[:, :, 0:3].contiguous()      # (B,N,3)
            feat = X[:, :, 3:].contiguous()      # (B,N,7)
    
            # pack xyz_norm+feat inside SA1: we pass points=feat, SA will concat xyz_norm+points
            l1_xyz, l1_p = self.sa1(xyz, feat)     # (B,1024,3), (B,1024,128)
            l2_xyz, l2_p = self.sa2(l1_xyz, l1_p)  # (B,256,3),  (B,256,256)
            l3_xyz, l3_p = self.sa3(l2_xyz, l2_p)  # (B,64,3),   (B,64,512)
    
            l2_fp = self.fp3(l2_xyz, l3_xyz, l2_p, l3_p)      # (B,256,256)
            l1_fp = self.fp2(l1_xyz, l2_xyz, l1_p, l2_fp)     # (B,1024,128)
            l0_fp = self.fp1(xyz, l1_xyz, feat, l1_fp)        # (B,N,128)
    
            x = l0_fp.transpose(1,2).contiguous()             # (B,128,N)
            logits = self.cls(x).transpose(1,2).contiguous()  # (B,N,C)
            return logits
    
    # =========================
    # 6) LOSS + METRICS
    # =========================
    class WeightedCELoss(nn.Module):
        def __init__(self, w, label_smooth, ignore_index):
            super().__init__()
            self.register_buffer("w", w)
            self.ls = label_smooth
            self.ignore = ignore_index
    
        def forward(self, logits, y):
            B, N, C = logits.shape
            logits = logits.reshape(B*N, C)
            y = y.reshape(B*N)
            return F.cross_entropy(logits, y, weight=self.w, ignore_index=self.ignore, label_smoothing=self.ls)
    
    @torch.no_grad()
    def metrics(logits, y):
        pred = logits.argmax(dim=-1)
        mask = (y != IGNORE_INDEX)
        acc = (pred[mask] == y[mask]).float().mean().item() if mask.any() else 0.0
    
        # macro F1
        f1s = []
        for c in range(NUM_CLASSES):
            tp = ((pred==c) & (y==c) & mask).sum().item()
            fp = ((pred==c) & (y!=c) & mask).sum().item()
            fn = ((pred!=c) & (y==c) & mask).sum().item()
            denom = 2*tp + fp + fn
            f1s.append((2*tp/denom) if denom>0 else 0.0)
        return acc, float(sum(f1s)/len(f1s))
    
    # =========================
    # 7) TRAIN
    # =========================
    def main():
        print("Device:", DEVICE)
        if DEVICE=="cuda":
            print("GPU:", torch.cuda.get_device_name(0))
    
        all_files = list_npz(ALL_BLOCKS_DIR)
        if len(all_files) == 0:
            raise RuntimeError("No .npz found. Check ALL_BLOCKS_DIR path.")
    
        random.seed(42)
        random.shuffle(all_files)
        split = int(0.8 * len(all_files))
        train_files = all_files[:split]
        val_files = all_files[split:]
    
        print("All blocks :", len(all_files))
        print("Train blocks:", len(train_files))
        print("Val blocks  :", len(val_files))
    
        train_ds = BlocksNPZ(train_files)
        val_ds   = BlocksNPZ(val_files)
    
        train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True,
                                  num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"), drop_last=True)
        val_loader   = DataLoader(val_ds, batch_size=BATCH, shuffle=False,
                                  num_workers=NUM_WORKERS, pin_memory=(DEVICE=="cuda"))
    
        # Use your known rough counts (edit if you have better)
        # For 5-class [1,2,3,6,12], your inference looked like:
        counts_guess = {1: 1, 2: 7185068, 3: 33809, 6: 1274169, 12: 3881799}
        w = class_weights_from_counts(counts_guess).to(DEVICE)
        print("Class weights:", w.detach().cpu().numpy())
    
        model = PointNet2SSGSeg(NUM_CLASSES, in_ch=IN_CH).to(DEVICE)
    
        opt = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
        sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS, eta_min=1e-5)
        scaler = torch.amp.GradScaler("cuda", enabled=(USE_AMP and DEVICE=="cuda"))
        with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE=="cuda")):
            logits = model(X)
            loss = loss_fn(logits, y)
            best_f1 = -1.0
    
    best_path = os.path.join(OUT_DIR, f"pointnetpp_best_{NUM_CLASSES}cls.pt")
    
    for epoch in range(1, EPOCHS + 1):
        t0 = time.time()
    
        # ---------------- TRAIN ----------------
        model.train()
        tr_loss = 0.0
    
        for X, y in train_loader:
            X = X.to(DEVICE, non_blocking=True).float()
            y = y.to(DEVICE, non_blocking=True).long()
    
            opt.zero_grad(set_to_none=True)
    
            with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE == "cuda")):
                logits = model(X)          # (B,N,C)
                loss = loss_fn(logits, y)
    
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler.step(opt)
            scaler.update()
    
            tr_loss += loss.item()
    
        tr_loss /= max(len(train_loader), 1)
    
        # ---------------- VALIDATE ----------------
        model.eval()
        va_loss = 0.0
        accs, f1s = [], []
    
        with torch.no_grad():
            for X, y in val_loader:
                X = X.to(DEVICE, non_blocking=True).float()
                y = y.to(DEVICE, non_blocking=True).long()
    
                logits = model(X)
                va_loss += loss_fn(logits, y).item()
    
                acc, f1 = metrics(logits, y)
                accs.append(acc)
                f1s.append(f1)
    
        va_loss /= max(len(val_loader), 1)
        va_acc = float(np.mean(accs)) if accs else 0.0
        va_f1 = float(np.mean(f1s)) if f1s else 0.0
    
        sch.step()
        lr = opt.param_groups[0]["lr"]
        dt = time.time() - t0
    
        print(f"Epoch {epoch:03d} | {dt:5.1f}s | lr={lr:.2e} | train={tr_loss:.4f} | val={va_loss:.4f} | acc={va_acc:.4f} | macroF1={va_f1:.4f}")
    
        if va_f1 > best_f1:
            best_f1 = va_f1
            torch.save(
                {
                    "model_state": model.state_dict(),
                    "num_classes": NUM_CLASSES,
                    "raw_classes": RAW_CLASSES,
                    "class_weights": w.detach().cpu(),
                },
                best_path,
            )
            print("✅ Saved best:", best_path)
    
    print("Done. Best macroF1:", best_f1)
    print("Best model:", best_path)
    
    if __name__ == "__main__":
        main()


NameError: name 'model' is not defined

In [3]:
import os, glob, random, time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# =========================
# SETTINGS (EDIT PATH ONLY)
# =========================
ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
OUT_DIR        = r"D:/lidarrrrr/anbu/dl_models"
os.makedirs(OUT_DIR, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# classes used in your trained pipeline
RAW_CLASSES = [1,2,3,6,12]
MAP = {c:i for i,c in enumerate(RAW_CLASSES)}
NUM_CLASSES = len(RAW_CLASSES)

BATCH = 6
EPOCHS = 3
LR = 5e-4
USE_AMP = False

# =========================
# DATASET
# =========================
def list_npz(folder):
    return sorted(glob.glob(os.path.join(folder, "*.npz")))

def map_labels(y_raw):
    y = np.full_like(y_raw, -100)
    for raw,new in MAP.items():
        y[y_raw==raw] = new
    return y

class BlocksNPZ(Dataset):
    def __init__(self, files):
        self.files = files

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i):
        d = np.load(self.files[i])
        X = d["X"].astype(np.float32)

        # normalize per block (safe method)
        mean = X.mean(axis=0)
        std  = X.std(axis=0) + 1e-6
        X = (X - mean) / std

        y = map_labels(d["y"].astype(np.int32))
        return torch.from_numpy(X), torch.from_numpy(y)

# =========================
# SIMPLE POINTNET++ MODEL
# =========================
class SimplePointNetPP(nn.Module):
    def __init__(self, num_classes, in_ch=10):
        super().__init__()

        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch,64,1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64,128,1),
            nn.BatchNorm1d(128),
            nn.ReLU()
        )

        self.mlp2 = nn.Sequential(
            nn.Conv1d(128,256,1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
        )

        self.head = nn.Sequential(
            nn.Conv1d(256,128,1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128,num_classes,1)
        )

    def forward(self, X):
        # X: (B,N,10)
        x = X.transpose(1,2)     # (B,10,N)
        x = self.mlp1(x)
        x = self.mlp2(x)
        x = self.head(x)
        return x.transpose(1,2)  # (B,N,C)

# =========================
# METRICS
# =========================
def compute_metrics(logits, y):
    pred = logits.argmax(dim=-1)
    mask = y != -100
    correct = (pred[mask] == y[mask]).sum().item()
    total = mask.sum().item()
    acc = correct/total if total>0 else 0

    f1s=[]
    for c in range(NUM_CLASSES):
        tp = ((pred==c)&(y==c)&mask).sum().item()
        fp = ((pred==c)&(y!=c)&mask).sum().item()
        fn = ((pred!=c)&(y==c)&mask).sum().item()
        denom = 2*tp+fp+fn
        f1s.append((2*tp/denom) if denom>0 else 0)

    return acc, sum(f1s)/len(f1s)

# =========================
# MAIN TRAINING
# =========================
def main():
    print("Device:", DEVICE)
    if DEVICE=="cuda":
        print("GPU:", torch.cuda.get_device_name(0))

    # load files
    all_files = list_npz(ALL_BLOCKS_DIR)
    random.shuffle(all_files)

    split = int(0.8 * len(all_files))
    train_files = all_files[:split]
    val_files   = all_files[split:]

    print("Total blocks:", len(all_files))
    print("Train blocks:", len(train_files))
    print("Val blocks:", len(val_files))

    train_ds = BlocksNPZ(train_files)
    val_ds   = BlocksNPZ(val_files)

    train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0, drop_last=True)
    val_loader   = DataLoader(val_ds, batch_size=BATCH, shuffle=False, num_workers=0)

    model = SimplePointNetPP(NUM_CLASSES).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    scaler = torch.amp.GradScaler("cuda", enabled=(USE_AMP and DEVICE=="cuda"))

    best_f1 = 0
    best_path = os.path.join(OUT_DIR,"pointnetpp_best.pt")

    for epoch in range(1,EPOCHS+1):
        t0=time.time()
        model.train()
        train_loss=0

        for X,y in train_loader:
            X=X.to(DEVICE).float()
            y=y.to(DEVICE).long()

            optimizer.zero_grad()

            with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE=="cuda")):
                logits=model(X)
                loss=F.cross_entropy(
                    logits.reshape(-1,NUM_CLASSES),
                    y.reshape(-1),
                    ignore_index=-100
                )

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss+=loss.item()

        train_loss/=len(train_loader)

        # validation
        model.eval()
        val_loss=0
        accs=[]; f1s=[]

        with torch.no_grad():
            for X,y in val_loader:
                X=X.to(DEVICE).float()
                y=y.to(DEVICE).long()
                logits=model(X)

                loss=F.cross_entropy(
                    logits.reshape(-1,NUM_CLASSES),
                    y.reshape(-1),
                    ignore_index=-100
                )
                val_loss+=loss.item()
                acc,f1=compute_metrics(logits,y)
                accs.append(acc)
                f1s.append(f1)

        val_loss/=len(val_loader)
        val_acc=np.mean(accs)
        val_f1=np.mean(f1s)

        scheduler.step()

        print(f"Epoch {epoch:03d} | train {train_loss:.4f} | val {val_loss:.4f} | acc {val_acc:.4f} | F1 {val_f1:.4f}")

        if val_f1>best_f1:
            best_f1=val_f1
            torch.save(model.state_dict(),best_path)
            print("Saved best model")

    print("Training complete")
    print("Best model:",best_path)

if __name__=="__main__":
    main()


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Total blocks: 27697
Train blocks: 22157
Val blocks: 5540
Epoch 001 | train 0.4055 | val 0.3576 | acc 0.8403 | F1 0.7848
Saved best model
Epoch 002 | train 0.3479 | val 0.3407 | acc 0.8467 | F1 0.8067
Saved best model
Epoch 003 | train 0.3343 | val 0.3319 | acc 0.8503 | F1 0.8159
Saved best model
Training complete
Best model: D:/lidarrrrr/anbu/dl_models\pointnetpp_best.pt


In [2]:
import glob, os, numpy as np

ALL_BLOCKS_DIR = r"D:/lidarrrrr/anbu/dl_dataset/blocks"
files = sorted(glob.glob(os.path.join(ALL_BLOCKS_DIR, "*.npz")))

bad = 0
for f in files[:300]:  # check first 300 blocks
    d = np.load(f)
    X = d["X"].astype(np.float32)
    if not np.isfinite(X).all():
        bad += 1
        print("BAD:", f, "nan/inf count:", np.sum(~np.isfinite(X)))
        break

print("Checked:", min(300, len(files)), "Bad found:", bad)


Checked: 300 Bad found: 0


In [1]:
import numpy as np, glob, os
f = sorted(glob.glob(os.path.join(r"D:/lidarrrrr/anbu/dl_dataset/blocks","*.npz")))[0]
d=np.load(f)
X=d["X"].astype(np.float32)
print("min", np.nanmin(X,0))
print("max", np.nanmax(X,0))
print("finite?", np.isfinite(X).all())


min [-1.378125e+02 -2.095000e+02 -8.483579e-01  0.000000e+00  0.000000e+00
  1.000000e+00  1.000000e+00 -5.900000e+01  0.000000e+00  3.999996e-02]
max [1.4343750e+02 1.5750000e+02 1.0561642e+01 1.0889999e+01 4.4083000e+04
 5.0000000e+00 5.0000000e+00 5.4000000e+01 0.0000000e+00 1.6670000e+01]
finite? True


In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import laspy

# =========================
# PATHS (EDIT)
# =========================
IN_LAZ   = r"D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz"
MODEL_PT = r"D:/lidarrrrr/anbu/dl_models/pointnetpp_best.pt"
OUT_LAS  = r"D:/lidarrrrr/anbu/New folder/pointnetpp_pred.las"
OUT_LAZ  = r"D:/lidarrrrr/anbu/New folder/pointnetpp_pred.laz"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# =========================
# CLASSES (5-class)
# =========================
RAW_CLASSES = [1, 2, 3, 6, 12]  # your trained mapping
NUM_CLASSES = len(RAW_CLASSES)
CONTIG_TO_RAW = {i: c for i, c in enumerate(RAW_CLASSES)}

# =========================
# INFERENCE SETTINGS
# =========================
POINTS_PER_BLOCK = 4096
BLOCK_SIZE = 20.0      # same scale you used when it started working well
BATCH_BLOCKS = 8       # RTX 3050 safe
USE_AMP = True         # inference AMP is safe

# =========================
# MODEL (MUST MATCH TRAINING)
# This matches your one-file training model: SimplePointNetPP
# =========================
class SimplePointNetPP(nn.Module):
    def __init__(self, num_classes, in_ch=10):
        super().__init__()
        self.mlp1 = nn.Sequential(
            nn.Conv1d(in_ch, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
        )
        self.mlp2 = nn.Sequential(
            nn.Conv1d(128, 256, 1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
        )
        self.head = nn.Sequential(
            nn.Conv1d(256, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(128, num_classes, 1),
        )

    def forward(self, X):
        # X: (B,N,10)
        x = X.transpose(1, 2).contiguous()   # (B,10,N)
        x = self.mlp1(x)
        x = self.mlp2(x)
        x = self.head(x)                     # (B,C,N)
        return x.transpose(1, 2).contiguous()  # (B,N,C)

def load_model():
    model = SimplePointNetPP(NUM_CLASSES, in_ch=10).to(DEVICE)
    state = torch.load(MODEL_PT, map_location=DEVICE)
    # if you saved plain state_dict, this works:
    if isinstance(state, dict) and "model_state" in state:
        model.load_state_dict(state["model_state"], strict=True)
    else:
        model.load_state_dict(state, strict=True)
    model.eval()
    return model

# =========================
# FEATURE BUILDING (10 features)
# (x_local, y_local, z, hag, intensity, return_number,
#  number_of_returns, scan_angle, deviation, slope)
# =========================
def get_dim_safe(las, name):
    try:
        return np.asarray(getattr(las, name))
    except Exception:
        try:
            return np.asarray(las[name])  # extra dims
        except Exception:
            return None

def compute_hag_grid(x, y, z, cell=1.0, percentile=5):
    x0 = x.min()
    y0 = y.min()
    gx = np.floor((x - x0) / cell).astype(np.int32)
    gy = np.floor((y - y0) / cell).astype(np.int32)
    key = (gx.astype(np.int64) << 32) ^ gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]
    z_s = z[order]

    ground = np.empty_like(z_s, dtype=np.float32)
    start = 0
    n = len(z_s)
    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1
        g = np.percentile(z_s[start:end], percentile).astype(np.float32)
        ground[start:end] = g
        start = end

    ground_unsorted = np.empty_like(ground)
    ground_unsorted[order] = ground

    hag = (z.astype(np.float32) - ground_unsorted.astype(np.float32))
    hag = np.maximum(hag, 0.0)
    return hag

def compute_slope_proxy(x_local, y_local, hag):
    # keep simple (same as your training: slope often not critical)
    return np.zeros_like(hag, dtype=np.float32)

def build_features_from_las(las):
    x = np.asarray(las.x, dtype=np.float64)
    y = np.asarray(las.y, dtype=np.float64)
    z = np.asarray(las.z, dtype=np.float32)

    x_local = (x - x.min()).astype(np.float32)
    y_local = (y - y.min()).astype(np.float32)

    intensity = get_dim_safe(las, "intensity")
    intensity = intensity.astype(np.float32) if intensity is not None else np.zeros_like(z, dtype=np.float32)

    return_num = get_dim_safe(las, "return_number")
    return_num = return_num.astype(np.float32) if return_num is not None else np.zeros_like(z, dtype=np.float32)

    num_returns = get_dim_safe(las, "number_of_returns")
    num_returns = num_returns.astype(np.float32) if num_returns is not None else np.zeros_like(z, dtype=np.float32)

    scan_angle = get_dim_safe(las, "scan_angle_rank")
    if scan_angle is None:
        scan_angle = get_dim_safe(las, "scan_angle")
    scan_angle = scan_angle.astype(np.float32) if scan_angle is not None else np.zeros_like(z, dtype=np.float32)

    deviation = get_dim_safe(las, "deviation")
    deviation = deviation.astype(np.float32) if deviation is not None else np.zeros_like(z, dtype=np.float32)

    hag = compute_hag_grid(x.astype(np.float32), y.astype(np.float32), z.astype(np.float32), cell=1.0, percentile=5)
    slope = compute_slope_proxy(x_local, y_local, hag)

    X = np.stack([
        x_local, y_local, z, hag,
        intensity, return_num, num_returns, scan_angle,
        deviation, slope
    ], axis=1).astype(np.float32)

    return X

# =========================
# BLOCKING (cover-all)
# =========================
def build_blocks_cover_all(X, block_size=20.0, points_per_block=4096, seed=123):
    x = X[:, 0]  # x_local
    y = X[:, 1]  # y_local

    gx = np.floor(x / block_size).astype(np.int32)
    gy = np.floor(y / block_size).astype(np.int32)
    key = gx.astype(np.int64) * 10_000_000 + gy.astype(np.int64)

    order = np.argsort(key)
    key_s = key[order]

    blocks = []
    start = 0
    n = len(X)
    rng = np.random.default_rng(seed)

    while start < n:
        end = start + 1
        while end < n and key_s[end] == key_s[start]:
            end += 1

        cell_indices = order[start:end]
        if cell_indices.size > 0:
            rng.shuffle(cell_indices)
            for s in range(0, cell_indices.size, points_per_block):
                chunk = cell_indices[s:s + points_per_block]
                if chunk.size < points_per_block:
                    pad = rng.choice(cell_indices, size=(points_per_block - chunk.size), replace=True)
                    chunk = np.concatenate([chunk, pad], axis=0)
                blocks.append(chunk.astype(np.int64))

        start = end

    return blocks

def normalize_per_block(Xb: np.ndarray) -> np.ndarray:
    # SAME as your training fix: per-block mean/std normalization
    mean = Xb.mean(axis=0, keepdims=True)
    std = Xb.std(axis=0, keepdims=True) + 1e-6
    return (Xb - mean) / std

# =========================
# PREDICT (probability voting)
# =========================
@torch.no_grad()
def predict_full(model, X_full):
    blocks = build_blocks_cover_all(X_full, block_size=BLOCK_SIZE, points_per_block=POINTS_PER_BLOCK)
    print(f"Blocks created: {len(blocks)} (block_size={BLOCK_SIZE}, N={POINTS_PER_BLOCK})")

    N = X_full.shape[0]
    prob_sum = np.zeros((N, NUM_CLASSES), dtype=np.float32)

    for i in range(0, len(blocks), BATCH_BLOCKS):
        batch = blocks[i:i + BATCH_BLOCKS]

        Xb = np.stack([X_full[idx] for idx in batch], axis=0)  # (B,4096,10)
        # normalize per block
        for b in range(Xb.shape[0]):
            Xb[b] = normalize_per_block(Xb[b])

        xb = torch.from_numpy(Xb).to(DEVICE, non_blocking=True).float()

        with torch.amp.autocast("cuda", enabled=(USE_AMP and DEVICE == "cuda")):
            logits = model(xb)  # (B,4096,C)
            probs = torch.softmax(logits, dim=-1).detach().cpu().numpy().astype(np.float32)

        for b, idxs in enumerate(batch):
            prob_sum[idxs] += probs[b]

        if (i // BATCH_BLOCKS) % 20 == 0:
            print(f"Predicted blocks {i}/{len(blocks)}")

    pred_contig = prob_sum.argmax(axis=1).astype(np.int32)  # 0..4
    return pred_contig

# =========================
# WRITE BACK
# =========================
def write_outputs(las, pred_contig):
    pred_raw = np.vectorize(CONTIG_TO_RAW.get)(pred_contig).astype(np.uint8)
    las.classification = pred_raw

    os.makedirs(os.path.dirname(OUT_LAS), exist_ok=True)

    las.write(OUT_LAS)
    print("✅ Wrote LAS:", OUT_LAS)

    try:
        las.write(OUT_LAZ)
        print("✅ Wrote LAZ:", OUT_LAZ)
    except Exception as e:
        print("⚠️ Could not write LAZ:", e)
        print("   Fix: pip install lazrs")

    u, c = np.unique(pred_raw, return_counts=True)
    print("Pred counts (raw):", dict(zip(u.tolist(), c.tolist())))

def main():
    print("Device:", DEVICE)
    if DEVICE == "cuda":
        print("GPU:", torch.cuda.get_device_name(0))

    las = laspy.read(IN_LAZ)
    print("Loaded:", IN_LAZ)
    print("Points:", len(las.x))

    X_full = build_features_from_las(las)
    print("X_full shape:", X_full.shape)

    model = load_model()
    pred_contig = predict_full(model, X_full)

    write_outputs(las, pred_contig)

if __name__ == "__main__":
    main()


Device: cuda
GPU: NVIDIA GeForce RTX 3050
Loaded: D:/lidarrrrr/anbu/DX3035724 S.GIUSTO000001.laz
Points: 12374846
X_full shape: (12374846, 10)
Blocks created: 3222 (block_size=20.0, N=4096)
Predicted blocks 0/3222
Predicted blocks 160/3222
Predicted blocks 320/3222
Predicted blocks 480/3222
Predicted blocks 640/3222
Predicted blocks 800/3222
Predicted blocks 960/3222
Predicted blocks 1120/3222
Predicted blocks 1280/3222
Predicted blocks 1440/3222
Predicted blocks 1600/3222
Predicted blocks 1760/3222
Predicted blocks 1920/3222
Predicted blocks 2080/3222
Predicted blocks 2240/3222
Predicted blocks 2400/3222
Predicted blocks 2560/3222
Predicted blocks 2720/3222
Predicted blocks 2880/3222
Predicted blocks 3040/3222
Predicted blocks 3200/3222
✅ Wrote LAS: D:/lidarrrrr/anbu/New folder/pointnetpp_pred.las
✅ Wrote LAZ: D:/lidarrrrr/anbu/New folder/pointnetpp_pred.laz
Pred counts (raw): {1: 3792962, 2: 6918647, 3: 501661, 6: 1073272, 12: 88304}
