In [8]:
# CELL 3 â€” VERIFY PACKAGES (run AFTER kernel restart)

import torch, numpy as np, sklearn
import open3d as o3d
from plyfile import PlyData

print("torch version      :", torch.__version__)
print("cuda available     :", torch.cuda.is_available())
print("cuda version       :", torch.version.cuda if torch.cuda.is_available() else None)
print("numpy version      :", np.__version__)
print("sklearn version    :", sklearn.__version__)
print("open3d version     :", o3d.__version__)
print("plyfile            : OK")

# Set DEVICE for later training
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("\nDEVICE =", DEVICE)


torch version      : 2.9.0+cu128
cuda available     : True
cuda version       : 12.8
numpy version      : 2.2.6
sklearn version    : 1.7.2
open3d version     : 0.19.0
plyfile            : OK

DEVICE = cuda


In [9]:
# CELL 4 â€” PLY Dataset Loader (for your 3-class dataset)

import numpy as np
import torch
from torch.utils.data import Dataset
from pathlib import Path
from plyfile import PlyData

DATA_DIR = Path("/home/ccbd/Desktop/SSS_03/Data/train_sphere_ascii_roi")

label_map = {1: 0, 3: 1, 9: 2}   # final correct mapping

def read_ply_hungary(path):
    pd = PlyData.read(str(path))
    v = pd["vertex"].data

    pts = np.vstack([v["x"], v["y"], v["z"]]).T.astype(np.float32)
    labels = np.array(v["scalar_NewClassification"]).astype(np.int64)

    # remap labels
    labels = np.vectorize(lambda x: label_map.get(int(x), 255))(labels)
    mask = labels != 255
    return pts[mask], labels[mask]

class HungaryPLYDataset(Dataset):
    def __init__(self, root, files, points_per_sample=2048, augment=True):
        self.root = Path(root)
        self.files = files
        self.points_per_sample = points_per_sample
        self.augment = augment

    def __len__(self):
        return len(self.files) * 10

    def __getitem__(self, idx):
        ply = self.files[np.random.randint(len(self.files))]
        pts, labels = read_ply_hungary(self.root / ply)

        N = len(pts)
        P = self.points_per_sample
        idxs = np.random.choice(N, P, replace=(N < P))

        pts = pts[idxs]
        labels = labels[idxs]

        if self.augment:
            theta = np.random.uniform(0, 2 * np.pi)
            R = np.array([
                [np.cos(theta), -np.sin(theta), 0],
                [np.sin(theta),  np.cos(theta), 0],
                [0, 0, 1]
            ], dtype=np.float32)
            pts = pts @ R.T
            pts += np.random.normal(0, 0.01, pts.shape)

        pts -= pts.mean(0, keepdims=True)

        return torch.from_numpy(pts), torch.from_numpy(labels)

def collate_batch(batch):
    pts = torch.stack([b[0] for b in batch], dim=0)
    lbl = torch.stack([b[1] for b in batch], dim=0)
    return pts, lbl

# Create train/val split
all_files = sorted([f.name for f in DATA_DIR.glob("*.ply")])
import random
random.shuffle(all_files)
N = len(all_files)
train_files = all_files[:int(0.8*N)]
val_files = all_files[int(0.8*N):]




train_ds = HungaryPLYDataset(DATA_DIR, train_files, points_per_sample=1024, augment=True)
val_ds   = HungaryPLYDataset(DATA_DIR, val_files,   points_per_sample=1024, augment=False)

from torch.utils.data import DataLoader
train_loader = DataLoader(train_ds, batch_size=1, shuffle=True,
                          num_workers=0, collate_fn=collate_batch, pin_memory=True)

val_loader   = DataLoader(val_ds, batch_size=1, shuffle=False,
                          num_workers=0, collate_fn=collate_batch, pin_memory=True)
print("train batches:", len(train_loader), "val batches:", len(val_loader))


train batches: 120 val batches: 40


In [10]:
# CELL 5 â€” FIXED KPConv-Like Model (Dimension-Safe)

import torch
import torch.nn as nn
import torch.nn.functional as F

def knn(pts, K=8):
    """
    pts: (B, P, 3)
    returns idx: (B, P, K)
    """
    dist = torch.cdist(pts, pts)  # (B, P, P)
    idx = dist.topk(K, largest=False)[1]  # (B, P, K)
    return idx

class KPConvLayer(nn.Module):
    def __init__(self, in_c, out_c, K=8):
        super().__init__()
        self.K = K
        self.mlp = nn.Sequential(
            nn.Linear(in_c + 3, out_c),
            nn.ReLU(),
            nn.Linear(out_c, out_c)
        )

    def forward(self, pts, feats):
        """
        pts:   (B, P, 3)
        feats: (B, P, C)
        output: (B, P, out_c)
        """
        B, P, C = feats.shape
        K = self.K

        idx = knn(pts, K)         # (B, P, K)

        # gather neighbor coords: (B, P, K, 3)
        pts_expand = pts.unsqueeze(2).expand(B, P, K, 3)
        neigh_pts = torch.gather(pts.unsqueeze(1).expand(B, P, P, 3), 
                                 2, 
                                 idx.unsqueeze(-1).expand(B, P, K, 3))

        # gather neighbor feats: (B, P, K, C)
        feats_expand = feats.unsqueeze(1).expand(B, P, P, C)
        neigh_feats = torch.gather(feats_expand,
                                   2,
                                   idx.unsqueeze(-1).expand(B, P, K, C))

        # compute relative positions
        rel = neigh_pts - pts_expand  # (B,P,K,3)

        # concatenate features: (B,P,K, 3+C)
        inp = torch.cat([rel, neigh_feats], dim=-1)

        # MLP â†’ (B,P,K,out_c)
        out = self.mlp(inp)

        # Max over K neighbors â†’ (B,P,out_c)
        out = out.max(dim=2)[0]

        return out


class KPNet(nn.Module):
    def __init__(self, base=16, num_classes=3, K=8):
        super().__init__()
        self.fc0 = nn.Linear(3, base)

        self.kp1 = KPConvLayer(base, base*2, K)
        self.kp2 = KPConvLayer(base*2, base*4, K)

        self.head = nn.Sequential(
            nn.Linear(base*4, base*4),
            nn.ReLU(),
            nn.Linear(base*4, num_classes)
        )

    def forward(self, pts):
        feats = F.relu(self.fc0(pts))
        feats = self.kp1(pts, feats)
        feats = self.kp2(pts, feats)
        out = self.head(feats)
        return out

model = KPNet(base=8, num_classes=3, K=6).to(DEVICE)
model


KPNet(
  (fc0): Linear(in_features=3, out_features=8, bias=True)
  (kp1): KPConvLayer(
    (mlp): Sequential(
      (0): Linear(in_features=11, out_features=16, bias=True)
      (1): ReLU()
      (2): Linear(in_features=16, out_features=16, bias=True)
    )
  )
  (kp2): KPConvLayer(
    (mlp): Sequential(
      (0): Linear(in_features=19, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=32, bias=True)
    )
  )
  (head): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=3, bias=True)
  )
)

In [11]:
from torch.utils.data import Dataset
from pathlib import Path
import numpy as np
import torch

label_map = {1: 0, 3: 1, 9: 2}   # same mapping

def read_ply_xyz_labels(path):
    pd = PlyData.read(str(path))
    v  = pd["vertex"].data

    xyz = np.vstack([v["x"], v["y"], v["z"]]).T.astype(np.float32)
    raw = np.array(v["scalar_NewClassification"]).astype(np.int64)

    lbl = np.vectorize(lambda x: label_map.get(int(x), 255))(raw)
    mask = lbl != 255

    xyz = xyz[mask]
    lbl = lbl[mask]
    return xyz, lbl


class FullCloudChunkDataset(Dataset):
    """
    Takes each PLY, splits into sequential chunks of `points_per_chunk`.
    - Caches each PLY in RAM (no re-reading)
    - Optionally limits total chunks (max_chunks) to keep training fast
    """

    def __init__(self, root, files, points_per_chunk=2048, augment=True, max_chunks=1500):
        self.root = Path(root)
        self.files = files
        self.points_per_chunk = points_per_chunk
        self.augment = augment

        # 1) Cache clouds in memory
        self.clouds = {}   # path -> (xyz, lbl)
        for f in self.files:
            path = self.root / f
            xyz, lbl = read_ply_xyz_labels(path)
            self.clouds[str(path)] = (xyz, lbl)

        # 2) Build chunk index: (path_str, start_idx, end_idx)
        chunks = []
        for f in self.files:
            path = self.root / f
            xyz, lbl = self.clouds[str(path)]
            N = xyz.shape[0]

            for start in range(0, N, points_per_chunk):
                end = min(start + points_per_chunk, N)
                if end - start < points_per_chunk // 2:
                    continue
                chunks.append((str(path), start, end))

        # 3) Optionally subsample chunks to limit training size
        if max_chunks is not None and len(chunks) > max_chunks:
            indices = np.random.choice(len(chunks), max_chunks, replace=False)
            chunks = [chunks[i] for i in indices]

        self.chunks = chunks
        print(f"[FullCloudChunkDataset] Total chunks: {len(self.chunks)}")

    def __len__(self):
        return len(self.chunks)

    def _augment(self, xyz):
        # rotation around Z + small jitter
        xyz = xyz.copy()
        theta = np.random.uniform(0, 2*np.pi)
        c, s = np.cos(theta), np.sin(theta)
        R = np.array([[c, -s, 0],
                      [s,  c, 0],
                      [0,  0, 1]], dtype=np.float32)
        xyz = xyz @ R.T
        xyz += np.random.normal(0, 0.01, xyz.shape).astype(np.float32)
        return xyz

    def __getitem__(self, idx):
        path_str, start, end = self.chunks[idx]
        xyz, lbl = self.clouds[path_str]

        xyz = xyz[start:end]
        lbl = lbl[start:end]

        # center
        xyz = xyz - xyz.mean(axis=0, keepdims=True)

        if self.augment:
            xyz = self._augment(xyz)

        xyz_t = torch.from_numpy(xyz).float()
        lbl_t = torch.from_numpy(lbl).long()
        return xyz_t, lbl_t


In [12]:
from torch.utils.data import DataLoader
import random

all_files = sorted([f.name for f in DATA_DIR.glob("*.ply")])
random.shuffle(all_files)

N = len(all_files)
train_files = all_files[:int(0.8 * N)]
val_files   = all_files[int(0.8 * N):]

print("Train files:", len(train_files), " Val files:", len(val_files))

train_ds_25 = FullCloudChunkDataset(
    root=DATA_DIR,
    files=train_files,
    points_per_chunk=2048,  # smaller chunks
    augment=True,
    max_chunks=1500         # limit dataset
)

val_ds_25 = FullCloudChunkDataset(
    root=DATA_DIR,
    files=val_files,
    points_per_chunk=2048,
    augment=False,
    max_chunks=500          # smaller eval set is fine
)

train_loader = DataLoader(
    train_ds_25,
    batch_size=1,
    shuffle=True,
    num_workers=0,
    pin_memory=True
)

val_loader = DataLoader(
    val_ds_25,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

print("train batches:", len(train_loader), " val batches:", len(val_loader))


Train files: 12  Val files: 4
[FullCloudChunkDataset] Total chunks: 1500
[FullCloudChunkDataset] Total chunks: 500
train batches: 1500  val batches: 500


In [13]:
#training loop new version
# --- IMPORTS ---
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import os
from sklearn.metrics import confusion_matrix

# --- SETUP ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# âœ… NEW: separate folder for today
SAVE_DIR = "./checkpoints_kpconv_25nov"
os.makedirs(SAVE_DIR, exist_ok=True)

# âœ… NEW: log file with 25nov suffix
LOG_FILE = os.path.join(SAVE_DIR, "training_log_25nov.csv")

# create log header
with open(LOG_FILE, "w") as f:
    f.write("epoch,train_loss,val_loss,iou0,iou1,iou2,miou\n")


# --- IoU Function (same as before) ---
def compute_iou(pred, gt, nc):
    cm = confusion_matrix(gt, pred, labels=list(range(nc)))
    ious = []
    for i in range(nc):
        tp = cm[i,i]
        fp = cm[:,i].sum() - tp
        fn = cm[i,:].sum() - tp
        denom = tp + fp + fn
        ious.append(tp / denom if denom > 0 else 0)
    return np.array(ious)


# --- MODEL, OPT, LOSS ---
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

EPOCHS = 10
best_miou = 0.0


# --- TRAINING LOOP ---
for ep in range(EPOCHS):

    # ========== TRAIN ==========
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {ep+1}/{EPOCHS}", leave=True)
    total_train_loss = 0
    batches = 0

    for pts, lbl in loop:
        pts, lbl = pts.to(DEVICE), lbl.to(DEVICE)

        optimizer.zero_grad()
        logits = model(pts)
        loss = criterion(logits.view(-1,3), lbl.view(-1))
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        batches += 1
        loop.set_postfix(loss=loss.item())

    avg_train_loss = total_train_loss / batches

    # ========== VALIDATION ==========
    model.eval()
    preds_all, lbl_all = [], []
    total_val_loss = 0
    val_batches = 0

    with torch.no_grad():
        for pts, lbl in val_loader:
            pts = pts.to(DEVICE)
            lbl = lbl.to(DEVICE)
            logits = model(pts)

            loss = criterion(logits.view(-1,3), lbl.view(-1))
            total_val_loss += loss.item()
            val_batches += 1

            pred = logits.argmax(-1).cpu().numpy().reshape(-1)
            lbl_np = lbl.cpu().numpy().reshape(-1)

            preds_all.append(pred)
            lbl_all.append(lbl_np)

    avg_val_loss = total_val_loss / val_batches

    preds_all = np.concatenate(preds_all)
    lbl_all = np.concatenate(lbl_all)

    iou = compute_iou(preds_all, lbl_all, 3)
    miou = iou.mean()

    print(f"\nEpoch {ep+1} Summary:")
    print(f"  Train Loss = {avg_train_loss:.4f}")
    print(f"  Val Loss   = {avg_val_loss:.4f}")
    print(f"  IoU        = {iou}")
    print(f"  mIoU       = {miou:.4f}")

    # ----- SAVE LOG -----
    with open(LOG_FILE, "a") as f:
        f.write(
            f"{ep+1},{avg_train_loss:.4f},{avg_val_loss:.4f},"
            f"{iou[0]:.4f},{iou[1]:.4f},{iou[2]:.4f},{miou:.4f}\n"
        )

    # ----- SAVE BEST MODEL (with 25nov tag) -----
    if miou > best_miou:
        best_miou = miou
        best_path = os.path.join(SAVE_DIR, f"best_model_25nov_mIoU_{best_miou:.4f}.pth")
        torch.save(model.state_dict(), best_path)
        print(f"  âœ… Saved BEST model to {best_path}")

    # ----- OPTIONAL: SAVE EVERY CHECKPOINT (with 25nov) -----
    ckpt_path = os.path.join(SAVE_DIR, f"checkpoint_epoch_{ep+1}_25nov.pth")
    torch.save(model.state_dict(), ckpt_path)

print("\nðŸŽ‰ TRAINING COMPLETE!")
print("Best mIoU =", best_miou)


Epoch 1/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 402.83it/s, loss=0.0955]



Epoch 1 Summary:
  Train Loss = 0.5933
  Val Loss   = 0.6146
  IoU        = [0.716 0.    0.   ]
  mIoU       = 0.2387
  âœ… Saved BEST model to ./checkpoints_kpconv_25nov/best_model_25nov_mIoU_0.2387.pth


Epoch 2/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 401.98it/s, loss=0.24]    



Epoch 2 Summary:
  Train Loss = 0.2774
  Val Loss   = 0.1805
  IoU        = [0.94841457 0.         0.93355636]
  mIoU       = 0.6273
  âœ… Saved BEST model to ./checkpoints_kpconv_25nov/best_model_25nov_mIoU_0.6273.pth


Epoch 3/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 408.26it/s, loss=0.0281]  



Epoch 3 Summary:
  Train Loss = 0.1428
  Val Loss   = 0.1967
  IoU        = [0.94252716 0.         0.85651973]
  mIoU       = 0.5997


Epoch 4/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 405.16it/s, loss=0.0466]  



Epoch 4 Summary:
  Train Loss = 0.1276
  Val Loss   = 0.1378
  IoU        = [0.95066486 0.         0.93419911]
  mIoU       = 0.6283
  âœ… Saved BEST model to ./checkpoints_kpconv_25nov/best_model_25nov_mIoU_0.6283.pth


Epoch 5/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 402.77it/s, loss=0.0702]  



Epoch 5 Summary:
  Train Loss = 0.1224
  Val Loss   = 0.1590
  IoU        = [0.95123566 0.         0.92587028]
  mIoU       = 0.6257


Epoch 6/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 398.51it/s, loss=0.0117]  



Epoch 6 Summary:
  Train Loss = 0.1183
  Val Loss   = 0.1627
  IoU        = [0.95277164 0.         0.91191334]
  mIoU       = 0.6216


Epoch 7/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:04<00:00, 373.82it/s, loss=0.0634]  



Epoch 7 Summary:
  Train Loss = 0.1209
  Val Loss   = 0.1516
  IoU        = [0.95284576 0.         0.89585277]
  mIoU       = 0.6162


Epoch 8/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:03<00:00, 375.70it/s, loss=0.0126]  



Epoch 8 Summary:
  Train Loss = 0.1166
  Val Loss   = 0.1624
  IoU        = [0.95118846 0.         0.92877869]
  mIoU       = 0.6267


Epoch 9/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:04<00:00, 374.57it/s, loss=0.0333]  



Epoch 9 Summary:
  Train Loss = 0.1151
  Val Loss   = 0.2130
  IoU        = [0.95238833 0.         0.86790318]
  mIoU       = 0.6068


Epoch 10/10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1500/1500 [00:04<00:00, 371.98it/s, loss=0.00654] 



Epoch 10 Summary:
  Train Loss = 0.1153
  Val Loss   = 0.1813
  IoU        = [0.95312301 0.         0.91109542]
  mIoU       = 0.6214

ðŸŽ‰ TRAINING COMPLETE!
Best mIoU = 0.6282879931937195


In [None]:
import os
from pathlib import Path
import numpy as np
import torch
from plyfile import PlyData
from sklearn.metrics import confusion_matrix
import csv

# ---------------------
# Paths & model
# ---------------------
DATA_PATH = Path("/home/ccbd/Desktop/SSS_03/Data/train_sphere_ascii_roi")
ALL_FILES = sorted(DATA_PATH.glob("*.ply"))

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Files found:", len(ALL_FILES))

# âœ… use your new best model if you retrain:
# best_ckpt_path = "./checkpoints_kpconv_25nov/best_model_25nov_mIoU_0.xxxx.pth"
# or keep the old 24nov one for comparison:
best_ckpt_path = "./checkpoints_kpconv_24nov/best_model_24nov_mIoU_0.8541.pth"

best_model = KPNet(base=8, num_classes=3, K=6).to(DEVICE)
best_model.load_state_dict(torch.load(best_ckpt_path, map_location=DEVICE))
best_model.eval()
print("Loaded:", best_ckpt_path)


# ---------------------------------------
# Helper: read GT and map labels
# ---------------------------------------
label_map = {1: 0, 3: 1, 9: 2}

def read_gt_labels(path):
    pd = PlyData.read(str(path))
    v = pd["vertex"].data

    lbl = np.array(v["scalar_NewClassification"]).astype(np.int64)
    lbl = np.vectorize(lambda x: label_map.get(int(x), 255))(lbl)

    mask = lbl != 255
    return mask, lbl[mask]


# ---------------------------------------
# Helper: run model on full cloud (chunked)
# ---------------------------------------
def predict_xyz(model, xyz, chunk=4096):
    preds = []
    model.eval()

    with torch.no_grad():
        for i in range(0, len(xyz), chunk):
            part = xyz[i:i+chunk]
            part = torch.from_numpy(part).float().unsqueeze(0).to(DEVICE)
            logits = model(part)
            pred = logits.argmax(-1).squeeze(0).cpu().numpy()
            preds.append(pred)

    return np.concatenate(preds)


# ---------------------------------------
# MAIN LOOP
# ---------------------------------------
results = []

for path in ALL_FILES:
    print(f"\nProcessing tile: {path.name}")

    pd = PlyData.read(str(path))
    v = pd["vertex"].data
    xyz = np.vstack([v["x"], v["y"], v["z"]]).T.astype(np.float32)

    valid_mask, gt = read_gt_labels(path)
    xyz_valid = xyz[valid_mask]

    # ðŸ”¹ Keep a similar normalization scheme (optional, but safe & consistent)
    xyz_valid = xyz_valid - xyz_valid.mean(axis=0, keepdims=True)

    pred = predict_xyz(best_model, xyz_valid)

    cm = confusion_matrix(gt, pred, labels=[0,1,2])

    ious = []
    for c in range(3):
        tp = cm[c,c]
        fp = cm[:,c].sum() - tp
        fn = cm[c,:].sum() - tp
        denom = tp + fp + fn
        iou = tp/denom if denom > 0 else 0
        ious.append(iou)

    miou = float(np.mean(ious))
    print("IoUs:", ious, "   mIoU =", miou)

    results.append([path.name] + ious + [miou])


# ---------------------------------------
# âœ… Save CSV in a 25nov-specific folder
# ---------------------------------------
EVAL_DIR = "./eval_fulltiles_25nov"
os.makedirs(EVAL_DIR, exist_ok=True)

csv_path = os.path.join(EVAL_DIR, "full_tile_results_25nov.csv")
with open(csv_path, "w", newline="") as f:
    w = csv.writer(f)
    w.writerow(["filename", "iou0", "iou1", "iou2", "miou"])
    w.writerows(results)

print("\nSaved:", csv_path)
