# Importing Necessary Libraries

In [1]:
# --- Imports, paths, seeds, device ---
import os, glob, random, math
import numpy as np
import pandas as pd
from collections import Counter
from plyfile import PlyData

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Loading Dataset path and checking if gpu is available

In [2]:
# Paths
DATA_FOLDER = r"C:\Users\bhanu\OneDrive\Desktop\capstone\Data\train_sphere_ascii_roi"  # ROI folder

# Repro
seed = 42
random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


# Just cross verying the number of files in the folder , Just a basic check

In [3]:
# --- List ROI .ply files ---
file_list = sorted(glob.glob(os.path.join(DATA_FOLDER, "*.ply")))
print("Found files:", len(file_list))
assert len(file_list) > 0, "No PLY files found in ROI folder."
print("\nFirst few files:\n", "\n".join(os.path.basename(f) for f in file_list[:5]))


Found files: 12

First few files:
 Sphere_10_ascii_roi.ply
Sphere_11_ascii_roi.ply
Sphere_12_ascii_roi.ply
Sphere_15_ascii_roi.ply
Sphere_1_ascii_roi.ply


# Label mapping + verify mapping on whole dataset

In [4]:
# --- Mapping: raw ASCII -> name -> id (final) ---
SCALAR_MAP = {1: "Background", 3: "Track", 9: "Object"}
CLASS_MAP  = {"Background": 0, "Track": 1, "Object": 2}
LABEL_CANDIDATES = ["scalar_NewClassification", "scalar_Classification", "classification", "label"]

def find_label_column(df):
    for c in LABEL_CANDIDATES:
        if c in df.columns:
            return c
    return None

# Scan all files, count mapped classes, and verify only 0/1/2 appear
cnt = Counter()
bad_files = []
for f in file_list:
    try:
        df = pd.DataFrame(PlyData.read(f)["vertex"].data)
        col = find_label_column(df)
        if col is None:
            raise ValueError("No label column")
        raw = df[col].astype(int)

        mapped_name = raw.map(SCALAR_MAP).fillna("Background")
        mapped_id = mapped_name.map(CLASS_MAP).astype(int)

        uniq = set(mapped_id.unique().tolist())
        if not uniq.issubset({0,1,2}):
            raise ValueError(f"Unexpected mapped ids: {uniq}")

        cnt.update(mapped_id.tolist())
    except Exception as e:
        print(f"[BAD] {os.path.basename(f)} -> {e}")
        bad_files.append(f)

print("\nBad files:", len(bad_files))
print("Counts [id: count]:", dict(cnt))
total = sum(cnt.values())
if total > 0:
    ratios = {k: round(v/total, 4) for k,v in cnt.items()}
    print("Ratios:", ratios)

# Final assertion
assert len(bad_files) == 0, "Some files failed label checks. Fix or remove before training."
print("\n✅ Mapping verified: 0=Background, 1=Track, 2=Object")



Bad files: 0
Counts [id: count]: {0: 12717193, 1: 541178, 2: 4012529}
Ratios: {0: 0.7363, 1: 0.0313, 2: 0.2323}

✅ Mapping verified: 0=Background, 1=Track, 2=Object


# Dataset (light, with mild class-balancing sampler)

In [5]:
# --- Light Dataset: normalize -> mild balanced sampling -> jitter ---

class Rail3DDataset(Dataset):
    def __init__(self, file_list, num_points=4096, sampling=True):
        self.file_list = file_list
        self.num_points = num_points
        self.sampling = sampling
        self.scalar_map = SCALAR_MAP
        self.class_map = CLASS_MAP

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        ply_path = self.file_list[idx]
        df = pd.DataFrame(PlyData.read(ply_path)["vertex"].data)

        # points
        if {"x","y","z"}.issubset(df.columns):
            pts = df[["x","y","z"]].to_numpy(dtype=np.float32)
        else:
            pts = np.zeros((0,3), np.float32)

        # labels
        col = find_label_column(df)
        if col is None:
            raw = pd.Series([1]*len(pts))  # fallback to Background raw=1
        else:
            raw = df[col].astype(int)

        mapped_name = raw.map(self.scalar_map).fillna("Background")
        lbl = mapped_name.map(self.class_map).astype(np.int64).to_numpy()

        # handle empty
        if pts.shape[0] == 0:
            pts = np.zeros((self.num_points,3), np.float32)
            lbl = np.zeros((self.num_points,), np.int64)

        # sampling (mild class balance, avoids collapse)
        if self.sampling and len(pts) > 0:
            # per-class frequency
            uniq, counts = np.unique(lbl, return_counts=True)
            freq = {int(u): int(c) for u,c in zip(uniq, counts)}
            inv = np.array([1.0 / max(freq.get(int(l),1), 1) for l in lbl], dtype=np.float32)

            # mild boost: favor Object, slightly Track
            boost = {0: 1.0, 1: 1.4, 2: 1.8}
            inv *= np.vectorize(boost.get)(lbl.astype(int))

            # blend with uniform for stability
            alpha = 0.6  # more uniform than rare; safer
            uniform = np.full_like(inv, 1.0/len(inv), np.float32)
            p = (1 - alpha) * (inv / max(inv.sum(), 1e-12)) + alpha * uniform
            p = p / p.sum()

            choice = np.random.choice(len(pts), self.num_points, replace=True, p=p)
            pts = pts[choice]
            lbl = lbl[choice]
        else:
            # uniform sample/pad
            if len(pts) >= self.num_points:
                choice = np.random.choice(len(pts), self.num_points, replace=False)
            else:
                choice = np.random.choice(len(pts), self.num_points, replace=True)
            pts = pts[choice]
            lbl = lbl[choice]

        # normalize to unit sphere + tiny jitter
        centroid = pts.mean(axis=0, keepdims=True)
        pts = pts - centroid
        max_dist = np.sqrt((pts**2).sum(axis=1)).max()
        if max_dist > 0:
            pts = pts / max_dist
        pts = pts + np.random.normal(scale=0.001, size=pts.shape).astype(np.float32)

        return torch.from_numpy(pts).float(), torch.from_numpy(lbl).long()


# Dataloader + batch sanity print

In [6]:
# --- Dataloader (start with workers=0 for laptop stability) ---
dataset = Rail3DDataset(file_list, num_points=4096, sampling=True)

dataloader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0,        # set >0 later if stable
    pin_memory=False,
)

# quick sanity on batch label distribution (3 batches)
for i, (_, L) in enumerate(dataloader):
    vals, cnts = torch.unique(L, return_counts=True)
    print({int(v): int(c) for v,c in zip(vals, cnts)})
    if i == 2: break


{0: 9686, 1: 2571, 2: 4127}
{0: 8400, 1: 2496, 2: 5488}
{0: 8451, 1: 2431, 2: 5502}


# Light model (PointMLP-lite with BN/Dropout)

In [7]:
# --- Light per-point model (MLP + BatchNorm + Dropout) ---
class PointMLPLite(nn.Module):
    def __init__(self, num_classes=3, dropout=0.2):
        super().__init__()
        self.fc1 = nn.Linear(3, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.head = nn.Linear(256, num_classes)
        self.do = nn.Dropout(dropout)

        # init
        for m in [self.fc1,self.fc2,self.fc3,self.head]:
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
                nn.init.zeros_(m.bias)

    def forward(self, x):
        # x: (B,N,3)
        B, N, _ = x.shape
        x = x.view(B*N, -1)
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.do(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.do(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.head(x)
        x = x.view(B, N, -1)
        return x

model = PointMLPLite(num_classes=3, dropout=0.2).to(device)
print(model.__class__.__name__, "on", device)


PointMLPLite on cuda


# Class weights from counts + loss/opt

In [8]:
# --- Class weights from dataset counts (sqrt-smooth, mean-norm, clipped) ---
arr = np.array([cnt.get(0,0), cnt.get(1,0), cnt.get(2,0)], dtype=np.float32)  # [BG, Track, Obj]
total = arr.sum()
raw = total / np.maximum(arr, 1.0)
w = np.sqrt(raw)
w = w / w.mean()
w = np.clip(w, 0.5, 2.0)  # gentle
weights = torch.tensor(w, dtype=torch.float32, device=device)
print("Class weights [BG,Track,Obj]:", w)

criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

# AMP scaler
scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())


Class weights [BG,Track,Obj]: [0.5       1.9065322 0.7001726]


# Metrics (IoU)

In [9]:
# --- IoU per class ---
def iou_per_class(preds, labels, num_classes=3):
    preds = preds.view(-1)
    labels = labels.view(-1)
    ious = []
    for c in range(num_classes):
        inter = ((preds == c) & (labels == c)).sum().item()
        union = ((preds == c) | (labels == c)).sum().item()
        ious.append(float('nan') if union == 0 else inter / union)
    return ious


# Traning loop 

In [10]:
# --- Train loop ---
epochs = 15
best_loss = float('inf')
save_dir = "./checkpoints_light"
os.makedirs(save_dir, exist_ok=True)

for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0.0
    correct = 0
    total_pts = 0

    for P, L in dataloader:
        P = P.to(device, non_blocking=True)
        L = L.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
            logits = model(P)                          # (B,N,C)
            loss = criterion(logits.view(-1,3), L.view(-1))

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item() * P.size(0)
        preds = logits.argmax(dim=-1)
        correct += (preds == L).sum().item()
        total_pts += L.numel()

    avg_loss = total_loss / len(dataset)
    acc = 100.0 * correct / total_pts

    # quick IoU on a few batches (cheap)
    model.eval()
    with torch.no_grad():
        P_s, L_s = next(iter(dataloader))
        P_s = P_s.to(device); L_s = L_s.to(device)
        preds_s = model(P_s).argmax(dim=-1)
        ious = iou_per_class(preds_s.cpu(), L_s.cpu(), num_classes=3)

    print(f"Epoch [{epoch}/{epochs}] Loss: {avg_loss:.4f} | Acc: {acc:.2f}% | IoU: {ious}")

    scheduler.step(avg_loss)

    # save best
    if avg_loss < best_loss:
        best_loss = avg_loss
        torch.save(model.state_dict(), os.path.join(save_dir, "best_light.pth"))


Epoch [1/15] Loss: 1.3610 | Acc: 24.69% | IoU: [0.0348721355031551, 0.15284087373203062, 0.0]
Epoch [2/15] Loss: 1.3447 | Acc: 24.51% | IoU: [0.0931098696461825, 0.15101315321720582, 0.0]
Epoch [3/15] Loss: 1.3385 | Acc: 24.92% | IoU: [0.22530737704918033, 0.12866610265087422, 0.0]
Epoch [4/15] Loss: 1.3079 | Acc: 25.01% | IoU: [0.14259866561121792, 0.1370994603957098, 0.0]
Epoch [5/15] Loss: 1.2718 | Acc: 25.10% | IoU: [0.005770136599152143, 0.15697275730939467, 0.0]
Epoch [6/15] Loss: 1.2574 | Acc: 24.56% | IoU: [0.049921073401736384, 0.1713860965595356, 0.0]
Epoch [7/15] Loss: 1.2364 | Acc: 26.46% | IoU: [0.15792838874680307, 0.13831001998287182, 0.001682935038707506]
Epoch [8/15] Loss: 1.2317 | Acc: 26.88% | IoU: [0.1407177168691721, 0.14568810213832536, 0.0]
Epoch [9/15] Loss: 1.2234 | Acc: 26.67% | IoU: [0.15479384003974167, 0.16302039082412914, 0.30575124887921096]
Epoch [10/15] Loss: 1.2196 | Acc: 27.39% | IoU: [0.13505612722170252, 0.1393766809049201, 0.21314461883408073]
Epoc

# POINTNET-SEGMENTATION ACTUAL

In [11]:
# ---- PointNet building blocks ----
class STN3d(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(3, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 9)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)

    def forward(self, x):
        # x: (B,3,N)
        B = x.size(0)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))           # (B,1024,N)
        x = torch.max(x, 2, keepdim=False)[0]         # (B,1024)
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        iden = torch.eye(3, device=x.device).view(1, 9).repeat(B, 1)
        x = x + iden
        return x.view(-1, 3, 3)

class STNkd(nn.Module):
    def __init__(self, k=64):
        super().__init__()
        self.conv1 = nn.Conv1d(k, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, k*k)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)
        self.k = k

    def forward(self, x):
        # x: (B,k,N)
        B = x.size(0)
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = torch.max(x, 2, keepdim=False)[0]         # (B,1024)
        x = F.relu(self.bn4(self.fc1(x)))
        x = F.relu(self.bn5(self.fc2(x)))
        x = self.fc3(x)
        iden = torch.eye(self.k, device=x.device).view(1, self.k*self.k).repeat(B, 1)
        x = x + iden
        return x.view(-1, self.k, self.k)

# ---- PointNetSeg (per-point + global features) ----
class PointNetSeg(nn.Module):
    def __init__(self, num_classes=3, feature_transform=True):
        super().__init__()
        self.feature_transform = feature_transform

        self.stn = STN3d()
        self.conv1 = nn.Conv1d(3, 64, 1)
        self.bn1 = nn.BatchNorm1d(64)

        self.fstn = STNkd(k=64)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

        # Segmentation head: concat per-point (64,128) with global (1024)
        self.conv4 = nn.Conv1d(64 + 128 + 1024, 512, 1)
        self.conv5 = nn.Conv1d(512, 256, 1)
        self.conv6 = nn.Conv1d(256, 128, 1)
        self.conv7 = nn.Conv1d(128, num_classes, 1)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)
        self.bn6 = nn.BatchNorm1d(128)
        self.dropout = nn.Dropout(p=0.3)

    def forward(self, x):
        # x: (B,N,3) -> to (B,3,N)
        x = x.transpose(2, 1).contiguous()

        # input transform
        trans = self.stn(x)
        x = torch.bmm(trans, x)                   # (B,3,N)

        x = F.relu(self.bn1(self.conv1(x)))       # (B,64,N)
        pointfeat = x                             # save 64-d per-point

        # feature transform
        trans_feat = None
        if self.feature_transform:
            trans_feat = self.fstn(x)
            x = torch.bmm(trans_feat, x)         # still (B,64,N)

        x = F.relu(self.bn2(self.conv2(x)))       # (B,128,N)
        x = self.bn3(self.conv3(x))               # (B,1024,N)
        x = torch.max(x, 2, keepdim=True)[0]      # (B,1024,1)
        x = x.repeat(1, 1, pointfeat.size(2))     # (B,1024,N)

        x = torch.cat([pointfeat,                # 64
                       F.relu(self.bn2(self.conv2(pointfeat))),  # 128 on pointfeat
                       x], dim=1)                # 1024
        # Now (B,64+128+1024, N)

        x = F.relu(self.bn4(self.conv4(x)))
        x = F.relu(self.bn5(self.conv5(x)))
        x = self.dropout(F.relu(self.bn6(self.conv6(x))))
        x = self.conv7(x)                         # (B,C,N)

        return x.transpose(2, 1).contiguous(), trans, trans_feat


In [12]:
def feature_transform_regularizer(trans):
    # trans: (B,k,k)
    if trans is None:
        return 0.0
    B, k, _ = trans.size()
    I = torch.eye(k, device=trans.device).unsqueeze(0).expand(B, -1, -1)
    diff = torch.bmm(trans, trans.transpose(2, 1)) - I
    return torch.mean(torch.norm(diff, dim=(1, 2)))


In [13]:
# Device, model
model = PointNetSeg(num_classes=3, feature_transform=True).to(device)

# Keep your class weights from ROI (tensor 'weights' you already compute)
ce_loss = nn.CrossEntropyLoss(weight=weights)

# Combine CE + small feature-transform regularizer
def seg_loss(logits, targets, trans_feat, ft_weight=0.001):
    ce = ce_loss(logits.view(-1, logits.size(-1)), targets.view(-1))
    reg = feature_transform_regularizer(trans_feat) * ft_weight
    return ce + reg


In [14]:
num_classes=3

In [15]:
# ---- loss setup (keep your 'weights' from ROI) ----
ce_loss = nn.CrossEntropyLoss(weight=weights)

def feature_transform_regularizer(trans):
    if trans is None:
        return torch.tensor(0.0, device=weights.device)
    B, k, _ = trans.size()
    I = torch.eye(k, device=trans.device).unsqueeze(0).expand(B, -1, -1)
    diff = torch.bmm(trans, trans.transpose(2, 1)) - I
    return torch.mean(torch.norm(diff, dim=(1, 2)))

def seg_loss(logits, targets, trans_feat, ft_weight=1e-3):
    ce = ce_loss(logits.view(-1, logits.size(-1)), targets.view(-1))
    reg = feature_transform_regularizer(trans_feat) * ft_weight
    return ce + reg

# ---- optimizer / scheduler (OK to keep your previous ones) ----
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)  # a touch higher works well for PointNet
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
scaler = torch.amp.GradScaler('cuda', enabled=torch.cuda.is_available())

# --- Train loop (PointNetSeg) ---
epochs = 15
best_loss = float('inf')
save_dir = "./checkpoints_pointnet"
os.makedirs(save_dir, exist_ok=True)

for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    total_pts = 0
    correct = 0

    # epoch IoU accumulators (CPU tensors)
    inter = torch.zeros(num_classes, dtype=torch.long)
    union = torch.zeros(num_classes, dtype=torch.long)

    for P, L in dataloader:
        P = P.to(device, non_blocking=True)   # (B,N,3)
        L = L.to(device, non_blocking=True)   # (B,N)

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda', enabled=torch.cuda.is_available()):
            # PointNetSeg returns: (logits, trans, trans_feat)
            logits, trans, trans_feat = model(P)     # logits: (B,N,C)
            loss = seg_loss(logits, L, trans_feat, ft_weight=1e-3)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        scaler.step(optimizer)
        scaler.update()

        # metrics
        running_loss += loss.item() * P.size(0)
        preds = logits.argmax(dim=-1)  # (B,N)
        correct += (preds == L).sum().item()
        total_pts += L.numel()

        # IoU accumulators on CPU
        p = preds.detach().cpu().view(-1)
        g = L.detach().cpu().view(-1)
        for c in range(num_classes):
            inter[c] += ((p == c) & (g == c)).sum()
            union[c] += ((p == c) | (g == c)).sum()

    avg_loss = running_loss / len(dataset)
    acc = 100.0 * correct / total_pts
    ious = [(inter[c].item() / union[c].item()) if union[c] > 0 else float('nan')
            for c in range(num_classes)]

    print(f"Epoch [{epoch}/{epochs}] Loss: {avg_loss:.4f} | Acc: {acc:.2f}% | IoU: {ious}")

    scheduler.step(avg_loss)

    if avg_loss < best_loss:
        best_loss = avg_loss
        torch.save(model.state_dict(), os.path.join(save_dir, "best_pointnet.pth"))


Epoch [1/15] Loss: 1.2686 | Acc: 37.58% | IoU: [0.35132809362629797, 0.12551703391742497, 0.1571156462585034]
Epoch [2/15] Loss: 1.2740 | Acc: 34.34% | IoU: [0.33905415713196035, 0.1363428772176357, 0.03396505045532858]
Epoch [3/15] Loss: 1.2858 | Acc: 35.68% | IoU: [0.3384538232111278, 0.13194417746338089, 0.0760611809343135]
Epoch [4/15] Loss: 1.2665 | Acc: 35.02% | IoU: [0.34945228193209926, 0.13667127574412344, 0.035669698591046906]
Epoch [5/15] Loss: 1.2578 | Acc: 35.02% | IoU: [0.3470052083333333, 0.12819582955575703, 0.053080341059205]
Epoch [6/15] Loss: 1.2386 | Acc: 33.47% | IoU: [0.3174984310622391, 0.12834205303987012, 0.07199222323879231]
Epoch [7/15] Loss: 1.2195 | Acc: 35.24% | IoU: [0.33366314242098716, 0.12820236813778257, 0.10977053616733382]
Epoch [8/15] Loss: 1.2045 | Acc: 34.35% | IoU: [0.281580077286389, 0.14194663497893487, 0.16598086891465827]
Epoch [9/15] Loss: 1.2105 | Acc: 34.68% | IoU: [0.27671387429114336, 0.1264714875401315, 0.20616390380574365]
Epoch [10/1