# T1 SEMANTIC SEGMENTATION TRAINING

Small example on how to train Pointcept for semantic segmentation

## LIBRARIES

In [3]:
#IMPORT PACKAGES
from pathlib import Path
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join('.')))
sys.path.insert(0, os.path.abspath(os.path.join('scripts')))
sys.path.insert(0, os.path.abspath(os.path.join('thirdparty', 'pointcept')))

import numpy as np
import laspy
import geomapi
from geomapi.nodes import *
import geomapi.utils as ut
from geomapi.utils import geometryutils as gmu
import geomapi.tools.progresstools as pt

# import geomapi.utils as ut
import torch

import context 
import utils.t1_utils as t1
import utils as utl


#POINTCEPT
from pointcept.engines.defaults import (
    default_argument_parser,
    default_config_parser,
    default_setup,
)

In [4]:
torch.cuda.empty_cache()  # Clear unused memory

In [5]:
#print number of torch gpu
print(torch.cuda.device_count())

3


In [6]:
%load_ext autoreload

In [7]:
%autoreload 2

tensorboard --logdir=/home/mbassier/code/Scan-to-BIM-CVPR-2024/thirdparty/pointcept/exp/kul-pt-v3-base1


## CONFIG

In [8]:
data_root= Path(os.getcwd()).parents[0]/'data'/'t1'
print(data_root)
#PREPROCESSING
input_folder = data_root/'input'

training_split=0.75
size=[20,20,100]

#TRAINING
config_path = data_root/'config.py' 
cfg = default_config_parser(str(config_path),{})

/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1


## DATA PREPERATION

Cut data into manageble parts (size or dimensions) and save the chunks as pth files

In [11]:
cfg.data.train.data_root

'/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1'

In [13]:
files=utl.get_list_of_files(input_folder, ext=".laz")

#split files in train and validation 75/25
np.random.seed(0)
np.random.shuffle(files)
split=int(len(files)*training_split)
train_files=files[:split]
val_files=files[split:]

#preprocess datasets
[t1.preprocess_point_clouds_to_pth(f, cfg.data.train.data_root,cfg,size=size) for f in files]
# [t1.preprocess_point_clouds_to_pth(f, cfg.data.val.data_root,cfg,size=size) for f in val_files]

processing 32_ShortOffice_05_F2_small1 ...
saved 32_ShortOffice_05_F2_small1_0_0_0
saved 32_ShortOffice_05_F2_small1_1_0_0
saved 32_ShortOffice_05_F2_small1_0_1_0
saved 32_ShortOffice_05_F2_small1_1_1_0
Function preprocess_point_clouds_to_pth took 149.5968 seconds to execute.
processing 35_Lab_02_F1_small1 ...
saved 35_Lab_02_F1_small1_0_0_0
saved 35_Lab_02_F1_small1_1_0_0
saved 35_Lab_02_F1_small1_2_0_0
saved 35_Lab_02_F1_small1_3_0_0
saved 35_Lab_02_F1_small1_0_1_0
saved 35_Lab_02_F1_small1_1_1_0
saved 35_Lab_02_F1_small1_2_1_0
saved 35_Lab_02_F1_small1_3_1_0
saved 35_Lab_02_F1_small1_0_2_0
saved 35_Lab_02_F1_small1_1_2_0
saved 35_Lab_02_F1_small1_2_2_0
saved 35_Lab_02_F1_small1_3_2_0
saved 35_Lab_02_F1_small1_0_3_0
saved 35_Lab_02_F1_small1_1_3_0
saved 35_Lab_02_F1_small1_2_3_0
saved 35_Lab_02_F1_small1_3_3_0
Function preprocess_point_clouds_to_pth took 465.0635 seconds to execute.
processing 35_Lab_02_F2_small1 ...
saved 35_Lab_02_F2_small1_0_0_0
saved 35_Lab_02_F2_small1_1_0_0
sav

[None, None, None, None, None, None, None, None]

throw away pth files that do not contain all the classes

In [12]:
import json
data = {}

#reload pth file 
for f in utl.get_list_of_files(str(Path(cfg.data.train.data_root)/'val'),ext='.pth'):
    print(f)
    
    chunk_dict=torch.load(f)

    #get labels
    labels=chunk_dict['semantic_gt']
    unique_labels,counts=np.unique(labels,return_counts=True)
    print(unique_labels,counts)
    #compute percentage of counts of total counts
    counts=counts/np.sum(counts)
    
    #add  filename, labels and counts to data
    data[ut.get_filename(f)]=dict(labels=unique_labels.tolist(),counts=counts.tolist())
    
# write to json with proper indentation
# with open(f'{data_root}\\stats.json', 'w') as fp:
#     json.dump(data, fp,indent=4)
# print(f'{data_root}\\stats.json')

/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/val/05_MedOffice_01_F2_small1_2_3_0.pth
[0 1 2 4 5] [128329 356502 208581   7574 602815]
/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/val/05_MedOffice_01_F2_small1_2_2_0.pth
[0 1 2 4 5] [101641 187093 361392  18996 277612]


read back jsons

In [21]:
#read back json
with open(f'{data_root}\\stats.json', 'r') as fp:
    data = json.load(fp)
    #print data nicely with indentation
    # print(json.dumps(data,indent=4))

#report the number of dict objects in the json
print(len(data))

#print the number of dict objects that do not have all labels
print(len([d for d in data.values() if len(d['labels'])!=6]))

42
31


resave pth files in folders

In [34]:
#gather the files that do have all labels
file_names=[f'{data_root}/{k}.pth' for k,v in data.items() if len(v['labels'])==6]
print(file_names)
#move the files that do have all labels to the test and train folder according to the split
#split files in train and validation 75/25
np.random.seed(0)
np.random.shuffle(files)
split=int(len(file_names)*training_split)
train_files=file_names[:split]
val_files=file_names[split:]
print(train_files)
for f in file_names:
    if f in train_files:
        f_new=f'{cfg.data.train.data_root}/{ut.get_filename(f)}.pth'
        os.rename(f,f_new)
    else:
        f_new=f'{cfg.data.val.data_root}/{ut.get_filename(f)}.pth'
        os.rename(f,f_new)


['/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/33_SmallBuilding_03_F1_small_0_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/19_MedOffice_07_F4_small_0_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/32_ShortOffice_05_F1_small_1_0_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/19_MedOffice_07_F4_small_0_0_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/32_ShortOffice_05_F1_small_1_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/19_MedOffice_07_F4_small_1_0_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/05_MedOffice_01_F2_small_1_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/19_MedOffice_07_F4_small_1_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/05_MedOffice_01_F2_small_0_1_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/19_MedOffice_07_F4_small_2_0_0.pth', '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/32_ShortOffice_05_F1_small_0_1_0.pth']
['/home/mbassier/code

In [12]:
# training_folder=str(Path(cfg['data']['val']['data_root'])/'train')
# validation_folder=str(Path(cfg['data']['val']['data_root'])/'val')

# os.makedirs(training_folder, exist_ok=True)
# os.makedirs(validation_folder, exist_ok=True)

# [t1.handle_process(p, training_folder,cfg,batch_size) for p in ut.get_list_of_files(input_folder, ext=".laz") if "train" in p]
# [t1.handle_process(p, validation_folder,cfg,batch_size) for p in ut.get_list_of_files(input_folder, ext=".laz") if "val" in p]

processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/32_ShortOffice_05_F1train.laz ...
Function handle_process took 282.5166 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/05_MedOffice_01_F2train.laz ...
Function handle_process took 254.6938 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/32_ShortOffice_05_F3train.laz ...
Function handle_process took 257.1952 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/35_Lab_02_F1train.laz ...
Function handle_process took 763.2298 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/35_Lab_02_F2val.laz ...
Function handle_process took 606.0391 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/19_MedOffice_07_F4val.laz ...
Function handle_process took 359.6630 seconds to execute.
processing /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/input/33_

[None, None, None, None]

## TRAINING

Training using Point Transformer V3

<!-- sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -->
```
conda activate pointcept
cd thirdparty/pointcept
export PYTHONPATH=./
sh scripts/train.sh -p python -g 3 -d kul -c kul-pt-v3-base1 -n kul-pt-v3-base1
```

In [7]:

from pointcept.engines.train import TRAINERS
from pointcept.engines.launch import launch

def main_worker(cfg):
    cfg = default_setup(cfg)
    trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg))
    trainer.train()

launch(
    main_worker,
    num_gpus_per_machine=1, # these seems to be a proble with the multiprocessing -> first check spawn vs forkserver methods (we now use spawn, didn't resolve anything)
    num_machines=1,
    machine_rank=0,
    dist_url='auto',
    cfg=(cfg,),
)

[2024-04-25 12:04:27,583 INFO train.py line 128 19327] => Loading config ...
[2024-04-25 12:04:27,584 INFO train.py line 130 19327] Save path: /home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1


[2024-04-25 12:04:28,336 INFO train.py line 131 19327] Config:
weight = '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/model/model_best.pth'
resume = False
evaluate = True
test_only = False
seed = 48380007
save_path = '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1'
num_worker = 1
batch_size = 2
batch_size_val = None
batch_size_test = None
epoch = 800
eval_epoch = 100
sync_bn = False
enable_amp = True
empty_cache = False
find_unused_parameters = False
mix_prob = 0.8
param_dicts = [dict(keyword='block', lr=0.0006)]
hooks = [
    dict(type='CheckpointLoader'),
    dict(type='IterationTimer', warmup_iter=2),
    dict(type='InformationWriter'),
    dict(type='SemSegEvaluator'),
    dict(type='CheckpointSaver', save_freq=None),
    dict(type='PreciseEvaluator', test_last=False)
]
train = dict(type='DefaultTrainer')
test = dict(type='SemSegTester', verbose=True)
model = dict(
    type='DefaultSegmentorV2',
    num_classes=6,
    backbone_out_channels=64,
    backbone=dict(
        typ

TypeError: 'numpy.float64' object is not iterable

## INFERENCE

Inference using Point Transformer V3

In [9]:
inf_files=utl.get_list_of_files(str(Path(cfg.data.test.data_root)/'test'), ext=".laz")
[t1.preprocess_point_clouds_to_pth(f, str(Path(cfg.data.test.data_root)/'test'),cfg,size=size) for f in inf_files]

processing 11_MedOffice_05_F4_small ...
saved 11_MedOffice_05_F4_small_0_0_0
saved 11_MedOffice_05_F4_small_1_0_0
saved 11_MedOffice_05_F4_small_2_0_0
saved 11_MedOffice_05_F4_small_0_1_0
saved 11_MedOffice_05_F4_small_1_1_0
saved 11_MedOffice_05_F4_small_2_1_0
saved 11_MedOffice_05_F4_small_0_2_0
saved 11_MedOffice_05_F4_small_1_2_0
saved 11_MedOffice_05_F4_small_2_2_0
saved 11_MedOffice_05_F4_small_0_3_0
saved 11_MedOffice_05_F4_small_1_3_0
saved 11_MedOffice_05_F4_small_2_3_0
Function preprocess_point_clouds_to_pth took 280.9030 seconds to execute.
processing 08_ShortOffice_01_F1_small ...
saved 08_ShortOffice_01_F1_small_0_0_0
saved 08_ShortOffice_01_F1_small_1_0_0
Function preprocess_point_clouds_to_pth took 88.5216 seconds to execute.
processing 08_ShortOffice_01_F2_small ...
saved 08_ShortOffice_01_F2_small_0_0_0
saved 08_ShortOffice_01_F2_small_1_0_0
Function preprocess_point_clouds_to_pth took 91.2592 seconds to execute.
processing 11_MedOffice_05_F2_small ...
saved 11_MedOffi

[None, None, None, None, None, None, None]

```
cd thirdparty/pointcept
export PYTHONPATH=./
python tools/inference_kul.py --config-file '/home/mbassier/code/Scan-to-BIM-CVPR-2024/data/t1/config.py' --num-gpus 3 --num-machines 1 --machine-rank 0
```

## RESULTS

Convert inference result back to .laz

In [36]:
names=[ut.get_filename('_'.join(f.split('_')[:5])) for f in utl.get_list_of_files(str(Path(cfg.data.test.data_root)/'test'),ext='.npy')]
unique_names=np.unique(names)
for n in unique_names:
    print(f'compiling results for: {n}')
    
    #create a new empty array to store the labels
    labelArray=np.array([])
    xArray=np.array([])
    yArray=np.array([])
    zArray=np.array([])
    redArray=np.array([])
    greenArray=np.array([])
    blueArray=np.array([])
    
    #get files
    files=[f for f in utl.get_list_of_files(str(Path(cfg.data.test.data_root)/'test'),ext='.npy') if n in f]

    for i,f in enumerate(files):
        #get labels
        print(f'{ut.get_filename(f)}:{i+1}/{len(files)}')
        labels=np.load(f)

        #get the corresponding pth file
        pth_file=f.replace('.npy','.pth')
        chunk_dict=torch.load(pth_file)
        
        # Revert class_mapping from 0,1,2,3,4,5 to the values in cfg['data']['labels']
        labels=t1.revert_class_mapping(cfg,labels)

        #stack the labels in the labelArray for every f
        labelArray=np.hstack((labelArray,labels))
        xArray=np.hstack((xArray,chunk_dict['coord'][:,0]))
        yArray=np.hstack((yArray,chunk_dict['coord'][:,1]))
        zArray=np.hstack((zArray,chunk_dict['coord'][:,2]))
        redArray=np.hstack((redArray,(chunk_dict['color'][:,0] * 65535).astype(np.uint16)))
        greenArray=np.hstack((greenArray,(chunk_dict['color'][:,1] * 65535).astype(np.uint16)))
        blueArray=np.hstack((blueArray,(chunk_dict['color'][:,2] * 65535).astype(np.uint16)))
        
    print(f'writing results for: {n}')
    #create a new las file with the labels
    hdr = laspy.LasHeader(version="1.4", point_format=2)
    las = laspy.LasData(hdr)
    las.x=xArray
    las.y=yArray
    las.z=zArray
    las.red= redArray
    las.green= greenArray
    las.blue= blueArray
    gmu.las_add_extra_dimensions(las, [labelArray], ['classes'], [np.uint8])
    # Write the LAS file as LAZ
    las.write(str(Path(cfg.data.test.data_root)/'test'/n)+'_pred.laz')
    print('DONE')

compiling results for: 08_ShortOffice_01_F1_small
08_ShortOffice_01_F1_small_0_0_0:1/2
08_ShortOffice_01_F1_small_1_0_0:2/2
writing results for: 08_ShortOffice_01_F1_small
DONE
