## Create KittiDatasetCustom 

In [None]:
from pcdet.datasets.kitti.kitti_dataset_custom import *
from pcdet.datasets.dataset import *
import yaml
from easydict import EasyDict
from pathlib import Path
from pcdet.utils import common_utils

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/kitti_dataset_custom.yaml')))
class_names = ['Car', 'Pedestrian', 'Cyclist']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/kitti/kitti_dataset_custom.py' 
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'kitti'
save_path = ROOT_DIR / 'data' / 'kitti'
kitti_infos = []
num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

def create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
    from time import sleep

    dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False, logger=common_utils.create_logger())
    
    train_split, val_split, test_split = 'train', 'val', 'test'
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

    train_filename = save_path / ('kitti_infos_%s.pkl' % train_split)
    val_filename = save_path / ('kitti_%s_dataset.pkl' % val_split)
    trainval_filename = save_path / ('kitti_infos_%s%s.pkl' % (train_split, val_split))
    test_filename = save_path / ('kitti_infos_%s.pkl' % test_split)

    print('\n' + '-' * 36 + 'Start to generate data infos' + '-' * 37)
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-----------------')

    dataset.set_split(train_split)
    # ensure that get_infos() processes the single scene.
    # NOTE: get_infos() collects infos about all classes (except 'DontCare'), filter unwanted classes with param `used_classes` in create_groundtruth_database.
    kitti_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features)
    with open(train_filename, 'wb') as f:
        pickle.dump(kitti_infos_train, f)
    print('Kitti info train file is saved to %s\n' % train_filename)
    sleep(3)

    dataset.set_split(val_split)
    # ensure that mode 'test' will process the single scene with PointFeatureEncoder, DataProcessor, FOV_FLAG
    dataset.training = False
    allowed_classes = class_names
    kitti_val_dataset = dataset.get_infos_val(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features, class_names=allowed_classes, fov_points_only=False)
    with open(val_filename, 'wb') as f:
        pickle.dump(kitti_val_dataset, f)
    print('Kitti info val file is saved to %s\n' % val_filename)
    sleep(3)

    with open(trainval_filename, 'wb') as f:
        pickle.dump(kitti_infos_train + kitti_val_dataset, f)
    print('Kitti info trainval file is saved to %s\n' % trainval_filename)
    sleep(3)

    dataset.set_split(test_split)
    kitti_infos_test = dataset.get_infos(num_workers=workers, has_label=False, count_inside_pts=False)
    with open(test_filename, 'wb') as f:
       pickle.dump(kitti_infos_test, f)
    print('Kitti info test file is saved to %s\n' % test_filename)
    sleep(3)

    print('\n---------------Start creating groundtruth database for later data augmentation-------------------------')
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-------------------')
    print('---------------No DataProcessor and PointFeatureEncoder required, handled by training data creation----')
    
    # Input the 'kitti_infos_train.pkl' to generate gt_database (cutted objects of samples)
    dataset.set_split(train_split)
    dataset.create_groundtruth_database(info_path=train_filename, used_classes=class_names, split=train_split)
    print(f'---------------These groundtruth {train_split} objects are randomly inserted into samples (augmentation)-------')
    print('-' * 41 + 'Data preparation Done' + '-' * 41)

def save_data_list_kitti(data_list=None, save_path=None, root_path=None, sample_id_list=None, augmentors=None):

    root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
    split = dataset_cfg.DATA_SPLIT['train']
    split_dir = root_path / 'ImageSets' / (split + '.txt')
    sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
    
    train_split = 'train'
    train_filename = save_path / ('kitti_%s_dataset.pkl' % train_split)

    aug_config_list = augmentors
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)
    
    print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
    with open(train_filename, 'wb') as f:
        pickle.dump(data_list, f)

    for sample_idx in sample_id_list:
        applied_augmentations = [str(name) for name in aug_config_list]
        aug_str = ', '.join(applied_augmentations)
        print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
    print('Kitti info train/aug file is saved to %s' % train_filename)
    print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n') 


# Step 1 : Create the data_infos, only validation data_infos and gt_database are important. 
# The val data gets post-processed through DataProcessor, PointFeatureEncoder, also includes points (w FoV).
# The gt_database is necessary for successfully creating augmented training samples.
#create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4)

# Step 2: Create the training set with data augmentation
dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=True) # the training flag allows data augmentation before training

# Step 3: Call the member method to catch information
dataset.dataset_w_all_infos = dataset.get_infos(num_workers=4, has_label=True, count_inside_pts=True, num_features=num_features)

In [None]:
import gc
from IPython import get_ipython

# Step 4: save it
dataset_as_list = []

for idx in range(len(dataset)):
    data, applied_augmentors = dataset[idx]
    # debug
    #sample_idx = data[0]['frame_id']
    #print(f"{sample_idx}")
    dataset_as_list.append(data)   
    # dataset_as_list.append(dataset[idx])

#gc.collect()

save_data_list_kitti(data_list=dataset_as_list, save_path=save_path, root_path=None, sample_id_list=None, augmentors=applied_augmentors)

# clean up variables after saving
#del dataset, dataset_as_list
#del data, applied_augmentors
#gc.collect()

# clean up variables in notebook & # restart ipython kernel
#if get_ipython():
#    get_ipython().run_line_magic('reset', '-sf')

## Create KittiDatasetCustom with Densification

In [None]:
# - NAME: range_based_densification
#           NUM_POINT_COPIES: 1
#           DELTA_R_RANGE: [0.1, 0.3]

## Create KittiDatasetCustom with Upsampling Griesbacher

In [None]:
# - NAME: random_beam_upsample_griesbacher
#           BEAM_UPSAMPLE_PROB: 1 # upsample all point clouds
#           PHI_THRESHOLD: 0.001570796 # KITTI: 1/1273*2 rad ≈ 0.001570796 ≈ 0.09° (HDL-64E data sheet) ; ZOD: 1/300*2 rad ≈ 0.006666667 ≈ 0.38° (VLS128 data sheet),
#           R_THRESHOLD: 2.0 # meters
#           NUM_INTERP_BEAMS: 1 # all_beams ≈ B + (B - 1) * num_interp_beams so for B = 64 (HDL-64E) and num_interp_beams = 1 -> 64 + (64- 1) = 127 (close to 128)

## Create KittiDatasetCustom with Paper Approach

In [None]:
# - NAME: d2_range_image_4ch

## Create ZODDatasetCustom

In [None]:
from pcdet.datasets.zod.zod_dataset_custom import *
from pcdet.datasets.dataset import *
import yaml
from easydict import EasyDict
from pathlib import Path
from pcdet.utils import common_utils

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/zod_dataset_custom.yaml')))
class_names = ['Vehicle_Car', 'Pedestrian', 'VulnerableVehicle_Bicycle']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/zod/zod_dataset_custom.py' 
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'zod'
save_path = ROOT_DIR / 'data' / 'zod'
zod_infos = []
num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

def create_zod_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
    from time import sleep

    dataset = ZODDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False, logger=common_utils.create_logger(), creating_pkl_infos=True)
    
    train_split, val_split = 'train', 'val'
    version = 'full'
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

    train_filename = save_path / ('zod_infos_%s_%s.pkl' % (train_split, version))
    val_filename = save_path / ('zod_%s_dataset.pkl' % val_split)
    trainval_filename = save_path / ('zod_infos_trainval_%s.pkl' % version)

    print('\n' + '-' * 36 + 'Start to generate data infos' + '-' * 37)
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-----------------')

    dataset.set_split(train_split, version)
    zod_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features)
    with open(train_filename, 'wb') as f:
        pickle.dump(zod_infos_train, f)
    print('Zod info train file is saved to %s\n' % train_filename)
    sleep(3)

    dataset.set_split(val_split, version)
    # ensure that mode 'test' will process the single scene with PointFeatureEncoder, DataProcessor, FOV_FLAG
    dataset.training = False
    allowed_classes = class_names
    zod_val_dataset = dataset.get_infos_val(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features, class_names=allowed_classes)
    with open(val_filename, 'wb') as f:
        pickle.dump(zod_val_dataset, f)
    print('Zod info val file is saved to %s\n' % val_filename)
    sleep(3)

    with open(trainval_filename, 'wb') as f:
        pickle.dump(zod_infos_train + zod_val_dataset, f)
    print('Zod info trainval file is saved to %s\n' % trainval_filename)
    sleep(3)

    print('\n---------------Start creating groundtruth database for later data augmentation-------------------------')
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-------------------')
    print('---------------No DataProcessor and PointFeatureEncoder required, handled by training data creation----')

    # Input the 'zod_infos_train_full.pkl' to generate gt_database (cutted objects of samples)
    dataset.set_split(train_split, version)
    dataset.create_groundtruth_database(info_path=train_filename, version=version, used_classes=class_names, split=train_split)
    print(f'---------------These groundtruth {train_split} objects are randomly inserted into samples (augmentation)-------')
    print('-' * 41 + 'Data preparation Done' + '-' * 41)

def save_data_list_zod(data_list=None, save_path=None, root_path=None, sample_id_list=None, augmentors=None):
    #import blosc
    import zstandard as zstd

    root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
    split = dataset_cfg.DATA_SPLIT['train']
    
    train_split = 'train'
    train_filename = save_path / ('zod_%s_dataset.pkl.zst' % train_split)

    aug_config_list = augmentors
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)
    
    print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
    # Funktioniert 100 %
    # raw = pickle.dumps(data_list, protocol=pickle.HIGHEST_PROTOCOL)
    # cctx = zstd.ZstdCompressor(level=15, threads=-1)
    # comp = cctx.compress(raw)
    # with open(train_filename, 'wb') as f:
    #     f.write(comp)
    
    cctx = zstd.ZstdCompressor(level=19, threads=-1)
    with open(train_filename, 'wb') as fh, cctx.stream_writer(fh) as zfh:
        pickler = pickle.Pickler(zfh, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        pickler.clear_memo()

    # for sample_idx in sample_id_list:
    #     applied_augmentations = [str(name) for name in aug_config_list]
    #     aug_str = ', '.join(applied_augmentations)
    #     print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
    print('Zod info train/aug file is saved to %s' % train_filename)
    print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n') 


# Step 1 : Create the data_infos, only validatiosn data_infos and gt_database are important. 
# The val data gets post-processed through DataProcessor, PointFeatureEncoder, also includes points (w FoV).
# The gt_database is necessary for successfully creating augmented training samples.
#create_zod_infos(dataset_cfg, class_names, data_path, save_path, workers=4)

# Step 2: Create the training set with data augmentation
dataset = ZODDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=True, logger=common_utils.create_logger(), creating_pkl_infos=False)

# Step 3: Call the member method to catch information
train_split, val_split = 'train', 'val'
version = 'full'
train_filename = data_path / ('zod_infos_%s_%s.pkl' % (train_split, version))
with open(train_filename, 'rb') as f:
    zod_infos_train_full = pickle.load(f)

sample_id_list = [info['point_cloud']['lidar_idx'] for info in zod_infos_train_full]

dataset.dataset_w_all_infos = dataset.get_infos(num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=sample_id_list, num_features=num_features)

In [None]:
# Step 4: save it (old, slower, OOM danger)
#dataset_as_list = []

#for idx in range(len(dataset)):
#    data, applied_augmentors = dataset[idx]
    # debug
    #sample_idx = data[0]['frame_id']
    #print(f"{sample_idx}")
#    dataset_as_list.append(data)   
    # dataset_as_list.append(dataset[idx])

#save_data_list_zod(data_list=dataset_as_list, save_path=save_path, root_path=None, sample_id_list=sample_id_list, augmentors=applied_augmentors)

# Step 4: save it (new) 
save_data_list_zod(
    data_list=[dataset[i][0] for i in range(len(dataset))],
    save_path=save_path,
    root_path=None,
    sample_id_list=sample_id_list,
    augmentors=[cfg.get('NAME', str(cfg)) for cfg in dataset_cfg.DATA_AUGMENTOR.AUG_CONFIG_LIST],
)

In [None]:
import pickle
import zstandard as zstd

train_filename = '/home/rlab10/OpenPCDet/data/zod/zod_train_dataset.pkl.zst'
with open(train_filename, 'rb') as f, zstd.ZstdDecompressor().stream_reader(f) as zfh:
    data_list = pickle.load(zfh)

## Test Compression for pkl files

In [None]:
# Step 4: save it (new, faster)

#save_data_list_zod_streaming(dataset=dataset, save_path=save_path, root_path=None, sample_id_list=sample_id_list)


In [None]:
# def save_data_list_zod_streaming(dataset=None, save_path=None, root_path=None, sample_id_list=None):
#     import zstandard as zstd
#     import pickle

#     root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
#     split = dataset_cfg.DATA_SPLIT['train']
    
#     train_split = 'train'
#     train_filename = save_path / ('zod_%s_dataset.pkl.zst' % train_split)

#     print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
#     cctx = zstd.ZstdCompressor(level=5, threads=-1, write_checksum=False, )
#     with open(train_filename, 'wb') as fh, cctx.stream_writer(fh) as zfh:
#         pickler = pickle.Pickler(zfh, protocol=pickle.HIGHEST_PROTOCOL)
#         for i in range(len(dataset)):
#             data, applied_augmentors = dataset[i]  # triggert __getitem__ + Augmentierung
#             pickler.dump(data)
        
#     if sample_id_list is not None:
#         for sample_idx in sample_id_list:
#             aug_str = ', '.join(map(str, applied_augmentors))
#             print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
#     print('Zod info train/aug file is saved to %s' % train_filename)
#     print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n')

## Debug with MultiProcessing

In [None]:
# source: https://github.com/microsoft/debugpy/issues/1168#issuecomment-1377998813

import sys

sys.modules['debugpy'].__file__

# go to '/home/rlab10/anaconda3/envs/pcdet/lib/python3.11/site-packages/debugpy
# find ebugpy/server/api.py
# change "subProcess": True to "subProcess": False