## Create KittiDatasetCustom 

In [None]:
from pcdet.datasets.kitti.kitti_dataset_custom import *
from pcdet.datasets.dataset import *
import yaml
from easydict import EasyDict
from pathlib import Path
from pcdet.utils import common_utils

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/kitti_dataset_custom.yaml')))
class_names = ['Car', 'Pedestrian', 'Cyclist']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/kitti/kitti_dataset_custom.py' 
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'kitti'
save_path = ROOT_DIR / 'data' / 'kitti'
kitti_infos = []
num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

def create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
    from time import sleep

    dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False, logger=common_utils.create_logger())
    
    train_split, val_split, test_split = 'train', 'val', 'test'
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

    train_filename = save_path / ('kitti_infos_%s.pkl' % train_split)
    val_filename = save_path / ('kitti_%s_dataset.pkl' % val_split)
    trainval_filename = save_path / ('kitti_infos_%s%s.pkl' % (train_split, val_split))
    test_filename = save_path / ('kitti_infos_%s.pkl' % test_split)

    print('\n' + '-' * 36 + 'Start to generate data infos' + '-' * 37)
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-----------------')

    dataset.set_split(train_split)
    # ensure that get_infos() processes the single scene.
    # NOTE: get_infos() collects infos about all classes (except 'DontCare'), filter unwanted classes with param `used_classes` in create_groundtruth_database.
    kitti_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features)
    with open(train_filename, 'wb') as f:
        pickle.dump(kitti_infos_train, f)
    print('Kitti info train file is saved to %s\n' % train_filename)
    sleep(3)

    dataset.set_split(val_split)
    # ensure that mode 'test' will process the single scene with PointFeatureEncoder, DataProcessor, FOV_FLAG
    dataset.training = False
    allowed_classes = class_names
    kitti_val_dataset = dataset.get_infos_val(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features, class_names=allowed_classes, fov_points_only=False)
    with open(val_filename, 'wb') as f:
        pickle.dump(kitti_val_dataset, f)
    print('Kitti info val dataset is saved to %s\n' % val_filename)
    sleep(3)

    with open(trainval_filename, 'wb') as f:
        pickle.dump(kitti_infos_train + kitti_val_dataset, f)
    print('Kitti info trainval file is saved to %s\n' % trainval_filename)
    sleep(3)

    dataset.set_split(test_split)
    kitti_infos_test = dataset.get_infos(num_workers=workers, has_label=False, count_inside_pts=False)
    with open(test_filename, 'wb') as f:
       pickle.dump(kitti_infos_test, f)
    print('Kitti info test file is saved to %s\n' % test_filename)
    sleep(3)

    print('\n---------------Start creating groundtruth database for later data augmentation-------------------------')
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-------------------')
    print('---------------No DataProcessor and PointFeatureEncoder required, handled by training data creation----')
    
    # Input the 'kitti_infos_train.pkl' to generate gt_database (cutted objects of samples)
    dataset.set_split(train_split)
    dataset.create_groundtruth_database(info_path=train_filename, used_classes=class_names, split=train_split)
    print(f'---------------These groundtruth {train_split} objects are randomly inserted into samples (augmentation)-------')
    print('-' * 41 + 'Data preparation Done' + '-' * 41)

def save_data_list_kitti(data_list=None, save_path=None, root_path=None, sample_id_list=None, augmentors=None):

    root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
    split = dataset_cfg.DATA_SPLIT['train']
    split_dir = root_path / 'ImageSets' / (split + '.txt')
    #sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
    
    train_split = 'train'
    train_filename = save_path / ('kitti_%s_dataset.pkl' % train_split)

    #aug_config_list = augmentors
    #num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)
    
    print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
    with open(train_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        pickler.clear_memo()

    #for sample_idx in sample_id_list:
    #    applied_augmentations = [str(name) for name in aug_config_list]
    #    aug_str = ', '.join(applied_augmentations)
    #    print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
    print('Kitti info train/aug file is saved to %s' % train_filename)
    print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n') 


# Step 1 : Create the data_infos, only validation data_infos and gt_database are important. 
# The val data gets post-processed through DataProcessor, PointFeatureEncoder, also includes points (w FoV).
# The gt_database is necessary for successfully creating augmented training samples.
#create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4)

# Step 2: Create the training set with data augmentation
dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=True, logger=common_utils.create_logger()) # the training flag allows data augmentation before training

# Step 3: Call the member method to catch information
dataset.dataset_w_all_infos = dataset.get_infos(num_workers=4, has_label=True, count_inside_pts=True, num_features=num_features)

## Save KittiDatasetCustom 

In [None]:
import gc
from IPython import get_ipython

# Step 4: save it
dataset_as_list = []

for idx in range(len(dataset)):
    data, applied_augmentors = dataset[idx]
    # debug
    #sample_idx = data[0]['frame_id']
    #print(f"{sample_idx}")
    dataset_as_list.append(data)   
    # dataset_as_list.append(dataset[idx])

gc.collect()

save_data_list_kitti(data_list=dataset_as_list, save_path=save_path, root_path=None, sample_id_list=None, augmentors=applied_augmentors)

# clean up variables after saving
del dataset, dataset_as_list
del data, applied_augmentors
gc.collect()

# clean up variables in notebook & # restart ipython kernel
if get_ipython():
    get_ipython().run_line_magic('reset', '-sf')

## Create KittiDatasetCustom with range-based densification

In [None]:
from pcdet.datasets.augmentor.data_augmentor import DataAugmentor
from pcdet.utils import common_utils

import yaml
from easydict import EasyDict
from pathlib import Path
import pickle
from tqdm import tqdm
import gc

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/DG_KITTI/kitti_dataset_custom_densification.yaml')))
class_names = ['Car', 'Pedestrian', 'Cyclist']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/kitti/kitti_dataset_custom.py'
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'kitti' # raw data path
save_path = ROOT_DIR / 'data' / 'kitti' / 'Domain Generalization' / 'densification'

aug_cfg = dataset_cfg.DATA_AUGMENTOR['AUG_CONFIG_LIST'][0]
name = aug_cfg.get('NAME')
num_point_copies = aug_cfg.get('NUM_POINT_COPIES', 3)
delta_r = aug_cfg.get('DELTA_R_RANGE', [0.05, 0.1])

train_split, val_split = 'train', 'val'
train_filename = data_path / ('kitti_%s_dataset.pkl' % train_split)
dens_train_filename = save_path / ('kitti_%s_dataset_densified_%s.pkl' % (train_split, num_point_copies))
val_filename = data_path / ('kitti_%s_dataset.pkl' % val_split)
dens_val_filename = save_path / ('kitti_%s_dataset_densified_%s.pkl' % (val_split, num_point_copies))

augmentor = DataAugmentor(root_path=data_path, augmentor_configs=dataset_cfg.DATA_AUGMENTOR, class_names=class_names, logger=common_utils.create_logger())
if num_point_copies and delta_r and augmentor.logger is not None:
    augmentor.logger.info('Range based densification enabled with Δr %s and num_copies %d' % (str(delta_r), num_point_copies))

# training 
with open(train_filename, 'rb') as f:
    data_list = pickle.load(f)

for sample in tqdm(data_list, desc="Samples"):
    #print(f"Processing frame_id: {sample[0].get('frame_id', 'N/A')}")
    for i, data_dict in enumerate(sample):
        for aug_func in augmentor.data_augmentor_queue:
            sample[i] = aug_func(data_dict)

with open(dens_train_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        print('Kitti info train dataset densified is saved to %s\n' % dens_train_filename)
        pickler.clear_memo()

# empty RAM
del data_list
gc.collect()

# validation
with open(val_filename, 'rb') as f:
     data_list = pickle.load(f)

for sample in tqdm(data_list, desc="Samples"):
        for aug_func in augmentor.data_augmentor_queue:
            sample = aug_func(sample)

with open(dens_val_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        print('Kitti info val dataset densified is saved to %s\n' % dens_val_filename)
        pickler.clear_memo()

## Create KittiDatasetCustom with Diode IDs (preparatory work for RBRS (see below))

In [None]:
from pcdet.datasets.kitti.kitti_dataset_custom import *
from pcdet.datasets.dataset import *
import yaml
from easydict import EasyDict
from pathlib import Path
from pcdet.utils import common_utils

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/DG_KITTI/kitti_dataset_custom_rbrs_prep.yaml')))
class_names = ['Car', 'Pedestrian', 'Cyclist']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/kitti/kitti_dataset_custom.py' 
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'kitti' # raw data path
save_path = ROOT_DIR / 'data' / 'kitti'
kitti_infos = []
num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

def create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
    from time import sleep

    dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False, logger=common_utils.create_logger())
    
    train_split, val_split = 'train', 'val'
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

    train_filename = data_path / ('kitti_infos_%s.pkl' % train_split)
    val_filename = save_path / ('kitti_%s_dataset_beamlabels.pkl' % val_split)

    print('\n' + '-' * 36 + 'Start to generate data infos with beamlabels' + '-' * 37)
    print('---------------CAUTION: Custom code is configured to prepare RBRS-----------------')

    dataset.set_split(val_split)
    dataset.training = False
    allowed_classes = class_names
    kitti_val_dataset = dataset.get_infos_val(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features, class_names=allowed_classes, fov_points_only=False, with_beam_label=True)
    with open(val_filename, 'wb') as f:
        pickle.dump(kitti_val_dataset, f)
    print('Kitti info val dataset with beamlabels is saved to %s\n' % val_filename)
    sleep(3)

    # print('\n---------------Start creating groundtruth database with beamlabels for RBRS-------------------------')
    # print('---------------CAUTION: Custom code is configured to serve as Upsampling NOT training-------------------')
    # print('---------------No DataProcessor and PointFeatureEncoder required, handled by training data creation----')
    
    # # Input the 'kitti_infos_train_beamlabels.pkl' to generate gt_database (cutted objects of samples)
    # dataset.set_split(train_split)
    # dataset.create_groundtruth_database(info_path=train_filename, used_classes=class_names, split=train_split, with_beam_labels=True)
    # print(f'---------------These groundtruth {train_split} objects are randomly inserted into samples (augmentation)-------')
    # print('-' * 41 + 'Data preparation Done' + '-' * 41)

def save_data_list_kitti(data_list=None, save_path=None, root_path=None, sample_id_list=None, augmentors=None):

    root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
    split = dataset_cfg.DATA_SPLIT['train']
    split_dir = root_path / 'ImageSets' / (split + '.txt')
    #sample_id_list = [x.strip() for x in open(split_dir).readlines()] if split_dir.exists() else None
    
    train_split = 'train'
    train_filename = save_path / ('kitti_%s_dataset_beamlabels.pkl' % train_split)

    #aug_config_list = augmentors
    #num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)
    
    print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
    with open(train_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        pickler.clear_memo()

    #for sample_idx in sample_id_list:
    #    applied_augmentations = [str(name) for name in aug_config_list]
    #    aug_str = ', '.join(applied_augmentations)
    #    print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
    print('Kitti info train/aug file with beamlabels is saved to %s' % train_filename)
    print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n') 


# Step 1 : Create the data_infos, only validation data_infos and gt_database are important. 
# The val data gets post-processed through DataProcessor, PointFeatureEncoder, also includes points
# The gt_database is necessary for successfully creating upsampled training samples.
create_kitti_infos(dataset_cfg, class_names, data_path, save_path, workers=4)

# Step 2: Create the training set with beamlabels
#dataset = KittiDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=True, logger=common_utils.create_logger()) # the training flag allows data augmentation before training

# Step 3: Call the member method to catch information
#dataset.dataset_w_all_infos = dataset.get_infos(num_workers=4, has_label=True, count_inside_pts=True, num_features=num_features)

## Save KittiDatasetCustom with Diode IDs

In [None]:
# Step 4: save it (new) 
save_data_list_kitti(
    data_list=[dataset[i][0] for i in range(len(dataset.dataset_w_all_infos))], # dataset.dataset_w_all_infos
    save_path=save_path,
    root_path=None,
    sample_id_list=None,
    augmentors=[cfg.get('NAME', str(cfg)) for cfg in dataset_cfg.DATA_AUGMENTOR.AUG_CONFIG_LIST],
)

## Create KittiDatasetCustom with Random Beam Re-Sampling (RBRS)

In [None]:
from pcdet.datasets.augmentor.data_augmentor import DataAugmentor
from pcdet.utils import common_utils

import yaml
from easydict import EasyDict
from pathlib import Path
import pickle
from tqdm import tqdm
import gc

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/DG_KITTI/kitti_dataset_custom_rbrs.yaml')))
class_names = ['Car', 'Pedestrian', 'Cyclist']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/kitti/kitti_dataset_custom.py'
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'kitti'
save_path = ROOT_DIR / 'data' / 'kitti' / 'Domain Generalization' / 'random beam re-sampling'

train_split, val_split = 'train', 'val'
train_filename = data_path / ('kitti_%s_dataset_beamlabels.pkl' % train_split)
rbrs_train_filename = save_path / ('kitti_%s_dataset_rbrs.pkl' % train_split)
val_filename = data_path / ('kitti_%s_dataset_beamlabels.pkl' % val_split)
rbrs_val_filename = save_path / ('kitti_%s_dataset_rbrs.pkl' % val_split)

aug_cfg = dataset_cfg.DATA_AUGMENTOR['AUG_CONFIG_LIST'][0]
name = aug_cfg.get('NAME')
upsampling_prob = aug_cfg.get('BEAM_UPSAMPLE_PROB', 1)
phi = aug_cfg.get('PHI_THRESHOLD', 0.001570796)
range = aug_cfg.get('R_THRESHOLD', 2.0)
num_interp_beams = aug_cfg.get('NUM_INTERP_BEAMS', 1)

augmentor = DataAugmentor(root_path=data_path, augmentor_configs=dataset_cfg.DATA_AUGMENTOR, class_names=class_names, logger=common_utils.create_logger())
if phi and range and augmentor.logger is not None:
    augmentor.logger.info('Random beam re-sampling enabled with upsample prob: %s, φ %s, r: %s and num. interp. beams: %s' % (upsampling_prob, phi, range, num_interp_beams))

# # training 
# with open(train_filename, 'rb') as f:
#     data_list = pickle.load(f)

# for sample in tqdm(data_list, desc="Samples"):
#     #print(f"Processing frame_id: {sample[0].get('frame_id', 'N/A')}")
#     for i, data_dict in enumerate(sample):
#         for aug_func in augmentor.data_augmentor_queue:
#             sample[i] = aug_func(data_dict)

# with open(rbrs_train_filename, 'wb') as f:
#         pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
#         pickler.dump(data_list)
#         print('Kitti info train dataset upsampled is saved to %s\n' % rbrs_train_filename)
#         pickler.clear_memo()

# empty RAM
#del data_list
#gc.collect()

# validation
with open(val_filename, 'rb') as f:
     data_list = pickle.load(f)

for sample in tqdm(data_list, desc="Samples"):
        for aug_func in augmentor.data_augmentor_queue:
            sample = aug_func(sample)

with open(rbrs_val_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        print('Kitti info val dataset upsampled is saved to %s\n' % rbrs_val_filename)
        pickler.clear_memo()

## Create KittiDatasetCustom with Paper Approach

In [None]:
# - NAME: d2_range_image_4ch

## Create ZODDatasetCustom

In [None]:
from pcdet.datasets.zod.zod_dataset_custom import *
from pcdet.datasets.dataset import *
import yaml
from easydict import EasyDict
from pathlib import Path
from pcdet.utils import common_utils

dataset_cfg = EasyDict(yaml.safe_load(open('/home/rlab10/OpenPCDet/tools/cfgs/dataset_configs/zod_dataset_custom.yaml')))
class_names = ['Vehicle_Car', 'Pedestrian', 'VulnerableVehicle_Bicycle']
file_path = '/home/rlab10/OpenPCDet/pcdet/datasets/zod/zod_dataset_custom.py' 
ROOT_DIR = (Path(file_path).resolve().parent / '../../../').resolve()
data_path = ROOT_DIR / 'data' / 'zod'
save_path = ROOT_DIR / 'data' / 'zod'
zod_infos = []
num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

def create_zod_infos(dataset_cfg, class_names, data_path, save_path, workers=4):
    from time import sleep

    dataset = ZODDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=False, logger=common_utils.create_logger(), creating_pkl_infos=True)
    
    train_split, val_split = 'train', 'val'
    version = 'full'
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)

    train_filename = save_path / ('zod_infos_%s_%s.pkl' % (train_split, version))
    val_filename = save_path / ('zod_%s_dataset.pkl' % val_split)
    trainval_filename = save_path / ('zod_infos_trainval_%s.pkl' % version)

    print('\n' + '-' * 36 + 'Start to generate data infos' + '-' * 37)
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-----------------')

    dataset.set_split(train_split, version)
    zod_infos_train = dataset.get_infos(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features)
    with open(train_filename, 'wb') as f:
        pickle.dump(zod_infos_train, f)
    print('Zod info train file is saved to %s\n' % train_filename)
    sleep(3)

    dataset.set_split(val_split, version)
    # ensure that mode 'test' will process the single scene with PointFeatureEncoder, DataProcessor, FOV_FLAG
    dataset.training = False
    zod_val_dataset = dataset.get_infos_val(num_workers=workers, has_label=True, count_inside_pts=True, num_features=num_features)
    with open(val_filename, 'wb') as f:
        pickle.dump(zod_val_dataset, f)
    print('Zod info val file is saved to %s\n' % val_filename)
    sleep(3)

    with open(trainval_filename, 'wb') as f:
        pickle.dump(zod_infos_train + zod_val_dataset, f)
    print('Zod info trainval file is saved to %s\n' % trainval_filename)
    sleep(3)

    print('\n---------------Start creating groundtruth database for later data augmentation-------------------------')
    print('---------------CAUTION: Custom code is configured to serve as Augmentor NOT training-------------------')
    print('---------------No DataProcessor and PointFeatureEncoder required, handled by training data creation----')

    # Input the 'zod_infos_train_full.pkl' to generate gt_database (cutted objects of samples)
    dataset.set_split(train_split, version)
    dataset.create_groundtruth_database(info_path=train_filename, version=version, split=train_split)
    print(f'---------------These groundtruth {train_split} objects are randomly inserted into samples (augmentation)-------')
    print('-' * 41 + 'Data preparation Done' + '-' * 41)

def save_data_list_zod(data_list=None, save_path=None, root_path=None, sample_id_list=None, augmentors=None):
    #import blosc
    #import zstandard as zstd

    root_path = root_path if root_path is not None else Path(dataset_cfg.DATA_PATH) 
    split = dataset_cfg.DATA_SPLIT['train']
    
    train_split = 'train'
    train_filename = save_path / ('zod_%s_dataset.pkl' % train_split)

    aug_config_list = augmentors
    num_features = len(dataset_cfg.POINT_FEATURE_ENCODING.src_feature_list)
    
    print('\n' + '-' * 35 + 'Start to save data infos(original+augmented)' + '-' * 37)
    
    # experimental
    # raw = pickle.dumps(data_list, protocol=pickle.HIGHEST_PROTOCOL)
    # cctx = zstd.ZstdCompressor(level=15, threads=-1)
    # comp = cctx.compress(raw)
    # with open(train_filename, 'wb') as f:
    #     f.write(comp)
    
    # experimental
    #cctx = zstd.ZstdCompressor(level=3, threads=-1)
    #with open(train_filename, 'wb') as fh, cctx.stream_writer(fh) as zfh:
    #    pickler = pickle.Pickler(zfh, protocol=pickle.HIGHEST_PROTOCOL)
    #    pickler.dump(data_list)
    #    pickler.clear_memo()
    
    with open(train_filename, 'wb') as f:
        pickler = pickle.Pickler(f, protocol=pickle.HIGHEST_PROTOCOL)
        pickler.dump(data_list)
        pickler.clear_memo()
    
    # for sample_idx in sample_id_list:
    #     applied_augmentations = [str(name) for name in aug_config_list]
    #     aug_str = ', '.join(applied_augmentations)

    #     print(f"{split} sample_idx: {sample_idx} (original, {aug_str})")
    
    print('Zod info train/aug file is saved to %s' % train_filename)
    print('-' * 49 + 'Data saving Done' + '-' * 51 + '\n') 


# Step 1 : Create the data_infos, only validatiosn data_infos and gt_database are important. 
# The val data gets post-processed through DataProcessor, PointFeatureEncoder, also includes points (w FoV).
# The gt_database is necessary for successfully creating augmented training samples.
#create_zod_infos(dataset_cfg, class_names, data_path, save_path, workers=4)

# Step 2: Create the training set with data augmentation
dataset = ZODDatasetCustom(dataset_cfg=dataset_cfg, class_names=class_names, root_path=data_path, training=True, logger=common_utils.create_logger(), creating_pkl_infos=False)

# Step 3: Call the member method to catch information
train_split, val_split = 'train', 'val'
version = 'full'
train_filename = data_path / ('zod_infos_%s_%s.pkl' % (train_split, version))
with open(train_filename, 'rb') as f:
    zod_infos_train_full = pickle.load(f)

sample_id_list = [info['point_cloud']['lidar_idx'] for info in zod_infos_train_full]

dataset.dataset_w_all_infos = dataset.get_infos(num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=sample_id_list, num_features=num_features)


In [None]:
# Step 4: save it (new) 
save_data_list_zod(
    data_list=[dataset[i][0] for i in range(len(dataset.dataset_w_all_infos))], # dataset.dataset_w_all_infos
    save_path=save_path,
    root_path=None,
    sample_id_list=sample_id_list,
    augmentors=[cfg.get('NAME', str(cfg)) for cfg in dataset_cfg.DATA_AUGMENTOR.AUG_CONFIG_LIST],
)

# Step 4: save it (old, slower, OOM danger)
#dataset_as_list = []

#for idx in range(len(dataset)):
#    data, applied_augmentors = dataset[idx]
    # debug
    #sample_idx = data[0]['frame_id']
    #print(f"{sample_idx}")
#    dataset_as_list.append(data)   
    # dataset_as_list.append(dataset[idx])

#save_data_list_zod(data_list=dataset_as_list, save_path=save_path, root_path=None, sample_id_list=sample_id_list, augmentors=applied_augmentors)

In [None]:
import numpy as np

import pickle
import numpy as np

train_filename = '/home/rlab10/OpenPCDet/data/zod/zod_train_dataset.pkl'
npy_filename = '/home/rlab10/OpenPCDet/data/zod/zod_train_dataset.npy'

answer = input("Would you like to save the .pkl as .npy? (y/n): ")
if answer.lower() == 'y':
    with open(train_filename, 'rb') as f:
        data = pickle.load(f)
    np.save(npy_filename, data)
    print(f"Saved as {npy_filename}")
else:
    print("Aborted.")

## Check Beam-Labels with range image

In [None]:
import pickle
from pcdet.datasets.augmentor.data_augmentor import DataAugmentor
import matplotlib.pyplot as plt

pkl_path = "/home/rlab10/OpenPCDet/data/kitti/kitti_val_dataset_beamlabels.pkl"

with open(pkl_path, 'rb') as f:
     data_list = pickle.load(f)
    
points = data_list[0]['points']
beam_label = points[:, -1].astype(int)

augmentor = DataAugmentor(root_path=None, augmentor_configs=[], class_names=[])
polar_image = augmentor.get_polar_image(points[:, :3], with_limit_range=False)
phi = polar_image[:, 0]
theta = polar_image[:, 1]
range = polar_image[:, 2] 

plt.figure(figsize=(12, 6))
sc = plt.scatter(phi, theta, c=range, cmap='jet', s=1)
plt.xlabel('Azimuth (phi)')
plt.ylabel('Elevation (theta)')
plt.title('Range Image colored by Beam Label')
plt.colorbar(sc, label='Beam Label')
plt.show()

## Debug with MultiProcessing

In [None]:
# source: https://github.com/microsoft/debugpy/issues/1168#issuecomment-1377998813

import sys

sys.modules['debugpy'].__file__

# go to '/home/rlab10/anaconda3/envs/pcdet/lib/python3.11/site-packages/debugpy
# find ebugpy/server/api.py
# change "subProcess": True to "subProcess": False