# This jupyter file is used for LiDAR detection efficiency profiling

## 1. Import required module from OpenPCDet 

In [1]:
os.environ['CUDA_VISIBLE_DEVICES'] = "5"
import _init_path
import argparse
import datetime
import glob
import os
from pathlib import Path
from test import repeat_eval_ckpt

import spconv.pytorch as spconv
import torch
import torch.nn as nn
from tensorboardX import SummaryWriter
from pcdet.config import cfg, cfg_from_list, cfg_from_yaml_file, log_config_to_file
from pcdet.datasets import build_dataloader
from pcdet.models import build_network, model_fn_decorator
from pcdet.utils import common_utils
import numpy as np
import warnings
warnings.filterwarnings("ignore")


## 2. Load the waymo data for profiling

**You can also re-use the dataloader defined in OpenPCDet (Recommanded)**

If you choose this option, please skip this part.

**Another option**

You can construct your own code for loading point cloud from waymo dataset. Check the code in ```../pcdet/datasets/waymo/waymo_dataset.py``` as reference. For the initial stage, loading one LiDAR scan would be enough. While later, we may need to load more to average the measurement for better accuracy.

The waymo data is located in ```../data/waymo```



In [2]:
# construct for you data loading function here

dataset_base_path = '/home/jnd/code/OpenPCDet/data/waymo'

def load_waymo_pcd():
    pass


## 3. Build the model based on cfg files

In this section, build the model following the code in ```./train.py```. The model config file in located in ```./cfgs/waymo_models```. The required configs are: pointpillar, pvrcnn, second, centerpoint. 

For profiling the efficiency of operations, you can just take the specific layer of the model for testing.

In [3]:
def load_and_build_data(model_name, batch_size):
    cfg_file = './cfgs/waymo_models/{}.yaml'.format(model_name)

    output_dir = Path('./profiling_log')
    output_dir.mkdir(exist_ok=True)
    log_file = output_dir / ('log_train_%s.txt' % datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
    logger = common_utils.create_logger(log_file, rank=0)
    cfg_from_yaml_file(cfg_file, cfg)

    # It would take some time to initialize the dataset.

    test_set, test_loader, test_sampler = build_dataloader(
            dataset_cfg=cfg.DATA_CONFIG,
            class_names=cfg.CLASS_NAMES,
            batch_size=batch_size,
            dist=False, workers=1,
            logger=logger,
            training=False,  # I would recommand you to use a test loader
            merge_all_iters_to_one_epoch=True,
            total_epochs=1,
            seed=666
        )

    model = build_network(model_cfg=cfg.MODEL, num_class=3, dataset=test_set)
    return model, test_loader


## 4. Now load the data and feed to the network for profiling

There're two options for you to perform this task:

1. Use your own data loading function and feed the data to the network
2. Re-use the dataset to load the data. Check function **eval_one_epoch** in file ```./eval_utils/eval_utils.py``` for details.


In [4]:
import pandas as pd
def rearrange_df(df1, df2, batch_size):
    
    new_df = pd.DataFrame()
    
    active1 = df1['active_bytes'].copy()
    active2 = df2['active_bytes'].copy()
    reserved1 = df1['reserved_bytes'].copy()
    reserved2 = df2['reserved_bytes'].copy()
    for i in range(len(active1)):
        if not isinstance(active1[i], float) and not isinstance(reserved1[i], float):        
            if active1[i][-1] == 'G':
                active1[i] = float(active1[i][:-1]) * 1024
            
            elif active1[i][-1] == 'K':
                active1[i] = float(active1[i][:-1]) / 1024
            
            else:
                active1[i] = float(active1[i][:-1])

            if reserved1[i][-1] == 'G':
                reserved1[i] = float(reserved1[i][:-1]) * 1024    
                
            elif reserved1[i][-1] == 'K':
                reserved1[i] = float(reserved1[i][:-1]) / 1024
  
            else:
                reserved1[i] = float(reserved1[i][:-1])

    for i in range(len(active2)):
        if not isinstance(active2[i], float) and not isinstance(reserved2[i], float):        
            if active2[i][-1] == 'G':
                active2[i] = float(active2[i][:-1]) * 1024
            
            elif active2[i][-1] == 'K':
                active2[i] = float(active2[i][:-1]) / 1024
            
            else:
                active2[i] = float(active2[i][:-1])

            if reserved2[i][-1] == 'G':
                reserved2[i] = float(reserved2[i][:-1]) * 1024    
                
            elif reserved2[i][-1] == 'K':
                reserved2[i] = float(reserved2[i][:-1]) / 1024
  
            else:
                reserved2[i] = float(reserved2[i][:-1])
                
    new_df['active_bytes'] = (active1 - active2) / (batch_size - 1)
    new_df['reserved_bytes'] = reserved2
    new_df = new_df.astype('float').round(3).fillna("")
    new_df = new_df.astype('str')
    new_df[(new_df['active_bytes']!="") & (new_df['reserved_bytes']!="")] = new_df[(new_df['active_bytes']!="") & (new_df['reserved_bytes']!="")] + 'M'
    return new_df

In [5]:
import time
from pytorch_memlab import LineProfiler
import logging
from pcdet.models import load_data_to_gpu

final_dict = {'overall_df': None}

def gpu_warm_up(model_name):
    model, test_loader = load_and_build_data(model_name, 1)
    model.cuda()
    model.eval()
    warm_up_data = next(iter(test_loader))
    load_data_to_gpu(warm_up_data)
    for _ in range(20):
        try:
            res1, res2 = model(warm_up_data)
        except:
            print("PointRCNN is so annoying.")
    del model, test_loader, res1, res2
    del warm_up_data

def inference_simulation(model_name, batch_size):
    
    model, test_loader = load_and_build_data(model_name, batch_size)
    
    model.cuda()
    model.eval()

    one_data = None

    for i, batch_dict in enumerate(test_loader):
        load_data_to_gpu(batch_dict)
        one_data = batch_dict.copy()
        print(batch_dict.keys())
        gpu_warm_up(model_name)
        with torch.no_grad():
            with LineProfiler(model.forward) as overall:
                starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
                starter.record()
                pred_dicts, ret_dict = model(batch_dict)
                ender.record()
                torch.cuda.synchronize()
                curr_time = starter.elapsed_time(ender)
            
            model_df = pd.read_html(overall.display()._repr_html_())[0]
            model_df.columns = [i[0] for i in model_df.columns]
        
        if batch_size == 1:
            logging.info("|---------------------------------------------- The model is {} --------------------------------------------|\n".format(model.__class__.__name__))
            logging.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>> The runtime of {} is {} ms.".format(model.__class__.__name__, curr_time))
            final_dict["overall_df"] = model_df
            logging.info(">>>> Analyzing the memory usage of this model with the batch size 1 <<<<")
            logging.info(overall.display())
            
        else:
            model_df[['active_bytes', 'reserved_bytes']] = rearrange_df(model_df, final_dict['overall_df'], batch_size)
            logging.info(">>>> Now we analyze the average memory usage of this model <<<<")
            logging.info('\n' + str(model_df))
        break
    return model, one_data

## 5. Record execution time and memory usage

For execution time, you could simply use the ```time``` library to record it. 

As for memory usage, it's a bit more tricky. But we have two interesting libraries for you to explore:

1. [pytorch_memlab](https://github.com/stonesjtu/pytorch_memlab)
2. [pytorch_profiler](https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html)

**Please read the document and understand how they work before use it.**

In [6]:
def profiling_one_batch(model_name):
    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.max_colwidth', -1)
    pd.set_option('display.max_rows', None)

    model, one_data = inference_simulation(model_name, batch_size=1)

    vfe = model.vfe
    backbone_3d = model.backbone_3d
    map2bev = model.map_to_bev_module
    backbone_2d = model.backbone_2d
    dense_head = model.dense_head
    pfe = model.pfe
    point_head = model.point_head
    roi_head = model.roi_head
    
    profile_dict = {'vfe_time': 0, 
                    'pfn_time': 0, 
                    'vfe_df': None,
                    'backbone_3d_time': 0, 
                    'spconv1_t': 0, 
                    'spconv2_t': 0, 
                    'spconv3_t': 0, 
                    'spconv4_t': 0, 
                    'backbone_3d_df': None,
                    'MSG_SA_time': 0,
                    'MSG_FP_time': 0,
                    'map2bev_time': 0, 
                    'map2bev_df': None, 
                    'pfe_time': 0, 
                    'pfe_df': None, 
                    'pfe_grouping_t': 0, 
                    'pfe_grouping_df': None,
                    'pfe_bev_feature_t': 0, 
                    'pfe_bev_df': None,
                    'pfe_raw_feature_t': 0, 
                    'pfe_raw_df': None,
                    'pfe_mlp_t':[],
                    'pfe_mlp_df':[],
                    'backbone_2d_time':0, 
                    'backbone_2d_df': None,
                    'dense_head_time':0, 
                    'dense_head_df': None,
                    'center_head_time': 0,
                    'point_head_time': 0,
                    'point_head_df': None,
                    'roi_head_time': 0,
                    'roi_head_df': None,
                    'roi_pooling_time': 0,
                    'roi_pooling_df': None,
                    'point_rcnn_proposal_time': 0,
                    'point_rcnn_proposal_df': None
                    }
    
    if vfe is not None:
        if model.vfe.__class__.__name__ == "MeanVFE":
            with LineProfiler(vfe.forward) as prof:
                start_time = time.time()
                out = vfe.forward(one_data)
                profile_dict['vfe_time'] = (time.time() - start_time)


        elif model.vfe.__class__.__name__ == "PillarVFE":
            with LineProfiler(vfe.forward) as prof:
                start_time = time.time()
                out = vfe.forward(one_data)
                profile_dict['vfe_time'] = (time.time() - start_time)


            # Analyze pfn process
            voxel_features, voxel_num_points, coords = one_data['voxels'], one_data['voxel_num_points'], one_data['voxel_coords']
            points_mean = voxel_features[:, :, :3].sum(dim=1, keepdim=True) / voxel_num_points.type_as(voxel_features).view(-1, 1, 1)
            f_cluster = voxel_features[:, :, :3] - points_mean
            f_center = torch.zeros_like(voxel_features[:, :, :3])
            f_center[:, :, 0] = voxel_features[:, :, 0] - (coords[:, 3].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_x + vfe.x_offset)
            f_center[:, :, 1] = voxel_features[:, :, 1] - (coords[:, 2].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_y + vfe.y_offset)
            f_center[:, :, 2] = voxel_features[:, :, 2] - (coords[:, 1].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_z + vfe.z_offset)
            if vfe.use_absolute_xyz:
                features = [voxel_features, f_cluster, f_center]
            else:
                features = [voxel_features[..., 3:], f_cluster, f_center]
            if vfe.with_distance:
                points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, keepdim=True)
                features.append(points_dist)
            features = torch.cat(features, dim=-1)
            voxel_count = features.shape[1]
            mask = vfe.get_paddings_indicator(voxel_num_points, voxel_count, axis=0)
            mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
            features *= mask
            start_time = time.time()
            for pfn in vfe.pfn_layers:
                features = pfn(features)
            profile_dict['pfn_time'] = time.time() - start_time
                       
            del voxel_features, voxel_num_points, coords
            del points_mean
            del f_cluster
            del f_center
            del features
            del voxel_count
            del mask
            
        vfe_df = pd.read_html(prof.display()._repr_html_())[0]
        vfe_df.columns = [i[0] for i in vfe_df.columns]
        profile_dict['vfe_df'] = vfe_df

    else: 
        out = one_data
        
    if backbone_3d is not None:
        with LineProfiler(backbone_3d.forward) as prof2:
            start_time = time.time()
            out2 = backbone_3d.forward(out)
            profile_dict['backbone_3d_time'] = (time.time() - start_time)
            
            backbone_3d_df = pd.read_html(prof2.display()._repr_html_())[0]
            backbone_3d_df.columns = [i[0] for i in backbone_3d_df.columns]
            profile_dict['backbone_3d_df'] = backbone_3d_df
        
        if "Voxel" in backbone_3d.__class__.__name__:       
            # Analyze specific process
            voxel_features, voxel_coords = out['voxel_features'], out['voxel_coords']
            batch_size = out['batch_size']
            input_sp_tensor = spconv.SparseConvTensor(
                    features=voxel_features,
                    indices=voxel_coords.int(),
                    spatial_shape=model.backbone_3d.sparse_shape,
                    batch_size=batch_size
                )

            x = model.backbone_3d.conv_input(input_sp_tensor)
            
            start_time = time.time()
            conv1 = backbone_3d.conv1(x)
            profile_dict['spconv1_t'] = (time.time() - start_time)

            start_time2 = time.time()
            conv2 = backbone_3d.conv2(conv1)
            profile_dict['spconv2_t'] = (time.time() - start_time2)

            start_time3 = time.time()
            conv3 = backbone_3d.conv3(conv2)
            profile_dict['spconv3_t'] = (time.time() - start_time3)

            start_time4 = time.time()
            conv4 = backbone_3d.conv4(conv3)
            end_time = time.time()
            profile_dict['spconv4_t'] = (end_time - start_time4)

            del voxel_features, voxel_coords
            del input_sp_tensor
            del x
            del conv1
            del conv2
            del conv3
            del conv4

        elif "Point" in backbone_3d.__class__.__name__:
            
            batch_size = out['batch_size']
            points = out['points']
            batch_idx, xyz, features = backbone_3d.break_up_pc(points)

            xyz_batch_cnt = xyz.new_zeros(batch_size).int()
            for bs_idx in range(batch_size):
                xyz_batch_cnt[bs_idx] = (batch_idx == bs_idx).sum()

            assert xyz_batch_cnt.min() == xyz_batch_cnt.max()
            xyz = xyz.view(batch_size, -1, 3)
            features = features.view(batch_size, -1, features.shape[-1]).permute(0, 2, 1).contiguous() if features is not None else None

            l_xyz, l_features = [xyz], [features]
            start_time = time.time()
            for i in range(len(backbone_3d.SA_modules)):
                li_xyz, li_features = backbone_3d.SA_modules[i](l_xyz[i], l_features[i])
                l_xyz.append(li_xyz)
                l_features.append(li_features)
            profile_dict['MSG_SA_time'] = time.time() - start_time

            start_time = time.time()
            for i in range(-1, -(len(backbone_3d.FP_modules) + 1), -1):
                l_features[i - 1] = backbone_3d.FP_modules[i](
                    l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
                )  # (B, C, N)
            profile_dict['MSG_FP_time'] = time.time() - start_time
            
            del points
            del batch_idx, xyz, features
            del xyz_batch_cnt
            del l_xyz, l_features
            
    else:
        out2 = out

    if map2bev is not None:
        with LineProfiler(map2bev.forward) as prof3:
            start_time = time.time()
            out3 = map2bev.forward(out2)
            profile_dict['map2bev_time'] = (time.time() - start_time)

        map2bev_df = pd.read_html(prof3.display()._repr_html_())[0]
        map2bev_df.columns = [i[0] for i in map2bev_df.columns]
        profile_dict['map2bev_df'] = map2bev_df   
    else:
        out3 = out2


    if pfe is not None:
        with LineProfiler(pfe.forward) as prof4:
            start_time = time.time()
            out4 = pfe.forward(out3)
            profile_dict['pfe_time'] = (time.time() - start_time)

        pfe_df = pd.read_html(prof4.display()._repr_html_())[0]
        pfe_df.columns = [i[0] for i in pfe_df.columns]
        profile_dict['pfe_df'] = pfe_df

        with LineProfiler(pfe.get_sampled_points) as grouping:
            start_time_grouping = time.time()
            keypoints = pfe.get_sampled_points(out3)
            profile_dict['pfe_grouping_t'] = time.time() - start_time_grouping

        pfe_grouping_df = pd.read_html(grouping.display()._repr_html_())[0]
        pfe_grouping_df.columns = [i[0] for i in pfe_grouping_df.columns]
        profile_dict['pfe_grouping_df'] = pfe_grouping_df

        point_features_list = []
        if 'bev' in pfe.model_cfg.FEATURES_SOURCE:
            with LineProfiler(pfe.interpolate_from_bev_features) as bev:
                start_time_bev = time.time()
                point_bev_features = pfe.interpolate_from_bev_features(
                    keypoints, out3['spatial_features'], out3['batch_size'],
                    bev_stride=out3['spatial_features_stride']
                )
                profile_dict['pfe_bev_feature_t'] = time.time() - start_time_bev

            pfe_bev_df = pd.read_html(bev.display()._repr_html_())[0]
            pfe_bev_df.columns = [i[0] for i in pfe_bev_df.columns]
            profile_dict['pfe_bev_df'] = pfe_bev_df
            point_features_list.append(point_bev_features)
            
            del point_bev_features

        batch_size = out3['batch_size']
        new_xyz = keypoints[:, 1:4].contiguous()
        new_xyz_batch_cnt = new_xyz.new_zeros(batch_size).int()
        for k in range(batch_size):
            new_xyz_batch_cnt[k] = (keypoints[:, 0] == k).sum()

        if 'raw_points' in pfe.model_cfg.FEATURES_SOURCE:
            raw_points = out3['points']
            with LineProfiler(pfe.aggregate_keypoint_features_from_one_source) as raw:
                start_time_raw = time.time()
                pooled_features = pfe.aggregate_keypoint_features_from_one_source(
                    batch_size=batch_size, aggregate_func=pfe.SA_rawpoints,
                    xyz=raw_points[:, 1:4],
                    xyz_features=raw_points[:, 4:].contiguous() if raw_points.shape[1] > 4 else None,
                    xyz_bs_idxs=raw_points[:, 0],
                    new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt,
                    filter_neighbors_with_roi=pfe.model_cfg.SA_LAYER['raw_points'].get('FILTER_NEIGHBOR_WITH_ROI', False),
                    radius_of_neighbor=pfe.model_cfg.SA_LAYER['raw_points'].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None),
                    rois=out3.get('rois', None)
                )
                profile_dict['pfe_raw_feature_t'] = time.time() - start_time_raw

            pfe_raw_df = pd.read_html(bev.display()._repr_html_())[0]
            pfe_raw_df.columns = [i[0] for i in pfe_raw_df.columns]
            profile_dict['pfe_raw_df'] = pfe_raw_df
            point_features_list.append(pooled_features)
            
            del pooled_features

        for k, src_name in enumerate(pfe.SA_layer_names):
            cur_coords = out3['multi_scale_3d_features'][src_name].indices
            cur_features = out3['multi_scale_3d_features'][src_name].features.contiguous()
            xyz = common_utils.get_voxel_centers(
                cur_coords[:, 1:4], downsample_times=pfe.downsample_times_map[src_name],
                voxel_size=pfe.voxel_size, point_cloud_range=pfe.point_cloud_range
            )

            with LineProfiler(pfe.aggregate_keypoint_features_from_one_source) as mlp:
                start_time_mlp = time.time()
                pooled_features = pfe.aggregate_keypoint_features_from_one_source(
                    batch_size=batch_size, aggregate_func=pfe.SA_layers[k],
                    xyz=xyz.contiguous(), xyz_features=cur_features, xyz_bs_idxs=cur_coords[:, 0],
                    new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt,
                    filter_neighbors_with_roi=pfe.model_cfg.SA_LAYER[src_name].get('FILTER_NEIGHBOR_WITH_ROI', False),
                    radius_of_neighbor=pfe.model_cfg.SA_LAYER[src_name].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None),
                    rois=out3.get('rois', None)
                )
                profile_dict['pfe_mlp_t'].append(time.time() - start_time_mlp)

            pfe_mlp_df = pd.read_html(mlp.display()._repr_html_())[0]
            pfe_mlp_df.columns = [i[0] for i in pfe_mlp_df.columns]
            profile_dict['pfe_mlp_df'].append(pfe_mlp_df)

            point_features_list.append(pooled_features)
        
        del point_features_list
        del keypoints
        del new_xyz
        del new_xyz_batch_cnt
        del cur_coords
        del cur_features
        del xyz
        del pooled_features
    else:
        out4 = out3


    if backbone_2d is not None:
        with LineProfiler(backbone_2d.forward) as prof5:
            start_time = time.time()
            out5 = backbone_2d.forward(out4)
            profile_dict['backbone_2d_time'] = (time.time() - start_time)

        backbone_2d_df = pd.read_html(prof5.display()._repr_html_())[0]
        backbone_2d_df.columns = [i[0] for i in backbone_2d_df.columns]
        profile_dict['backbone_2d_df'] = backbone_2d_df
    else:
        out5=out4


    if dense_head is not None:
        with LineProfiler(dense_head.forward) as prof6:
            start_time = time.time()
            out6 = dense_head.forward(out5)
            profile_dict['dense_head_time'] = (time.time() - start_time)

        if dense_head.__class__.__name__ == "CenterHead":
            spatial_features_2d = out5['spatial_features_2d']
            x = dense_head.shared_conv(spatial_features_2d)
            pred_dicts = []
            start_time = time.time()
            for head in dense_head.heads_list:
                pred_dicts.append(head(x))
            profile_dict['center_head_time'] = time.time() - start_time
            
            del spatial_features_2d
            del x 
            del pred_dicts
            
        dense_head_df = pd.read_html(prof6.display()._repr_html_())[0] 
        dense_head_df.columns = [i[0] for i in dense_head_df.columns]
        profile_dict['dense_head_df'] = dense_head_df
    else:
        out6 = out5


    if point_head is not None:
        with LineProfiler(point_head.forward) as ph:
            start_time = time.time()
            out7 = point_head.forward(out6)
            profile_dict['point_head_time'] = (time.time() - start_time)
        point_head_df = pd.read_html(ph.display()._repr_html_())[0]
        point_head_df.columns = [i[0] for i in point_head_df.columns]
        profile_dict['point_head_df'] = point_head_df
    else:
        out7 = out6
    
    if roi_head is not None:

        with LineProfiler(roi_head.forward) as roi:
            start_time = time.time()
            out8 = roi_head.forward(out7)
            profile_dict['roi_head_time'] = (time.time() - start_time)
        roi_head_df = pd.read_html(roi.display()._repr_html_())[0]
        roi_head_df.columns = [i[0] for i in roi_head_df.columns]
        profile_dict['roi_head_df'] = roi_head_df

        if 'PV' in roi_head.__class__.__name__:
            with LineProfiler(roi_head.roi_grid_pool) as pooling:
                start_time_pooling = time.time()
                pooled_features = roi_head.roi_grid_pool(out7)
                profile_dict['roi_pooling_time'] = time.time() - start_time_pooling

            roi_pooling_df = pd.read_html(pooling.display()._repr_html_())[0]
            roi_pooling_df.columns = [i[0] for i in roi_pooling_df.columns]
            profile_dict['roi_pooling_df'] = roi_pooling_df
        
        elif 'PointRCNN' in roi_head.__class__.__name__:
            play_data = out7.copy()
            with LineProfiler(roi_head.proposal_layer) as proposal:
                start_time = time.time()
                targets_dict = roi_head.proposal_layer(play_data, nms_config=roi_head.model_cfg.NMS_CONFIG['TRAIN' if roi_head.training else 'TEST'])
                profile_dict['point_rcnn_proposal_time'] = time.time() - start_time
            
            point_rcnn_proposal_df = pd.read_html(proposal.display()._repr_html_())[0]
            point_rcnn_proposal_df.columns = [i[0] for i in point_rcnn_proposal_df.columns]
            profile_dict['point_rcnn_proposal_df'] = point_rcnn_proposal_df
            
            del play_data
            
    else:
        out8 = out7
        
    return profile_dict

In [7]:
def profiling_multi_batch(model_name, batch_size, profile_dict):
    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.max_colwidth', -1)
    pd.set_option('display.max_rows', None)

    model, one_data = inference_simulation(model_name, batch_size)
    vfe = model.vfe
    backbone_3d = model.backbone_3d
    map2bev = model.map_to_bev_module
    backbone_2d = model.backbone_2d
    dense_head = model.dense_head
    pfe = model.pfe
    point_head = model.point_head
    roi_head = model.roi_head
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| VFE Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if vfe is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(model.vfe.__class__.__name__))
        if model.vfe.__class__.__name__ == "MeanVFE":
            with LineProfiler(vfe.forward) as prof:
                start_time = time.time()
                out = vfe.forward(one_data)
                total_time = (time.time() - start_time)
                logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(model.vfe.__class__.__name__, profile_dict['vfe_time'] * 1000))
        elif model.vfe.__class__.__name__ == "PillarVFE":
            with LineProfiler(vfe.forward) as prof:
                start_time = time.time()
                out = vfe.forward(one_data)
                total_time = (time.time() - start_time)
                logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(model.vfe.__class__.__name__, profile_dict['vfe_time'] * 1000))
            # Analyze pfn process
            voxel_features, voxel_num_points, coords = one_data['voxels'], one_data['voxel_num_points'], one_data['voxel_coords']
            points_mean = voxel_features[:, :, :3].sum(dim=1, keepdim=True) / voxel_num_points.type_as(voxel_features).view(-1, 1, 1)
            f_cluster = voxel_features[:, :, :3] - points_mean
            f_center = torch.zeros_like(voxel_features[:, :, :3])
            f_center[:, :, 0] = voxel_features[:, :, 0] - (coords[:, 3].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_x + vfe.x_offset)
            f_center[:, :, 1] = voxel_features[:, :, 1] - (coords[:, 2].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_y + vfe.y_offset)
            f_center[:, :, 2] = voxel_features[:, :, 2] - (coords[:, 1].to(voxel_features.dtype).unsqueeze(1) * vfe.voxel_z + vfe.z_offset)
            if vfe.use_absolute_xyz:
                features = [voxel_features, f_cluster, f_center]
            else:
                features = [voxel_features[..., 3:], f_cluster, f_center]
            if vfe.with_distance:
                points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, keepdim=True)
                features.append(points_dist)
            features = torch.cat(features, dim=-1)
            voxel_count = features.shape[1]
            mask = vfe.get_paddings_indicator(voxel_num_points, voxel_count, axis=0)
            mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
            features *= mask
            start_time = time.time()
            for pfn in vfe.pfn_layers:
                features = pfn(features)
            total_time = time.time() - start_time
            logging.info(">>>>>>>>>>>>>> There are {} PFNLayers".format(len(vfe.pfn_layers)))
            logging.info(">>>>>>>>>>>>>> The total runtime of all PFNLayers is {} ms".format(total_time * 1000))
            logging.info(">>>>>>>>>>>>>> The average runtime of each PFNLayer is {} ms".format(total_time * 1000 / len(vfe.pfn_layers)))
            logging.info("\n")
            
            del voxel_features, voxel_num_points, coords
            del points_mean
            del f_cluster
            del f_center
            del features
            del voxel_count
            del mask
            
        logging.info(">>>> Now we analyze its memory usage <<<<")
        vfe_df = pd.read_html(prof.display()._repr_html_())[0]
        vfe_df.columns = [i[0] for i in vfe_df.columns]
        vfe_df[['active_bytes', 'reserved_bytes']] = rearrange_df(vfe_df, profile_dict['vfe_df'], batch_size)
        logging.info('\n' + str(vfe_df))
        
    else:
        out = one_data
        logging.info("This model does not have a vfe")
        
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| 3D backbone Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if backbone_3d is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(model.backbone_3d.__class__.__name__))
        with LineProfiler(backbone_3d.forward) as prof2:
            start_time = time.time()
            out2 = backbone_3d.forward(out)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The run time for {} is {} ms".format(model.backbone_3d.__class__.__name__, profile_dict['backbone_3d_time'] * 1000))
        
        if "Voxel" in backbone_3d.__class__.__name__:
        # Analyze specific process
            voxel_features, voxel_coords = out['voxel_features'], out['voxel_coords']
            batch_size = out['batch_size']
            input_sp_tensor = spconv.SparseConvTensor(
                    features=voxel_features,
                    indices=voxel_coords.int(),
                    spatial_shape=model.backbone_3d.sparse_shape,
                    batch_size=batch_size
                )
            x = model.backbone_3d.conv_input(input_sp_tensor)
            start_time = time.time()
            conv1 = backbone_3d.conv1(x)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for conv1 in {} is {} ms".format(model.backbone_3d.__class__.__name__, profile_dict['spconv1_t'] * 1000))
            start_time2 = time.time()
            conv2 = backbone_3d.conv2(conv1)
            total_time2 = (time.time() - start_time2)
            logging.info(">>>>>>>>>>>>>> The runtime for conv2 in {} is {} ms".format(model.backbone_3d.__class__.__name__, profile_dict['spconv2_t'] * 1000))
            start_time3 = time.time()
            conv3 = backbone_3d.conv3(conv2)
            total_time3 = (time.time() - start_time3)
            logging.info(">>>>>>>>>>>>>> The runtime for conv3 in {} is {} ms".format(model.backbone_3d.__class__.__name__, profile_dict['spconv3_t'] * 1000))
            start_time4 = time.time()
            conv4 = backbone_3d.conv4(conv3)
            end_time = time.time()
            total_time4 = (end_time - start_time4)
            logging.info(">>>>>>>>>>>>>> The runtime for conv4 in {} is {} ms".format(model.backbone_3d.__class__.__name__, profile_dict['spconv4_t'] * 1000))
            logging.info(">>>>>>>>>>>>>> The average running time of the combined convolutinal blocks is {} ms ".format((end_time - start_time) * 1000 / batch_size))
            
            del voxel_features, voxel_coords
            del input_sp_tensor
            del x
            del conv1
            del conv2
            del conv3
            del conv4
        
        elif "Point" in backbone_3d.__class__.__name__:
            
            logging.info(">>>>>>>>>>>>>> The runtime of SA modules in {} is {} ms".format(backbone_3d.__class__.__name__, profile_dict['MSG_SA_time'] * 1000))
            logging.info(">>>>>>>>>>>>>> The runtime of FP modules in {} is {} ms".format(backbone_3d.__class__.__name__, profile_dict['MSG_FP_time'] * 1000))
            
        logging.info("\n")
        logging.info(">>>> Now we analyze its memory usage <<<<")
        
        backbone_3d_df = pd.read_html(prof2.display()._repr_html_())[0]
        backbone_3d_df.columns = [i[0] for i in backbone_3d_df.columns]
        backbone_3d_df[['active_bytes', 'reserved_bytes']] = rearrange_df(backbone_3d_df, profile_dict['backbone_3d_df'], batch_size)
        logging.info('\n' + str(backbone_3d_df))
        
        
    else:
        out2 = out
        logging.info("This model does not have a 3d backbone")
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| Map2Bev Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if map2bev is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(model.map_to_bev_module.__class__.__name__))
        with LineProfiler(map2bev.forward) as prof3:
            start_time = time.time()
            out3 = map2bev.forward(out2)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(model.map_to_bev_module.__class__.__name__, profile_dict['map2bev_time'] * 1000))
        logging.info("\n")
        logging.info(">>>> Now we analyze its memory usage <<<<")    
        map2bev_df = pd.read_html(prof3.display()._repr_html_())[0]
        map2bev_df.columns = [i[0] for i in map2bev_df.columns]
        map2bev_df[['active_bytes', 'reserved_bytes']] = rearrange_df(map2bev_df, profile_dict['map2bev_df'], batch_size)
        logging.info('\n' + str(map2bev_df))
    else:
        out3 = out2
        logging.info("This model does not rely on BEV")
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| PFE Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if pfe is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(pfe.__class__.__name__))
        with LineProfiler(pfe.forward) as prof4:
            start_time = time.time()
            out4 = pfe.forward(out3)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(pfe.__class__.__name__, profile_dict['pfe_time'] * 1000))
        logging.info("*************************** The overall memory analyze of {} ***************************".format(pfe.__class__.__name__))
        pfe_df = pd.read_html(prof4.display()._repr_html_())[0]
        pfe_df.columns = [i[0] for i in pfe_df.columns]
        pfe_df[['active_bytes', 'reserved_bytes']] = rearrange_df(pfe_df, profile_dict['pfe_df'], batch_size)
        logging.info('\n' + str(pfe_df))
        
        with LineProfiler(pfe.get_sampled_points) as grouping:
            start_time_grouping = time.time()
            keypoints = pfe.get_sampled_points(out3)
            total_time_grouping = time.time() - start_time_grouping
            logging.info(">>>>>>>>>>>>>> The total time for the grouping process in {} is {} ms.".format(pfe.__class__.__name__, profile_dict['pfe_grouping_t'] * 1000))
        logging.info("\n")
        logging.info("************** Analyzing memory usage of grouping process **************")
        pfe_grouping_df = pd.read_html(grouping.display()._repr_html_())[0]
        pfe_grouping_df.columns = [i[0] for i in pfe_grouping_df.columns]
        pfe_grouping_df[['active_bytes', 'reserved_bytes']] = rearrange_df(pfe_grouping_df, profile_dict['pfe_grouping_df'], batch_size)
        logging.info("\n" + str(pfe_grouping_df))
        point_features_list = []
        if 'bev' in pfe.model_cfg.FEATURES_SOURCE:
            with LineProfiler(pfe.interpolate_from_bev_features) as bev:
                start_time_bev = time.time()
                point_bev_features = pfe.interpolate_from_bev_features(
                    keypoints, out3['spatial_features'], out3['batch_size'],
                    bev_stride=out3['spatial_features_stride']
                )
                total_time_bev = time.time() - start_time_bev
                logging.info(">>>>>>>>>>>>>> The total time of getting bev features is {} ms".format(profile_dict['pfe_bev_feature_t'] * 1000))
            logging.info("\n")
            logging.info("************** Analyzing memory usage of getting bev features **************")
            pfe_bev_df = pd.read_html(bev.display()._repr_html_())[0]
            pfe_bev_df.columns = [i[0] for i in pfe_bev_df.columns]
            pfe_bev_df[['active_bytes', 'reserved_bytes']] = rearrange_df(pfe_bev_df, profile_dict['pfe_bev_df'], batch_size)
            logging.info('\n' + str(pfe_bev_df))
            point_features_list.append(point_bev_features)
            
            del point_bev_features
            
        batch_size = out3['batch_size']
        new_xyz = keypoints[:, 1:4].contiguous()
        new_xyz_batch_cnt = new_xyz.new_zeros(batch_size).int()
        for k in range(batch_size):
            new_xyz_batch_cnt[k] = (keypoints[:, 0] == k).sum()
        if 'raw_points' in pfe.model_cfg.FEATURES_SOURCE:
            raw_points = out3['points']
            with LineProfiler(pfe.aggregate_keypoint_features_from_one_source) as raw:
                start_time_raw = time.time()
                pooled_features = pfe.aggregate_keypoint_features_from_one_source(
                    batch_size=batch_size, aggregate_func=pfe.SA_rawpoints,
                    xyz=raw_points[:, 1:4],
                    xyz_features=raw_points[:, 4:].contiguous() if raw_points.shape[1] > 4 else None,
                    xyz_bs_idxs=raw_points[:, 0],
                    new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt,
                    filter_neighbors_with_roi=pfe.model_cfg.SA_LAYER['raw_points'].get('FILTER_NEIGHBOR_WITH_ROI', False),
                    radius_of_neighbor=pfe.model_cfg.SA_LAYER['raw_points'].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None),
                    rois=out3.get('rois', None)
                )
                total_time_raw = time.time() - start_time_raw
                logging.info(">>>>>>>>>>>>>> The total runtime for pooling features from raw points is {} ms".format(profile_dict['pfe_raw_feature_t'] * 1000))
            logging.info("\n")
            logging.info("************** Analyzing memory usage of getting raw_points features **************")
            pfe_raw_df = pd.read_html(raw.display()._repr_html_())[0]
            pfe_raw_df.columns = [i[0] for i in pfe_raw_df.columns]
            pfe_raw_df[['active_bytes', 'reserved_bytes']] = rearrange_df(pfe_raw_df, profile_dict['pfe_raw_df'], batch_size)
            logging.info('\n' + str(pfe_raw_df))
            point_features_list.append(pooled_features)
            del pooled_features
            
        for k, src_name in enumerate(pfe.SA_layer_names):
            cur_coords = out3['multi_scale_3d_features'][src_name].indices
            cur_features = out3['multi_scale_3d_features'][src_name].features.contiguous()
            xyz = common_utils.get_voxel_centers(
                cur_coords[:, 1:4], downsample_times=pfe.downsample_times_map[src_name],
                voxel_size=pfe.voxel_size, point_cloud_range=pfe.point_cloud_range
            )
            with LineProfiler(pfe.aggregate_keypoint_features_from_one_source) as mlp:
                start_time_mlp = time.time()
                pooled_features = pfe.aggregate_keypoint_features_from_one_source(
                    batch_size=batch_size, aggregate_func=pfe.SA_layers[k],
                    xyz=xyz.contiguous(), xyz_features=cur_features, xyz_bs_idxs=cur_coords[:, 0],
                    new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt,
                    filter_neighbors_with_roi=pfe.model_cfg.SA_LAYER[src_name].get('FILTER_NEIGHBOR_WITH_ROI', False),
                    radius_of_neighbor=pfe.model_cfg.SA_LAYER[src_name].get('RADIUS_OF_NEIGHBOR_WITH_ROI', None),
                    rois=out3.get('rois', None)
                )
                total_time_mlp = time.time() - start_time_mlp
                logging.info(">>>>>>>>>>>>>> The total time of mlp {} is {} ms".format(src_name, profile_dict['pfe_mlp_t'][k]))
            logging.info("\n")
            logging.info("************** Analyzing memory usage of getting features from the mlp {} **************".format(src_name))
            pfe_mlp_df = pd.read_html(mlp.display()._repr_html_())[0]
            pfe_mlp_df.columns = [i[0] for i in pfe_mlp_df.columns]
            pfe_mlp_df[['active_bytes', 'reserved_bytes']] = rearrange_df(pfe_mlp_df, profile_dict['pfe_mlp_df'][k], batch_size)
            logging.info('\n' + str(pfe_mlp_df))
            point_features_list.append(pooled_features)
            
        del point_features_list
        del keypoints
        del new_xyz
        del new_xyz_batch_cnt
        del cur_coords
        del cur_features
        del xyz
        del pooled_features
    else:
        out4 = out3
        logging.info("This model does not have a pfe")
    
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| 2D backbone Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if backbone_2d is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(backbone_2d.__class__.__name__))
        with LineProfiler(backbone_2d.forward) as prof5:
            start_time = time.time()
            out5 = backbone_2d.forward(out4)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(backbone_2d.__class__.__name__, profile_dict['backbone_2d_time'] * 1000))
        logging.info("\n")
        logging.info(">>>> Now we analyze its memory usage <<<<")
        backbone_2d_df = pd.read_html(prof5.display()._repr_html_())[0]
        backbone_2d_df.columns = [i[0] for i in backbone_2d_df.columns]
        backbone_2d_df[['active_bytes', 'reserved_bytes']] = rearrange_df(backbone_2d_df, profile_dict['backbone_2d_df'], batch_size)
        logging.info('\n' + str(backbone_2d_df))
    else:
        out5 = out4
        logging.info("This model does not have a 2D backbone")
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| Dense Head Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if dense_head is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(dense_head.__class__.__name__))
        with LineProfiler(dense_head.forward) as prof6:
            start_time = time.time()
            out6 = dense_head.forward(out5)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(dense_head.__class__.__name__, profile_dict['dense_head_time'] * 1000))
        if dense_head.__class__.__name__ == "CenterHead":
            spatial_features_2d = out5['spatial_features_2d']
            x = dense_head.shared_conv(spatial_features_2d)
            pred_dicts = []
            start_time = time.time()
            for head in dense_head.heads_list:
                pred_dicts.append(head(x))
            total_time = time.time() - start_time
            logging.info(">>>>>>>>>>>>>> The total runtime of separateHeads is {} ms".format(profile_dict['center_head_time'] * 1000))
            logging.info(">>>>>>>>>>>>>> Number of separated_heads in the heads_list: {}".format(len(dense_head.heads_list)))
            logging.info(">>>>>>>>>>>>>> The average runtime of each separateHead is {} ms".format(profile_dict['center_head_time'] * 1000 / len(dense_head.heads_list)))
            del spatial_features_2d
            del x 
            del pred_dicts
        logging.info("\n")
        logging.info(">>>> Now we analyze its memory usage <<<<")
        dense_head_df = pd.read_html(prof6.display()._repr_html_())[0]
        dense_head_df.columns = [i[0] for i in dense_head_df.columns]
        dense_head_df[['active_bytes', 'reserved_bytes']] = rearrange_df(dense_head_df, profile_dict['dense_head_df'], batch_size)
        logging.info('\n' + str(dense_head_df))
    else:
        out6 = out5
        logging.info("This model does not have a dense head.")  
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| Point Head Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if point_head is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(point_head.__class__.__name__))
        with LineProfiler(point_head.forward) as ph:
            start_time = time.time()
            out7 = point_head.forward(out6)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(point_head.__class__.__name__, profile_dict['point_head_time'] * 1000))
        logging.info("************** Analyzing memory usage of {}**************".format(point_head.__class__.__name__))
        point_head_df = pd.read_html(ph.display()._repr_html_())[0]
        point_head_df.columns = [i[0] for i in point_head_df.columns]
        point_head_df[['active_bytes', 'reserved_bytes']] = rearrange_df(point_head_df, profile_dict['point_head_df'], batch_size)
        logging.info('\n' + str(point_head_df))
    else:
        out7 = out6
        logging.info("This model does not have a point head")

    
    logging.info("\n")
    logging.info("|||||||||||||||||||||||||||||||||||| ROI Head Part ||||||||||||||||||||||||||||||||||||")
    logging.info("\n")
    if roi_head is not None:
        logging.info("----------------------- Analyzing on {} -----------------------".format(roi_head.__class__.__name__))
        with LineProfiler(roi_head.forward) as roi:
            start_time = time.time()
            print(out7.keys())
            out8 = roi_head.forward(out7)
            total_time = (time.time() - start_time)
            logging.info(">>>>>>>>>>>>>> The runtime for {} is {} ms".format(roi_head.__class__.__name__, profile_dict['roi_head_time'] * 1000))
        logging.info("\n")

        logging.info("************** Analyzing memory usage of {}**************".format(roi_head.__class__.__name__))
        roi_head_df = pd.read_html(roi.display()._repr_html_())[0]
        roi_head_df.columns = [i[0] for i in roi_head_df.columns]
        roi_head_df[['active_bytes', 'reserved_bytes']] = rearrange_df(roi_head_df, profile_dict['roi_head_df'], batch_size)
        logging.info('\n' + str(roi_head_df))
        
        if 'PV' in roi_head.__class__.__name__:
            with LineProfiler(roi_head.roi_grid_pool) as pooling:
                start_time_pooling = time.time()
                pooled_features = roi_head.roi_grid_pool(out7)
                total_time_pooling = time.time() - start_time_pooling
                logging.info(">>>>>>>>>>>>>> The runtime for ROI grid pooling in {} is {} ms".format(roi_head.__class__.__name__, profile_dict['roi_pooling_time']))
            logging.info("\n")
            logging.info("************** Analyzing memory usage of ROI grid pooling **************")
            roi_pooling_df = pd.read_html(pooling.display()._repr_html_())[0]
            roi_pooling_df.columns = [i[0] for i in roi_pooling_df.columns]
            roi_pooling_df[['active_bytes', 'reserved_bytes']] = rearrange_df(roi_pooling_df, profile_dict['roi_pooling_df'], batch_size)
            logging.info('\n' + str(roi_pooling_df))
        
        elif 'PointRCNN' in roi_head.__class__.__name__:
            play_data = out7.copy()
            with LineProfiler(roi_head.proposal_layer) as proposal:
                start_time = time.time()
                targets_dict = roi_head.proposal_layer(play_data, nms_config=roi_head.model_cfg.NMS_CONFIG['TRAIN' if roi_head.training else 'TEST'])
                end = time.time() - start_time
                logging.info(">>>>>>>>>>>>>> The runtime for The Proposal Layer in {} is {} ms".format(roi_head.__class__.__name__, profile_dict['point_rcnn_proposal_time']))
            logging.info("\n")
            logging.info("************** Analyzing memory usage of PointRCNN Proposal Layer **************")
            point_rcnn_proposal_df = pd.read_html(proposal.display()._repr_html_())[0]
            point_rcnn_proposal_df.columns = [i[0] for i in point_rcnn_proposal_df.columns]
            point_rcnn_proposal_df[['active_bytes', 'reserved_bytes']] = rearrange_df(point_rcnn_proposal_df, profile_dict['point_rcnn_proposal_df'], batch_size)
            logging.info('\n' + str(point_rcnn_proposal_df))
            del play_data
    else:
        out8 = out7
        logging.info("This model does not have a ROI head")
    
    logging.info("\n")
    logging.info("------------------------------------------------------ Finish profiling ------------------------------------------------------")

## Profiling required models
**Remember to ```restart``` the jupyter notebook before profiling a new model, because jupyter notebook will store previous loaded data (ie. if you run the same cell multiple times, the memory usage will be accumulated), which might cause a large memory usage.**

**Profiling SECOND**

In [8]:
logging.basicConfig(filename="./profiling_results/second_profiling.log", level=logging.INFO)
profile_dict = profiling_one_batch("second")
profiling_multi_batch("second", 2, profile_dict)

2023-01-07 04:05:12,490   INFO  Loading Waymo dataset
2023-01-07 04:05:13,985   INFO  Total skipped info 0
2023-01-07 04:05:13,985   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:15,681   INFO  Loading Waymo dataset
2023-01-07 04:05:15,681   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:05:17,294   INFO  Total skipped info 0
2023-01-07 04:05:17,294   INFO  Total skipped info 0
2023-01-07 04:05:17,295   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:17,295   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:22,044   INFO  Loading Waymo dataset
2023-01-07 04:05:22,044   INFO  Loading Waymo dataset
2023-01-07 04:05:22,044   INFO  Loading Waymo dataset
2023-01-07 04:05:23,452   INFO  Total skipped info 0
2023-01-07 04:05:23,452   INFO  Total skipped info 0
2023-01-07 04:05:23,452   INFO  Total skipped info 0
2023-01-07 04:05:23,453   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:23,453   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:23,453   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:23,923   INFO  Loading Waymo dataset
2023-01-07 04:05:23,923   INFO  Loading Waymo dataset
2023-01-07 04:05:23,923   INFO  Loading Waymo dataset
2023-01-07 04:05:23,923   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:05:25,759   INFO  Total skipped info 0
2023-01-07 04:05:25,759   INFO  Total skipped info 0
2023-01-07 04:05:25,759   INFO  Total skipped info 0
2023-01-07 04:05:25,759   INFO  Total skipped info 0
2023-01-07 04:05:25,761   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:25,761   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:25,761   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:05:25,761   INFO  Total samples for Waymo dataset: 39987


**Profiling PointPillar**

In [9]:
logging.basicConfig(filename="./profiling_results/pointpillar_profiling.log", level=logging.INFO)
profile_dict = profiling_one_batch("pointpillar_1x")
profiling_multi_batch("pointpillar_1x", 2, profile_dict)

2023-01-07 03:51:21,572   INFO  Loading Waymo dataset
2023-01-07 03:51:21,572   INFO  Loading Waymo dataset
2023-01-07 03:51:22,958   INFO  Total skipped info 0
2023-01-07 03:51:22,958   INFO  Total skipped info 0
2023-01-07 03:51:22,959   INFO  Total samples for Waymo dataset: 39987
2023-01-07 03:51:22,959   INFO  Total samples for Waymo dataset: 39987


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 03:51:24,405   INFO  Loading Waymo dataset
2023-01-07 03:51:24,405   INFO  Loading Waymo dataset
2023-01-07 03:51:24,405   INFO  Loading Waymo dataset
2023-01-07 03:51:25,721   INFO  Total skipped info 0
2023-01-07 03:51:25,721   INFO  Total skipped info 0
2023-01-07 03:51:25,721   INFO  Total skipped info 0
2023-01-07 03:51:25,722   INFO  Total samples for Waymo dataset: 39987
2023-01-07 03:51:25,722   INFO  Total samples for Waymo dataset: 39987
2023-01-07 03:51:25,722   INFO  Total samples for Waymo dataset: 39987


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


**Profiling Centerpoint**

In [8]:
logging.basicConfig(filename="./profiling_results/centerpoint_profiling.log", level=logging.INFO)
profile_dict = profiling_one_batch("centerpoint")
profiling_multi_batch("centerpoint", 2, profile_dict)

2023-01-07 04:01:32,359   INFO  Loading Waymo dataset
2023-01-07 04:01:33,854   INFO  Total skipped info 0
2023-01-07 04:01:33,855   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:36,361   INFO  Loading Waymo dataset
2023-01-07 04:01:36,361   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:01:37,978   INFO  Total skipped info 0
2023-01-07 04:01:37,978   INFO  Total skipped info 0
2023-01-07 04:01:37,979   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:37,979   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:43,370   INFO  Loading Waymo dataset
2023-01-07 04:01:43,370   INFO  Loading Waymo dataset
2023-01-07 04:01:43,370   INFO  Loading Waymo dataset
2023-01-07 04:01:44,819   INFO  Total skipped info 0
2023-01-07 04:01:44,819   INFO  Total skipped info 0
2023-01-07 04:01:44,819   INFO  Total skipped info 0
2023-01-07 04:01:44,821   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:44,821   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:44,821   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:45,296   INFO  Loading Waymo dataset
2023-01-07 04:01:45,296   INFO  Loading Waymo dataset
2023-01-07 04:01:45,296   INFO  Loading Waymo dataset
2023-01-07 04:01:45,296   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:01:47,140   INFO  Total skipped info 0
2023-01-07 04:01:47,140   INFO  Total skipped info 0
2023-01-07 04:01:47,140   INFO  Total skipped info 0
2023-01-07 04:01:47,140   INFO  Total skipped info 0
2023-01-07 04:01:47,141   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:47,141   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:47,141   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:01:47,141   INFO  Total samples for Waymo dataset: 39987


**Profiling PV_RCNN**

In [8]:
logging.basicConfig(filename="./profiling_results/pv_rcnn_profiling.log", level=logging.INFO)
profile_dict = profiling_one_batch("pv_rcnn")
# profiling_multi_batch("pv_rcnn", 4, profile_dict)  -- CUDA out of memory!!!!!
profiling_multi_batch("pv_rcnn", 2, profile_dict)

2023-01-07 04:03:40,516   INFO  Loading Waymo dataset
2023-01-07 04:03:42,003   INFO  Total skipped info 0
2023-01-07 04:03:42,004   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:43,658   INFO  Loading Waymo dataset
2023-01-07 04:03:43,658   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:03:45,280   INFO  Total skipped info 0
2023-01-07 04:03:45,280   INFO  Total skipped info 0
2023-01-07 04:03:45,281   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:45,281   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:56,449   INFO  Loading Waymo dataset
2023-01-07 04:03:56,449   INFO  Loading Waymo dataset
2023-01-07 04:03:56,449   INFO  Loading Waymo dataset
2023-01-07 04:03:57,937   INFO  Total skipped info 0
2023-01-07 04:03:57,937   INFO  Total skipped info 0
2023-01-07 04:03:57,937   INFO  Total skipped info 0
2023-01-07 04:03:57,939   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:57,939   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:57,939   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:03:58,507   INFO  Loading Waymo dataset
2023-01-07 04:03:58,507   INFO  Loading Waymo dataset
2023-01-07 04:03:58,507   INFO  Loading Waymo dataset
2023-01-07 04:03:58,507   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size'])


2023-01-07 04:04:00,439   INFO  Total skipped info 0
2023-01-07 04:04:00,439   INFO  Total skipped info 0
2023-01-07 04:04:00,439   INFO  Total skipped info 0
2023-01-07 04:04:00,439   INFO  Total skipped info 0
2023-01-07 04:04:00,441   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:04:00,441   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:04:00,441   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:04:00,441   INFO  Total samples for Waymo dataset: 39987


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'voxels', 'voxel_coords', 'voxel_num_points', 'metadata', 'batch_size', 'voxel_features', 'encoded_spconv_tensor', 'encoded_spconv_tensor_stride', 'multi_scale_3d_features', 'multi_scale_3d_strides', 'spatial_features', 'spatial_features_stride', 'point_features_before_fusion', 'point_features', 'point_coords', 'spatial_features_2d', 'batch_cls_preds', 'batch_box_preds', 'cls_preds_normalized', 'point_cls_scores'])


**Profiling Point_RCNN**

In [8]:
logging.basicConfig(filename="./profiling_results/point_rcnn_profiling.log", level=logging.INFO)
profile_dict = profiling_one_batch("point_rcnn")
profiling_multi_batch("point_rcnn", 2, profile_dict)

2023-01-07 04:06:12,718   INFO  Loading Waymo dataset
2023-01-07 04:06:14,198   INFO  Total skipped info 0
2023-01-07 04:06:14,199   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:16,414   INFO  Loading Waymo dataset
2023-01-07 04:06:16,414   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'metadata', 'batch_size'])


2023-01-07 04:06:18,020   INFO  Total skipped info 0
2023-01-07 04:06:18,020   INFO  Total skipped info 0
2023-01-07 04:06:18,021   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:18,021   INFO  Total samples for Waymo dataset: 39987


PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.


2023-01-07 04:06:41,847   INFO  Loading Waymo dataset
2023-01-07 04:06:41,847   INFO  Loading Waymo dataset
2023-01-07 04:06:41,847   INFO  Loading Waymo dataset
2023-01-07 04:06:43,189   INFO  Total skipped info 0
2023-01-07 04:06:43,189   INFO  Total skipped info 0
2023-01-07 04:06:43,189   INFO  Total skipped info 0
2023-01-07 04:06:43,191   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:43,191   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:43,191   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:43,375   INFO  Loading Waymo dataset
2023-01-07 04:06:43,375   INFO  Loading Waymo dataset
2023-01-07 04:06:43,375   INFO  Loading Waymo dataset
2023-01-07 04:06:43,375   INFO  Loading Waymo dataset


dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'metadata', 'batch_size'])


2023-01-07 04:06:45,207   INFO  Total skipped info 0
2023-01-07 04:06:45,207   INFO  Total skipped info 0
2023-01-07 04:06:45,207   INFO  Total skipped info 0
2023-01-07 04:06:45,207   INFO  Total skipped info 0
2023-01-07 04:06:45,209   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:45,209   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:45,209   INFO  Total samples for Waymo dataset: 39987
2023-01-07 04:06:45,209   INFO  Total samples for Waymo dataset: 39987


PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
PointRCNN is so annoying.
dict_keys(['sample_idx', 'points', 'frame_id', 'gt_boxes', 'use_lead_xyz', 'metadata', 'batch_size', 'point_features', 'point_coords', 'point_cls_scores', 'batch_cls_preds', 'batch_box_preds', 'batch_index', 'cls_preds_normalized'])


## 6. More experiments for specific operations