In [None]:
# Generate the Metadata Bank

In [None]:
import os
import re
import time
import torch
import pickle
import json
import numpy as np
from tqdm import tqdm
from pathlib import Path
from pcdet.models import build_network, load_data_to_gpu
from pcdet.datasets import build_dataloader
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.utils import common_utils

In [None]:
# Configs and checkpoints for all branches
branches = [['cfgs/waymo_models/centerpoint_dyn_pillar020_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar020_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_pillar024_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar024_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_pillar028_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar028_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_pillar032_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar032_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_pillar036_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar036_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_pillar040_1x.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_pillar040_1x.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0075.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0075.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0100.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0100.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0125.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0125.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0150.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0150.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0175.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0175.pth'],
            ['cfgs/waymo_models/centerpoint_dyn_voxel0200.yaml',
                '../output/waymo_checkpoints/centerpoint_dyn_voxel0200.pth']]

In [None]:
# Generate look up table for Content Agnostic Scheduler
# Accuracy (mAP) of each branch on the test set
# Average Latency of each branch on the test set
import numpy as np

# Get accuracy profiling results of all branches
# Level1 mAP of 3 classes: Vehicle, pedestrian, cyclist
acc_test = [0.708, 0.7075, 0.7048, 0.6948, 0.694, 0.6809, 0.7399, 0.7168, 0.6912, 0.6588, 0.6321, 0.5964]
# acc_val = [0.7512, 0.7437, 0.7428, 0.7333, 0.7289, 0.7179, 0.7805, 0.7636, 0.7383, 0.7060, 0.6775, 0.6712]

# Get latency profiling results of all branches 
lat_orin_test = np.load('../output/waymo_results/latency_profiling/test/latency_profiling_test.npy', allow_pickle=True)
#lat_orin_val = np.load('../output/waymo_results/latency_profiling/val/latency_profiling_val.npy', allow_pickle=True)

lat = []
for i, res in enumerate(lat_orin_test):
    e2e = (round(np.float64(lat_orin_test[i][1]), 2))
    lat.append(e2e)
print(lat)

look_up_content_ag = []
for i in range(len(lat)):
    look_up_content_ag.append([i, lat[i], acc_test[i]])
print(look_up_content_ag)
# np.save('look_up_content_ag_orin_waymo', look_up_content_ag)

In [None]:
[[0, 449.85, 0.708], [1, 303.97, 0.7075], [2, 234.55, 0.7048], [3, 174.38, 0.6948], [4, 156.61, 0.694], [5, 135.58, 0.6809], [6, 235.76, 0.7399], [7, 169.42, 0.7168], [8, 146.25, 0.6912], [9, 122.81, 0.6588], [10, 110.08, 0.6321], [11, 100.87, 0.5964]]


In [None]:
import os
import pickle
import numpy as np
# Baseline results
baselines = ['centerpoint_pillar_1x',
            'centerpoint_without_resnet',
            'second',
            'PartA2',
            'pointpillar_1x',
            'pv_rcnn']

data_root = '../output/waymo_results'
# latency profiles
print('Loading baseline latency for test samples...')
lat_dir = os.path.join(data_root, 'baselines')
branch_lats = []
for b in baselines:
    lat_path = os.path.join(lat_dir, b + '_lat.pkl')
    lat = pickle.load(open(lat_path, 'rb'))
    branch_lats.append(np.array(lat))
branch_lats = np.stack(branch_lats, axis=-1)    # (#samples, #branches)
print(branch_lats.mean(axis=0))

In [None]:
print(branch_lats.mean(axis=0))

In [None]:
import re
import os
import pickle
import numpy as np
# Oracle controller
branches = ['centerpoint_dyn_pillar024_4x',
            'centerpoint_dyn_pillar028_4x',
            'centerpoint_dyn_pillar032_4x',
            'centerpoint_dyn_pillar036_4x',
            'centerpoint_dyn_pillar040_4x',
            'centerpoint_dyn_pillar044_4x',
            'centerpoint_dyn_pillar048_4x',
            'centerpoint_dyn_pillar052_4x',
            'centerpoint_without_resnet_dyn_voxel100',
            'centerpoint_without_resnet_dyn_voxel150',
            'centerpoint_without_resnet_dyn_voxel200',
            'centerpoint_without_resnet_dyn_voxel250',
            'centerpoint_without_resnet_dyn_voxel300',
            'centerpoint_without_resnet_dyn_voxel350',
            'centerpoint_without_resnet_dyn_voxel400',
            'centerpoint_without_resnet_dyn_voxel450',
            'dsvt_sampled_pillar020',
            'dsvt_sampled_pillar030',
            'dsvt_sampled_pillar040',
            'dsvt_sampled_pillar050',
            'dsvt_sampled_pillar060',
            'dsvt_sampled_pillar070',
            'dsvt_sampled_pillar080',
            'dsvt_sampled_pillar090',
            'dsvt_sampled_pillar100',
            'dsvt_sampled_pillar110',
            'dsvt_sampled_pillar120',
            'dsvt_sampled_pillar130',
            'dsvt_sampled_voxel020',
            'dsvt_sampled_voxel030',
            'dsvt_sampled_voxel040',
            'dsvt_sampled_voxel050',
            'dsvt_sampled_voxel060',
            'dsvt_sampled_voxel070',
            'dsvt_sampled_voxel080',
            'dsvt_sampled_voxel090',
            'dsvt_sampled_voxel100',
            'dsvt_sampled_voxel110',
            'dsvt_sampled_voxel120',
            'dsvt_sampled_voxel130']

data_root = '../output/waymo_new_profiling'

# latency profiles
print('Loading branch latency for test samples...')
lat_dir = os.path.join(data_root, 'lat/test')
branch_lats = []
for b in branches:
    lat_path = os.path.join(lat_dir, b + '_lat.pkl')
    lat = pickle.load(open(lat_path, 'rb'))
    branch_lats.append(np.array(lat))
branch_lats = np.stack(branch_lats, axis=-1)    # (#samples, #branches)

# accuracy profiles in waymo format
print('Loading branch accuracy for test samples...')
branch_accs = np.load('../output/waymo_new_profiling/per_frame_l2_acc_test.npy', allow_pickle=True)

# acc_dir = os.path.join(data_root, '../output/waymo_new_profiling/per_frame_acc/test')
# branch_accs = []
# for b in branches:
#     acc_path = os.path.join(acc_dir, b + '_per_frame_test.txt')
#     with open(acc_path, "r") as file:
#         content = file.read()
#     pattern = r'\b\d+\.\d+\b'
#     result = [[float(match) for match in re.findall(pattern, line)] for line in content.split("{")[1:]]
#     # indices = [0, 4, 12]
#     indices = [2, 6, 14]
#     acc = [sum(item[index] for index in indices) / len(indices) for item in result]
#     branch_accs.append(np.array(acc))
# branch_accs = np.stack(branch_accs, axis=-1)    # (#samples, #branches)

# # accuracy profiles in kitti format
# print('Loading branch accuracy for test samples...')
# acc_dir = os.path.join(data_root, 'accuracy_profiling/kitti_format/test')
# branch_accs = []
# for b in branches:
#     acc_path = os.path.join(acc_dir, b + '_kitti.txt')
#     with open(acc_path, "r") as file:
#         acc = file.readlines()
#     acc_values = [float(line.strip()) for line in acc]
#     branch_accs.append(np.array(acc_values))
# branch_accs = np.stack(branch_accs, axis=-1)    # (#samples, #branches)


# load detection profiles
print('Loading branch detection results for test samples...')
det_dir = os.path.join(data_root, 'det/test')
branch_profiles = []
for b in branches:
    det_path = os.path.join(det_dir, b + '_det.pkl')
    det = pickle.load(open(det_path, 'rb'))
    branch_profiles.append(det)
branch_profiles = np.stack(branch_profiles, axis=-1)    # (#samples, #branches)

In [None]:
np.save('latency.npy', branch_lats)
np.save('accuracy.npy', branch_accs)

In [None]:
# print(branch_profiles)
print(branch_profiles.shape)
print(len(branch_profiles))
print(len(branch_profiles[0]))
# print(branch_profiles[0])

In [None]:
# print(branch_accs)
print(branch_accs.shape)
print(len(branch_accs))
print(len(branch_accs[0]))
print(branch_accs[1])

In [None]:
# print(branch_lats)
print(branch_lats.shape)
print(len(branch_lats))
print(len(branch_lats[0]))
print(branch_lats[1])

In [None]:
# latency budgets
board = 'orin'
slo_list = (50, 100, 150, 200, 250, 300, 350, 400, 450, 500)
# slo_list = (1000, 2000)
schd_overhead = 0

# SWITCH BETWEEN LATENCY PREDICTORS AND THRESHOLDS
print('Running virtual branch scheduling...')
det_results, lat_results = dict(), dict()
for slo in slo_list:
    det_results[slo] = []
    lat_results[slo] = []

# k is the number of frames
for k in range(len(branch_lats)):
    if (k + 1) % 500 == 0:
        print(f'{(k + 1):04d} done!')

    # for each frame, load the lat and acc of 20 branches
    lat, acc = branch_lats[k], branch_accs[k]

    for slo in slo_list:
        # Filter valid branches according to the latency slo
        valid_branches = np.nonzero(lat + schd_overhead <= slo)[0]
        # If there is no valid branch, choose the fastest branch
        if len(valid_branches) == 0:
            valid_branches = [lat.argmin()]
        # Select the branch come with the highest acc
        # If multiple branches have the same highest acc, choose the fastest branch
        sorted_indices = np.lexsort((lat[valid_branches], -acc[valid_branches]))
        b = valid_branches[sorted_indices[0]]
        lat_results[slo].append(lat[b])

        # load detection profile
        det = branch_profiles[k][b]
        det_results[slo].append(det)

print('Saving detection results...')
out_dir = '../output/oracle/oracle'
os.makedirs(out_dir, exist_ok=True)
for slo in slo_list:
    out_path = os.path.join(out_dir, f'{board}_slo{slo:d}_oracle.pkl')
    with open(out_path, 'wb') as f:
        pickle.dump(det_results[slo], f)

    lat = np.array(lat_results[slo]).mean()
    print(f"Latency [SLO: {slo:d}]: {lat:.2f}s")

In [1]:
# visualize the ranking of all branches
import re
import os
import pickle
import numpy as np
# Oracle controller
branches = ['centerpoint_dyn_pillar052_4x',
            'centerpoint_dyn_pillar048_4x',
            'centerpoint_dyn_pillar044_4x',
            'centerpoint_dyn_pillar040_4x',
            'centerpoint_dyn_pillar036_4x',
            'centerpoint_dyn_pillar032_4x',
            'centerpoint_dyn_pillar028_4x',
            'centerpoint_dyn_pillar024_4x',
            'centerpoint_without_resnet_dyn_voxel450',
            'centerpoint_without_resnet_dyn_voxel400',
            'centerpoint_without_resnet_dyn_voxel350',
            'centerpoint_without_resnet_dyn_voxel300',
            'centerpoint_without_resnet_dyn_voxel250',
            'centerpoint_without_resnet_dyn_voxel200',
            'centerpoint_without_resnet_dyn_voxel150',
            'centerpoint_without_resnet_dyn_voxel100',
            'dsvt_sampled_pillar130',
            'dsvt_sampled_pillar120',
            'dsvt_sampled_pillar110',
            'dsvt_sampled_pillar100',
            'dsvt_sampled_pillar090',
            'dsvt_sampled_pillar080',
            'dsvt_sampled_pillar070',
            'dsvt_sampled_pillar060',
            'dsvt_sampled_pillar050',
            'dsvt_sampled_pillar040',
            'dsvt_sampled_pillar030',
            'dsvt_sampled_pillar020',
            'dsvt_sampled_voxel130',
            'dsvt_sampled_voxel120',
            'dsvt_sampled_voxel110',
            'dsvt_sampled_voxel100',
            'dsvt_sampled_voxel090',
            'dsvt_sampled_voxel080',
            'dsvt_sampled_voxel070',
            'dsvt_sampled_voxel060',
            'dsvt_sampled_voxel050',
            'dsvt_sampled_voxel040',
            'dsvt_sampled_voxel030',
            'dsvt_sampled_voxel020']

data_root = '../output/waymo_new_profiling'

# latency profiles
print('Loading branch latency for test samples...')
lat_dir = os.path.join(data_root, 'lat/test')
branch_lats = []
for b in branches:
    lat_path = os.path.join(lat_dir, b + '_lat.pkl')
    lat = pickle.load(open(lat_path, 'rb'))
    branch_lats.append(np.array(lat))
branch_lats = np.stack(branch_lats, axis=-1)    # (#samples, #branches)

# accuracy profiles in waymo format
print('Loading branch accuracy for test samples...')
branch_accs = np.load('../output/waymo_new_profiling/per_frame_l2_acc_test.npy', allow_pickle=True)

# ========================================================================================================
# latency budgets
board = 'orin'
slo_list = (50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 5000)
# slo_list = (1000, 2000)
schd_overhead = 0

# SWITCH BETWEEN LATENCY PREDICTORS AND THRESHOLDS
print('Running virtual branch scheduling...')
branch_results, lat_results = dict(), dict()
for slo in slo_list:
    branch_results[slo] = []
    lat_results[slo] = []

# k is the number of frames
for k in range(len(branch_lats)):
    if (k + 1) % 5000 == 0:
        print(f'{(k + 1):04d} done!')

    # for each frame, load the lat and acc of 20 branches
    lat, acc = branch_lats[k], branch_accs[k]

    for slo in slo_list:
        # Filter valid branches according to the latency slo
        valid_branches = np.nonzero(lat + schd_overhead <= slo)[0]
        # If there is no valid branch, choose the fastest branch
        if len(valid_branches) == 0:
            valid_branches = [lat.argmin()]
        # Select the branch come with the highest acc
        # If multiple branches have the same highest acc, choose the fastest branch
        sorted_indices = np.lexsort((lat[valid_branches], -acc[valid_branches]))
        b = valid_branches[sorted_indices[0]]
        lat_results[slo].append(lat[b])

        # save branch selection
        branch_results[slo].append(sorted_indices)

print('Saving branch ranking results...')
out_dir = '../output/oracle/oracle'
os.makedirs(out_dir, exist_ok=True)
for slo in slo_list:
    out_path = os.path.join(out_dir, f'{board}_slo{slo:d}_oracle_branch_ranking.pkl')
    with open(out_path, 'wb') as f:
        pickle.dump(branch_results[slo], f)

    lat = np.array(lat_results[slo]).mean()
    print(f"Latency [SLO: {slo:d}]: {lat:.2f}s")

Loading branch latency for test samples...
Loading branch accuracy for test samples...
Running virtual branch scheduling...
5000 done!
10000 done!
15000 done!
20000 done!
25000 done!
30000 done!
35000 done!
Saving branch ranking results...
Latency [SLO: 50]: 43.71s
Latency [SLO: 100]: 75.34s
Latency [SLO: 150]: 107.86s
Latency [SLO: 200]: 139.37s
Latency [SLO: 250]: 161.85s
Latency [SLO: 300]: 177.49s
Latency [SLO: 350]: 192.40s
Latency [SLO: 400]: 203.25s
Latency [SLO: 450]: 211.48s
Latency [SLO: 500]: 216.97s
Latency [SLO: 5000]: 264.41s


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming the 178x40 data is sorted from best (low values) to worst (high values) for each of the 178 dimensions
# Simulating data: For demonstration, generating random data
np.random.seed(0)
data = np.random.rand(178, 40)

# Sorting each of the 178 dimensions (rows) from best to worst
data_sorted = np.sort(data, axis=1)

# Creating a heatmap
plt.figure(figsize=(20, 10))
sns.heatmap(data_sorted, cmap='coolwarm', cbar_kws={'label': 'Ranking (Low to High)'})
plt.title('Heatmap of Rankings Across 178 Branches')
plt.xlabel('Branch Index')
plt.ylabel('Rank')
plt.show()

In [None]:
print(branch_results[5000][0:198])

In [2]:
# print(branch_results[500][:178])
np.save('seq_4_slo200_ranking.npy', branch_results[200][793:991])
np.save('seq_3_slo200_ranking.npy', branch_results[200][595:793])
np.save('seq_2_slo200_ranking.npy', branch_results[200][397:595])
np.save('seq_1_slo200_ranking.npy', branch_results[200][198:397])
np.save('seq_0_slo200_ranking.npy', branch_results[200][0:198])

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (198,) + inhomogeneous part.

In [None]:
!echo $CUDA_VISIBLE_DEVICES

In [None]:
import os
import numpy as np
from tqdm import tqdm
import pickle
from pathlib import Path
from pcdet.datasets import build_dataloader
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.utils import common_utils

os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

config = 'cfgs/waymo_models/centerpoint_dyn_pillar020_1x.yaml'
# Read the config file
cfg_from_yaml_file(config, cfg)
cfg.TAG = Path(config).stem
cfg.EXP_GROUP_PATH = '/'.join(config.split('/')[1:-1])  # remove 'cfgs' and 'xxxx.yaml'
np.random.seed(1024)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
dist_test = False
total_gpus = 1
batch_size = 1
workers = 4

# Create logger
logger = common_utils.create_logger()

# Build the dataloader
test_set, test_loader, sampler = build_dataloader(
    dataset_cfg=cfg.DATA_CONFIG,
    class_names=cfg.CLASS_NAMES,
    batch_size=batch_size,
    dist=dist_test, workers=workers, logger=logger, training=False)
dataset = test_loader.dataset
class_names = dataset.class_names

slo_list = (100, 125, 150, 175, 200, 225, 250, 300, 400, 500)
# slo_list = (100, 125, 150, 175, 200, 225, 250)
out_dir = '../output/waymo_results/oracle/own'
for slo in tqdm(slo_list[::-1]):
    det_path = os.path.join(out_dir, f'orin_slo{slo:d}_oracle.pkl')
    final_output_dir = '../output/waymo_results/oracle/eval'
    os.makedirs(final_output_dir, exist_ok=True)
    # Read the detection results
    print('================', det_path, '=====================')
    det_annos = pickle.load(open(det_path, 'rb'))
    ret_dict = {}
    result_str, result_dict = dataset.evaluation(
            det_annos, class_names,
            eval_metric=cfg.MODEL.POST_PROCESSING.EVAL_METRIC,
            output_path=final_output_dir
        )

    ret_dict.update(result_dict)
    print(ret_dict)

In [None]:
import os
import numpy as np
from tqdm import tqdm
import pickle
from pathlib import Path
from pcdet.datasets import build_dataloader
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.utils import common_utils

os.environ["CUDA_VISIBLE_DEVICES"] = "1" 

config = 'cfgs/waymo_models/centerpoint_dyn_pillar020_1x.yaml'
# Read the config file
cfg_from_yaml_file(config, cfg)
cfg.TAG = Path(config).stem
cfg.EXP_GROUP_PATH = '/'.join(config.split('/')[1:-1])  # remove 'cfgs' and 'xxxx.yaml'
np.random.seed(1024)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
dist_test = False
total_gpus = 1
batch_size = 1
workers = 4

# Create logger
logger = common_utils.create_logger()

# Build the dataloader
test_set, test_loader, sampler = build_dataloader(
    dataset_cfg=cfg.DATA_CONFIG,
    class_names=cfg.CLASS_NAMES,
    batch_size=batch_size,
    dist=dist_test, workers=workers, logger=logger, training=False)
dataset = test_loader.dataset
class_names = dataset.class_names

#slo_list = (100, 150, 200, 250, 500)
slo_list = (100, 150, 200, 250)
out_dir = '../output/waymo_results/oracle/models'
models = ['waymo_mse_adamw_64', 'waymo_mse_sgd_16', 'waymo_mse_sgd_64']
for m in models:
    for slo in tqdm(slo_list[::-1]):
        det_path = os.path.join(out_dir, m, f'orin_slo{slo:d}_ep05.pkl')
        final_output_dir = '../output/waymo_results/oracle/eval'
        os.makedirs(final_output_dir, exist_ok=True)
        # Read the detection results
        print('================', det_path, '=====================')
        det_annos = pickle.load(open(det_path, 'rb'))
        ret_dict = {}
        result_str, result_dict = dataset.evaluation(
                det_annos, class_names,
                eval_metric=cfg.MODEL.POST_PROCESSING.EVAL_METRIC,
                output_path=final_output_dir
            )

        ret_dict.update(result_dict)
        print(ret_dict)

In [None]:
import os
import numpy as np
from tqdm import tqdm
import pickle
from pathlib import Path
from pcdet.datasets import build_dataloader
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.utils import common_utils

os.environ["CUDA_VISIBLE_DEVICES"] = "1" 

config = 'cfgs/waymo_models/centerpoint_dyn_pillar020_1x.yaml'
# Read the config file
cfg_from_yaml_file(config, cfg)
cfg.TAG = Path(config).stem
cfg.EXP_GROUP_PATH = '/'.join(config.split('/')[1:-1])  # remove 'cfgs' and 'xxxx.yaml'
np.random.seed(1024)
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
dist_test = False
total_gpus = 1
batch_size = 1
workers = 4

# Create logger
logger = common_utils.create_logger()

# Build the dataloader
test_set, test_loader, sampler = build_dataloader(
    dataset_cfg=cfg.DATA_CONFIG,
    class_names=cfg.CLASS_NAMES,
    batch_size=batch_size,
    dist=dist_test, workers=workers, logger=logger, training=False)
dataset = test_loader.dataset
class_names = dataset.class_names

slo_list = (100, 150, 200, 250, 500)
out_dir = '../output/waymo_results/oracle/models'
models = ['waymo_mse_adamw_64', 'waymo_mse_sgd_16', 'waymo_mse_sgd_64']
for m in models[1:]:
    det_path = os.path.join(out_dir, m, 'orin_slo500_ep05.pkl')
    final_output_dir = '../output/waymo_results/oracle/eval'
    os.makedirs(final_output_dir, exist_ok=True)
    # Read the detection results
    print('================', det_path, '=====================')
    det_annos = pickle.load(open(det_path, 'rb'))
    ret_dict = {}
    result_str, result_dict = dataset.evaluation(
            det_annos, class_names,
            eval_metric=cfg.MODEL.POST_PROCESSING.EVAL_METRIC,
            output_path=final_output_dir
        )

    ret_dict.update(result_dict)
    print(ret_dict)

In [None]:
np.mean([0.6810022, 0.5048319, 0.62684095])