In [12]:
import numpy as np
from PIL import Image
import glob
import os
import cv2
from tqdm import tqdm
from skimage.draw import polygon
from sklearn.cluster import KMeans
from scipy import stats
import matplotlib.pyplot as plt
import pickle

In [13]:
label_dict = {
    0: {'name': 'unlabeled', 'color': (0, 0, 0), 'train_id': 0, 'id': 255},
    1: {'name': 'building', 'color': (70, 70, 70), 'train_id': 1, 'id': 0},
    2: {'name': 'fence', 'color': (100, 40, 40), 'train_id': 2, 'id': 1},
    3: {'name': 'other', 'color': (55, 90, 80), 'train_id': 3, 'id': 255},
    4: {'name': 'pedestrian', 'color': (220, 20, 60), 'train_id': 4, 'id': 2},
    5: {'name': 'pole', 'color': (153, 153, 153), 'train_id': 5, 'id': 3},
    6: {'name': 'roadline', 'color': (157, 234, 50), 'train_id': 6, 'id': 4},
    7: {'name': 'road', 'color': (128, 64, 128), 'train_id': 7, 'id': 5},
    8: {'name': 'sidewalk', 'color': (244, 35, 232), 'train_id': 8, 'id': 6},
    9: {'name': 'vegetation', 'color': (107, 142, 35), 'train_id': 9, 'id': 7},
    10: {'name': 'vehicles', 'color': (0, 0, 142), 'train_id': 10, 'id': 8},
    11: {'name': 'wall', 'color': (102, 102, 156), 'train_id': 11, 'id': 9},
    12: {'name': 'trafficsign', 'color': (220, 220, 0), 'train_id': 12, 'id': 10},
    13: {'name': 'sky', 'color': (70, 130, 180), 'train_id': 13, 'id': 11},
    14: {'name': 'ground', 'color': (81, 0, 81), 'train_id': 14, 'id': 12},
    15: {'name': 'bridge', 'color': (150, 100, 100), 'train_id': 15, 'id': 13},
    16: {'name': 'railtrack', 'color': (230, 150, 140), 'train_id': 16, 'id': 14},
    17: {'name': 'guardrail', 'color': (180, 165, 180), 'train_id': 17, 'id': 15},
    18: {'name': 'trafficlight', 'color': (250, 170, 30), 'train_id': 18, 'id': 16},
    19: {'name': 'static', 'color': (110, 190, 160), 'train_id': 19, 'id': 17},
    20: {'name': 'dynamic', 'color': (170, 120, 50), 'train_id': 20, 'id': 18},
    21: {'name': 'water', 'color': (45, 60, 150), 'train_id': 21, 'id': 19},
    22: {'name': 'terrain', 'color': (145, 170, 100), 'train_id': 22, 'id': 20}
}

anchor_objects = [
    'billboard', 'mobile barrier', 'fire hydrant', 'chair', 'trash', 'trashcan', 'pole', 'vegetation', 'tree', 'trafficlight', 'traffic sign'
]

FX_DEPTH = 168.05
FY_DEPTH = 168.05
CX_DEPTH = 480/2
CY_DEPTH = 270/2

In [16]:
def draw_box_label(img, bbox, trk_id, box_color=(0, 255, 255), show_label=True):
    """
    Helper function for drawing the bounding boxes and the labels
    bbox_cv2 = [left, top, right, bottom]
    """
    # box_color= (0, 255, 255)
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_size = 0.7
    font_color = (0, 0, 0)
    left, top, right, bottom = bbox[1], bbox[0], bbox[3], bbox[2]

    # Draw the bounding box
    box_color = tuple(box_color)

    cv2.rectangle(img, (left, top), (right, bottom), box_color, 4)

    if show_label:
        # Draw a filled box on top of the bounding box (as the background for the labels)
        cv2.rectangle(img, (left-2, top-65), (right+2, top), box_color, -1, 1)

        # Output the labels that show the x and y coordinates of the bounding box center.
        cv2.putText(img, f"id: {trk_id}", (left, top-45), font, font_size, font_color, 1, cv2.LINE_AA)
        text_x= 'x='+str((left+right)/2)
        cv2.putText(img, text_x, (left, top-25), font, font_size, font_color, 1, cv2.LINE_AA)
        text_y= 'y='+str((top+bottom)/2)
        cv2.putText(img, text_y, (left, top-5), font, font_size, font_color, 1, cv2.LINE_AA)

    return img

In [17]:
videos = [str(x).zfill(3) for x in range(14, 15)]
tracker_out_root = f"./tracker_out/gt_tracker"

if not os.path.exists(tracker_out_root):
    os.makedirs(tracker_out_root)

for vid_name in videos:
    ins_seg_folder = f"/Volumes/ssd_imran/carla_dataset/testing/localization_test/map_10/{vid_name}/ins_seg"
    depth_folder  = f"/Volumes/ssd_imran/carla_dataset/testing/localization_test/map_10/{vid_name}/depth"
    rgb_root = f"/Volumes/ssd_imran/carla_dataset/testing/localization_test/map_10/{vid_name}/rgb"

    images = glob.glob(
        os.path.join(
            rgb_root,
            f"*.png"
        )
    )

    images.sort()
    tracker_dict = {}
    tracker_count = {}
    track_id_map = {}
    for i, img in enumerate(tqdm(images)):
        rgb = np.array(Image.open(img))
        ins_seg = np.array(Image.open(os.path.join(ins_seg_folder, os.path.basename(img))).convert('RGB'))
        depth = np.array(Image.open(os.path.join(depth_folder, os.path.basename(img))))
        # rgb = cv2.resize(rgb, (480, 270), interpolation = cv2.INTER_LINEAR)
        # ins_seg = cv2.resize(ins_seg, (480, 270), interpolation = cv2.INTER_NEAREST)
        # depth = cv2.resize(depth, (480, 270), interpolation = cv2.INTER_NEAREST)
        depth = (depth / (256 * 256 - 1)) * 1000

        unique_colors = np.unique(ins_seg.reshape(-1, ins_seg.shape[2]), axis=0)

        for unique_color in unique_colors:
            obj_name = label_dict[unique_color[0]]['name']
            tracker_id = "_".join([str(x) for x in unique_color[1:]])

            frame_name = os.path.basename(img).split('_')[-1].replace('.png', '')
            # if frame_name not in tracker_dict[obj_name][tracker_id].keys():
            image_mask = np.zeros((270, 480))
            indices = np.argwhere(np.all(ins_seg == unique_color, axis=-1))
            rr, cc = indices[:, 0], indices[:, 1]
            image_mask[rr, cc] = 255
            
            # if obj_name not in anchor_objects:
            #     continue

            contours, hierarchy = cv2.findContours(
                image_mask.astype(np.uint8),
                cv2.RETR_EXTERNAL,
                cv2.CHAIN_APPROX_SIMPLE
            )

            area_th = 100
            h_w_th = 10

            cnt = max(contours, key = cv2.contourArea)
            area = cv2.contourArea(cnt)
            if area < area_th:
                continue

            x,y,w,h = cv2.boundingRect(cnt)
            M = cv2.moments(cnt)
            cx = int(M["m10"] / M["m00"])
            cy = int(M["m01"] / M["m00"])

            rrt, cct = polygon(cnt.squeeze()[:, 1], cnt.squeeze()[:, 0])

            rrt[rrt < 0] = 0
            rrt[rrt >= depth.shape[0]] = depth.shape[0] - 1

            cct[cct < 0] = 0
            cct[cct >= depth.shape[1]] = depth.shape[1] - 1

            if h>h_w_th and w>h_w_th:
                tmp_obj_boxes = [y, x, y+h, x+w]
                all_depths = depth[rrt, cct]

                # if unique_color[0] in [4, 10]:
                all_colors = rgb[rrt, cct]
                kmeans = KMeans(n_clusters = 1)
                kmeans.fit(all_colors)
                dominant_color = tuple(kmeans.cluster_centers_.astype(int)[0])
                    # dominant_color = (0, 0, 0)
                # else:
                #     dominant_color = (0, 0, 0)

                z = np.median(all_depths) # depth[cy, cx] # stats.mode(all_depths)[0][0]
                z = float(f"{z:.2f}")
                
                # print(obj_name, z)
                # immm = np.zeros((image_mask.shape[0], image_mask.shape[1], 3))
                # cv2.drawContours(immm, [cnt], 0, (0,255,0), 3)
                # plt.imshow(immm)
                # plt.show()

                # break

                x = (cx - CX_DEPTH) * z / FX_DEPTH
                y = (cy - CY_DEPTH) * z / FY_DEPTH
                tmp_obj_3d_cord = [x, y, z]
                tmp_obj_dominant_colors = dominant_color

                if obj_name not in tracker_dict.keys():
                    tracker_dict[obj_name] = {}
                if obj_name not in tracker_count.keys():
                    tracker_count[obj_name] = -1
                if not obj_name in track_id_map.keys():
                    track_id_map[obj_name] = {}

                if tracker_id not in track_id_map[obj_name].keys():
                    tracker_count[obj_name] += 1
                    track_id_map[obj_name][tracker_id] = tracker_count[obj_name]

                if not track_id_map[obj_name][tracker_id] in tracker_dict[obj_name].keys():
                    tracker_dict[obj_name][track_id_map[obj_name][tracker_id]] = {}

                tracker_dict[obj_name][track_id_map[obj_name][tracker_id]][frame_name] = {
                    "b_box": tmp_obj_boxes,
                    "color": tmp_obj_dominant_colors,
                    "3d_cord": tmp_obj_3d_cord
                }

                if i < 10:
                    tracker_out = os.path.join(
                        tracker_out_root,
                        vid_name,
                        'images',
                        obj_name,
                        str(track_id_map[obj_name][tracker_id])
                    )
                    # print(tracker_out)

                    if not os.path.exists(tracker_out):
                        os.makedirs(tracker_out)

                    track_placed_img = draw_box_label(
                        img = rgb.copy(),
                        bbox = tmp_obj_boxes,
                        trk_id = track_id_map[obj_name][tracker_id],
                        box_color=(126, 247,   0),
                        show_label=True
                    )
                    out_img = os.path.join(
                        tracker_out,
                        os.path.basename(img).split('_')[-1]
                    )
                    im = Image.fromarray(track_placed_img)
                    im.save(out_img)

    if not os.path.exists(os.path.join(tracker_out_root, f'{vid_name}/pickles/')):
        os.makedirs(os.path.join(tracker_out_root, f'{vid_name}/pickles/'))

    with open(os.path.join(tracker_out_root, f'{vid_name}/pickles/trackers_{vid_name}.pickle'), 'wb') as handle:
        pickle.dump(tracker_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

0it [00:00, ?it/s]


In [8]:
import pickle
from tqdm import tqdm
import os

videos = [str(x).zfill(3) for x in range(14, 16)]
tracker_out_root = f"./tracker_out/gt_tracker"

label_dict = {
    0: {'name': 'unlabeled', 'color': (0, 0, 0), 'train_id': 0, 'id': 255},
    1: {'name': 'building', 'color': (70, 70, 70), 'train_id': 1, 'id': 0},
    2: {'name': 'fence', 'color': (100, 40, 40), 'train_id': 2, 'id': 1},
    3: {'name': 'other', 'color': (55, 90, 80), 'train_id': 3, 'id': 255},
    4: {'name': 'pedestrian', 'color': (220, 20, 60), 'train_id': 4, 'id': 2},
    5: {'name': 'pole', 'color': (153, 153, 153), 'train_id': 5, 'id': 3},
    6: {'name': 'roadline', 'color': (157, 234, 50), 'train_id': 6, 'id': 4},
    7: {'name': 'road', 'color': (128, 64, 128), 'train_id': 7, 'id': 5},
    8: {'name': 'sidewalk', 'color': (244, 35, 232), 'train_id': 8, 'id': 6},
    9: {'name': 'vegetation', 'color': (107, 142, 35), 'train_id': 9, 'id': 7},
    10: {'name': 'vehicles', 'color': (0, 0, 142), 'train_id': 10, 'id': 8},
    11: {'name': 'wall', 'color': (102, 102, 156), 'train_id': 11, 'id': 9},
    12: {'name': 'trafficsign', 'color': (220, 220, 0), 'train_id': 12, 'id': 10},
    13: {'name': 'sky', 'color': (70, 130, 180), 'train_id': 13, 'id': 11},
    14: {'name': 'ground', 'color': (81, 0, 81), 'train_id': 14, 'id': 12},
    15: {'name': 'bridge', 'color': (150, 100, 100), 'train_id': 15, 'id': 13},
    16: {'name': 'railtrack', 'color': (230, 150, 140), 'train_id': 16, 'id': 14},
    17: {'name': 'guardrail', 'color': (180, 165, 180), 'train_id': 17, 'id': 15},
    18: {'name': 'trafficlight', 'color': (250, 170, 30), 'train_id': 18, 'id': 16},
    19: {'name': 'static', 'color': (110, 190, 160), 'train_id': 19, 'id': 17},
    20: {'name': 'dynamic', 'color': (170, 120, 50), 'train_id': 20, 'id': 18},
    21: {'name': 'water', 'color': (45, 60, 150), 'train_id': 21, 'id': 19},
    22: {'name': 'terrain', 'color': (145, 170, 100), 'train_id': 22, 'id': 20}
}

kb_dict = {}

for vid_name in tqdm(videos[1:2]):
    pkl_file = f'./tracker_out/gt_tracker/{vid_name}/pickles/trackers_{vid_name}.pickle'
    with open(pkl_file, 'rb') as f:
        data = pickle.load(f)

    new_data = {}

    for obj in data.keys():
        for trk in data[obj].keys():
            obj_name = f'{obj}_{trk}'
            for frm in data[obj][trk].keys():
                if frm not in new_data.keys():
                    new_data[frm] = {}
                if obj_name not in new_data[frm].keys():
                    new_data[frm][obj_name] = {}
                new_data[frm][obj_name] = data[obj][trk][frm]

    print(new_data)


100%|██████████| 1/1 [00:00<00:00, 41.20it/s]

{'00001': {'building_0': {'b_box': [8, 192, 67, 279], 'color': (0, 0, 0), '3d_cord': [0.9365069919666765, -46.35709610235048, 78.69]}, 'building_1': {'b_box': [45, 133, 72, 164], 'color': (0, 0, 0), '3d_cord': [-40.332282058911034, -32.95971437072299, 72.88]}, 'building_2': {'b_box': [53, 320, 76, 349], 'color': (0, 0, 0), '3d_cord': [55.60630764653377, -43.05004462957453, 100.48]}, 'building_3': {'b_box': [50, 283, 72, 300], 'color': (0, 0, 0), '3d_cord': [32.5453138946742, -47.22261231776256, 107.24]}, 'building_4': {'b_box': [28, 18, 108, 99], 'color': (0, 0, 0), '3d_cord': [-19.894912228503422, -7.433263909550729, 18.37]}, 'building_5': {'b_box': [0, 380, 126, 480], 'color': (0, 0, 0), '3d_cord': [18.863850044629576, -6.635525141326985, 15.93]}, 'pole_0': {'b_box': [0, 376, 103, 393], 'color': (0, 0, 0), '3d_cord': [8.313656649806605, -4.825409104433204, 9.77]}, 'roadline_0': {'b_box': [141, 113, 270, 207], 'color': (0, 0, 0), '3d_cord': [-2.398869384111871, 2.2934245760190417, 4.4




In [11]:
kb_for_desc = {}

for key in new_data.keys():
    if int(key) in [11, 15, 20]:
        print(f"Frame: {int(key)}:")
        if key not in kb_for_desc.keys():
            kb_for_desc[key] = {}
        for obj in new_data[key].keys():
            if 'vehicle' in obj or 'pedestrian' in obj:
                # kb_for_desc[key][obj] = new_data[key][obj]
                for s_key in new_data[key][obj].keys():
                    print(f'The {s_key} of {obj} in frame {int(key)} is {new_data[key][obj][s_key]}')

# print(kb_for_desc)

Frame: 11:
The b_box of vehicles_0 in frame 11 is [84, 246, 105, 268]
The color of vehicles_0 in frame 11 is (0, 0, 0)
The 3d_cord of vehicles_0 in frame 11 is [1.77280571258554, -4.653614995537042, 18.62]
The b_box of vehicles_1 in frame 11 is [125, 195, 222, 289]
The color of vehicles_1 in frame 11 is (0, 0, 0)
The 3d_cord of vehicles_1 in frame 11 is [0.0, 0.9832192799761976, 4.03]
The b_box of vehicles_2 in frame 11 is [74, 192, 87, 225]
The color of vehicles_2 in frame 11 is (0, 0, 0)
The 3d_cord of vehicles_2 in frame 11 is [-7.076227313299613, -12.782862243379945, 38.36]
The b_box of vehicles_3 in frame 11 is [65, 160, 87, 186]
The color of vehicles_3 in frame 11 is (0, 0, 0)
The 3d_cord of vehicles_3 in frame 11 is [-15.465397203213328, -13.873371020529603, 38.22]
The b_box of vehicles_5 in frame 11 is [75, 72, 88, 88]
The color of vehicles_5 in frame 11 is (0, 0, 0)
The 3d_cord of vehicles_5 in frame 11 is [-31.519785778042245, -10.767628681939899, 32.9]
The b_box of pedestria