In [None]:
import cv2
import numpy as np
from src.data import compile_data

* Action (4차원):
* 인덱스 0: 80.76% = Move forward
* 인덱스 1: 19.23% = Stop/slow down
* 인덱스 2: 29.43% = Turn left/change to left lane
* 인덱스 3: 38.07% = Turn right/change to right lane

###

* Description (8차원):
* 인덱스 0: 84.14% = Traffic light allows
* 인덱스 1: 89.64% = Front area is clear
* 인덱스 2: 27.61% = Solid line on the left
* 인덱스 3: 23.74% = Solid line on the right
* 인덱스 4: 41.57% = Front left area is clear
* 인덱스 5: 42.91% = Back left area is clear
* 인덱스 6: 46.93% = Front right area is clear
* 인덱스 7: 50.54% = Back right area is clear

In [None]:
def initialize_dataloader():
    max_grad_norm = 5.0
    grid_conf = {
        "xbound": [-50.0, 50.0, 0.5],
        "ybound": [-50.0, 50.0, 0.5],
        "zbound": [-10.0, 10.0, 20.0],
        "dbound": [4.0, 45.0, 1.0],
    }
    data_aug_conf = {
        "resize_lim": (0.193, 0.225),
        "final_dim": (128, 352),
        "rot_lim": (-5.4, 5.4),
        "H": 900,
        "W": 1600,
        "rand_flip": False,
        "bot_pct_lim": (0.0, 0.22),
        "cams": ["CAM_FRONT_LEFT", "CAM_FRONT", "CAM_FRONT_RIGHT", "CAM_BACK_LEFT", "CAM_BACK", "CAM_BACK_RIGHT"],
        "Ncams": 6,
    }
    trainloader, valloader = compile_data(
        "trainval",
        "./data",
        data_aug_conf=data_aug_conf,
        grid_conf=grid_conf,
        bsz=1,
        nworkers=0,
        parser_name="segmentationdata",
    )
    
    return trainloader, valloader

def get_data_from(dataloader, idx):
    imgs, rots, trans, intrins, post_rots, post_trans, binimg, act, desc = dataloader.dataset[idx]
    # print(imgs.shape, rots.shape, trans.shape, intrins.shape, post_rots.shape, post_trans.shape, binimg.shape, act.shape, desc.shape)
    imgs = normalize_imgs(imgs)
    return imgs, rots, trans, intrins, post_rots, post_trans, binimg, act, desc

def normalize_imgs(imgs):
    # min-max normalize
    permute = True if imgs[0].shape[0] == 3 else False

    rtn_imgs = []
    for img in imgs:
        if permute:
            img = img.permute(1, 2, 0).cpu().numpy()
        else:
            img = img.cpu().numpy()
        img = (img - np.min(img)) / (np.max(img) - np.min(img))
        img = img * 255
        img = img.astype(np.uint8)
        rtn_imgs.append(img)
    return rtn_imgs

def view_6_cameras_and_binimg(imgs, binimg, action_imgs, action, description):
    f"""
    imgs : list of 6 images. each image is a numpy array of shape (H, W, 3)
    binimg : array of shape (H, W). each cell is [0 or 1 or 2 or 3].
    action: list of 4 actions. [0, 1]^4
    description: list of 8 descriptions. [0, 1]^8
    """
    img_grid0 = np.zeros((60, imgs[0].shape[1] * 3, 3), dtype=np.uint8)
    img_grid1 = np.concatenate([imgs[i] for i in range(3)], axis=1)
    img_grid2 = np.zeros((60, imgs[0].shape[1] * 3, 3), dtype=np.uint8)
    img_grid3 = np.concatenate([imgs[i] for i in range(5, 2, -1)], axis=1)
    img_grid = np.concatenate([img_grid0, img_grid1, img_grid2, img_grid3], axis=0)

    right_append = np.ones((376, 800, 3), dtype=np.uint8) * 255
    img_grid = np.concatenate([img_grid, right_append], axis=1)

    binimg_3_repeat = np.repeat(binimg[:, :, np.newaxis], 3, axis=2)
    img_grid[60:60+200, 1100:1100+200, :] = normalize_imgs([binimg_3_repeat])[0]


    action = [int(action[0].item()), int(action[1].item()), int(action[2].item()), int(action[3].item())]
    action_idx = "".join(map(str, action))
    img_grid[60:60+233, 1350:1350+324, :] = cv2.resize(action_imgs[action_idx], (324, 233))


    desc_texts = ["traffic sign OK", "F clear", "L solid line", "R solid line", "FL clear", "RL clear", "FR clear", "RR clear"]

    desc_text = ""
    positions = {1: (362, 100), 4: (10, 100), 5:(714, 288), 6:(714, 100), 7:(10, 288)}
    for k, v in positions.items():
        if description[k] == 1:
            cv2.putText(img_grid, "Clear", v, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    for i in [0, 2, 3]:
        if description[i] == 1:
            desc_text += desc_texts[i] + ", "       

    cv2.putText(img_grid, f'Description: {desc_text}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 255), 2)


    return img_grid


In [None]:
trainloader, valloader = initialize_dataloader()
action_imgs = {name: cv2.imread(f"./action_images/{name}.png") for name in ["0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"]}

In [None]:
idx = 0
while True:
    imgs, rots, trans, intrins, post_rots, post_trans, binimg, act, desc = get_data_from(valloader, idx) # len valloader == 2005
    full_view = view_6_cameras_and_binimg(imgs, binimg, action_imgs, act, desc)
    cv2.imshow('img', full_view)
    key = cv2.waitKey(0)
    if key & 0xFF == ord('q'):
        break
    elif key & 0xFF == ord('n'):
        idx += 1
        if idx >= len(valloader):
            idx = 0
    elif key & 0xFF == ord('b'):
        idx -= 1
        if idx < 0:
            idx = len(valloader) - 1

cv2.destroyAllWindows()