# Jupyter notebook for debugging

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
# Copied from `train` function in train_simple.py:L78
import yaml

device = 'cpu'
hyp = 'data/hyps/hyp.scratch-low.yaml'

with open(hyp, errors="ignore") as f:
    hyp = yaml.safe_load(f)  # load hyps dict

In [None]:
from models.yolo import Model
from utils.general import check_dataset

cfg = 'models/yolov5n_kaist-rgbt.yaml'
# cfg = 'models/yolov5s_kaist-rgbt_anchor.yaml'
# cfg = 'models/yolov5s_kaist-rgbt_rev.yaml'
data = 'data/kaist-rgbt.yaml'
data_dict = check_dataset(data)

nc = int(data_dict["nc"])  # number of classes
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create

In [None]:
anchors = model.model[-1].anchors

# [TODO] Draw anchors
import matplotlib.pyplot as plt

# Assuming 'model' is your YOLOv5 model
anchors = model.model[-1].anchors.cpu().detach().numpy()

# Visualize anchors
import numpy as np
def plot_anchors(anchors, image_shape=(640, 512)):
    fig, ax = plt.subplots(1)
    img = np.ones(image_shape) * 255  # white background
    ax.imshow(img, extent=[0, image_shape[1], image_shape[0], 0], cmap='gray')

    grid_size = 80  # Adjust grid size if necessary
    for i, anchor in enumerate(anchors):
        for j in range(anchor.shape[0]):
            w, h = anchor[j]
            rect = plt.Rectangle(
                ((image_shape[1] - w) / 2, (image_shape[0] - h) / 2), w, h,
                linewidth=2, edgecolor='red', facecolor='none', linestyle='--'
            )
            ax.add_patch(rect)
            # ax.text(
            #     ((image_shape[1] - w) / 2), ((image_shape[0] - h) / 2) - 10,
            #     f'Anchor {i+1},{j+1}: [{w:.1f}, {h:.1f}]',
            #     color='red', fontsize=10, ha='left'
            # )

    plt.title("YOLOv5 Anchors")
    plt.xlabel("Width")
    plt.ylabel("Height")
    plt.ylim(310,330)
    plt.xlim(246,266)
    plt.show()

plot_anchors(anchors)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

# 레이블 파일 경로 목록 가져오기
label_files = [os.path.join('datasets/kaist-rgbt/train/labels', f) for f in \
               os.listdir('datasets/kaist-rgbt/train/labels') if os.path.isfile(os.path.join('datasets/kaist-rgbt/train/labels', f))]

# 바운딩 박스 크기 수집
bboxes = []
for file in label_files:
    with open(file, 'r') as fp:
        labels = [x.split() for x in fp.read().strip().splitlines() if len(x)]
    if len(labels):
        labels = np.array(labels, dtype=np.float32)
        widths = labels[:, 3]
        heights = labels[:, 4]
        bboxes.extend(zip(widths, heights))

# numpy 배열로 변환
bboxes = np.array(bboxes)

# K-means clustering without sklearn
def kmeans(data, k, max_iters=100):
    centroids = data[np.random.choice(data.shape[0], k, replace=False)]
    for _ in range(max_iters):
        distances = np.sqrt(((data - centroids[:, np.newaxis])**2).sum(axis=2))
        closest = np.argmin(distances, axis=0)
        new_centroids = np.array([data[closest == i].mean(axis=0) for i in range(k)])
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids
    return centroids

# 최적 anchor 계산 (k-means clustering 사용)
clustering_anchors = kmeans(bboxes, k=9)
# 픽셀 단위로 변환
pixel_anchors = clustering_anchors * np.array([640, 512])
pixel_anchors = pixel_anchors[np.argsort(pixel_anchors[:,0])]
print("Kmeans Anchors\n",pixel_anchors)


## Sanity check: dataset
- Read images and label
- Visualize bounding boxes

In [None]:
import cv2
import os
import numpy as np
from PIL import Image

annFile = 'datasets/kaist-rgbt/train/labels/set05_V000_I01219.txt'
lwirFile = annFile.replace('labels', 'images/lwir').replace('.txt', '.jpg')
visFile  = annFile.replace('labels', 'images/visible').replace('.txt', '.jpg')

# Read images
img_lwir = cv2.imread(lwirFile)
img_vis  = cv2.imread(visFile)

h, w = img_vis.shape[:2]

# Read labels
with open(annFile, 'r') as fp:
    labels = [x.split() for x in fp.read().strip().splitlines() if len(x)]

colors = {
    0: (255, 0, 0),
    1: (0, 255, 0),
    2: (0, 0, 255),
    3: (255, 0, 255),
}

if len(labels):
    # convert normalized bbox to pixel coordinates
    labels = np.array(labels, dtype=np.float32)
    labels[:, (1, 3)] *= w
    labels[:, (2, 4)] *= h

    cls = labels[:, 0]

    xyxy = np.zeros((len(labels), 4))
    xyxy[:, :2] = labels[:, 1:3]
    xyxy[:, 2:] = labels[:, 1:3] + labels[:, 3:5]
    xyxy = xyxy.astype(np.int16)

    for c, bb in zip(cls, xyxy):
        color = colors[c]
        cv2.rectangle(img_lwir, bb[:2], bb[2:], color)
        cv2.rectangle(img_vis,  bb[:2], bb[2:], color)

images = np.concatenate([img_lwir, img_vis], axis=1)
Image.fromarray(images)

## Sanity check: dataset class
- Visualize bounding boxes from dataset class

In [None]:
from utils.dataloaders import create_dataloader
from utils.general import check_img_size, colorstr

imgsz = 640
batch_size = 1
single_cls = False
seed = 0

train_path = data_dict["train"]
gs = max(int(model.stride.max()), 32)  # grid size (max stride)
imgsz = check_img_size(imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple

train_loader, dataset = create_dataloader(
    train_path,
    imgsz,
    batch_size,
    gs,
    single_cls,
    hyp=hyp,
    augment=True,      # TODO: check if there is no bug when applying augmentation
    cache=None,
    rect=False,
    rank=-1,
    workers=8,
    image_weights=False,
    quad=False,
    prefix=colorstr("train: "),
    shuffle=False,      # No shuffle for debugging
    seed=seed,
    rgbt_input=True,
)


In [None]:
from utils.dataloaders import LoadRGBTImagesAndLabels
from utils.general import xywh2xyxy

frame = 103

# Get a minibatch
# for ii, (imgs, targets, paths, _) in enumerate(train_loader):
#     break

# Get a minibatch (fast)
imgs, targets, paths, shapes, _ = LoadRGBTImagesAndLabels.collate_fn([dataset[frame]])

idx = 0
img_lwir = imgs[0][idx].numpy().transpose((1, 2, 0))
img_vis  = imgs[1][idx].numpy().transpose((1, 2, 0))
h, w = img_vis.shape[:2]

labels = targets.numpy()

colors = {
    0: (255, 0, 0), #r 
    1: (0, 255, 0), #g
    2: (0, 0, 255), #b
    3: (255, 0, 255),
}

if len(labels):
    labels = labels[labels[:, 0] == idx, 1:]

    # convert normalized bbox to pixel coordinates
    labels = np.array(labels, dtype=np.float32)
    labels[:, (1, 3)] *= w
    labels[:, (2, 4)] *= h

    cls = labels[:, 0]

    xyxy = xywh2xyxy(labels[:, 1:5])
    xyxy = xyxy.astype(np.int16)

    img_lwir = np.ascontiguousarray(img_lwir)
    img_vis = np.ascontiguousarray(img_vis)

    for c, bb in zip(cls, xyxy):
        color = colors[c]
        cv2.rectangle(img_lwir, bb[:2], bb[2:], color)
        cv2.rectangle(img_vis,  bb[:2], bb[2:], color)

images = np.concatenate([img_lwir, img_vis], axis=1)
print(paths[idx])
Image.fromarray(images)

## Run trained model
- Visualize bounding boxes from dataset class

In [None]:
import torch
from models.yolo import Model
from utils.torch_utils import select_device

weights = 'runs/train/yolov5n-rgbt3/weights/best.pt'      # Train your own model!
data = 'data/kaist-rgbt.yaml'
cfg = 'models/yolov5s_kaist-rgbt.yaml'
hyp = 'data/hyps/hyp.scratch-low.yaml'
nc = 1
half = False  # use FP16 half-precision inference
dnn = False  # use OpenCV DNN for ONNX inference
device = select_device('cpu')

with open(hyp, errors="ignore") as f:
    hyp = yaml.safe_load(f)  # load hyps dict

model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create

# inference
model.eval()
ims = [im.to(device, non_blocking=True).float() / 255 for im in imgs]    # For RGB-T input
with torch.no_grad():
    pred = model(ims)  # forward

In [None]:
import numpy as np
from utils.general import scale_boxes, non_max_suppression

conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
classes = None
agnostic_nms = False  # class-agnostic NMS

pred1 = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)


In [None]:
from utils.general import scale_boxes, non_max_suppression

conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
classes = None
agnostic_nms = False  # class-agnostic NMS

pred1 = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

# Resize predicted box
scale_boxes()         # see valpy:L285

# [TODO] draw predictions (see detect.py:L178)


In [None]:
pred[0]