In [1]:
import os
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")
%cd .
# %load_ext autoreload
# %autoreload 2

/home/jh/code/til/til-23-cv


In [2]:
%%sh
./setup.sh

Not on competition platform, exiting...


## Inference and Submission

In [2]:
from ultralytics import YOLO
from til_23_cv import ReIDEncoder, cos_sim, thres_strategy_softmax, thres_strategy_naive
from PIL import Image
import tqdm as tqdm
import numpy as np
import csv
from pathlib import Path

### Load Models

In [3]:
yolo_path = "models/det.pt"
reid_path = "models/reid.pt"

yolo_cfg = dict(
    conf=0.7,
    iou=0.7,
    half=True,
    device="cuda:0",
    imgsz=1280,
    stream=True,
    # verbose=False,
)

In [4]:
detector = YOLO(yolo_path)
detector.fuse()
encoder = ReIDEncoder(reid_path)

YOLOv5m6u summary (fused): 323 layers, 41132004 parameters, 0 gradients


### Inference

In [6]:
test_dir = "data/til23plush/images/test"
sus_dir = "data/til23plush/suspects"
out_dir = "runs/til23"

In [5]:
# Preview model output.
def concat_images_horizontally(*images):
    # Get the maximum height of the images
    max_height = max(im.size[1] for im in images)

    # Resize all images to the maximum height
    resized_images = [im.resize((int(im.size[0] * max_height / im.size[1]), max_height)) for im in images]

    # Concatenate the resized images horizontally
    total_width = sum(im.size[0] for im in resized_images)
    new_im = Image.new("RGB", (total_width, max_height))
    x_offset = 0
    for im in resized_images:
        new_im.paste(im, (x_offset, 0))
        x_offset += im.size[0]

    return new_im

In [7]:
# Thresholding strategies.
naive_thres = lambda x: thres_strategy_naive(x, 0.3) 
softmax_thres = lambda x: thres_strategy_softmax(x, 0.8, 1.4)

In [24]:
# Save results!
from IPython.display import clear_output
from time import sleep, time

sus_dir = Path(sus_dir)
out_dir = Path(out_dir)
if out_dir.exists():
    out_dir.rename(out_dir.with_name(f"{out_dir.name}_{int(time())}"))
(out_dir / "yolo").mkdir(exist_ok=True, parents=True)

with open(out_dir / "results.csv", "w") as f:
    fields = ["Image_ID", "class", "confidence", "ymin", "xmin", "ymax", "xmax"]
    writer = csv.DictWriter(f, fieldnames=fields)
    writer.writeheader()

    for res in detector.predict(test_dir, **yolo_cfg):
        res = res.cpu()
        im = res.orig_img
        res_pth = Path(res.path)
        
        sus = np.array(Image.open(sus_dir / res_pth.name))
        sus_embed = encoder([sus])[0]

        boxes = res.boxes.xyxy.round().int()
        crops = []
        for x1, y1, x2, y2 in boxes:
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(im.shape[1], x2)
            y2 = min(im.shape[0], y2)
            crops.append(im[y1:y2, x1:x2, ::-1])

        box_embeds = encoder(crops)
        box_sims = [cos_sim(e, sus_embed) for e in box_embeds]
        idx = naive_thres(box_sims)

        # res.boxes.conf[:] = torch.tensor(box_sims) # np.clip(box_sims, 0, 1)
        if idx != -1:
            res.boxes.cls[idx] = 1

        # Save YOLO predictions for eyeballing.
        res.save_txt(out_dir / "yolo" / f"{res_pth.stem}.txt", save_conf=True)
        # Save CSV for submission.
        for i in range(len(res.boxes)):
            writer.writerow({
                "Image_ID": res_pth.stem,
                "class": int(res.boxes.cls[i]),
                "confidence": float(res.boxes.conf[i]),
                "ymin": float(res.boxes.xyxyn[i][1]),
                "xmin": float(res.boxes.xyxyn[i][0]),
                "ymax": float(res.boxes.xyxyn[i][3]),
                "xmax": float(res.boxes.xyxyn[i][2]),
            })

        clear_output(wait=True)
        # Preview.
        if False: 
            print(box_sims, idx + 1 if idx != -1 else "None")
            display(concat_images_horizontally(*[Image.fromarray(c) for c in crops], Image.fromarray(sus)))
            sleep(1)

image 1600/1600 /home/jh/code/til/data/til23plush/images/test/image_1599.png: 768x1280 3 plushies, 58.2ms
Speed: 3.0ms preprocess, 66.4ms inference, 3.5ms postprocess per image at shape (1, 3, 1280, 1280)


**NOTE: Results are saved to `runs/til23/results.csv`!**