In [None]:
from itertools import islice
from scenic_reasoning.measurements.ObjectDetection import ObjectDetectionMeasurements
from scenic_reasoning.utilities.common import get_default_device
import torch
from ultralytics.data.augment import LetterBox

In [None]:
NUM_EXAMPLES_TO_SHOW = 1
BATCH_SIZE = 1

In [None]:
shape_transform = LetterBox(new_shape=(720, 1280))
def transform_image_for_yolo(image : torch.Tensor):
    # 1) convert from tensor to cv2 image
    image_np  = image.permute(1, 2, 0).numpy()
    # 2) resize to 768x1280
    image_np = shape_transform(image=image_np)
    # 3) convert back to tensor
    image = torch.tensor(image_np).permute(2, 0, 1)
    # 4) normalize to 0-1
    image = image.to(torch.float32) / 255.0

    return image

In [None]:
from scenic_reasoning.data.ImageLoader import Bdd100kDataset

'''
This 'transform_image_for_yolo' was isolated to be problematic with Bdd100kDataset,
which is understandable give this transform fn was originally intended for YOLO format
We take the original 720x1280 image input resolution to do proper inference upon
'''
'''
bdd = Bdd100kDataset(
    split="val", 
    # YOLO requires images to be 640x640 or 768x1280, 
    # but BDD100K images are 720x1280 so we need to resize
    transform=transform_image_for_yolo,  
    use_original_categories=False,
    use_extended_annotations=False,
)
'''

bdd = Bdd100kDataset(
    split="val",
    use_original_categories=False,
    use_extended_annotations=False)

print(bdd)
print(len(bdd))
first_img = bdd[0]
first_img_tensor = first_img['image']
print(first_img_tensor.shape)
print(len(first_img['labels']))

In [None]:
from scenic_reasoning.models.UltralyticsYolo import Yolo

# https://docs.ultralytics.com/models/yolov5/#performance-metrics
# model = Yolo(model="../yolov5x6u.pt") # v5 can handle 1280 while v8 can handle 640. makes no sense ><

In [None]:
from scenic_reasoning.models.Detectron import Detectron2Model

threshold = 0.5
config_file = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
weights_file = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
model = Detectron2Model(
    config_file=config_file, 
    weights_file=weights_file, 
    threshold=threshold
)

In [None]:
from PIL import Image
from torchvision.io import decode_image

# Tested with local raw image path for Detectron2 inference upon
sample_img_path = "/Users/kevinchon/Documents/KE7/scenic-reasoning/data/bdd100k/images/100k/val/b1c9c847-3bda4659.jpg"
sample_img = Image.open(sample_img_path)
sample_img_tensor = decode_image(sample_img_path)
print(f"sample_img_tensor dims {sample_img_tensor.shape}")
model.identify_for_image(sample_img_tensor)

print("---------------------------------")
# Tested with the specific individual image of BDD Dataset
# Able to output of proper confidence scores and classification labels
first_img = bdd[0]
first_img_tensor = first_img['image']
print(first_img_tensor.shape)

model.identify_for_image(first_img_tensor)


In [None]:
measurements = ObjectDetectionMeasurements(model, bdd, batch_size=BATCH_SIZE, collate_fn=lambda x: x) # hacky way to avoid RuntimeError: each element in list of batch should be of equal size

# WARNING ⚠️ imgsz=[720, 1280] must be multiple of max stride 64, updating to [768, 1280]
from pprint import pprint
for (results, ims) in islice(measurements.iter_measurements(
        device=get_default_device(), 
        imgsz=[720, 1280],
        bbox_offset=24,
        debug=True,
        conf=0.1,
        class_metrics=True,
        extended_summary=True,
        ), 
    NUM_EXAMPLES_TO_SHOW):
    pprint(results)
    [im.show() for im in ims]