In [1]:
from itertools import islice
from scenic_reasoning.measurements.ObjectDetection import ObjectDetectionMeasurements
from scenic_reasoning.utilities.common import get_default_device
import torch
from ultralytics.data.augment import LetterBox

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NUM_EXAMPLES_TO_SHOW = 1
BATCH_SIZE = 1

In [3]:
shape_transform = LetterBox(new_shape=(720, 1280))
def transform_image_for_yolo(image : torch.Tensor):
    # 1) convert from tensor to cv2 image
    image_np  = image.permute(1, 2, 0).numpy()
    # 2) resize to 768x1280
    image_np = shape_transform(image=image_np)
    # 3) convert back to tensor
    image = torch.tensor(image_np).permute(2, 0, 1)
    # 4) normalize to 0-1
    image = image.to(torch.float32) / 255.0

    return image

In [13]:
from scenic_reasoning.data.ImageLoader import Bdd100kDataset

'''
This 'transform_image_for_yolo' was isolated to be problematic with Bdd100kDataset,
which is understandable give this transform fn was originally intended for YOLO format
We take the original 720x1280 image input resolution to do proper inference upon
'''
'''
bdd = Bdd100kDataset(
    split="val", 
    # YOLO requires images to be 640x640 or 768x1280, 
    # but BDD100K images are 720x1280 so we need to resize
    transform=transform_image_for_yolo,  
    use_original_categories=False,
    use_extended_annotations=False,
)
'''

bdd = Bdd100kDataset(
    split="val",
    use_original_categories=False,
    use_extended_annotations=False)

print(bdd)
print(len(bdd))
first_img = bdd[0]
first_img_tensor = first_img['image']
print(first_img_tensor.shape)
print(len(first_img['labels']))

<scenic_reasoning.data.ImageLoader.Bdd100kDataset object at 0x383392600>
10000
torch.Size([3, 720, 1280])
34


In [5]:
from scenic_reasoning.models.UltralyticsYolo import Yolo

# https://docs.ultralytics.com/models/yolov5/#performance-metrics
# model = Yolo(model="../yolov5x6u.pt") # v5 can handle 1280 while v8 can handle 640. makes no sense ><

In [6]:
from scenic_reasoning.models.Detectron import Detectron2Model

threshold = 0.5
config_file = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
weights_file = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
model = Detectron2Model(
    config_file=config_file, 
    weights_file=weights_file, 
    threshold=threshold
)

In [14]:
from PIL import Image
from torchvision.io import decode_image

# Tested with local raw image path for Detectron2 inference upon
sample_img_path = "/Users/kevinchon/Documents/KE7/scenic-reasoning/data/bdd100k/images/100k/val/b1c9c847-3bda4659.jpg"
sample_img = Image.open(sample_img_path)
sample_img_tensor = decode_image(sample_img_path)
print(f"sample_img_tensor dims {sample_img_tensor.shape}")
model.identify_for_image(sample_img_tensor)

print("---------------------------------")
# Tested with the specific individual image of BDD Dataset
# Able to output of proper confidence scores and classification labels
first_img = bdd[0]
first_img_tensor = first_img['image']
print(first_img_tensor.shape)

model.identify_for_image(first_img_tensor)


sample_img_tensor dims torch.Size([3, 720, 1280])
image should be CHW: torch.Size([3, 720, 1280])
image should be HWC: (720, 1280, 3)
Image to predict: (720, 1280, 3)
Predictions: {'instances': Instances(num_instances=18, image_height=720, image_width=1280, fields=[pred_boxes: Boxes(tensor([[ 831.0070,  322.6685, 1206.9893,  521.2864],
        [ 765.7220,  303.8628,  938.9324,  441.8760],
        [ 732.2385,  319.1121,  792.3572,  390.5504],
        [ 687.2977,  331.2784,  711.5379,  353.2328],
        [ 457.9689,  265.3659,  590.2451,  410.5630],
        [ 714.3595,  328.7690,  743.0000,  370.7492],
        [ 248.7974,  332.9140,  343.5569,  391.2460],
        [ 602.7080,  336.1413,  626.0717,  352.5395],
        [ 163.8347,  342.5507,  266.0285,  403.6561],
        [  33.5951,  332.2769,  246.5630,  414.9995],
        [ 386.9363,  338.2810,  445.6151,  375.8312],
        [ 670.0027,  333.3142,  689.0413,  344.9504],
        [ 753.6214,  321.5774,  796.9987,  415.9343],
        [ 583.

[<scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x35be1cd10>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x349e23ce0>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x37766c770>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8cd40>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8cf50>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8d040>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8cda0>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8ce60>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8d160>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8d100>,
 <scenic_reasoning.interfaces.ObjectDetectionI.ObjectDetectionResultI at 0x36cb8d250>,
 <scenic_reasoning.interfaces.ObjectDetecti

In [15]:
measurements = ObjectDetectionMeasurements(model, bdd, batch_size=BATCH_SIZE, collate_fn=lambda x: x) # hacky way to avoid RuntimeError: each element in list of batch should be of equal size

# WARNING ⚠️ imgsz=[720, 1280] must be multiple of max stride 64, updating to [768, 1280]
from pprint import pprint
for (results, ims) in islice(measurements.iter_measurements(
        device=get_default_device(), 
        imgsz=[720, 1280],
        bbox_offset=24,
        debug=True,
        conf=0.1,
        class_metrics=True,
        extended_summary=True,
        ), 
    NUM_EXAMPLES_TO_SHOW):
    pprint(results)
    [im.show() for im in ims]

Image to predict: (720, 1280, 3)
Predictions: {'instances': Instances(num_instances=19, image_height=720, image_width=1280, fields=[pred_boxes: Boxes(tensor([[6.8363e+02, 3.5803e+02, 7.1776e+02, 3.9149e+02],
        [3.9739e-01, 3.4041e+02, 5.2140e+01, 4.0087e+02],
        [2.4966e+02, 3.4410e+02, 3.4852e+02, 3.9540e+02],
        [7.9386e+02, 3.6018e+02, 9.0129e+02, 4.2734e+02],
        [4.5918e+01, 3.4805e+02, 1.2667e+02, 3.9822e+02],
        [7.2831e+02, 3.6429e+02, 7.5923e+02, 4.0362e+02],
        [1.2017e+03, 3.9434e+02, 1.2791e+03, 5.2392e+02],
        [8.8209e+02, 3.7626e+02, 9.5036e+02, 4.4668e+02],
        [7.5011e+02, 3.5995e+02, 7.8137e+02, 4.0815e+02],
        [7.5801e+02, 3.6191e+02, 8.0814e+02, 4.1125e+02],
        [9.3557e+02, 3.3723e+02, 1.2033e+03, 4.8237e+02],
        [7.0548e+02, 3.6375e+02, 7.3333e+02, 3.9648e+02],
        [2.0293e+02, 3.3985e+02, 2.5388e+02, 3.8553e+02],
        [2.0583e+02, 3.3887e+02, 3.0272e+02, 3.8574e+02],
        [4.4592e+02, 3.5903e+02, 4.618

AttributeError: 'list' object has no attribute 'as_xyxy'