In [None]:
import torch
import sys
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

PROJECT_NAME = "customRCNN"
PROJECT_DIR = os.path.join(os.path.abspath('.').split(PROJECT_NAME)[0], PROJECT_NAME)
DETECTION_DIR = os.path.join(PROJECT_DIR, "DeepDataMiningLearning", "detection")

sys.path.insert(0, DETECTION_DIR)
sys.path.insert(0, PROJECT_DIR)

from dataset_nuscenescoco import NuscenesCOCODataset
from dataset_nuscenes import create_nuscenes_transforms
from dataset_waymococo import WaymoCOCODataset, get_transformsimple


To enable Open3D visualization, activate the py312 environment:
  conda activate py312
  python scripts/nuscenes.py [your_arguments]
✅ Successfully imported nuscenes utilities


In [None]:
NUSCENES_CLASS_NAMES = {
    0: 'car',
    1: 'truck',
    2: 'bus',
    3: 'trailer',
    4: 'construction_vehicle',
    5: 'pedestrian',
    6: 'motorcycle',
    7: 'bicycle',
    8: 'traffic_cone',
    9: 'barrier',
}

WAYMO_CLASS_NAMES = {
    1: "Vehicle",
    2: "Pedestrian",
    3: "Cyclist",
    4: "Sign"
}

## 1. load dataset

In [3]:
def get_nuscenes_dataset():
    transform = create_nuscenes_transforms(train=True)
    dataset = NuscenesCOCODataset(
        root=f'{PROJECT_DIR}/data/nuscenes_subset_coco_step10',
        annotation = f'{PROJECT_DIR}/data/nuscenes_subset_coco_step10/annotations.json',
        train=True,
        transform=transform
    )

    return dataset

def get_waymo_dataset():
    transform = get_transformsimple(None)
    dataset = WaymoCOCODataset(
        root=f'{PROJECT_DIR}/data/waymo',
        annotation = f'{PROJECT_DIR}/data/waymo/annotations.json',
        train=True,
        transform=transform
    )
    return dataset

nd = get_nuscenes_dataset()
wd = get_waymo_dataset()

loading annotations into memory...
Done (t=0.13s)
creating index...
index created!
loading annotations into memory...
Done (t=0.49s)
creating index...
index created!


In [11]:
label_dict = {i: value for i, value in enumerate(wd.INSTANCE_CATEGORY_NAMES)}
label_dict

{0: '__background__',
 1: 'Vehicles',
 2: 'Pedestrians',
 3: 'Cyclists',
 4: 'Signs'}

In [15]:
images, targets = nd[0]
print(f"image size = {images.size()}")  # [C, H, W]
print(f"image sample = {images[0][0][:10]}")

image size = torch.Size([3, 900, 1600])
image sample = tensor([2.2489, 2.2489, 2.2489, 2.2489, 2.2489, 2.2489, 2.2489, 2.2489, 2.2489,
        2.2489])


## 2. load baseline model

In [18]:
import torchvision
from torchvision.models import get_model, get_model_weights, get_weight, list_models
from src.modeling.modeling_rpnfasterrcnn import CustomRCNN

detectionmodel_names=list_models(module=torchvision.models.detection)
print("Torchvision detection models:", detectionmodel_names)

Torchvision detection models: ['fasterrcnn_mobilenet_v3_large_320_fpn', 'fasterrcnn_mobilenet_v3_large_fpn', 'fasterrcnn_resnet50_fpn', 'fasterrcnn_resnet50_fpn_v2', 'fcos_resnet50_fpn', 'keypointrcnn_resnet50_fpn', 'maskrcnn_resnet50_fpn', 'maskrcnn_resnet50_fpn_v2', 'retinanet_resnet50_fpn', 'retinanet_resnet50_fpn_v2', 'ssd300_vgg16', 'ssdlite320_mobilenet_v3_large']


In [19]:
backbonename='resnet50'
trainable_layers =2
#layers_to_train = ["layer4", "layer3", "layer2", "layer1", "conv1"]
num_classes = 10
model=CustomRCNN(backbone_modulename=backbonename,trainable_layers=trainable_layers,num_classes=num_classes, out_channels=256, min_size=800, max_size=1333)
model.eval()

images, targets = nd[0]
output = model([images], [targets])

print([(k, v.shape) for k, v in output[0].items()])

[('boxes', torch.Size([100, 4])), ('labels', torch.Size([100])), ('scores', torch.Size([100]))]


In [None]:
# sample output
box = output[0]['boxes'][0]
label = output[0]['labels'][0]
score = output[0]['scores'][0]

tensor([4, 4, 4, 4, 5, 4, 4, 4, 2, 4, 4, 5, 4, 5, 8, 4, 4, 4, 4, 4, 4, 5, 5, 7,
        4, 5, 4, 4, 5, 4, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 2, 4, 4,
        4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 8, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 3, 5, 4, 4, 4, 4, 8, 4,
        4, 4, 4, 4])

In [24]:
output[0]['scores']

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 0.9999, 0.9999, 0.9999, 0.9999, 0.9999, 0.9999,
        0.9999, 0.9998, 0.9998, 0.9998, 0.9998, 0.9998, 0.9997, 0.9997, 0.9997,
        0.9996, 0.9995, 0.9995, 0.9994, 0.9993, 0.9993, 0.9992, 0.9992, 0.9990,
        0.9990, 0.9989, 0.9988, 0.9984, 0.9982, 0.9982, 0.9982, 0.9979, 0.9975,
        0.9973, 0.9972, 0.9972, 0.9972, 0.9971, 0.9969, 0.9968, 0.9965, 0.9963,
        0.9958, 0.9958, 0.9956, 0.9956, 0.9951, 0.9944, 0.9941, 0.9939, 0.9935,
        0.9935, 0.9932, 0.9932, 0.9930, 0.9918, 0.9912, 0.9910, 0.9906, 0.9899,
        0.9898], grad_fn=<IndexBackward0>)