## Dataset building

In [1]:
# imports
import pathlib

import numpy as np
from torchvision.models.detection.transform import GeneralizedRCNNTransform

from pytorch_faster_rcnn_tutorial.datasets import ObjectDetectionDataSet
from pytorch_faster_rcnn_tutorial.transformations import Clip, ComposeDouble
from pytorch_faster_rcnn_tutorial.transformations import FunctionWrapperDouble
from pytorch_faster_rcnn_tutorial.transformations import normalize_01
from pytorch_faster_rcnn_tutorial.utils import get_filenames_of_path
from pytorch_faster_rcnn_tutorial.utils import stats_dataset
from pytorch_faster_rcnn_tutorial.visual import DatasetViewer

In [2]:
# root directory
# root = pathlib.Path('pytorch_faster_rcnn_tutorial/data/heads')
# root = pathlib.Path('pytorch_faster_rcnn_tutorial/data/stop_line')
# root = pathlib.Path('pytorch_faster_rcnn_tutorial/data/speed_bump')
root = pathlib.Path('pytorch_faster_rcnn_tutorial/data/cross_lane')

In [3]:
# input and target files
inputs = get_filenames_of_path(root / 'input')
targets = get_filenames_of_path(root / 'target')
# inputs = get_filenames_of_path(root / 'test')
# targets = get_filenames_of_path(root / 'predictions')

inputs.sort()
targets.sort()

In [4]:
# mapping
mapping = {
    # 'head': 1,
    # 'stopline': 1,
    # 'speedbump': 1,
    # 'bumpsign': 2,
    'crossdash': 1
}

In [5]:
# transforms
transforms = ComposeDouble([
    Clip(),
    # AlbumentationWrapper(albumentation=A.HorizontalFlip(p=0.5)),
    # AlbumentationWrapper(albumentation=A.RandomScale(p=0.5, scale_limit=0.5)),
    # AlbumentationWrapper(albumentation=A.VerticalFlip(p=0.5)),
    FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

In [6]:
# dataset building
dataset = ObjectDetectionDataSet(inputs=inputs,
                                 targets=targets,
                                 transform=transforms,
                                 use_cache=False,
                                 convert_to_format=None,
                                 mapping=mapping)

In [7]:
sample = dataset[0]

In [8]:
sample['x'].shape

torch.Size([3, 1440, 2560])

In [14]:
# visualize dataset
color_mapping = {
    0: 'green',
    1: 'red',
}

datasetviewer = DatasetViewer(dataset, color_mapping)
datasetviewer.napari()
datasetviewer.gui_text_properties(datasetviewer.shape_layer)

In [59]:
# visualize dataset with Faster-RCNN transformer
color_mapping = {
    1: 'green',
}

transform = GeneralizedRCNNTransform(min_size=1024,
                                     max_size=1024,
                                     image_mean=[0.485, 0.456, 0.406],
                                     image_std=[0.229, 0.224, 0.225])

datasetviewer = DatasetViewer(dataset, color_mapping, rccn_transform=transform)
datasetviewer.napari()

## Dataset statistics

In [10]:
stats = stats_dataset(dataset)
stats

{'image_height': tensor([800., 800., 800., 800., 800., 800., 800.]),
 'image_width': tensor([960., 960., 960., 960., 960., 960., 960.]),
 'image_mean': tensor([0.5385, 0.5358, 0.5460, 0.5516, 0.5560, 0.5362, 0.5382]),
 'image_std': tensor([0.2796, 0.2972, 0.2964, 0.2941, 0.2916, 0.2944, 0.2935]),
 'boxes_height': tensor([122, 196, 228, 250, 282, 344, 381]),
 'boxes_width': tensor([12, 12, 16, 19, 22, 30, 19]),
 'boxes_num': tensor([1., 1., 1., 1., 1., 1., 1.]),
 'boxes_area': tensor([ 1464,  2352,  3648,  4750,  6204, 10320,  7239])}

In [11]:
transform = GeneralizedRCNNTransform(min_size=1024,
                                     max_size=1024,
                                     image_mean=[0.485, 0.456, 0.406],
                                     image_std=[0.229, 0.224, 0.225])

stats_transform = stats_dataset(dataset, transform)
stats_transform

{'image_height': tensor([864., 864., 864., 864., 864., 864., 864.]),
 'image_width': tensor([1024., 1024., 1024., 1024., 1024., 1024., 1024.]),
 'image_mean': tensor([0.3917, 0.3803, 0.4247, 0.4491, 0.4682, 0.3821, 0.3904]),
 'image_std': tensor([1.2242, 1.3030, 1.2991, 1.2888, 1.2782, 1.2902, 1.2867]),
 'boxes_height': tensor([130.1334, 209.0667, 243.2000, 266.6667, 300.8000, 366.9333, 406.4000]),
 'boxes_width': tensor([12.7950, 12.7950, 17.0600, 20.2587, 23.4575, 31.9875, 20.2587]),
 'boxes_num': tensor([1., 1., 1., 1., 1., 1., 1.]),
 'boxes_area': tensor([ 1665.0581,  2675.0110,  4148.9917,  5402.3271,  7056.0225, 11737.2988,
          8233.1465])}