# inf_ufg_dataset

> Module for handling the loading of bags collected with the INF-UFG car that were previously converted to the Semantic KITTI format.

In [38]:
#| default_exp inf_ufg_dataset

In [39]:
#| export
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import numpy as np

In [52]:
#| export
class InfUFGDataset(Dataset):
    "Load the inf_ufg data in a pytorch Dataset object."
    def __init__(self, data_path, is_train=True, transform=None):
        data_path = Path(data_path)
        self.velodyne_path = data_path/'sequences'

        # how much sequences?
        velodyne_fns = []
        if is_train: 
            query = '*0[0-9]/velodyne/*.bin'
            velodyne_fns += list(self.velodyne_path.rglob(query))
        # validation?
        else:
            pass
    
        self.frame_ids = [fn.stem for fn in velodyne_fns]
        self.frame_sequences = [fn.parts[-3] for fn in velodyne_fns]    

        self.transform = transform
        self.is_train = is_train

    def set_transform(self, transform):
        self.transform = transform

    def __len__(self):
        return len(self.frame_ids)

    def __getitem__(self, idx):
        frame_id = self.frame_ids[idx]
        frame_sequence = self.frame_sequences[idx]
        
        frame_path = self.velodyne_path/frame_sequence/'velodyne'/(frame_id + '.bin')
        with open(frame_path, 'rb') as f:
            frame = np.fromfile(f, dtype=np.float32).reshape(-1, 4)

        label = None
        
        mask = None
        if self.transform:
            frame, label, mask = self.transform(frame, label)
        
        return frame, label, mask

To use it, first download and extract the *KITTI_18-05-24* from this [link](https://drive.google.com/file/d/1hbJS4wqi1fBeg3IrL3-RilOPfGj0RrRk/view?usp=sharing) (remember to use a UFG email) into a folder called *data*, at the root of your workspace. Lastly, use the following code to get the training data in its original format:

In [41]:
#| eval: false
data_path = '/workspace/data'
ds = InfUFGDataset(data_path)
frame, label, mask = ds[128]
len(ds)

9382

Without any transform set, the data is simply read into numpy arrays and mask is set to **None**.

In [42]:
#| eval: false
frame, frame.shape

(array([[ 3.034256 ,  5.3260975, -1.6424657,  0.06     ],
        [ 3.489289 ,  6.119853 , -1.6263963,  0.05     ],
        [ 4.1641755,  7.3005815, -1.6337067,  0.03     ],
        ...,
        [ 7.1781106, 11.202549 ,  2.5862253,  0.1      ],
        [ 6.733394 , 10.500441 ,  2.8798237,  0.07     ],
        [ 7.006298 , 10.917624 ,  3.4759398,  0.04     ]], dtype=float32),
 (29184, 4))

In [43]:
#| eval: false
mask == None

True

In [54]:
#| hide
import nbdev; nbdev.nbdev_export()