In [15]:
import os
import glob
import numpy as np
import pandas as pd

In [16]:
from PIL import Image

import torch
from torch_snippets import find
from torch.utils.data import Dataset

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Code type using for Pytorch: {device}")

Code type using for Pytorch: cuda


## Load labels

In [18]:
data_root = "data"
labels_df = pd.read_csv(os.path.join(data_root, "df.csv"))
labels_df.head(10)

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,...,IsDepiction,IsInside,XClick1X,XClick2X,XClick3X,XClick4X,XClick1Y,XClick2Y,XClick3Y,XClick4Y
0,0000599864fd15b3,xclick,Bus,1,0.34375,0.90875,0.156162,0.650047,1,0,...,0,0,0.421875,0.34375,0.795,0.90875,0.156162,0.5127,0.650047,0.457197
1,00006bdb1eb5cd74,xclick,Truck,1,0.276667,0.6975,0.141604,0.437343,1,0,...,0,0,0.299167,0.276667,0.6975,0.659167,0.141604,0.241855,0.35213,0.437343
2,00006bdb1eb5cd74,xclick,Truck,1,0.7025,0.999167,0.204261,0.409774,1,1,...,0,0,0.849167,0.7025,0.906667,0.999167,0.204261,0.398496,0.409774,0.295739
3,00010bf498b64bab,xclick,Bus,1,0.15625,0.37125,0.269188,0.705228,0,0,...,0,0,0.274375,0.37125,0.311875,0.15625,0.269188,0.493882,0.705228,0.521691
4,00013f14dd4e168f,xclick,Bus,1,0.2875,0.999375,0.194184,0.999062,0,1,...,0,0,0.92,0.999375,0.64875,0.2875,0.194184,0.30394,0.999062,0.523452
5,0002914fa805e227,xclick,Truck,1,0.06125,0.966875,0.125399,0.974495,1,0,...,1,0,0.756875,0.966875,0.501875,0.06125,0.974495,0.5983,0.125399,0.382572
6,00036603fa39fa85,xclick,Truck,1,0.058125,0.983125,0.053333,0.94,0,0,...,0,0,0.058125,0.59875,0.983125,0.71,0.55,0.053333,0.337778,0.94
7,0004d5a9dd44ab6a,xclick,Truck,1,0.035625,0.188125,0.189893,0.320827,0,0,...,0,0,0.115,0.035625,0.139375,0.188125,0.189893,0.245023,0.320827,0.2366
8,0004d5a9dd44ab6a,xclick,Truck,1,0.094375,0.8975,0.147014,0.93415,0,0,...,0,0,0.365,0.094375,0.33375,0.8975,0.147014,0.609495,0.93415,0.822358
9,0004d5a9dd44ab6a,xclick,Truck,1,0.860625,0.999375,0.249617,0.390505,1,0,...,0,0,0.9925,0.999375,0.860625,0.92125,0.249617,0.294028,0.325421,0.390505


In [19]:
label2target = {lbl:cls+1 for cls, lbl in enumerate(labels_df.LabelName.unique())}
label2target['background'] = 0
label2target

{'Bus': 1, 'Truck': 2, 'background': 0}

In [20]:
target2label = {cls:lbl for lbl,cls in label2target.items() }
target2label

{1: 'Bus', 2: 'Truck', 0: 'background'}

## Process Images

In [21]:
def preprocess_image(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

In [27]:
class ImageLabelDataset(Dataset):
    W, H = 224, 224
    def __init__(self, labels_df, image_root_dir):
        self.image_dir = image_root_dir
        self.files = glob.glob(self.image_dir+'/*')
        self.df = labels_df
        self.image_infos = labels_df.ImageID.unique()

    def __getitem__(self, ix):
        # load image
        image_id = self.image_infos[ix]
        img_path = find(image_id, self.files)
        img = Image.open(img_path).convert("RGB")
        # transform image as np array
        # size of image is resized to align with labels
        img = np.array(img.resize((self.W, self.H), resample=Image.BILINEAR))/255.
        # process labels
        df = self.df.copy()
        labels = df[df['ImageID'] == image_id]['LabelName'].values.tolist()
        true_bboxes = df[df['ImageID'] == image_id][['XMin','YMin','XMax','YMax']].values
        true_bboxes[:,[0,2]] *= self.W
        true_bboxes[:,[1,3]] *= self.H
        true_bboxes = true_bboxes.astype(np.uint32).tolist() # convert to absolute coordinates
        # combine labels and true bounding boxes to dictionary
        target_dict = {}
        target_dict['boxes'] = torch.Tensor(true_bboxes).float()
        target_dict['labels'] = torch.Tensor([label2target[lbl] for lbl in labels]).long()
        img = preprocess_image(img)
        return img, target_dict

    def __len__(self):
        return len(self.image_infos)

In [31]:
image_ds = ImageLabelDataset(labels_df, "data/images/images")
img_ary, tgt = image_ds[10]

In [32]:
img_ary

tensor([[[0.2941, 0.2627, 0.2902,  ..., 0.2078, 0.2039, 0.2000],
         [0.3255, 0.3373, 0.3412,  ..., 0.2627, 0.2667, 0.2667],
         [0.3490, 0.3569, 0.3608,  ..., 0.3137, 0.3137, 0.3137],
         ...,
         [0.1922, 0.1686, 0.1725,  ..., 0.2157, 0.2196, 0.2196],
         [0.2392, 0.2235, 0.2314,  ..., 0.2157, 0.2157, 0.2157],
         [0.2549, 0.2431, 0.2627,  ..., 0.2118, 0.2118, 0.2118]],

        [[0.2824, 0.2510, 0.2784,  ..., 0.2275, 0.2235, 0.2196],
         [0.3137, 0.3255, 0.3294,  ..., 0.2824, 0.2863, 0.2863],
         [0.3451, 0.3490, 0.3529,  ..., 0.3333, 0.3333, 0.3333],
         ...,
         [0.1686, 0.1490, 0.1529,  ..., 0.2392, 0.2431, 0.2471],
         [0.2118, 0.2000, 0.2039,  ..., 0.2353, 0.2392, 0.2431],
         [0.2275, 0.2196, 0.2275,  ..., 0.2314, 0.2353, 0.2392]],

        [[0.3176, 0.2863, 0.3137,  ..., 0.2510, 0.2471, 0.2431],
         [0.3490, 0.3608, 0.3647,  ..., 0.3059, 0.3098, 0.3098],
         [0.3765, 0.3843, 0.3843,  ..., 0.3569, 0.3569, 0.

In [33]:
tgt

{'boxes': tensor([[  0.,   0., 223., 223.]]), 'labels': tensor([1])}