In [1]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as tfms
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pandas as pd
import glob
import xml.etree.ElementTree as ET
import os
%matplotlib inline

Pytorch has a very useful feature called Datasets and Dataloaders. Dataloaders help us in efficient preparation of data by giving us predefined templates. These templates (also called base/parent classes) can help us utilise many of the existing features of these and also use our custom functions

Data loading in PyTorch can be separated in 2 parts:

1. Data must be wrapped on a Dataset parent class where the methods _ _getitem_ _ and _ _len_ _ must be overrided. Not that at this point the data is not loaded on memory. PyTorch will only load what is needed to the memory.
2. Use a Dataloader that will actually read the data and put into memory.

In below notebook we will see how we can create some common dataloaders for major computer vision problems

### Image Classification

Image Classification is one of the major problems we solve using Deep learning/Computer Vision. For Image Classification as in any kind of any classification problem the X consists of the image(images) and Y consists of the label

In [2]:
class CustomImageClassification(Dataset):
    def __init__(self, labels, root_dir, subset=False, transform=None):
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_name = self.labels.iloc[idx, 0] # file name
        fullname = os.path.join(self.root_dir, (img_name + '.jpeg'))
        image = Image.open(fullname).convert('RGB')
        labels = self.labels.iloc[idx, 1] # category_id
        if self.transform:
            #print('Transforming')
            image = self.transform(image)
        return image, labels

In [3]:
# Read the label mapping file
df_label = pd.read_csv('DiabeticRetinopathy/Train/trainLabels.csv')

In [4]:
# Check the files
filelist = pd.Series(data = [os.path.splitext(x)[0] for x in os.listdir('DiabeticRetinopathy/Train')])

In [5]:
df_label = df_label[df_label.image.isin(filelist.values)]

The Above operation can also be merged within init of the dataset by passing the csv file as an input to the constructor (\__init__)

In [6]:
train_dataset = CustomImageClassification(df_label,'DiabeticRetinopathy/Train/',transform=tfms.transforms.Compose(
                [
                    tfms.transforms.Resize((600,600)),
                    tfms.transforms.ToTensor()
                ]
                ))

In [7]:
train_loader = DataLoader(train_dataset,batch_size=2,shuffle=True,num_workers=0)

In [8]:
img_test,lb = next(iter(train_loader))

In [62]:
img_test.shape

torch.Size([2, 3, 600, 600])

In [63]:
img.shape

torch.Size([1, 2592, 3888, 3])

In [64]:
lb

tensor([ 0,  0])

### Image Localization / Object Detection

In Image localization or Object detection based problems we have to predict a bounding box also in addition to the class of the image. 
We can have multiple classes of the image and hence multiple bounding boxes.

Most common format of specifying the annotations of bounding boxes is by using xml files (aka in PASCAL VOC style)
You can read more about PASCAL VOC format and dataset here - http://host.robots.ox.ac.uk/pascal/VOC/
The Dataloader in this case should output the following 

1. X - images batch (N,C,H,W) N- batch size , C - channel , H & W - Height and Width respectively
2. Y - label (N,) for each image we will have label
3. Bbox - Bounding box tensor. This tensor usually has 2 extreme corners of bounding box (N,xmin,ymin,xmax,ymax). In certain cases we have the center coordinate along with width and hieght present (N,xcenter,ycenter,h,w)

We can use the tensor in any format we want for further processing in the network

**Organization/Directory Structure**

- We have 2 major folders in any object detection dataset. One folder will contain Images
- Another folder will contain the annotation xml file for these images
- We should have the same name for image and its respective annotation file (eg: racoon-1.jpeg , racoon-1.xml)

In [189]:
class CustomObjectDetection(Dataset):
    def __init__(self,img_dir,annot_dir,class_list,transform=None):
        self.root_dir = img_dir
        self.annot_dir = annot_dir
        self.class_list = class_list
        self.transform = transform
        self.img_annot_map = self.form_img_annot_map
        
    def __len__(self):
        return self.img_annot_map.shape[0]
    
    def __getitem__(self,idx):
        img_name = self.img_annot_map.loc[idx, 'filename'] # file name
        fullname = os.path.join(self.root_dir, img_name)
        image = Image.open(fullname).convert('RGB')
        labels = torch.IntTensor([self.img_annot_map.loc[idx, 'class']]) # category_id
        bbox = torch.tensor(self.img_annot_map.loc[idx, ['xmin','ymin','xmax','ymax']].astype('float'))
        if self.transform:
            image = self.transform(image)
        return [image,bbox],labels
    
    @property
    def form_img_annot_map(self):
        xml_list = []
        for xml_file in glob.glob(self.annot_dir + '/*.xml'):
            tree = ET.parse(xml_file)
            root = tree.getroot()
            for member in root.findall('object'):
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                     )
                xml_list.append(value)
        column_name = ['filename','width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
        xml_df = pd.DataFrame(xml_list, columns=column_name)
        xml_df = xml_df[xml_df['class'].isin(self.class_list)]
        xml_df['class'] = xml_df['class'].astype("category").cat.codes
        return xml_df
    

In [190]:
test = CustomObjectDetection('raccoon_dataset-master/images/','raccoon_dataset-master/annotations/',['raccoon'],
                            transform=tfms.transforms.Compose(
                            [
                            tfms.transforms.Resize((600,600)),
                            tfms.transforms.ToTensor()
                            ]
                        )
                    )

In [201]:
test_dataloader = DataLoader(test,batch_size=2,shuffle=False)

In [202]:
img_bbox,label = next(iter(test_dataloader))

In [205]:
img_bbox[1]

tensor([[  81.,   88.,  522.,  408.],
        [ 130.,    2.,  446.,  488.]])

Here the output of data loader contains 3 parts 1. Image Tensor 2. Tensor of Bounding boxes 3. Lables 