## Pytorch
### Dataset 클래스

- torch.utils.data.Dataset 은 데이터셋을 나타내는 추상클래스
- **Dataset을 상속하고 오버라이드 하여 사용해야 함**
    - __len__ 은 데이터셋의 크기를 리턴
    - __getitem__ 은 i번째 샘플을 찾는데 사용 -> 로드한 데이터를 차례차례 돌려줌

## 1. Dataset 클래스 원형

In [1]:
# An abstract class representing a Dataset.
# All other datasets should subclass it and should override '__len__', '__getitem__'
# __len__ : that provides the size of the dataset
# __getitem__ : supporting integer indexing in range from 0 to len(self)

# Dataset 원형
class Dataset(object):
    def __getitem__(self, index):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def __add__(self, other):
        return ConcatDataset([self, other])


## 2. CustomDataset, CustomDataloader
- Custom Class가 생성될 때 DataLoader에서 사용 할 수 있는 최소한의 준비
    - (1) 파라미터 인자를 받아 변수에 할당
    - (2) class name list 파일을 받아 load
    - (3) Dataset 존재 유무 확인
    - (4) Dataset parsing

In [78]:
import sys
import os
import torch
import numpy as np
import glob
from PIL import Image

from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

root_path = '../../datasets/VOCdevkit/VOC2012/'
image_path, label_path = "JPEGImages/", "Annotations/"
img_extension, label_extension = '*.jpg', '*.xml'

class VOC(Dataset):
    def __init__(self, root, image_path, label_path,
                 transform=None, target_transform=None,
                 is_train=True, resize=300,):
        self.root = root # 세부 폴더 전 root 데이터셋 경로
        self.image_path = image_path
        self.label_path = label_path
        
        self.transform = transform # image transform
        self.target_transform = target_transform # label transform

        self.is_train = is_train # train 여부
        self.resize_factor = resize
        
        self.img_data = glob.glob(root_path + image_path + img_extension)
        self.target_data = glob.glob(root_path + label_path + label_extension)

        # dataset 이 root 경로에 존재하는지
        if os.path.exists(os.path.join(self.root, self.image_path)) and \
            os.path.exists(os.path.join(self.root, self.label_path)):
            print('Dataset is exist')
        else:
            raise RuntimeError("Dataset not found")

    # 전체 이미지 갯수 반환
    def __len__(self):
        return min(len(self.img_data), len(self.target_data))

    # image 무조건 1장
    def __getitem__(self, index):
        img = Image.open(self.img_data[index]).convert('RGB')
        img = img.resize((self.resize_factor, self.resize_factor))

        target = self.target_data[index]

        if self.transform:
            img = self.transform(img)

        if self.target_transform:
            pass

        return img, target

## 3. Run CustomDataset, CustomDataloader

In [104]:
voc = VOC(ROOT_PATH, image_path, label_path, 
          transform = transforms.Compose([transforms.ToTensor()]),
          target_transform = transforms.Compose([transforms.ToTensor()]),
          is_train=True, resize=300,)
    
print('dataset length', dataloader.dataset.__len__())
dataloader = DataLoader(voc, batch_size=4, shuffle=True)
for i, (img, target) in enumerate(dataloader):
    print(i, img.shape, target)
    if (i+1)%12==0:
        break

Dataset is exist
dataset length 5138
0 torch.Size([4, 3, 300, 300]) ('../../datasets/VOCdevkit/VOC2012/Annotations\\2012_003578.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2012_001370.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2011_005342.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2010_006325.xml')
1 torch.Size([4, 3, 300, 300]) ('../../datasets/VOCdevkit/VOC2012/Annotations\\2011_006841.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2010_006941.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2012_002151.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2011_005105.xml')
2 torch.Size([4, 3, 300, 300]) ('../../datasets/VOCdevkit/VOC2012/Annotations\\2012_002641.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2012_000046.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2011_005108.xml', '../../datasets/VOCdevkit/VOC2012/Annotations\\2011_003143.xml')
3 torch.Size([4, 3, 300, 300]) ('../../datasets/VOCdevkit/VOC2012/Annotations\\2012_001

## 참고문헌
- https://deepbaksuvision.github.io/Modu_ObjectDetection/posts/03_01_dataloader.html