In [1]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np
import cv2
import os

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
# faster rcnn model이 포함된 library
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

from torch.utils.data import DataLoader, Dataset
import pandas as pd
from tqdm import tqdm

In [2]:
class CustomDataset(Dataset):
    '''
      data_dir: data가 존재하는 폴더 경로
      transforms: data transform (resize, crop, Totensor, etc,,,)
    '''

    def __init__(self, annotation, data_dir, transforms=None):
        super().__init__()
        self.data_dir = data_dir
        # coco annotation 불러오기 (coco API)
        self.coco = COCO(annotation)
        self.predictions = {
            "images": self.coco.dataset["images"].copy(),
            "categories": self.coco.dataset["categories"].copy(),
            "annotations": None
        }
        self.transforms = transforms

    def __getitem__(self, index: int):
        
        image_id = self.coco.getImgIds(imgIds=index)

        image_info = self.coco.loadImgs(image_id)[0]
        
        image = cv2.imread(os.path.join(self.data_dir, image_info['file_name']))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        ann_ids = self.coco.getAnnIds(imgIds=image_info['id'])
        anns = self.coco.loadAnns(ann_ids)

        boxes = np.array([x['bbox'] for x in anns])

        # boxex (x_min, y_min, x_max, y_max)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        labels = np.array([x['category_id'] for x in anns])
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        areas = np.array([x['area'] for x in anns])
        areas = torch.as_tensor(areas, dtype=torch.float32)
                                
        is_crowds = np.array([x['iscrowd'] for x in anns])
        is_crowds = torch.as_tensor(is_crowds, dtype=torch.int64)
                                
        segmentation = np.array([x['segmentation'] for x in anns], dtype=object)

        target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([index]), 'area': areas,
                  'iscrowd': is_crowds}

        # transform
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)

        return image, target, image_id
    
    def __len__(self) -> int:
        return len(self.coco.getImgIds())

In [3]:
def get_train_transform():
    return A.Compose([
        A.Resize(512, 512),
        A.Flip(p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [4]:
annotation = '../input/data/train.json'
data_dir = '../input/data'
train_dataset = CustomDataset(annotation, data_dir, get_train_transform())

loading annotations into memory...
Done (t=3.59s)
creating index...
index created!


In [None]:
train_loader = 

In [14]:
# image, target, image_id = train_dataset[0]
train_dataset[0]

(tensor([[[0.3961, 0.3922, 0.3922,  ..., 0.6039, 0.6039, 0.6000],
          [0.3961, 0.3961, 0.3922,  ..., 0.6000, 0.6000, 0.6000],
          [0.3961, 0.3961, 0.3961,  ..., 0.5961, 0.5961, 0.5961],
          ...,
          [0.4824, 0.4824, 0.4784,  ..., 0.5098, 0.5059, 0.5020],
          [0.4863, 0.4824, 0.4784,  ..., 0.5137, 0.5059, 0.5059],
          [0.4863, 0.4824, 0.4784,  ..., 0.5137, 0.5137, 0.5098]],
 
         [[0.2235, 0.2196, 0.2196,  ..., 0.4627, 0.4627, 0.4588],
          [0.2235, 0.2235, 0.2196,  ..., 0.4588, 0.4588, 0.4588],
          [0.2235, 0.2235, 0.2235,  ..., 0.4549, 0.4549, 0.4549],
          ...,
          [0.4039, 0.4039, 0.3961,  ..., 0.4196, 0.4157, 0.4118],
          [0.4078, 0.4039, 0.3961,  ..., 0.4235, 0.4157, 0.4157],
          [0.4078, 0.4039, 0.3961,  ..., 0.4235, 0.4235, 0.4196]],
 
         [[0.0784, 0.0745, 0.0745,  ..., 0.3608, 0.3608, 0.3569],
          [0.0784, 0.0784, 0.0745,  ..., 0.3569, 0.3569, 0.3569],
          [0.0784, 0.0784, 0.0784,  ...,

In [15]:
coco_test = COCO(annotation)


loading annotations into memory...
Done (t=3.95s)
creating index...
index created!


In [18]:
coco_test.loadImgs([2])

[{'license': 0,
  'url': None,
  'file_name': 'batch_01_vt/0006.jpg',
  'height': 512,
  'width': 512,
  'date_captured': None,
  'id': 2}]

In [17]:
coco_test.loadImgs(image_id)[0]   ### list 를 벗겨낸 것

{'license': 0,
 'url': None,
 'file_name': 'batch_01_vt/0003.jpg',
 'height': 512,
 'width': 512,
 'date_captured': None,
 'id': 0}