In [1]:
import os
import glob
import numpy as np

from PIL import Image, ImageDraw
import scipy.ndimage.morphology as morph

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms

from args import *

In [2]:
import json
with open('/storage/ProtopopovI/_data_/COCO/2014/annotations/person_keypoints_train2014.json') as data_file:    
    data_json = json.load(data_file)

In [None]:
len(data_json['images']

In [19]:
# data_json['annotations'][37502]['segmentation']
# data_json['images'][80744]
# data_json['annotations'][44474]#[37502]['segmentation']

In [4]:
class TrainPerson(Dataset):
    def __init__(self):
        super().__init__()
        self.target_trans = transforms.Compose([
            transforms.Resize((TARGET_SIZE, TARGET_SIZE), interpolation=0),
            transforms.ToTensor()
            ])
        self.search_trans = transforms.Compose([
            transforms.Resize((SEARCH_SIZE, SEARCH_SIZE), interpolation=0),
            transforms.ToTensor()
            ])
        self.file_names = sorted(os.listdir("/storage/ProtopopovI/_data_/COCO/2014/train2014/"))
        
    def transform_score_label(self, depth2):
        depth2 = depth2.reshape(1, 1, depth2.size(0), depth2.size(1))
        max_value = depth2.max()
        depth2 = (depth2 == max_value).float()
        score_label = F.max_pool2d(depth2, kernel_size=(16, 16), padding=8, stride=16)
        score_zero = (score_label == 0).float()
        score_label = torch.stack([score_zero, score_label], dim=1).squeeze()
        return score_label

    def get_labels(self, object):
        labels = torch.tensor([])
        depths = torch.tensor([])
        score_labels = torch.tensor([])
        
        label1 = (object==0).float()
        depth1 = torch.tensor(morph.distance_transform_edt(np.asarray(label1[0])))
        label2 = (label1==0).float()
        depth2 = torch.tensor(morph.distance_transform_edt(np.asarray(label2[0])))
        depth = (depth1 + depth2).float().unsqueeze(0)
        label = torch.stack([label1, label2], dim=1)
        labels = torch.cat([labels, label], dim=0)
        depths = torch.cat([depths, depth], dim=0)
        score_label = self.transform_score_label(depth2).unsqueeze(0)
        score_labels = torch.cat([score_labels, score_label], dim=0)
        labels = labels.squeeze()
        
        return labels, depths, score_labels
    
    def  __getitem__(self, idx):
        file_name = self.file_names[idx]
        
        bboxs = []
        seg_ids = []
        js = []
        for i in range(len(data_json['images'])):
            if file_name == data_json['images'][i]['file_name']:
                id = data_json['images'][i]['id']
                image_i = i
                for j in range(len(data_json['annotations'])):
                    if id == data_json['annotations'][j]['image_id']:
                        js.append(j)
                        seg_ids.append(data_json['annotations'][j]['id'])
                        bboxs.append(data_json['annotations'][j]['bbox'])
        search = Image.open("/storage/ProtopopovI/_data_/COCO/2014/train2014/" + file_name).convert('RGB')
        
        box = [bboxs[0][0], bboxs[0][1], bboxs[0][2], bboxs[0][3]]
        size = [data_json['images'][image_i]['width'], data_json['images'][image_i]['height']]
        center = [(box[0]+box[2]/2)/size[0], (box[1]+box[3]/2)/size[1]]
        
        
        target = search.crop([box[0], box[1], box[0]+box[2], box[1]+box[3]])
        
        target = self.search_trans(target)
        search = self.search_trans(search)
        
        mask = Image.new('L', (data_json['images'][image_i]['width'], data_json['images'][image_i]['height']))
        idraw = ImageDraw.Draw(mask)
        idraw.polygon(data_json['annotations'][js[0]]['segmentation'][0], fill='white')
        mask = self.search_trans(mask)
        label, depth, score_label = self.get_labels(mask)
    
        return target, search, label, depth, score_label, size, center

In [5]:
data = TrainPerson()

In [6]:
target, search, label, depth, score_label, size, center = data[130]

In [7]:
size

[480, 640]

In [8]:
center

[0.6875729166666666, 0.63871875]

In [9]:
print(target.shape)
print(search.shape)
print(label.shape)
print(depth.shape)
print(score_label.shape)

torch.Size([3, 256, 256])
torch.Size([3, 256, 256])
torch.Size([2, 256, 256])
torch.Size([1, 256, 256])
torch.Size([1, 2, 17, 17])


In [10]:
score_label[0][1]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [11]:
print((score_label[0][1] == 1).nonzero())

tensor([[13, 10]])


In [12]:
t_32 = torch.rand([32, 32])
t_32.shape

torch.Size([32, 32])

In [57]:
F.pad(t_32, [1,1,1,1]).shape

torch.Size([34, 34])

In [58]:
17+17

34

In [59]:
17+17+17+17

68