In [37]:
import os
import glob
import cv2
from PIL import Image
import tensorflow as tf

from tqdm.auto import tqdm as tq
import pandas as pd
import numpy as np
import json
train_x_dir = '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017/train2017'
# os.listdir(train_dir)

In [52]:
d = '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017/annotations'
os.listdir(d)

['captions_val2017.json',
 'instances_val2017.json',
 'person_keypoints_val2017.json',
 'captions_train2017.json',
 'person_keypoints_train2017.json',
 'instances_train2017.json']

In [3]:
dataset_dir = '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017/'
mode = 'val'

In [53]:
def get_dataset(mode, dataset_dir):
    assert mode in ['train', 'val']
    
    # get X
    X = sorted(glob.glob(f"{dataset_dir}/{mode}2017/*.jpg"))

    # get Y
    Y = f"{dataset_dir}/annotations/instances_{mode}2017.json"
    with open(Y, 'r', encoding='utf-8') as f:
        Y = json.load(f)

    # get image infomation (height, width)
    img_id  = [Y['images'][idx]['id']  for idx in range(len(Y['images']))]
    heights = [Y['images'][idx]['height'] for idx in range(len(Y['images']))]
    widths  = [Y['images'][idx]['width']  for idx in range(len(Y['images']))]

    img_info = pd.DataFrame([img_id, heights, widths]).T
    img_info.columns = ['image_id', 'height', 'width']
        
    # preprocess images
    path = X[:]
    ids = [X[idx].rsplit('/', 1)[1] for idx in range(len(X))]
    X = pd.DataFrame()
    X['path'] = path
    X['filename'] = ids
    X['image_id'] = X['filename'].str.replace(".jpg", "").astype(int)

    labels = [Y['annotations'][idx] for idx in range(len(Y['annotations']))]
    labels_image_id = [labels[idx]['image_id'] for idx in range(len(labels))]
    labels_bbox = [labels[idx]['bbox'] for idx in range(len(labels))]
    labels_category_id = [labels[idx]['category_id'] for idx in range(len(labels))]
    
    Y = pd.DataFrame([labels_image_id, 
                      labels_bbox,
                      labels_category_id, 
                     ]).T
    Y.columns = ['image_id', 'bbox', 'category_id']
    
    # merged dataframe
    labels = dict()
    for idx in (range(len(Y))):
        row = Y.iloc[idx]
        if row['image_id'] not in labels:
            labels[row['image_id']] = [[], []] # bbox, category
        else:
            labels[row['image_id']][0].append(row['bbox'])
            labels[row['image_id']][1].append(row['category_id'])

    labels_df = pd.DataFrame(labels).T.reset_index()
    labels_df.columns = ['image_id', 'bbox', 'category_id']

    dataset = pd.merge(X, img_info, how='left', on='image_id').dropna().reset_index(drop=True)
    dataset = pd.merge(dataset, labels_df, how='left', on='image_id').dropna().reset_index(drop=True)
    return dataset
        
    

In [8]:
train_dataset = get_dataset('train', dataset_dir)
val_dataset = get_dataset('val', dataset_dir)

In [44]:
class DataLoader(tf.keras.utils.Sequence):
    def __init__(self, dataset, batch_size, shuffle=False):
        self.X_path = dataset['path'].values
        self.X_height = dataset['height'].values
        self.X_width  = dataset['width'].values
        self.bbox     = dataset['bbox'].values
        self.category_id = dataset['category_id'].values
        
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        self.indices = np.arange(len(self.X_path))
        
    def on_epoch_end(self):
        self.indices = np.arange(len(self.X_path))
        if self.shuffle == True:
            np.random.shuffle(self.indices)
        
    def __len__(self):
        return int((len(self.x)+self.batch_size -1)//self.batch_size)
    
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_x = [self.X_path[i] for i in indices]
        batch_xh = [self.X_height[i] for i in indices]
        batch_xw = [self.X_width[i] for i in indices]        
        batch_bbox = [self.bbox[i] for i in indices]
        batch_category_id = [self.category_id[i] for i in indices]
        
        return batch_x, np.array(batch_xh), np.array(batch_xw), batch_bbox, batch_category_id

In [45]:
val_loader = DataLoader(
    dataset = val_dataset,
    batch_size = 4,
    shuffle=True
)

In [46]:
val_loader[0]

(['/home/gyuseonglee/workspace/dataset/coco-2017/coco2017//val2017/000000000139.jpg',
  '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017//val2017/000000000285.jpg',
  '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017//val2017/000000000632.jpg',
  '/home/gyuseonglee/workspace/dataset/coco-2017/coco2017//val2017/000000000724.jpg'],
 array([426, 640, 483, 500]),
 array([640, 586, 640, 375]),
 [[[7.03, 167.76, 149.32, 94.87],
   [557.21, 209.19, 81.35, 78.73],
   [358.98, 218.05, 56.0, 102.83],
   [290.69, 218.0, 61.83, 98.48],
   [413.2, 223.01, 30.17, 81.36],
   [317.4, 219.24, 21.58, 11.59],
   [412.8, 157.61, 53.05, 138.01],
   [384.43, 172.21, 15.12, 35.74],
   [512.22, 205.75, 14.74, 15.97],
   [493.1, 174.34, 20.29, 108.31],
   [604.77, 305.89, 14.34, 45.71],
   [613.24, 308.24, 12.88, 46.44],
   [447.77, 121.12, 13.97, 21.88],
   [549.06, 309.43, 36.68, 89.67],
   [350.76, 208.84, 11.37, 22.55],
   [412.25, 219.02, 9.63, 12.52],
   [241.24, 194.99, 14.22, 17.63],
   [3