In [None]:
path = "COCO" # dataset root dir
train = "train2017.txt" # train images (relative to 'path') 118287 images
val = "val2017.txt" # val images (relative to 'path') 5000 images
test = "test-dev2017.txt" # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

# Classes
names = {
  0: "person",
  1: "bicycle",
  2: "car",
  3: "motorcycle",
  4: "airplane",
  5: "bus",
  6: "train",
  7: "truck",
  8: "boat",
  9: "traffic light",
  10: "fire hydrant",
  11: "stop sign",
  12: "parking meter",
  13: "bench",
  14: "bird",
  15: "cat",
  16: "dog",
  17: "horse",
  18: "sheep",
  19: "cow",
  20: "elephant",
  21: "bear",
  22: "zebra",
  23: "giraffe",
  24: "backpack",
  25: "umbrella",
  26: "handbag",
  27: "tie",
  28: "suitcase",
  29: "frisbee",
  30: "skis",
  31: "snowboard",
  32: "sports ball",
  33: "kite",
  34: "baseball bat",
  35: "baseball glove",
  36: "skateboard",
  37: "surfboard",
  38: "tennis racket",
  39: "bottle",
  40: "wine glass",
  41: "cup",
  42: "fork",
  43: "knife",
  44: "spoon",
  45: "bowl",
  46: "banana",
  47: "apple",
  48: "sandwich",
  49: "orange",
  50: "broccoli",
  51: "carrot",
  52: "hot dog",
  53: "pizza",
  54: "donut",
  55: "cake",
  56: "chair",
  57: "couch",
  58: "potted plant",
  59: "bed",
  60: "dining table",
  61: "toilet",
  62: "tv",
  63: "laptop",
  64: "mouse",
  65: "remote",
  66: "keyboard",
  67: "cell phone",
  68: "microwave",
  69: "oven",
  70: "toaster",
  71: "sink",
  72: "refrigerator",
  73: "book",
  74: "clock",
  75: "vase",
  76: "scissors",
  77: "teddy bear",
  78: "hair drier",
  79: "toothbrush"}


import sys
sys.path.append('../../')


from ultralytics.utils.downloads import download
from pathlib import Path
import os

# Download labels
segments = False  # segment or box labels
dir = Path(path)  # dataset root dir
url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
# download(urls, dir=dir.parent)
#
# TODO move coco/* TO your path
#
# Download data
urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
download(urls, dir=dir / 'images', threads=3)

Downloading http://images.cocodataset.org/zips/train2017.zip to 'COCO/images/train2017.zip'...
Downloading http://images.cocodataset.org/zips/test2017.zip to 'COCO/images/test2017.zip'...


In [1]:
!unzip annotations_trainval2017.zip

Archive:  annotations_trainval2017.zip
  inflating: annotations/instances_train2017.json  
  inflating: annotations/instances_val2017.json  
  inflating: annotations/captions_train2017.json  
  inflating: annotations/captions_val2017.json  
  inflating: annotations/person_keypoints_train2017.json  
  inflating: annotations/person_keypoints_val2017.json  


In [2]:
import os

paths = ['images',
         'images/train',
         'images/valid',
         'images/test',
         'labels',
         'labels/train',
         'labels/valid',
         'labels/test',
         'supports']

for path in paths:
    for div in ['COCO1']:

        if not os.path.exists(div):
            os.mkdir(div)

        if not os.path.exists(div+'/'+path):
            os.mkdir(div+'/'+path)

for div in ['COCO1']:
    for i in range(80):
        if not os.path.exists(div+'/supports/'+str(i)):
            os.mkdir(div+'/supports/'+str(i))    

In [3]:
import numpy as np
import pandas as pd
import json
from PIL import Image, ImageOps
from tqdm import tqdm

def load_json(jfile):
    with open(jfile, 'rb') as f:
        return json.load(f)

In [4]:
def resize_with_padding(img, expected_size, colors=(114,114,114)):
    width, height = img.size
    ratio = width / height
    if width < height:
        img = img.resize((int(ratio*expected_size[0]),int(expected_size[0])))
    else:
        img = img.resize((int(expected_size[1]),int((1/ratio)*expected_size[1])))
        
    img.thumbnail((expected_size[0], expected_size[1]))
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding, colors)

In [5]:
def resize_label_with_padding(old_size, expected_size, bbox):
    (x,y,w,h) = bbox

    ratio = old_size[0] / old_size[1]
    if old_size[0] < old_size[1]:
        new = (int(ratio*expected_size[0]),int(expected_size[0]))
    else:
        new = (int(expected_size[1]),int((1/ratio)*expected_size[1]))

    delta_width = expected_size[0] - new[0]
    delta_height = expected_size[1] - new[1]

    pad_width = delta_width // 2
    pad_height = delta_height // 2

    x_t = (pad_width +  (x)*new[0]) / expected_size[0]
    y_t = (pad_height + (y)*new[1]) / expected_size[1]
    w_t = w*new[0] / expected_size[0]
    h_t = h*new[1] / expected_size[1]
    
    return x_t,y_t,w_t,h_t
    

In [6]:
PATH = ''

images_train = PATH+'COCO/train2017'
images_val = PATH+'COCO/val2017'

train = PATH+'annotations/instances_train2017.json'
val = PATH+'annotations/instances_val2017.json'

train_data = load_json(train)
val_data = load_json(val)

In [7]:
category = pd.DataFrame(val_data['categories'])
images_df = pd.DataFrame(val_data['images'])
anns_df = pd.DataFrame(val_data['annotations'])
nb_category = len(category)

all = pd.merge(anns_df, images_df, left_on='image_id', right_on='id')
all = all.sort_values(by=['category_id'])
all.shape

(36781, 15)

In [8]:
all.head()

Unnamed: 0,segmentation,area,iscrowd,image_id,bbox,category_id,id_x,license,file_name,coco_url,height,width,date_captured,flickr_url,id_y
27667,"[[154.87, 149.64, 143.78, 130.41, 128.26, 121....",9791.35945,0,140987,"[15.82, 118.58, 139.05, 316.76]",1,1709663,3,000000140987.jpg,http://images.cocodataset.org/val2017/00000014...,640,425,2013-11-21 00:03:27,http://farm9.staticflickr.com/8070/8156597598_...,140987
7544,"[[468.91, 508.17, 469.38, 496.87, 467.3, 486.4...",297.6145,0,21839,"[467.3, 471.49, 11.07, 37.84]",1,569562,3,000000021839.jpg,http://images.cocodataset.org/val2017/00000002...,640,480,2013-11-16 12:27:39,http://farm5.staticflickr.com/4080/5052955350_...,21839
7549,"[[46.6, 463.64, 42.06, 462.73, 39.8, 460.01, 4...",541.672,0,21839,"[24.82, 453.65, 23.6, 44.48]",1,1746923,3,000000021839.jpg,http://images.cocodataset.org/val2017/00000002...,640,480,2013-11-16 12:27:39,http://farm5.staticflickr.com/4080/5052955350_...,21839
27605,"[[352.42, 79.79, 359.06, 96.42, 450.49, 104.73...",12566.8559,0,355325,"[352.42, 3.32, 125.58, 139.64]",1,2152486,3,000000355325.jpg,http://images.cocodataset.org/val2017/00000035...,640,478,2013-11-21 05:32:56,http://farm5.staticflickr.com/4075/4765748189_...,355325
7560,"[[294.46, 312.08, 292.97, 310.59, 292.76, 291....",163.95625,0,190923,"[292.76, 287.01, 6.8, 35.26]",1,1314969,4,000000190923.jpg,http://images.cocodataset.org/val2017/00000019...,500,375,2013-11-16 19:15:04,http://farm3.staticflickr.com/2669/3706332805_...,190923


In [9]:
all['category_id'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 13, 14, 15, 16, 17, 18,
       19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38,
       39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
       57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77,
       78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90])

In [10]:
novel1 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76]
novel2 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,65,69,73,77]
novel3 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62,66,70,74,78]
novel4 = [3,7,11,15,19,23,27,31,35,39,43,47,51,55,59,63,67,71,75,79]

In [11]:
def saveImage(list_image, path, div):

    lab = {}
    for i in range(0,80):
        lab[i] = 0

    for _, row in tqdm(list_image.iterrows()):
        c = row['category_id']-1
        if c in div:
            try:
                n = row['file_name']
                b = row['bbox']
                width = row['width']
                height = row['height']

                image = Image.open("COCO/images/val2017/"+n)
                (xmin,ymin,w,h) = b
                x1, x2, y1, y2 = xmin, xmin + w, ymin, ymin + h
                img = image.crop((x1,y1,x2,y2))
                #img = resize_with_padding(img, (160,160), (0,0,0))
                img.save('COCO1/supports/'+str(c)+'/'+str(lab[c])+'.png')
                lab[c] += 1

                image = resize_with_padding(image, (640,640))
                image.save(path+n[:-4]+'.png')

                p = path.replace('images','labels')
                with open(p+n[:-4]+'.txt', 'a') as f:
                    (xmin,ymin,w,h) = b
                    cx = (xmin+w/2)/width
                    cy = (ymin+h/2)/height
                    h = h/height
                    w = w/width
                    cx,cy,w,h = resize_label_with_padding((width,height), (640,640), (cx,cy,w,h))
                    s = str(c)+' '+str(cx)+' '+str(cy)+' '+str(w)+' '+str(h)+' \n'
                    f.write(s)
            except:
                print("HAAA")

In [12]:
saveImage(all,'COCO1/images/valid/', novel1)
# saveImage(all,'COOC2/images/val/', novel2)
# saveImage(all,'COOC3/images/val/', novel3)
# saveImage(all,'COOC4/images/val/', novel4)

0it [00:00, ?it/s]

544it [01:18,  9.97it/s]

HAAA


548it [01:19, 11.99it/s]

HAAA


561it [01:20, 10.16it/s]

HAAA


600it [01:24, 19.28it/s]

HAAA
HAAA
HAAA
HAAA
HAAA
HAAA


608it [01:25, 13.89it/s]

HAAA
HAAA
HAAA


614it [01:26, 10.48it/s]

HAAA
HAAA


1014it [02:26, 10.19it/s]

HAAA


1063it [02:33, 19.96it/s]

HAAA
HAAA
HAAA
HAAA
HAAA
HAAA
HAAA


1065it [02:33, 19.80it/s]

HAAA
HAAA
HAAA
HAAA
HAAA
HAAA
HAAA


2300it [05:24,  7.25it/s]

HAAA
HAAA


2356it [05:31,  8.94it/s]

HAAA


2412it [05:40,  9.24it/s]

HAAA
HAAA


2421it [05:41,  9.75it/s]

HAAA
HAAA
HAAA
HAAA
HAAA
HAAA
HAAA


2432it [05:41, 13.99it/s]

HAAA
HAAA


2783it [06:34,  8.25it/s]

HAAA


2788it [06:34, 12.67it/s]

HAAA
HAAA
HAAA


2800it [06:35, 16.56it/s]

HAAA
HAAA
HAAA
HAAA
HAAA
HAAA


3927it [09:14, 11.15it/s]

HAAA


4996it [11:43,  9.12it/s]

HAAA
HAAA


5016it [11:46, 10.36it/s]

HAAA


5020it [11:46, 11.35it/s]

HAAA


5026it [11:47,  8.67it/s]

HAAA


5039it [11:49,  8.09it/s]

HAAA


5067it [11:52, 11.73it/s]

HAAA


5647it [13:10, 11.09it/s]

HAAA
HAAA


14795it [26:20, 45.61it/s]  

HAAA
HAAA


22272it [30:12, 69.22it/s] 

HAAA


36781it [35:17, 17.37it/s] 


In [13]:
with open('COCO1/data.yaml', 'wt') as f:
    f.write('train: images/train \n')
    f.write('val: images/valid \n')
    f.write('support: supports \n')
    f.write(' \n')
    f.write('nc: '+str(80)+'\n')

# Base on DANA support IMAGE

In [None]:
#!wget -O support.zip "https://drive.google.com/uc?export=download&id=1nl9-DEpBBJ5w6hxVdijY6hFxoQdz8aso&confirm=yes"

!wget -O support.zip 'https://drive.usercontent.google.com/download?id=1nl9-DEpBBJ5w6hxVdijY6hFxoQdz8aso&export=download&authuser=0&confirm=yes'

In [None]:
!unzip support.zip

In [70]:
# !rm -rf support.zip

In [71]:
# !rm -rf supports

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import glob

a = []

for year, image_set in ('2017', 'train'), ('2017', 'val'):
    lbs_path = dir / 'labels' / f'{image_set}{year}/*'

    for i in tqdm(glob.glob(str(lbs_path))):
        with open(i,'+r') as f:
            info = f.read().strip().split()
            size = len(info)
            for y in range(0,size,5):
                inf = info[y:y+5]
                a.append(inf[0])   

count = {i:a.count(str(i)) for i in range(0,80)}
print(len(count))

plt.figure()
sns.barplot(x=range(0,80),y=count.values())
plt.show()