In [1]:
import tensorlayer as tl
import json
import pylab as pl
import random
import numpy as np
import os
import cv2
import anno_func
%matplotlib inline
from bbox import BoundingBox
from errors import UnsupportedExtensionError, UnsupportedFormatError
from PIL import Image
from collections import Counter
from tqdm import tqdm
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
datadir='../../data'
filedir=os.path.join(datadir, 'annotations.json')
annos=json.loads(open(filedir).read())

In [3]:
class_counter=Counter()
classes=set()
for imgid in annos['imgs']:
    for boxes in annos['imgs'][imgid]['objects']:
        class_counter[boxes['category']]+=1
        if(class_counter[boxes['category']]>=100 and boxes['category'] not in ['io','po','wo']):
            classes.add(boxes['category'])
classes=list(classes)

In [4]:
new_annos={}
new_annos['imgs']={}
new_annos['types']=anno_func.type42

In [9]:
def transform_box(box, new_xmin, new_ymin):
    box_dict={}
    box_dict['bbox']={'xmin':box['bbox']['xmin']-new_xmin, 'xmax':box['bbox']['xmax']-new_xmin,
                     'ymin':box['bbox']['ymin']-new_ymin, 'ymax':box['bbox']['ymax']-new_ymin}
    box_dict['category']=box['category']
    return box_dict

def overlap(box, old_width, old_height,new_xmin, new_ymin, new_width, new_height,tol):
    xmin=max(box['bbox']['xmin'],0)
    ymin=max(box['bbox']['ymin'],0)
    xmax=min(box['bbox']['xmax'],old_width)
    ymax=min(box['bbox']['ymax'],old_height)
    return xmin>=new_xmin-tol and ymin>=new_ymin-tol and xmax-tol<=new_width+new_xmin and ymax-tol<=new_height+new_ymin
    
def crop_image(id,old_width=2048, old_height=2048, new_width=512, new_height=512,note='_1',cropped_dir='cropped_train',tol=8,aug=False):
    global new_annos
    
    
    ids=0
    for box in annos['imgs'][id]['objects']:
        objects=[]
        
        label=box['category']
        if label not in classes:
            continue
        
        xmin=box['bbox']['xmin']
        ymin=box['bbox']['ymin']
        xmax=box['bbox']['xmax']
        ymax=box['bbox']['ymax']
        if(xmax-xmin>128 or ymax-ymin>128):
            continue
        new_id=id+'_'+str(ids)+note
        new_annos['imgs'][new_id]={}
        new_annos['imgs'][new_id]['id']=new_id
        path=annos['imgs'][id]['path']
        new_xmin=np.random.randint(int(max(0,xmax-new_width)), max(0,int(xmin))+1)
        new_ymin=np.random.randint(int(max(0,ymax-new_height)),max(0,int(ymin))+1)
        image=Image.open(os.path.join(datadir, path))
        bottom=new_ymin+new_height
        right=new_xmin+new_width
        
        if right>=old_width:
            right=old_width
            new_xmin=right-new_width
        if bottom>=old_height:
            bottom=old_height
            new_ymin=bottom-new_height
        
        new_img=np.asarray(image.crop((new_xmin, new_ymin, right,bottom)))
        if aug:
            new_img=img = tl.prepro.illumination(new_img, gamma=(0.5, 1.5), 
             contrast=(0.5, 1.5), saturation=(0.5, 1.5), is_random=True)
        
        new_img_path=new_id+'.jpg'
        tl.visualize.save_image(new_img, os.path.join(datadir, cropped_dir, new_img_path))
        
        for obox in annos['imgs'][id]['objects']:
            if obox['category'] not in classes:
                continue
            if overlap(obox, old_width, old_height,new_xmin, new_ymin, new_width, new_height,tol):
                box_dict=transform_box(obox, new_xmin, new_ymin)
                objects.append(box_dict)
       
        
        
        new_annos['imgs'][new_id]['objects']=objects
        new_annos['imgs'][new_id]['path']=os.path.join(cropped_dir,new_img_path)
        ids+=1

In [7]:
train_dir=os.path.join(datadir,'train')
test_dir=os.path.join(datadir,'test')
train_id_list=list(map(lambda x:x.split('.')[0],os.listdir(train_dir)))
test_id_list=list(map(lambda x:x.split('.')[0],os.listdir(test_dir)))
cropped_dir_train='cropped_train'
cropped_dir_test='cropped_test'
train_new_path=os.path.join(datadir, cropped_dir_train)
test_new_path=os.path.join(datadir, cropped_dir_test)
if not os.path.exists(train_new_path):
    os.mkdir(train_new_path)
if not os.path.exists(test_new_path):
    os.mkdir(test_new_path)

In [None]:
for id in tqdm(test_id_list):
    
    crop_image(id,2048,2048,512,512,'_1',cropped_dir=cropped_dir_test)

In [11]:
for id in tqdm(train_id_list):
    crop_image(id,2048,2048,512,512,'_1',cropped_dir=cropped_dir_train)

100%|██████████████████████████████████████████████████████████████████████████████| 6105/6105 [22:08<00:00,  3.88it/s]


In [1]:
def Data_augmentation(num):
    keys=list(new_annos['imgs'].keys())
    for imgid in tqdm(keys):
        if new_annos['imgs'][imgid]['path'].split('/')[0]==cropp_dir_test:
            continue
        for box in new_annos['imgs'][imgid]['objects']:
            category=box['category']
            if class_counter[category]<num:
                org_id=imgid.split('_')[0]
                n=num//class_counter[category]+1
                for k in range(n):
                    crop_image(org_id, 2048,2048,512,512, note='_'+str(k+2),cropped_dir=cropped_dir_train, aug=True)