In [1]:
categories = ['bicycle', 'bus', 'car', 'motorbike', 'train']

occlusion_ranges = {
    "low": (0.2, 0.4),
    "medium": (0.4, 0.6),
    "high": (0.6, 0.8)
}

In [2]:
import scipy

def load_one_annotation(anno_path):
    a = scipy.io.loadmat(anno_path)
    # I added the astype int here....
    bbox_ = a['record'][0][0][1][0][0][1][0].astype(int)
    w = a['record']['size'][0][0][0][0][0][0][0]
    h = a['record']['size'][0][0][0][0][1][0][0]
    num_obj = len(a['record'][0][0][1][0])
    return w, h, bbox_, num_obj != 1

In [3]:
'''
get overlap percentage based on bounding boxes
'''
def overlap_ratio(occluder_bb, occludee_bb):

    #top left and bottom right points
    occluder_x1, occluder_y1, occluder_x2, occluder_y2 = occluder_bb
    occludee_x1, occludee_y1, occludee_x2, occludee_y2 = occludee_bb

    # area of the foreground object
    occludee_area = (occludee_x2 - occludee_x1) * (occludee_y2 - occludee_y1)

    # area of the background object being covered by the foreground object
    overlap_area = max(0, min(occludee_x2, occluder_x2) - max(occludee_x1, occluder_x1)) * max(0, min(occludee_y2, occluder_y2) - max(occludee_y1, occluder_y1))

    # overlap over the total background object area
    return overlap_area / occludee_area

In [4]:
import random
import time

'''
Get a list of randomly chosen bounding boxes to occlude the background object above some threshold
This can be improved if we know the foreground image has to be some base scale to allow for above threshold occlusion
'''
def get_bbox_list(bg_bbox, bg_w, bg_h, fg_w, fg_h):

    random.seed(time.time())

    bboxes = []
    occludee_x1 = bg_bbox[0]
    occludee_y1 = bg_bbox[1] 
    occludee_x2 = bg_bbox[2]
    occludee_y2 = bg_bbox[3]
    num_boxes = 1
    
    for _ in range(num_boxes):

        occluder_bb = get_random_bbox(bg_bbox, bg_w, bg_h, fg_w, fg_h)
     
        if not occluder_bb:
            return bboxes
        occluded_ratio = overlap_ratio(occluder_bb, bg_bbox)
        
        if .20 <= occluded_ratio <= .80:
            bboxes.append((occluded_ratio, occluder_bb))
    
    return bboxes

In [5]:
# get a random bounding box, with some unkown level of occlusion
def get_random_bbox(bg_bbox, bg_w, bg_h, fg_w, fg_h):
    
    occludee_x1 = bg_bbox[0]
    occludee_y1 = bg_bbox[1] 
    occludee_x2 = bg_bbox[2]
    occludee_y2 = bg_bbox[3]

    for _ in range(20):
        
        random.seed(time.time())
        
        occluder_x1 = random.randint(max(0, occludee_x1 - fg_w), occludee_x2) # overlapping in the x-direction
        occluder_y1 = random.randint(max(0, occludee_y1 - fg_h), occludee_y2) # overlapping in the y-direction

        # TODO: Fix so doesnt exceed background image
        occluder_x2 = occluder_x1 + fg_w
        occluder_y2 = occluder_y1 + fg_h
        
        # maybe we should introduce image cropping/clipping here instead
        if occluder_x2 <= bg_w and occluder_y2 <= bg_h:
            return [occluder_x1, occluder_y1, occluder_x2, occluder_y2]
        
    return None

In [6]:
from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales

'''
Get a random bounding box location for a given occlusion range
'''

def get_random_location(fg_img, fg_mask, bg_img, bg_w, bg_h, bg_bbox, occ_range):
    
    cache_dir = './unrealistic_cache'
    scaled_fg_dir, scaled_mask_dir, csv_path = prepare_multi_fg_scales(cache_dir, fg_img, fg_mask, bg_img, 16)
    
    with open(csv_path, mode='r', newline='') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            fg_name   = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            mask_name = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            fg_w = int(row['newWidth'])
            fg_h = int(row['newHeight'])
            
            fg_img_path = os.path.join(scaled_fg_dir, fg_name)
            fg_mask_path = os.path.join(scaled_mask_dir, mask_name)

            occ_min, occ_max = occ_range
            # this tends to run infinitely
            for _ in range(20):
                occluder_bb = get_random_bbox(bg_bbox, bg_w, bg_h, fg_w, fg_h)
                if not occluder_bb:
                    continue
                # we need to try a different occluder size
                occluded_ratio = overlap_ratio(occluder_bb, bg_bbox) 
                if occ_min <= occluded_ratio <= occ_max:
                    return fg_img_path, fg_mask_path, occluded_ratio, occluder_bb
    
    return None, None, None, None

No module 'xformers'. Proceeding without it.


  rank_zero_deprecation(


In [7]:
'''
returns the score, composite image, and compositive mask. 
num scales is the number of different foreground scales to try.
'''
from libcom import OPAScoreModel
from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales

def get_optimal_location(fg_img, fg_mask, bg_img, bg_w, bg_h, bg_bbox, num_scales):

    net = OPAScoreModel(device=0, model_type='SimOPA')
    cache_dir = './realistic_cache'

    scaled_fg_dir, scaled_mask_dir, csv_path = prepare_multi_fg_scales(cache_dir, fg_img, fg_mask, bg_img, 16)

    score = 0
    ratio = 0
    optimal_bbox = None
    best_fg = None
    best_mask = None
    best_comp = None 
    best_comp_mask = None
    
    with open(csv_path, mode='r', newline='') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            fg_name   = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            mask_name = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            fg_w = int(row['newWidth'])
            fg_h = int(row['newHeight'])

            bg_img    = read_image_pil(bg_img)
            bbox_list = get_bbox_list(bg_bbox, bg_w, bg_h, fg_w, fg_h)

            fg_img = os.path.join(scaled_fg_dir, fg_name)
            fg_mask = os.path.join(scaled_mask_dir, mask_name)

            for occ_ratio, bbox in bbox_list:
                comp, comp_mask = get_composite_image(fg_img, fg_mask, bg_img, bbox)
                bbox_score = net(comp, comp_mask)
                if bbox_score > score:
                    best_fg = fg_img
                    best_mask = fg_mask
                    optimal_bbox = bbox
                    best_comp = comp
                    best_comp_mask = comp_mask
                    score = bbox_score
                    ratio = occ_ratio

        return score, ratio, best_fg, best_mask, optimal_bbox, best_comp, best_comp_mask

In [8]:
import random
import numpy as np
from PIL import Image
import cv2
import time

'''
Get a random occluder and correspondiong mask for a given occlusion range
'''
def get_occluder(occ_size, index=None):
    random.seed(time.time())
    
    # get the correct path to the occluder
    occluder_path = '/srv/occluder_libs_test_' + occ_size + '.npz'
    
    # load the occluders
    data = np.load(occluder_path, allow_pickle=True)
    
    if not index:
        index = random.randint(0, len(data['images']))

    # save the occluders
    os.makedirs('./occluders', exist_ok=True)
    occ_img = f'./occluders/fg_img_{index}.jpg'
    occ_mask = f'./occluders/fg_mask_{index}.png'

    image = data['images'][index]
    box = data['boxes'][index]
    mask = data['masks'][index]
    
    mask = (mask * 255)
        
    cv2.imwrite(occ_img, image)
    cv2.imwrite(occ_mask, mask)
    
    h = box[1] - box[0]
    w = box[3] - box[2]

    return occ_img, occ_mask, w, h

In [9]:
import json

'''
Write to an annotation file
'''

def write_one_annotation(save_path, bg_img, cate, occ_size, occlusion_level, occlusion_ratio, occluder_path, fg_bb, bg_bb):
    annotation = {}
    annotation['box'] = bg_bb.tolist() 
    annotation['ratio'] = occlusion_ratio
    annotation['occluder_box'] = fg_bb
    annotation['occluder_path'] = occluder_path
    annotation['source'] = bg_img
    annotation['cate'] = cate
    
    img_id = bg_img.split('/')[-1].split('.')[0]
    
    # Convert and write JSON object to file
    with open(f'{save_path}/annotations/{occlusion_level}/{occ_size}_{img_id}.json', "w") as outfile: 
        json.dump(annotation, outfile)

In [16]:
from libcom import color_transfer
from libcom.utils.process_image import *
from libcom.utils.environment import *
from libcom import OPAScoreModel
from libcom import get_composite_image
from libcom.utils.process_image import make_image_grid
import cv2
import csv
from PIL import Image
from libcom import Mure_ObjectStitchModel
import json

def generate_images(cate, bg_img_path, bg_w, bg_h, bg_bbox, fg_indices, save_real, save_unreal, record_file):

    net = Mure_ObjectStitchModel(device=0, sampler='plms')
    annotation = {}
    print(bg_img_path)
    
    for (occ_size, fg_index) in fg_indices:
        
        real_fg_img_path, real_fg_mask_path, _, _ = get_occluder(occ_size, fg_index)

        img_id = bg_img_path.split('/')[-1].split('.')[0]
     
        score, real_ratio, op_fg_img_path, op_fg_mask_path, op_bbox, _, _ = get_optimal_location(real_fg_img_path, real_fg_mask_path, bg_img_path, bg_w, bg_h, bg_bbox, num_scales=16)
    
        if not score:
            record_file.write(f'Skipped {occ_size} for {bg_img_path}. Couldnt find optimal location.')
            # we couldnt find a possible foreground location, so we just skip everything
            continue

        # write to a directory based on the level of occlusion
        occlusion_level = None
        for range_label, (min_occ, max_occ) in occlusion_ranges.items():
            # print(min_occ, max_occ, real_ratio)
            if min_occ <= real_ratio <= max_occ:
                occlusion_level = range_label

        # first, we get a random occluder (the occluder size is based off the occlusion level
        unreal_fg_img_path, unreal_fg_mask_path, fg_w, fg_h = get_occluder(occ_size)

        # get a bounding box and ratio for an occlusion that falls into the bin
        random_fg_img_path, random_fg_mask_path, unreal_ratio, random_bbox = get_random_location(unreal_fg_img_path, unreal_fg_mask_path, bg_img_path, bg_w, bg_h, bg_bbox, occlusion_ranges[occlusion_level])
        
        if not random_fg_img_path:
            record_file.write(f'Skipped {occ_size} for {bg_img_path}. Couldnt find random location/')
             # we couldnt find a reasonable foreground, so we skip writing out anything for both foreground and background
            continue
            
        # realistic image
        write_one_annotation(save_real, bg_img_path, cate, occ_size, occlusion_level, real_ratio, real_fg_img_path, op_bbox, bg_bbox)
        res, _ = net(bg_img_path, [op_fg_img_path], [op_fg_mask_path], op_bbox, sample_steps=25, num_samples=3)
        # TODO: find the best result in res, instead of just taking the last sample
        cv2.imwrite(f'{save_real}/images/{occlusion_level}/{occ_size}_{img_id}.jpg', res[2])
        
        # unrealistic image
        write_one_annotation(save_unreal, bg_img_path, cate, occ_size, occlusion_level, unreal_ratio, unreal_fg_img_path, random_bbox, bg_bbox)
        comp, _ = get_composite_image(random_fg_img_path, random_fg_mask_path, bg_img_path, random_bbox)
        cv2.imwrite(f'{save_unreal}/images/{occlusion_level}/{occ_size}_{img_id}.jpg', comp)

In [17]:
# import os

# # the only reason we have bg_mask_dir is in case we need it for segmentation masks...
# def generate_images(cate, file_list, bg_img_dir, bg_anno_dir, real_save, unreal_save, record_file):
    
#     # set up all of the necessary directories!
#     for dataset in [real_save, unreal_save]:
#         for data_type in ['images', 'annotations']:
#             for range_label in occlusion_ranges.keys():
#                 os.makedirs(dataset + "/" + data_type + "/" + range_label, exist_ok=True)
    
#     for file_name in file_list[9:10]:
#         bg_w, bg_h, bg_bbox, flag_ = load_one_annotation(os.path.join(bg_anno_dir, file_name + '.mat'))

#         if flag_:
#             record_file.write('Skipped %s for multi objects\n' % file_name)
#             continue

#         bg_img = os.path.join(bg_img_dir, file_name + '.JPEG')
        
#         generate_composite_image(cate, bg_img, bg_w, bg_h, bg_bbox, real_save, unreal_save)

In [18]:
# for cate in categories:
#     print('Start cate: ', cate)
#     tem = open('generating_record_%s_1030.txt' % cate, 'w')
#     file_list_ = open(bg_list_path % cate).readlines()
#     file_list_ = [tem.strip('\n') for tem in file_list_]
#     bg_img_path_ = bg_img_path % cate
#     bg_anno_path_ = bg_anno_path % cate

#     generate_dataset(cate, file_list_, bg_img_path_, bg_anno_path_, real_save, unreal_save, tem)

In [19]:
import os

'''
Important paths
'''
path_to_original_pascal3dp = '/srv/PASCAL3D+_release1.1/'
 
small_occluder_path = '/srv/occluder_libs_test_small.npz'
medium_occluder_path = '/srv/occluder_libs_test_medium.npz'
large_occluder_path = '/srv/occluder_libs_test_large.npz'

# bg_list_path = path_to_original_pascal3dp + 'Image_sets/%s_imagenet_val.txt'
bg_img_dir = path_to_original_pascal3dp + 'Images/'
bg_anno_dir = path_to_original_pascal3dp + 'Annotations/'
bg_mask_dir = path_to_original_pascal3dp + 'obj_mask/'

real_save = './realistic'
unreal_save = './unrealistic'

In [20]:
'''
Set up the necessary directories
'''
for dataset in [real_save, unreal_save]:
    for data_type in ['images', 'annotations']:
        for range_label in occlusion_ranges.keys():
            os.makedirs(dataset + "/" + data_type + "/" + range_label, exist_ok=True)

In [None]:
path_to_original_pascal3dp = '/srv/PASCAL3D+_release1.1/'
bg_anno_dir = path_to_original_pascal3dp + 'Annotations/'
csv_path = './bg_fg_pair.csv'
real_save = './realistic'
record_file = open('generating_record_.txt', 'w')

with open(csv_path, mode='r', newline='') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        
        bg_img_path = row['image_name']
        fg_index_small = int(row['fg_index_small'])
        fg_index_medium = int(row['fg_index_medium'])
        fg_index_large = int(row['fg_index_large'])
        
        fg_indices = (("small", fg_index_small), ("medium", fg_index_medium), ("large", fg_index_large))
        
        category = bg_img_path.split('/')[4].split('_')[0]
        img_id = bg_img_path.split('/')[-1].split('.')[0]
        
        bg_anno_path = os.path.join(bg_anno_dir, category + '_imagenet', img_id + '.mat')
        
        # load the annotation file for the background img
        bg_w, bg_h, bg_bbox, flag_ = load_one_annotation(bg_anno_path)
        
        if flag_:
            record_file.write('Skipped %s for multi objects\n' % file_name)
            continue
            
        generate_images(category, bg_img_path, bg_w, bg_h, bg_bbox, fg_indices, real_save, unreal_save, record_file)
        
record_file.close()

/srv/PASCAL3D+_release1.1/Images/bicycle_imagenet/n02834778_10027.JPEG
/srv/PASCAL3D+_release1.1/Images/bicycle_imagenet/n02834778_10155.JPEG
/srv/PASCAL3D+_release1.1/Images/bicycle_imagenet/n02834778_10164.JPEG
