In [1]:
import fiftyone as fo
from fiftyone import ViewField as F

# Load a small dataset
dataset = fo.zoo.load_zoo_dataset(
    "coco-2017",
    split="validation",
    label_types=["detections"],
    max_samples=10,
)

***Python 3.8 Deprecation Notice***
Python 3.8 will no longer be supported in new releases after October 1, 2024.
Please upgrade to Python 3.9 or later.
For additional details please see https://deprecation.voxel51.com


Downloading split 'validation' to '/home/lradovan/fiftyone/coco-2017/validation' if necessary
Found annotations at '/home/lradovan/fiftyone/coco-2017/raw/instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient


***Python 3.8 Deprecation Notice***
Python 3.8 will no longer be supported in new releases after October 1, 2024.
Please upgrade to Python 3.9 or later.
For additional details please see https://deprecation.voxel51.com


Loading existing dataset 'coco-2017-validation-10'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [5]:
'''
Just testing the FOPA code
'''

from libcom import FOPAHeatMapModel
from libcom.utils.process_image import make_image_grid, draw_bbox_on_image
import cv2
import os

fg_img = '/home/lradovan/workspace/libcom/tests/source/foreground/1.jpg'
fg_mask = '/home/lradovan/workspace/libcom/tests/source/foreground_mask/1.png'

#test_set = get_test_list_fopa_heatmap()

test_set = []
for sample in dataset[5:6]:
    print(sample.metadata.width)
    test_set.append({'foreground': fg_img, 'foreground_mask': fg_mask, 'background': sample.filepath})
    
result_dir = './results/'
net = FOPAHeatMapModel(device=0)

for pair in test_set[:1]:
    fg_img, fg_mask, bg_img = pair['foreground'], pair['foreground_mask'], pair['background']
    bboxes, heatmaps = net(fg_img, fg_mask, bg_img, cache_dir=os.path.join(result_dir, 'cache'), heatmap_dir=os.path.join(result_dir, 'heatmap'))
    img_name  = os.path.basename(bg_img).replace('.png', '.jpg')
    bbox = bboxes[0]
    bg_img  = draw_bbox_on_image(bg_img, [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
    grid_img  = make_image_grid([bg_img, fg_img, heatmaps[0]])
    os.makedirs(os.path.join(result_dir, 'grid'), exist_ok=True)
    res_path  = os.path.join(result_dir, 'grid', img_name)
    cv2.imwrite(res_path, grid_img)
    print('save result to ', res_path)
print(f'end testing!')

640
save result to  ./results/grid/000000000785.jpg
end testing!


In [4]:
'''
Gets the largest bounding box from a background image
'''
def get_bounding_box(sample):
    detections = sample.ground_truth.detections
    if len(detections) > 0:
        print(f"\nImage path: {sample.filepath}")
        
        # Find largest bounding box
        largest_det = max(detections, key=lambda d: get_box_area(d.bounding_box))

        # get bbox
        bbox = largest_det.bounding_box
        width = sample.metadata.width
        height = sample.metadata.height
        
        x1 = int(bbox[0] * width)
        y1 = int(bbox[1] * height)
        x2 = int((bbox[0] + bbox[2]) * width)
        y2 = int((bbox[1] + bbox[3]) * height)

    return [x1, y1, x2, y2]

In [5]:
# takes a box of the form [x1, x2, y1, y2]
def get_box_area(box):
    return (box[1] - box[0]) * (box[3] - box[2])

In [6]:
# get overlap / background subject
def overlap_ratio(occluder_bb, occludee_bb):

    #top left and bottom right points
    occluder_x1, occluder_y1, occluder_x2, occluder_y2 = occluder_bb
    occludee_x1, occludee_y1, occludee_x2, occludee_y2 = occludee_bb

    # area of the foreground object
    occludee_area = (occludee_x2 - occludee_x1) * (occludee_y2 - occludee_y1)

    # area of the background object being covered by the foreground object
    overlap_area = max(0, min(occludee_x2, occluder_x2) - max(occludee_x1, occluder_x1)) * max(0, min(occludee_y2, occluder_y2) - max(occludee_y1, occluder_y1))

    # overlap over the total background object area
    return overlap_area / occludee_area

In [60]:
import random

'''
Get a list of randomly chosen bounding boxes to occlude the background object above some threshold
This can be improved if we know the foreground image has to be some base scale to allow for above threshold occlusion
'''
def get_bbox_list(bg_bbox, bg_w, bg_h, fg_w, fg_h):

    bboxes = []

    occludee_x1 = bg_bbox[0]
    occludee_y1 = bg_bbox[1] 
    occludee_x2 = bg_bbox[2]
    occludee_y2 = bg_bbox[3]

    num_boxes = 5 # iterate over multiple boxes
    overlap_threshold = .2 # ensure a minimum amount of overlap

    # print(bg_bbox, fg_w, fg_h)
    
    for _ in range(num_boxes):
        # top left corner of the occluder bounding box:

        occluder_x1 = random.randint(max(0, occludee_x1 - fg_w), occludee_x2) # overlapping in the x-direction
        occluder_y1 = random.randint(max(0, occludee_y1 - fg_h), occludee_y2) # overlapping in the y-direction

        # TODO: Fix so doesnt exceed background image
        
        occluder_x2 = occluder_x1 + fg_w
        occluder_y2 = occluder_y1 + fg_h

        # Maybe this will fix sizing error??? (IT DOES)
        if occluder_x2 > bg_w or occluder_y2 > bg_h:
            continue

        occluder_bb = [occluder_x1, occluder_y1, occluder_x2, occluder_y2]
        occluded_ratio = overlap_ratio(occluder_bb, bg_bbox)

        if occluded_ratio >= overlap_threshold:
            bboxes.append([occluder_x1, occluder_y1, occluder_x2, occluder_y2])
    
    return bboxes

In [26]:
# from https://github.com/bcmi/libcom/blob/main/libcom/fopa_heat_map/source/prepare_multi_fg_scales.py

# I was just using this to confirm that the width and height put into the CSV is equivalent to the scaled foreground with and height
# - it is
# will delete soon

import os
import csv
import numpy as np
from PIL import Image
import cv2

fg_scale_num = 16

def fill_image_by_mask(image, mask, fill_pixel=0, thresh=127):
    image = np.asarray(image).copy()
    mask  = np.asarray(mask)
    fill_img = (np.ones_like(image) * fill_pixel).astype(np.uint8)
    image = np.where(mask > thresh, image, fill_img)
    return Image.fromarray(image)

# I added the foreground bounding box to the list of arguments
def prepare_multi_fg_scales(cache_dir, fg_path, mask_path, bg_path, fg_bb, fg_scale_num):
    os.makedirs(cache_dir, exist_ok=True)
    fg_name   = os.path.splitext(os.path.basename(fg_path))[0]
    bg_name   = os.path.splitext(os.path.basename(bg_path))[0]
    fg_scales = list(range(1, fg_scale_num+1))
    fg_scales = [i/(1+fg_scale_num+1) for i in fg_scales]

    # -- code I added in ----
    fg_bb_width = fg_bb[2] - fg_bb[0]  # original bounding box width
    fg_bb_height = fg_bb[3] - fg_bb[1] # original bounding box height
    
    scaled_fg_dir   = os.path.join(cache_dir, f'fg_{fg_scale_num}scales')
    scaled_mask_dir = os.path.join(cache_dir, f'mask_{fg_scale_num}scales')    
    csv_file = os.path.join(cache_dir, f'{fg_scale_num}scales.csv')
    os.makedirs(scaled_fg_dir,   exist_ok=True)
    os.makedirs(scaled_mask_dir, exist_ok=True)

    file = open(csv_file, mode='w', newline='')
    writer = csv.writer(file)
    csv_head = ['fg_name', 'mask_name', 'bg_name', 'scale', 'newWidth', 'newHeight', 'bbWidth', 'bbHeight', 'pos_label', 'neg_label']
    writer.writerow(csv_head)

    bg_img = Image.open(bg_path).convert("RGB")  
    bg_img_aspect = bg_img.height / bg_img.width
    fg_tocp   = Image.open(fg_path).convert("RGB")
    mask_tocp = Image.open(mask_path).convert("RGB")
    fg_tocp   = fill_image_by_mask(fg_tocp, mask_tocp)
    fg_tocp_aspect = fg_tocp.height / fg_tocp.width
    
    for fg_scale in fg_scales:
        if fg_tocp_aspect > bg_img_aspect:
            # bounding box scaling!
            bb_h = int(bg_img.height * fg_scale)
            bb_w = int(bb_h / fg_bb_height * fg_bb_width)
            
            new_height = int(bg_img.height * fg_scale)
            new_width  = int(new_height / fg_tocp.height * fg_tocp.width)
        else:
            # bounding box scaling!
            bb_w = int(bg_img.width * fg_scale)
            fg_h = int(bb_w / fg_bb_width * fg_bb_height)
            
            new_width  = int(bg_img.width * fg_scale)
            new_height = int(new_width / fg_tocp.width * fg_tocp.height)
        
        top    = int((bg_img.height - new_height) / 2)
        bottom = top + new_height
        left   = int((bg_img.width - new_width) / 2)
        right  = left + new_width
        
        fg_img_ = np.asarray(fg_tocp.resize((new_width, new_height)))
        mask_   = np.asarray(mask_tocp.resize((new_width, new_height)))
        fg_img  = np.zeros((bg_img.height, bg_img.width, 3), dtype=np.uint8) 
        mask    = np.zeros((bg_img.height, bg_img.width, 3), dtype=np.uint8) 
        fg_img[top:bottom, left:right, :] = fg_img_
        mask[top:bottom, left:right, :] = mask_
        fg_img = Image.fromarray(fg_img.astype(np.uint8))
        mask = Image.fromarray(mask.astype(np.uint8))
        
        basename = f'{fg_name}_{bg_name}_{new_width}_{new_height}.jpg'
        fg_img_path = os.path.join(scaled_fg_dir, basename)
        mask_path = os.path.join(scaled_mask_dir, basename)
        fg_img.save(fg_img_path)
        mask.save(mask_path)

        # Instead of new_width and new_height for the image, lets try to take the scaled bounding box's height and width
        writer.writerow([os.path.basename(fg_path), 
                         os.path.basename(mask_path), 
                         os.path.basename(bg_path), 
                         fg_scale, new_width, new_height, bb_w, bb_h,
                         None, None])
    file.close()  
    csv_data = []
    with open(csv_file, mode='r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if row['pos_label']=="":
                row['pos_label'] = [[0,0]]
            if row['neg_label']=="":
                row['neg_label'] = [[0,0]]
            csv_data.append(row)
    return scaled_fg_dir, scaled_mask_dir, csv_file

In [65]:
'''
Right now this just takes does one specific foreground image (teddy bear), and one background image (skier)
The output goes to ./results/best.jpg
'''

from libcom.utils.process_image import *
from libcom.utils.environment import *
from libcom import OPAScoreModel
from libcom import get_composite_image
from libcom.utils.process_image import make_image_grid
import cv2
import csv
from PIL import Image
from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales

net = OPAScoreModel(device=0, model_type='SimOPA')

# for each scale, take the highest OPA score
fg_scale_num = 16

# trying with just one foreground image (teddy bear) and one background image (skier)
foreground_image = '/home/lradovan/workspace/libcom/tests/source/foreground/1.jpg'
foreground_mask = '/home/lradovan/workspace/libcom/tests/source/foreground_mask/1.png'

# this is the bounding box specific to the teddy bear
fg_bb = [1000, 895, 1480, 1355]

bg_imgs = []
for sample in dataset[5:6]:
    bg_imgs.append([sample.filepath, get_bounding_box(sample), sample.metadata.width, sample.metadata.height])

background_image = bg_imgs[0][0]

cache_dir = './cache'

# from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales
scaled_fg_dir, scaled_mask_dir, csv_path = prepare_multi_fg_scales(cache_dir, foreground_image, foreground_mask, background_image, fg_scale_num)
 
# iterate over the different foreground scales
with open(csv_path, mode='r', newline='') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    score = 0
    res = [-1, -1]
    for row in csv_reader:
        fg_name   = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
        mask_name = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
        scale     = row['scale']
        fg_w = int(row['newWidth'])
        fg_h = int(row['newHeight'])
        
        save_name = fg_name.split(".")[0] + '_' + str(scale) + '.jpg'

        bg_img    = read_image_pil(background_image)
        fg_img    = read_image_pil(os.path.join(scaled_fg_dir, fg_name))
        fg_mask   = read_mask_pil(os.path.join(scaled_mask_dir, mask_name))
        bbox_list = get_bbox_list(bg_imgs[0][1], bg_imgs[0][2], bg_imgs[0][3], fg_w, fg_h)

        print(bbox_list)

        for bbox in bbox_list:
            try:
                comp, comp_mask = get_composite_image(fg_img, fg_mask, bg_img, bbox)
                bbox_score = net(comp, comp_mask)
                if bbox_score > score:
                    res = comp, comp_mask
                    score = bbox_score
            except:
                print("Sizing issues!")

    print(score)
    grid_img  = make_image_grid(res)
    cv2.imwrite('./results/best.jpg', grid_img)


Image path: /home/lradovan/fiftyone/coco-2017/validation/data/000000000785.jpg
[]
[]
[]
[]
[]
[]
[[361, 66, 533, 231], [395, 186, 567, 351]]
[[224, 164, 420, 352], [373, 152, 569, 340]]
[[194, 143, 415, 355], [146, 196, 367, 408]]
[[359, 122, 605, 358], [130, 186, 376, 422]]
[[239, 3, 509, 262], [100, 64, 370, 323], [107, 24, 377, 283]]
[[115, 135, 410, 418]]
[[198, 113, 517, 419]]
[]
[]
[]
0.9997072815895081
