In [1]:
!pip install openimages

Collecting openimages
  Downloading openimages-0.0.1-py2.py3-none-any.whl (10 kB)
Collecting cvdata
  Downloading cvdata-0.0.3-py3-none-any.whl (37 kB)
Collecting lxml
  Downloading lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl (5.1 MB)
[K     |████████████████████████████████| 5.1 MB 7.5 MB/s eta 0:00:01
Installing collected packages: lxml, cvdata, openimages
Successfully installed cvdata-0.0.3 lxml-5.3.0 openimages-0.0.1


In [2]:
from openimages.download import download_dataset
download_dataset("./dest/dir", ["Car",], annotation_format="pascal", limit=10)

2024-11-06  18:46:43 INFO Downloading 10 train images for class 'car'
100%|███████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 23.64it/s]
2024-11-06  18:46:44 INFO Creating 10 train annotations (pascal) for class 'car'
100%|██████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 513.19it/s]


{'car': {'images_dir': './dest/dir/car/images',
  'annotations_dir': './dest/dir/car/pascal'}}

In [6]:
import xml.etree.ElementTree as ET
from pathlib import Path

# Define the path where your annotations are stored
annotations_path = Path("./dest/dir/car/pascal")
images_path = Path("./dest/dir/car/images")

# Function to parse the XML and get bounding boxes
def parse_anno(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()

    bboxes = []
    for obj in root.findall("object"):
        class_name = obj.find("name").text
        bbox = obj.find("bndbox")
        width = int(root.find("size/width").text)
        height = int(root.find("size/height").text)
        x_min = int(bbox.find("xmin").text)
        y_min = int(bbox.find("ymin").text)
        x_max = int(bbox.find("xmax").text)
        y_max = int(bbox.find("ymax").text)
        bboxes.append([x_min, y_min, x_max, y_max])

    return width, height, max(bboxes)

# Example usage for one annotation file
for annotation_file in annotations_path.glob("*.xml"):
    image_id = annotation_file.stem
    image_file = images_path / f"{image_id}.jpg"
    bboxes = get_bounding_box(annotation_file)
    print(f"Bounding boxes for {image_file}: {bboxes}")

Bounding boxes for dest/dir/car/images/00005bf623ff1ac2.jpg: [37, 91, 997, 630]
Bounding boxes for dest/dir/car/images/0001c6bf48e16ab2.jpg: [126, 434, 945, 875]
Bounding boxes for dest/dir/car/images/0000575f5a03db70.jpg: [27, 111, 1006, 572]
Bounding boxes for dest/dir/car/images/0001c8c65851276f.jpg: [67, 218, 923, 501]
Bounding boxes for dest/dir/car/images/0000048549557964.jpg: [445, 536, 743, 725]
Bounding boxes for dest/dir/car/images/0001c626b9afb50c.jpg: [112, 139, 908, 550]
Bounding boxes for dest/dir/car/images/000228608388803f.jpg: [19, 156, 973, 715]
Bounding boxes for dest/dir/car/images/0001124e2d5104e1.jpg: [16, 19, 962, 810]
Bounding boxes for dest/dir/car/images/000096726fd6c6c8.jpg: [153, 124, 811, 767]
Bounding boxes for dest/dir/car/images/00010bf498b64bab.jpg: [592, 287, 713, 386]


In [7]:
'''
Just testing the FOPA code
'''

from libcom import FOPAHeatMapModel
from libcom.utils.process_image import make_image_grid, draw_bbox_on_image
import cv2
import os

fg_img = '/home/lradovan/workspace/libcom/tests/source/foreground/1.jpg'
fg_mask = '/home/lradovan/workspace/libcom/tests/source/foreground_mask/1.png'

#test_set = get_test_list_fopa_heatmap()

test_set = []
for sample in dataset[5:6]:
    print(sample.metadata.width)
    test_set.append({'foreground': fg_img, 'foreground_mask': fg_mask, 'background': sample.filepath})
    
result_dir = './results/'
net = FOPAHeatMapModel(device=0)

for pair in test_set[:1]:
    fg_img, fg_mask, bg_img = pair['foreground'], pair['foreground_mask'], pair['background']
    bboxes, heatmaps = net(fg_img, fg_mask, bg_img, cache_dir=os.path.join(result_dir, 'cache'), heatmap_dir=os.path.join(result_dir, 'heatmap'))
    img_name  = os.path.basename(bg_img).replace('.png', '.jpg')
    bbox = bboxes[0]
    bg_img  = draw_bbox_on_image(bg_img, [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
    grid_img  = make_image_grid([bg_img, fg_img, heatmaps[0]])
    os.makedirs(os.path.join(result_dir, 'grid'), exist_ok=True)
    res_path  = os.path.join(result_dir, 'grid', img_name)
    cv2.imwrite(res_path, grid_img)
    print('save result to ', res_path)
print(f'end testing!')

No module 'xformers'. Proceeding without it.
640


  rank_zero_deprecation(


save result to  ./results/grid/000000000785.jpg
end testing!


In [10]:
'''
get overlap percentage
'''
def overlap_ratio(occluder_bb, occludee_bb):

    #top left and bottom right points
    occluder_x1, occluder_y1, occluder_x2, occluder_y2 = occluder_bb
    occludee_x1, occludee_y1, occludee_x2, occludee_y2 = occludee_bb

    # area of the foreground object
    occludee_area = (occludee_x2 - occludee_x1) * (occludee_y2 - occludee_y1)

    # area of the background object being covered by the foreground object
    overlap_area = max(0, min(occludee_x2, occluder_x2) - max(occludee_x1, occluder_x1)) * max(0, min(occludee_y2, occluder_y2) - max(occludee_y1, occluder_y1))

    # overlap over the total background object area
    return overlap_area / occludee_area

In [11]:
import random

'''
Get a list of randomly chosen bounding boxes to occlude the background object above some threshold
This can be improved if we know the foreground image has to be some base scale to allow for above threshold occlusion
'''
def get_bbox_list(bg_bbox, bg_w, bg_h, fg_w, fg_h):

    bboxes = []

    occludee_x1 = bg_bbox[0]
    occludee_y1 = bg_bbox[1] 
    occludee_x2 = bg_bbox[2]
    occludee_y2 = bg_bbox[3]

    num_boxes = 5 # iterate over multiple boxes
    overlap_threshold = .2 # ensure a minimum amount of overlap

    # print(bg_bbox, fg_w, fg_h)
    
    for _ in range(num_boxes):
        # top left corner of the occluder bounding box:

        occluder_x1 = random.randint(max(0, occludee_x1 - fg_w), occludee_x2) # overlapping in the x-direction
        occluder_y1 = random.randint(max(0, occludee_y1 - fg_h), occludee_y2) # overlapping in the y-direction

        # TODO: Fix so doesnt exceed background image
        
        occluder_x2 = occluder_x1 + fg_w
        occluder_y2 = occluder_y1 + fg_h

        # Maybe this will fix sizing error??? (IT DOES)
        if occluder_x2 > bg_w or occluder_y2 > bg_h:
            continue

        occluder_bb = [occluder_x1, occluder_y1, occluder_x2, occluder_y2]
        occluded_ratio = overlap_ratio(occluder_bb, bg_bbox)

        if occluded_ratio >= overlap_threshold:
            bboxes.append([occluder_x1, occluder_y1, occluder_x2, occluder_y2])
    
    return bboxes

In [33]:
'''
returns the score, composite image, and compositive mask. 
num scales is the number of different foreground scales to try.
'''

def get_optimal_location(fg_img, fg_mask, bg_img, bg_w, bg_h, bg_bbox, num_scales):
    cache_dir = './cache'

    # from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales
    scaled_fg_dir, scaled_mask_dir, csv_path = prepare_multi_fg_scales(cache_dir, fg_img, fg_mask, bg_img, num_scales)

    score = 0
    res = [-1, -1]
    
    # iterate over the different foreground scales
    with open(csv_path, mode='r', newline='') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            fg_name   = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            mask_name = '{}_{}_{}_{}.jpg'.format(row["fg_name"].split(".")[0],row["bg_name"].split(".")[0],int(row["newWidth"]),int(row["newHeight"]))
            scale     = row['scale']
            fg_w = int(row['newWidth'])
            fg_h = int(row['newHeight'])
            
            save_name = fg_name.split(".")[0] + '_' + str(scale) + '.jpg'
            bg_img    = read_image_pil(bg_img)
            fg_img    = read_image_pil(os.path.join(scaled_fg_dir, fg_name))
            fg_mask   = read_mask_pil(os.path.join(scaled_mask_dir, mask_name))
            bbox_list = get_bbox_list(bg_bbox, bg_w, bg_h, fg_w, fg_h)
    
            for bbox in bbox_list:
                comp, comp_mask = get_composite_image(fg_img, fg_mask, bg_img, bbox)
                bbox_score = net(comp, comp_mask)
                if bbox_score > score:
                    res = comp, comp_mask
                    score = bbox_score

        return score, res[0], res[1]

In [45]:
'''
Right now this just takes does one specific foreground image (teddy bear), and one background image (skier)
'''

from libcom.utils.process_image import *
from libcom.utils.environment import *
from libcom import OPAScoreModel
from libcom import get_composite_image
from libcom.utils.process_image import make_image_grid
import cv2
import csv
from PIL import Image
from libcom.fopa_heat_map.source.prepare_multi_fg_scales import prepare_multi_fg_scales

net = OPAScoreModel(device=0, model_type='SimOPA')

# for each scale, take the highest OPA score
fg_scale_num = 16

# trying with just one foreground image (teddy bear) and one background image (skier)
foreground_image = '/home/lradovan/workspace/libcom/tests/source/foreground/8.jpg'
foreground_mask = '/home/lradovan/workspace/libcom/tests/source/foreground_mask/8.png'

# Define the path where your annotations are stored
bg_annotations_path = Path("./dest/dir/car/pascal")
bg_images_path = Path("./dest/dir/car/images")

for annotation_file in annotations_path.glob("*.xml"):
    image_id = annotation_file.stem
    bg_img = os.path.join(bg_images_path, f"{image_id}.jpg")
    bg_w, bg_h, bg_bbox = parse_anno(annotation_file)
    score, comp, comp_mask = get_optimal_location(foreground_image, foreground_mask, bg_img, bg_w, bg_h, bg_bbox, num_scales=16)
    if(score):
        grid_img  = make_image_grid([comp, comp_mask])
        cv2.imwrite(f'./results/composite_{image_id}.jpg', grid_img)