In [None]:
import json, random, pathlib, uuid
import cv2, numpy as np
from pycocotools.coco import COCO
from tqdm import tqdm

In [None]:
# Example: Cut an object from an image and paste onto a background
import cv2
import numpy as np
from pycocotools.coco import COCO
import random
import os

# Load COCO annotations
annotation_file = '../../datasets/taco_official/annotations.json'
coco = COCO(annotation_file)

# Load an example image
image_dir = '../../datasets/taco_official/'
image_info = coco.loadImgs(coco.getImgIds()[0])[0]
image_path = os.path.join(image_dir, image_info['file_name'])
print(f"Loading image: {image_path}")
image = cv2.imread(image_path)

# Check if image was loaded successfully
if image is None:
    print(f"Error: Could not load image at {image_path}")
    print(f"File exists: {os.path.exists(image_path)}")
    # Try to find the image in subdirectories
    for batch_dir in os.listdir(image_dir):
        if batch_dir.startswith('batch_'):
            test_path = os.path.join(image_dir, batch_dir, image_info['file_name'])
            if os.path.exists(test_path):
                image_path = test_path
                image = cv2.imread(image_path)
                print(f"Found image at: {image_path}")
                break

# Get annotations for the image
annotation_ids = coco.getAnnIds(imgIds=image_info['id'])
annotations = coco.loadAnns(annotation_ids)

if len(annotations) == 0:
    print("No annotations found for this image")
else:
    # Extract the first object using its segmentation mask
    mask = coco.annToMask(annotations[0])
    
    # Ensure mask has the same dimensions as the image
    if image is not None and mask.shape[:2] == image.shape[:2]:
        # Create a 3-channel mask for bitwise operations
        mask_3d = np.stack([mask] * 3, axis=-1)
        object_img = image * mask_3d
        
        # Create a random background
        background = np.random.randint(0, 256, image.shape, dtype=np.uint8)
        
        # Create a simple composite by placing the object on the background
        result = np.where(mask_3d > 0, object_img, background)
        
        # Display the result
        import matplotlib.pyplot as plt
        plt.figure(figsize=(15, 5))
        plt.subplot(1, 3, 1)
        plt.title('Original Image')
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        
        plt.subplot(1, 3, 2)
        plt.title('Mask')
        plt.imshow(mask, cmap='gray')
        plt.axis('off')
        
        plt.subplot(1, 3, 3)
        plt.title('Synthetic Image')
        plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.show()
    else:
        print(f"Error: Image shape {image.shape if image is not None else 'None'} doesn't match mask shape {mask.shape}")

In [None]:
# Multiple Examples: Cut multiple objects and create various synthetic scenarios
import cv2
import numpy as np
from pycocotools.coco import COCO
import random
import os
import matplotlib.pyplot as plt

# Load COCO annotations
annotation_file = '../../datasets/taco_official/annotations.json'
coco = COCO(annotation_file)

def find_image_path(image_dir, filename):
    """Helper function to find the correct image path"""
    # Try direct path first
    direct_path = os.path.join(image_dir, filename)
    if os.path.exists(direct_path):
        return direct_path
    
    # Search in batch directories
    for item in os.listdir(image_dir):
        if item.startswith('batch_'):
            test_path = os.path.join(image_dir, item, filename)
            if os.path.exists(test_path):
                return test_path
    return None

def extract_object(image, mask):
    """Extract an object from an image using a mask"""
    if image is None or mask is None:
        return None
    
    # Ensure mask dimensions match image
    if mask.shape[:2] != image.shape[:2]:
        return None
    
    # Create 3-channel mask
    mask_3d = np.stack([mask] * 3, axis=-1)
    return image * mask_3d

def get_bounding_box_from_mask(mask):
    """Extract bounding box coordinates from a binary mask"""
    coords = np.where(mask > 0)
    if len(coords[0]) == 0:
        return None
    
    y_min, y_max = coords[0].min(), coords[0].max()
    x_min, x_max = coords[1].min(), coords[1].max()
    
    return [int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)]  # [x, y, width, height]

def draw_bounding_box(image, bbox, color=(0, 255, 0), thickness=2):
    """Draw a bounding box on an image"""
    if bbox is None:
        return image
    
    img_with_box = image.copy()
    x, y, w, h = bbox
    
    # Convert to integers to ensure OpenCV compatibility
    x, y, w, h = int(x), int(y), int(w), int(h)
    
    cv2.rectangle(img_with_box, (x, y), (x + w, y + h), color, thickness)
    
    # Add area text
    area = w * h
    cv2.putText(img_with_box, f'Area: {area}px', (x, y - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    
    return img_with_box

def create_background(shape, bg_type='random'):
    """Create different types of backgrounds"""
    if bg_type == 'random':
        return np.random.randint(0, 256, shape, dtype=np.uint8)
    elif bg_type == 'gradient':
        h, w = shape[:2]
        gradient = np.linspace(0, 255, w, dtype=np.uint8)
        background = np.tile(gradient, (h, 1))
        return np.stack([background] * 3, axis=-1)
    elif bg_type == 'solid':
        color = [random.randint(50, 200) for _ in range(3)]
        return np.full(shape, color, dtype=np.uint8)
    else:
        return np.zeros(shape, dtype=np.uint8)

# Get category names for better labeling
categories = coco.loadCats(coco.getCatIds())
category_names = {cat['id']: cat['name'] for cat in categories}

# Get multiple images with annotations
image_dir = '../../datasets/taco_official/'
image_ids = coco.getImgIds()[:5]  # Get first 5 images

examples = []
for img_id in image_ids:
    image_info = coco.loadImgs(img_id)[0]
    image_path = find_image_path(image_dir, image_info['file_name'])
    
    if image_path:
        image = cv2.imread(image_path)
        if image is not None:
            # Get annotations for this image
            annotation_ids = coco.getAnnIds(imgIds=img_id)
            annotations = coco.loadAnns(annotation_ids)
            
            if len(annotations) > 0:
                examples.append({
                    'image': image,
                    'annotations': annotations,
                    'filename': image_info['file_name']
                })
    
    if len(examples) >= 3:  # Limit to 3 examples for display
        break

# Create synthetic examples with bounding boxes
fig, axes = plt.subplots(3, 5, figsize=(20, 12))
fig.suptitle('Object Cutting and Synthetic Generation with Bounding Boxes', fontsize=16)

for i, example in enumerate(examples):
    if i >= 3:
        break
        
    image = example['image']
    annotations = example['annotations']
    filename = example['filename']
    
    # Original image with bounding box
    original_with_bbox = image.copy()
    first_annotation = annotations[0]
    
    # Draw original bounding box from annotation
    if 'bbox' in first_annotation:
        original_bbox = first_annotation['bbox']  # COCO format: [x, y, width, height]
        original_with_bbox = draw_bounding_box(original_with_bbox, original_bbox, (255, 0, 0), 3)
        
        # Add category label
        category_name = category_names.get(first_annotation['category_id'], f"Cat {first_annotation['category_id']}")
        cv2.putText(original_with_bbox, category_name, 
                   (int(original_bbox[0]), int(original_bbox[1] - 30)), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    
    axes[i, 0].imshow(cv2.cvtColor(original_with_bbox, cv2.COLOR_BGR2RGB))
    axes[i, 0].set_title(f'Original: {filename}')
    axes[i, 0].axis('off')
    
    # Extract first object
    mask = coco.annToMask(annotations[0])
    object_img = extract_object(image, mask)
    
    if object_img is not None:
        # Show mask
        axes[i, 1].imshow(mask, cmap='gray')
        axes[i, 1].set_title('Segmentation Mask')
        axes[i, 1].axis('off')
        
        # Get bounding box from mask
        mask_bbox = get_bounding_box_from_mask(mask)
        
        # Create synthetic image with random background
        bg_random = create_background(image.shape, 'random')
        mask_3d = np.stack([mask] * 3, axis=-1)
        synthetic_random = np.where(mask_3d > 0, object_img, bg_random)
        
        # Draw bounding box on synthetic image
        synthetic_random_with_bbox = draw_bounding_box(synthetic_random, mask_bbox, (0, 255, 0), 2)
        
        axes[i, 2].imshow(cv2.cvtColor(synthetic_random_with_bbox, cv2.COLOR_BGR2RGB))
        axes[i, 2].set_title('Random Background + BBox')
        axes[i, 2].axis('off')
        
        # Create synthetic image with gradient background
        bg_gradient = create_background(image.shape, 'gradient')
        synthetic_gradient = np.where(mask_3d > 0, object_img, bg_gradient)
        
        # Draw bounding box on gradient synthetic image
        synthetic_gradient_with_bbox = draw_bounding_box(synthetic_gradient, mask_bbox, (0, 255, 0), 2)
        
        axes[i, 3].imshow(cv2.cvtColor(synthetic_gradient_with_bbox, cv2.COLOR_BGR2RGB))
        axes[i, 3].set_title('Gradient Background + BBox')
        axes[i, 3].axis('off')
        
        # Create synthetic image with solid background
        bg_solid = create_background(image.shape, 'solid')
        synthetic_solid = np.where(mask_3d > 0, object_img, bg_solid)
        
        # Draw bounding box on solid synthetic image
        synthetic_solid_with_bbox = draw_bounding_box(synthetic_solid, mask_bbox, (0, 255, 0), 2)
        
        axes[i, 4].imshow(cv2.cvtColor(synthetic_solid_with_bbox, cv2.COLOR_BGR2RGB))
        axes[i, 4].set_title('Solid Background + BBox')
        axes[i, 4].axis('off')
        
        # Print bounding box information
        print(f"\nImage {i+1}: {filename}")
        print(f"  Category: {category_names.get(first_annotation['category_id'], 'Unknown')}")
        if 'bbox' in first_annotation:
            orig_bbox = first_annotation['bbox']
            print(f"  Original BBox: [{orig_bbox[0]:.0f}, {orig_bbox[1]:.0f}, {orig_bbox[2]:.0f}, {orig_bbox[3]:.0f}]")
        if mask_bbox:
            print(f"  Mask BBox:     [{mask_bbox[0]}, {mask_bbox[1]}, {mask_bbox[2]}, {mask_bbox[3]}]")
            print(f"  Object Area:   {mask_bbox[2] * mask_bbox[3]} pixels")
        
    else:
        # If extraction failed, show empty plots
        for j in range(1, 5):
            axes[i, j].text(0.5, 0.5, 'Processing\nFailed', 
                          ha='center', va='center', transform=axes[i, j].transAxes)
            axes[i, j].axis('off')

plt.tight_layout()
plt.show()

print(f"\nProcessed {len(examples)} examples successfully!")
print("Red boxes: Original annotations from dataset")
print("Green boxes: Computed from segmentation masks")

In [None]:
# Advanced Synthetic Generation: Object Bank + Real Backgrounds
import cv2
import numpy as np
from pycocotools.coco import COCO
import random
import os
import matplotlib.pyplot as plt
from collections import defaultdict

def extract_object(image, mask):
    """Extract a tightly cropped object from an image using a mask."""
    if image is None or mask is None or mask.shape[:2] != image.shape[:2]:
        return None, None

    # Find the bounding box of the object from the mask
    coords = np.where(mask > 0)
    if len(coords[0]) == 0:
        return None, None
    
    y_min, y_max = coords[0].min(), coords[0].max()
    x_min, x_max = coords[1].min(), coords[1].max()

    # Crop the mask and the image to the bounding box
    cropped_mask = mask[y_min:y_max+1, x_min:x_max+1]
    cropped_image = image[y_min:y_max+1, x_min:x_max+1]

    # Apply the mask to the cropped image
    mask_3d = np.stack([cropped_mask] * 3, axis=-1)
    extracted_obj = np.where(mask_3d > 0, cropped_image, 0)
    
    return extracted_obj, cropped_mask

def resize_object_and_mask(obj_img, mask, scale_factor):
    """Resize an object and its mask"""
    h, w = mask.shape
    new_h, new_w = int(h * scale_factor), int(w * scale_factor)
    
    if new_h <= 0 or new_w <= 0:
        return None, None
    
    resized_obj = cv2.resize(obj_img, (new_w, new_h))
    resized_mask = cv2.resize(mask.astype(np.uint8), (new_w, new_h))
    
    return resized_obj, resized_mask

def create_object_mask_regions(image, annotations, coco):
    """Create a mask of all annotated objects in an image to find clean background areas"""
    mask = np.zeros(image.shape[:2], dtype=np.uint8)
    
    for ann in annotations:
        obj_mask = coco.annToMask(ann)
        if obj_mask.shape[:2] == image.shape[:2]:
            mask = np.logical_or(mask, obj_mask).astype(np.uint8)
    
    return mask

def get_clean_background_region(image, object_mask, region_size=(400, 400)):
    """Extract a clean background region without objects"""
    h, w = image.shape[:2]
    region_h, region_w = region_size
    
    # Try multiple random positions to find a clean area
    for _ in range(50):
        start_y = random.randint(0, max(0, h - region_h))
        start_x = random.randint(0, max(0, w - region_w))
        
        end_y = min(start_y + region_h, h)
        end_x = min(start_x + region_w, w)
        
        # Check if this region has minimal objects
        region_mask = object_mask[start_y:end_y, start_x:end_x]
        object_ratio = np.sum(region_mask) / (region_mask.shape[0] * region_mask.shape[1])
        
        if object_ratio < 0.1:  # Less than 10% objects
            return image[start_y:end_y, start_x:end_x]
    
    # If no clean region found, return a random region
    start_y = random.randint(0, max(0, h - region_h))
    start_x = random.randint(0, max(0, w - region_w))
    end_y = min(start_y + region_h, h)
    end_x = min(start_x + region_w, w)
    return image[start_y:end_y, start_x:end_x]

def place_object_on_background_with_annotation(background, obj_img, mask, x_pos, y_pos, category_id, object_id):
    """Place an object on background and return annotation info"""
    obj_h, obj_w = obj_img.shape[:2]
    bg_h, bg_w = background.shape[:2]
    
    # Ensure the object fits within the background
    end_y = min(y_pos + obj_h, bg_h)
    end_x = min(x_pos + obj_w, bg_w)
    actual_h = end_y - y_pos
    actual_w = end_x - x_pos
    
    if actual_h <= 0 or actual_w <= 0:
        return background, None
    
    # Crop object and mask if necessary
    obj_crop = obj_img[:actual_h, :actual_w]
    mask_crop = mask[:actual_h, :actual_w]
    
    # Create 3D mask for blending
    mask_3d = np.stack([mask_crop] * 3, axis=-1)
    
    # Blend the object onto the background
    background_region = background[y_pos:end_y, x_pos:end_x]
    blended_region = np.where(mask_3d > 0, obj_crop, background_region)
    background[y_pos:end_y, x_pos:end_x] = blended_region
    
    # Create annotation info
    bbox = [x_pos, y_pos, actual_w, actual_h]
    area = int(np.sum(mask_crop > 0))
    
    annotation = {
        "id": object_id,
        "category_id": category_id,
        "bbox": bbox,
        "area": area,
        "iscrowd": 0
    }
    
    return background, annotation

def draw_all_bboxes(image, annotations, category_names, original_color=(255, 0, 0), synthetic_color=(0, 255, 0)):
    """Draw bounding boxes for both original and synthetic objects"""
    img_with_boxes = image.copy()
    
    for ann in annotations:
        if ann is None:
            continue
            
        x, y, w, h = ann['bbox']
        x, y, w, h = int(x), int(y), int(w), int(h)
        category_id = ann['category_id']
        
        # Use different colors for original vs synthetic
        color = original_color if ann.get('is_original', False) else synthetic_color
        
        # Draw rectangle
        cv2.rectangle(img_with_boxes, (x, y), (x + w, y + h), color, 2)
        
        # Add label
        cat_name = category_names.get(category_id, f"Cat {category_id}")
        label = f"{cat_name}"
        
        # Add text background
        (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        cv2.rectangle(img_with_boxes, (x, y - text_height - 5), (x + text_width, y), color, -1)
        cv2.putText(img_with_boxes, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    return img_with_boxes

# Load COCO annotations
annotation_file = '../../datasets/taco_official/annotations.json'
coco = COCO(annotation_file)

# Get category names
categories = coco.loadCats(coco.getCatIds())
category_names = {cat['id']: cat['name'] for cat in categories}

print("Building object bank...")
# Build object bank
object_bank = defaultdict(list)
background_images = []

image_dir = '../../datasets/taco_official/'
image_ids = coco.getImgIds()[:20]  # Process more images for better variety

for img_id in image_ids:
    image_info = coco.loadImgs(img_id)[0]
    image_path = find_image_path(image_dir, image_info['file_name'])
    
    if image_path:
        image = cv2.imread(image_path)
        if image is not None:
            annotation_ids = coco.getAnnIds(imgIds=img_id)
            annotations = coco.loadAnns(annotation_ids)
            
            if len(annotations) > 0:
                # Extract objects for the bank
                for ann in annotations:
                    mask = coco.annToMask(ann)
                    if mask.shape[:2] == image.shape[:2] and np.sum(mask) > 500:  # Filter small objects
                        obj_img, cropped_mask = extract_object(image, mask)
                        if obj_img is not None:
                            object_bank[ann['category_id']].append({
                                'object': obj_img,
                                'mask': cropped_mask,
                                'category_id': ann['category_id'],
                                'source_image': image_info['file_name']
                            })
                
                # Store background images
                object_mask = create_object_mask_regions(image, annotations, coco)
                background_images.append({
                    'image': image,
                    'object_mask': object_mask,
                    'annotations': annotations,
                    'filename': image_info['file_name']
                })

print(f"Object bank built with {sum(len(objects) for objects in object_bank.values())} objects")
print(f"Categories in bank: {list(object_bank.keys())}")
print(f"Available background images: {len(background_images)}")

# Create synthetic scenes using real backgrounds + object bank
print("\nGenerating synthetic scenes...")

# Select 3 background images for synthetic generation
selected_backgrounds = random.sample(background_images, min(3, len(background_images)))

fig, axes = plt.subplots(len(selected_backgrounds), 2, figsize=(15, 7 * len(selected_backgrounds)))
if len(selected_backgrounds) == 1:
    axes = [axes] # Make it iterable
fig.suptitle('Synthetic Generation: Adding Objects Directly to Images', fontsize=16)

synthetic_data = []

for i, bg_data in enumerate(selected_backgrounds):
    background_img = bg_data['image']
    original_annotations = bg_data['annotations']
    filename = bg_data['filename']
    object_mask = bg_data['object_mask']
    
    # Create a copy of the original image to add objects to
    synthetic_scene = background_img.copy()
    
    # Prepare original annotations with a flag
    original_anns_with_flag = []
    for ann in original_annotations:
        if 'bbox' in ann:
            ann_copy = ann.copy()
            ann_copy['is_original'] = True
            original_anns_with_flag.append(ann_copy)
    
    # Display the original image with its bounding boxes
    original_with_all_bboxes = draw_all_bboxes(background_img, original_anns_with_flag, category_names)
    axes[i, 0].imshow(cv2.cvtColor(original_with_all_bboxes, cv2.COLOR_BGR2RGB))
    axes[i, 0].set_title(f'Original: {filename}\n({len(original_annotations)} objects)')
    axes[i, 0].axis('off')
    
    # --- Create synthetic scene by adding objects to the original image ---
    synthetic_annotations = []
    
    # Add 2-4 random objects from the bank
    num_objects = random.randint(2, 4)
    available_categories = [cat for cat in object_bank.keys() if len(object_bank[cat]) > 0]
    
    if len(available_categories) > 0:
        object_id_counter = max([ann['id'] for ann in original_annotations], default=0) + 1
        
        for _ in range(num_objects):
            cat_id = random.choice(available_categories)
            obj_data = random.choice(object_bank[cat_id])
            obj_img, mask = obj_data['object'], obj_data['mask']
            
            scale = random.uniform(0.7, 1.4)
            resized_obj, resized_mask = resize_object_and_mask(obj_img, mask, scale)
            
            if resized_obj is None:
                continue

            # Try to place the object in a relatively empty area
            best_pos = None
            min_overlap = float('inf')
            
            for _ in range(20): # Try 20 random positions
                margin = 10
                max_x = max(margin, synthetic_scene.shape[1] - resized_obj.shape[1] - margin)
                max_y = max(margin, synthetic_scene.shape[0] - resized_obj.shape[0] - margin)
                
                if max_x <= margin or max_y <= margin: continue

                x_pos = random.randint(margin, max_x)
                y_pos = random.randint(margin, max_y)

                # Check overlap with existing objects
                h, w = resized_obj.shape[:2]
                overlap_region = object_mask[y_pos:y_pos+h, x_pos:x_pos+w]
                overlap_ratio = np.sum(overlap_region) / (h * w) if h > 0 and w > 0 else 1

                if overlap_ratio < min_overlap:
                    min_overlap = overlap_ratio
                    best_pos = (x_pos, y_pos)

                if overlap_ratio < 0.2: # Found a good spot
                    break
            
            if best_pos:
                x_pos, y_pos = best_pos
                synthetic_scene, annotation = place_object_on_background_with_annotation(
                    synthetic_scene, resized_obj, resized_mask, x_pos, y_pos, cat_id, object_id_counter
                )
                if annotation:
                    annotation['is_original'] = False
                    synthetic_annotations.append(annotation)
                    object_id_counter += 1

    # Combine original and new annotations for final visualization
    all_annotations = original_anns_with_flag + synthetic_annotations
    
    # Draw all bounding boxes on the augmented scene
    synthetic_with_all_boxes = draw_all_bboxes(synthetic_scene, all_annotations, category_names)
    
    axes[i, 1].imshow(cv2.cvtColor(synthetic_with_all_boxes, cv2.COLOR_BGR2RGB))
    axes[i, 1].set_title(f'Augmented Scene\n({len(synthetic_annotations)} added objects)')
    axes[i, 1].axis('off')
    
    # Store synthetic data
    synthetic_data.append({
        'original_image': background_img,
        'original_annotations': original_anns_with_flag,
        'synthetic_scene': synthetic_scene,
        'synthetic_annotations': synthetic_annotations,
        'filename': filename
    })
    
    # Print details
    print(f"\nSynthetic Scene {i+1} ({filename}):")
    print(f"  Original objects: {len(original_annotations)}")
    print(f"  Added objects: {len(synthetic_annotations)}")
    for ann in synthetic_annotations:
        cat_name = category_names.get(ann['category_id'], f"Category {ann['category_id']}")
        bbox = ann['bbox']
        print(f"    - {cat_name}: bbox=[{int(bbox[0])}, {int(bbox[1])}, {int(bbox[2])}, {int(bbox[3])}], area={ann['area']}")

plt.tight_layout()
plt.show()

# Display object bank statistics
print("\n" + "="*60)
print("OBJECT BANK STATISTICS:")
print("="*60)
for cat_id, objects in object_bank.items():
    cat_name = category_names.get(cat_id, f"Category {cat_id}")
    print(f"{cat_name}: {len(objects)} objects")

print(f"\nTotal synthetic scenes generated: {len(synthetic_data)}")
print("Green boxes: Synthetic objects added to scenes")
print("Red boxes: Original objects from dataset")

In [None]:
# Synthetic Generation with Fake Backgrounds, Rotation, and No Overlap
import cv2
import numpy as np
from pycocotools.coco import COCO
import random
import os
import matplotlib.pyplot as plt
from collections import defaultdict

def create_background(shape, bg_type='random'):
    """Create different types of backgrounds"""
    if bg_type == 'random':
        return np.random.randint(0, 256, shape, dtype=np.uint8)
    elif bg_type == 'gradient':
        h, w = shape[:2]
        gradient = np.linspace(0, 255, w, dtype=np.uint8)
        background = np.tile(gradient, (h, 1))
        return np.stack([background] * 3, axis=-1)
    elif bg_type == 'solid':
        color = [random.randint(50, 200) for _ in range(3)]
        return np.full(shape, color, dtype=np.uint8)
    else:
        return np.zeros(shape, dtype=np.uint8)

def rotate_object_and_mask(image, mask, angle):
    """Rotates an object and its mask, returning the new images and bounding box."""
    h, w = image.shape[:2]
    center = (w // 2, h // 2)

    # Get the rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1.0)

    # Calculate the new bounding box size
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to account for translation
    M[0, 2] += (new_w / 2) - center[0]
    M[1, 2] += (new_h / 2) - center[1]

    # Perform the rotation
    rotated_image = cv2.warpAffine(image, M, (new_w, new_h))
    rotated_mask = cv2.warpAffine(mask, M, (new_w, new_h))

    # The new bounding box is simply the size of the new image
    new_bbox = (0, 0, new_w, new_h)
    
    return rotated_image, rotated_mask, new_bbox

# We can reuse the object bank from the previous cell if it has been run.
# If not, we should build it again.
if 'object_bank' not in locals() or not object_bank:
    print("Object bank not found. Building it now...")
    # Load COCO annotations
    annotation_file = '../../datasets/taco_official/annotations.json'
    coco = COCO(annotation_file)

    # Get category names
    categories = coco.loadCats(coco.getCatIds())
    category_names = {cat['id']: cat['name'] for cat in categories}

    object_bank = defaultdict(list)
    image_dir = '../../datasets/taco_official/'
    image_ids = coco.getImgIds()[:50]  # Process images to build a decent bank

    for img_id in image_ids:
        image_info = coco.loadImgs(img_id)[0]
        image_path = find_image_path(image_dir, image_info['file_name'])
        
        if image_path:
            image = cv2.imread(image_path)
            if image is not None:
                annotation_ids = coco.getAnnIds(imgIds=img_id)
                annotations = coco.loadAnns(annotation_ids)
                
                for ann in annotations:
                    mask = coco.annToMask(ann)
                    if mask.shape[:2] == image.shape[:2] and np.sum(mask) > 500:
                        obj_img, cropped_mask = extract_object(image, mask)
                        if obj_img is not None:
                            object_bank[ann['category_id']].append({
                                'object': obj_img,
                                'mask': cropped_mask,
                                'category_id': ann['category_id']
                            })
    print(f"Object bank built with {sum(len(objects) for objects in object_bank.values())} objects.")


# --- Generate synthetic scenes with fake backgrounds ---
print("\nGenerating synthetic scenes with fake backgrounds...")

num_synthetic_images = 4
fig, axes = plt.subplots(1, num_synthetic_images, figsize=(20, 5))
fig.suptitle('Synthetic Scenes with Fake Backgrounds, Rotation & No Overlap', fontsize=16)

synthetic_scenes_data = []

for i in range(num_synthetic_images):
    bg_shape = (random.randint(600, 800), random.randint(800, 1200), 3)
    bg_type = random.choice(['random', 'gradient', 'solid'])
    background = create_background(bg_shape, bg_type)
    placement_mask = np.zeros(bg_shape[:2], dtype=np.uint8)
    
    synthetic_annotations = []
    num_objects = random.randint(3, 7)
    available_categories = [cat for cat in object_bank.keys() if len(object_bank[cat]) > 0]
    object_id_counter = 1

    if len(available_categories) > 0:
        for _ in range(num_objects):
            cat_id = random.choice(available_categories)
            obj_data = random.choice(object_bank[cat_id])
            obj_img, mask = obj_data['object'], obj_data['mask']
            
            # --- Rotation ---
            angle = random.uniform(0, 360)
            rotated_obj, rotated_mask, _ = rotate_object_and_mask(obj_img, mask, angle)

            # --- Scaling ---
            bg_h, bg_w = background.shape[:2]
            obj_h, obj_w = rotated_obj.shape[:2]
            max_scale = min(bg_h / obj_h if obj_h > 0 else 1, bg_w / obj_w if obj_w > 0 else 1, 1.0) # Cap scale at 1.0
            min_scale = 0.4 # Increased from 0.2
            if max_scale <= min_scale:
                scale = max_scale
            else:
                scale = random.uniform(min_scale, max_scale)
            
            resized_obj, resized_mask = resize_object_and_mask(rotated_obj, rotated_mask, scale)
            if resized_obj is None:
                continue

            # --- Placement (avoid overlap) ---
            best_pos = None
            min_overlap = float('inf')
            for _ in range(20):
                max_x = max(1, background.shape[1] - resized_obj.shape[1])
                max_y = max(1, background.shape[0] - resized_obj.shape[0])
                x_pos, y_pos = random.randint(0, max_x), random.randint(0, max_y)
                
                h, w = resized_obj.shape[:2]
                overlap_region = placement_mask[y_pos:y_pos+h, x_pos:x_pos+w]
                overlap = np.sum(overlap_region > 0)
                
                if overlap < min_overlap:
                    min_overlap = overlap
                    best_pos = (x_pos, y_pos)
                if overlap == 0:
                    break # Found a perfect spot

            if best_pos:
                x_pos, y_pos = best_pos
                background, annotation = place_object_on_background_with_annotation(
                    background, resized_obj, resized_mask, x_pos, y_pos, cat_id, object_id_counter
                )
                if annotation:
                    synthetic_annotations.append(annotation)
                    object_id_counter += 1
                    # Update placement mask
                    h, w = resized_obj.shape[:2]
                    placement_mask[y_pos:y_pos+h, x_pos:x_pos+w] = 255

    # Draw bounding boxes on the final scene
    scene_with_boxes = draw_all_bboxes(background, synthetic_annotations, category_names, synthetic_color=(0, 255, 0))
    
    axes[i].imshow(cv2.cvtColor(scene_with_boxes, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'Scene {i+1} ({bg_type} bg)\n{len(synthetic_annotations)} objects')
    axes[i].axis('off')
    
    synthetic_scenes_data.append({
        'scene': scene_with_boxes,
        'annotations': synthetic_annotations
    })

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# Print details for the first generated scene
if synthetic_scenes_data:
    print("\nDetails for the first generated scene:")
    first_scene_data = synthetic_scenes_data[0]
    print(f"  - Total objects: {len(first_scene_data['annotations'])}")
    for ann in first_scene_data['annotations']:
        cat_name = category_names.get(ann['category_id'], f"Category {ann['category_id']}")
        bbox = ann['bbox']
        print(f"    - {cat_name}: bbox=[{int(bbox[0])}, {int(bbox[1])}, {int(bbox[2])}, {int(bbox[3])}], area={ann['area']}")

In [None]:
# Dataset Exploration: Analisi del bilanciamento del dataset TACO
import json
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import pandas as pd

# Carica le annotazioni TACO
annotation_file = '../../datasets/taco_official/annotations.json'
with open(annotation_file, 'r') as f:
    taco_data = json.load(f)

# Estrai informazioni sui dati
images = taco_data['images']
annotations = taco_data['annotations']
categories = taco_data['categories']

print("=== STATISTICHE GENERALI DEL DATASET TACO ===")
print(f"Numero totale di immagini: {len(images)}")
print(f"Numero totale di annotazioni: {len(annotations)}")
print(f"Numero di categorie: {len(categories)}")

# Crea mapping categoria_id -> nome categoria
id_to_category = {cat['id']: cat['name'] for cat in categories}
print("\nCategorie disponibili:")
for cat_id, cat_name in id_to_category.items():
    print(f"  {cat_id}: {cat_name}")

# Analizza la distribuzione delle categorie
category_counts = Counter()
annotations_per_image = defaultdict(int)
category_per_image = defaultdict(set)

for ann in annotations:
    category_counts[ann['category_id']] += 1
    annotations_per_image[ann['image_id']] += 1
    category_per_image[ann['image_id']].add(ann['category_id'])

print(f"\n=== BILANCIAMENTO DELLE CLASSI ===")
print("Distribuzione degli oggetti per categoria:")

# Crea DataFrame per analisi più dettagliata
category_stats = []
for cat_id, count in category_counts.most_common():
    cat_name = id_to_category[cat_id]
    percentage = (count / len(annotations)) * 100
    category_stats.append({
        'category_id': cat_id,
        'category_name': cat_name,
        'count': count,
        'percentage': percentage
    })
    print(f"  {cat_name}: {count} oggetti ({percentage:.1f}%)")

df_categories = pd.DataFrame(category_stats)

# Calcola statistiche di bilanciamento
max_count = df_categories['count'].max()
min_count = df_categories['count'].min()
imbalance_ratio = max_count / min_count if min_count > 0 else float('inf')

print(f"\nStatistiche di bilanciamento:")
print(f"  Categoria più rappresentata: {df_categories.iloc[0]['category_name']} ({max_count} oggetti)")
print(f"  Categoria meno rappresentata: {df_categories.iloc[-1]['category_name']} ({min_count} oggetti)")
print(f"  Rapporto di sbilanciamento: {imbalance_ratio:.1f}:1")

# Analizza annotazioni per immagine
ann_per_img_stats = list(annotations_per_image.values())
print(f"\n=== STATISTICHE ANNOTAZIONI PER IMMAGINE ===")
print(f"Media annotazioni per immagine: {np.mean(ann_per_img_stats):.1f}")
print(f"Mediana annotazioni per immagine: {np.median(ann_per_img_stats):.1f}")
print(f"Min annotazioni per immagine: {min(ann_per_img_stats)}")
print(f"Max annotazioni per immagine: {max(ann_per_img_stats)}")

# Immagini senza annotazioni
images_without_annotations = len(images) - len(annotations_per_image)
print(f"Immagini senza annotazioni: {images_without_annotations}")

In [None]:
# Visualizzazioni per l'analisi del bilanciamento
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Analisi del Bilanciamento del Dataset TACO', fontsize=16)

# 1. Distribuzione delle categorie (bar plot)
ax1 = axes[0, 0]
bars = ax1.bar(range(len(df_categories)), df_categories['count'], 
               color=plt.cm.viridis(np.linspace(0, 1, len(df_categories))))
ax1.set_xlabel('Categorie')
ax1.set_ylabel('Numero di Oggetti')
ax1.set_title('Distribuzione degli Oggetti per Categoria')
ax1.set_xticks(range(len(df_categories)))
ax1.set_xticklabels([cat[:15] + '...' if len(cat) > 15 else cat 
                     for cat in df_categories['category_name']], rotation=45, ha='right')

# Aggiungi valori sopra le barre
for i, bar in enumerate(bars):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}', ha='center', va='bottom', fontsize=8)

# 2. Distribuzione percentuale (pie chart per le top 10)
ax2 = axes[0, 1]
top_10 = df_categories.head(10)
others_count = df_categories.tail(len(df_categories) - 10)['count'].sum()
if others_count > 0:
    pie_data = list(top_10['count']) + [others_count]
    pie_labels = list(top_10['category_name']) + ['Altri']
else:
    pie_data = list(top_10['count'])
    pie_labels = list(top_10['category_name'])

wedges, texts, autotexts = ax2.pie(pie_data, labels=pie_labels, autopct='%1.1f%%', startangle=90)
ax2.set_title('Distribuzione Percentuale (Top 10 + Altri)')
for text in texts:
    text.set_fontsize(8)
for autotext in autotexts:
    autotext.set_fontsize(8)
    autotext.set_color('white')

# 3. Annotazioni per immagine
ax3 = axes[1, 0]
ax3.hist(ann_per_img_stats, bins=30, alpha=0.7, color='skyblue', edgecolor='black')
ax3.set_xlabel('Numero di Annotazioni per Immagine')
ax3.set_ylabel('Frequenza')
ax3.set_title('Distribuzione delle Annotazioni per Immagine')
ax3.axvline(np.mean(ann_per_img_stats), color='red', linestyle='--', 
           label=f'Media: {np.mean(ann_per_img_stats):.1f}')
ax3.axvline(np.median(ann_per_img_stats), color='orange', linestyle='--',
           label=f'Mediana: {np.median(ann_per_img_stats):.1f}')
ax3.legend()

# 4. Heatmap del bilanciamento
ax4 = axes[1, 1]
# Calcola il target count (media o mediana)
target_count = int(np.mean(df_categories['count']))
balance_scores = df_categories['count'] / target_count
colors = ['red' if score < 0.5 else 'orange' if score < 0.8 else 'green' for score in balance_scores]

bars = ax4.barh(range(len(df_categories)), balance_scores, color=colors)
ax4.set_yticks(range(len(df_categories)))
ax4.set_yticklabels([cat[:20] + '...' if len(cat) > 20 else cat 
                     for cat in df_categories['category_name']], fontsize=8)
ax4.set_xlabel('Score di Bilanciamento (relativo alla media)')
ax4.set_title('Score di Bilanciamento per Categoria')
ax4.axvline(1.0, color='black', linestyle='--', alpha=0.5, label='Target (media)')
ax4.legend()

# Aggiungi valori accanto alle barre
for i, bar in enumerate(bars):
    width = bar.get_width()
    ax4.text(width + 0.05, bar.get_y() + bar.get_height()/2.,
             f'{width:.2f}', ha='left', va='center', fontsize=7)

plt.tight_layout()
plt.show()

# Stampa raccomandazioni per il bilanciamento
print("\n" + "="*60)
print("RACCOMANDAZIONI PER IL BILANCIAMENTO")
print("="*60)

target_per_category = int(np.median(df_categories['count']))  # Usiamo la mediana come target
print(f"Target oggetti per categoria: {target_per_category}")

print("\nCategorie che necessitano di augmentazione:")
for _, row in df_categories.iterrows():
    if row['count'] < target_per_category:
        needed = target_per_category - row['count']
        print(f"  {row['category_name']}: +{needed} oggetti ({row['count']} -> {target_per_category})")

print("\nCategorie sovra-rappresentate:")
for _, row in df_categories.iterrows():
    if row['count'] > target_per_category * 1.5:
        excess = row['count'] - target_per_category
        print(f"  {row['category_name']}: -{excess} oggetti ({row['count']} -> {target_per_category})")

# Salva le statistiche in un file per uso futuro
df_categories.to_csv('../../datasets/taco_category_statistics.csv', index=False)
print(f"\nStatistiche salvate in: ../../datasets/taco_category_statistics.csv")

In [None]:
# Sistema di Generazione Dataset Bilanciato - Versione Corretta
import shutil
from pathlib import Path
import json
from datetime import datetime

def simple_extract_object(image, mask):
    """Versione semplificata di extract_object"""
    if image is None or mask is None:
        return None, None
    
    # Assicurati che le dimensioni corrispondano
    if mask.shape[:2] != image.shape[:2]:
        return None, None
    
    # Trova il bounding box dell'oggetto dalla maschera
    coords = np.where(mask > 0)
    if len(coords[0]) == 0:
        return None, None
    
    y_min, y_max = coords[0].min(), coords[0].max()
    x_min, x_max = coords[1].min(), coords[1].max()

    # Ritaglia la maschera e l'immagine al bounding box
    cropped_mask = mask[y_min:y_max+1, x_min:x_max+1]
    cropped_image = image[y_min:y_max+1, x_min:x_max+1]

    # Applica la maschera all'immagine ritagliata
    mask_3d = np.stack([cropped_mask] * 3, axis=-1)
    extracted_obj = np.where(mask_3d > 0, cropped_image, 0)
    
    return extracted_obj, cropped_mask

class BalancedDatasetGenerator:
    def __init__(self, original_annotation_file, original_image_dir, target_count_per_category=None):
        self.original_annotation_file = original_annotation_file
        self.original_image_dir = original_image_dir
        
        # Carica dati originali
        with open(original_annotation_file, 'r') as f:
            self.original_data = json.load(f)
        
        self.categories = self.original_data['categories']
        self.id_to_category = {cat['id']: cat['name'] for cat in self.categories}
        
        # Calcola statistiche attuali
        self.category_counts = Counter()
        for ann in self.original_data['annotations']:
            self.category_counts[ann['category_id']] += 1
        
        # Imposta target
        if target_count_per_category is None:
            self.target_count = int(np.median(list(self.category_counts.values())))
        else:
            self.target_count = target_count_per_category
        
        print(f"Target oggetti per categoria: {self.target_count}")
        
        # Inizializza contatori per il nuovo dataset
        self.new_annotations = []
        self.new_images = []
        self.new_category_counts = Counter()
        self.annotation_id_counter = 1
        self.image_id_counter = 1
        
        # Costruisci object bank
        self.object_bank = self._build_object_bank()
        
    def _build_object_bank(self):
        """Costruisce una banca di oggetti estratti dalle immagini originali"""
        print("Costruendo object bank per la generazione sintetica...")
        
        object_bank = defaultdict(list)
        coco = COCO(self.original_annotation_file)
        
        # Processa le prime 50 immagini per costruire l'object bank
        image_ids = coco.getImgIds()[:50]
        
        total_extracted = 0
        
        for img_id in tqdm(image_ids, desc="Estraendo oggetti"):
            image_info = coco.loadImgs(img_id)[0]
            image_path = find_image_path(self.original_image_dir, image_info['file_name'])
            
            if image_path:
                image = cv2.imread(image_path)
                if image is not None:
                    annotation_ids = coco.getAnnIds(imgIds=img_id)
                    annotations = coco.loadAnns(annotation_ids)
                    
                    for ann in annotations:
                        mask = coco.annToMask(ann)
                        if mask.shape[:2] == image.shape[:2] and np.sum(mask) > 200:
                            obj_img, cropped_mask = simple_extract_object(image, mask)
                            if obj_img is not None and cropped_mask is not None:
                                if obj_img.shape[0] > 10 and obj_img.shape[1] > 10:
                                    object_bank[ann['category_id']].append({
                                        'object': obj_img,
                                        'mask': cropped_mask,
                                        'category_id': ann['category_id'],
                                        'source_image': image_info['file_name']
                                    })
                                    total_extracted += 1
        
        print(f"Object bank costruito con {total_extracted} oggetti")
        for cat_id, objects in object_bank.items():
            cat_name = self.id_to_category.get(cat_id, f"Category {cat_id}")
            print(f"  {cat_name}: {len(objects)} oggetti")
        
        return object_bank
    
    def add_original_images(self, max_images_per_category=None):
        """Aggiunge immagini originali al dataset bilanciato"""
        print("\nAggiungendo immagini originali...")
        
        # Raggruppa immagini per categoria
        images_by_category = defaultdict(list)
        
        for ann in self.original_data['annotations']:
            img_id = ann['image_id']
            cat_id = ann['category_id']
            
            # Trova l'immagine corrispondente
            image_info = next((img for img in self.original_data['images'] if img['id'] == img_id), None)
            if image_info:
                images_by_category[cat_id].append({
                    'image_info': image_info,
                    'annotations': [a for a in self.original_data['annotations'] if a['image_id'] == img_id]
                })
        
        # Rimuovi duplicati
        for cat_id in images_by_category:
            seen_ids = set()
            unique_images = []
            for item in images_by_category[cat_id]:
                img_id = item['image_info']['id']
                if img_id not in seen_ids:
                    seen_ids.add(img_id)
                    unique_images.append(item)
            images_by_category[cat_id] = unique_images
        
        # Aggiungi immagini per ogni categoria
        for cat_id, images in images_by_category.items():
            cat_name = self.id_to_category[cat_id]
            current_count = self.new_category_counts[cat_id]
            needed = max(0, self.target_count - current_count)
            
            if max_images_per_category:
                max_to_add = min(needed, max_images_per_category, len(images))
            else:
                max_to_add = min(needed, len(images))
            
            # Seleziona immagini casuali
            selected_images = random.sample(images, max_to_add) if len(images) > max_to_add else images
            
            for item in selected_images:
                image_info = item['image_info'].copy()
                annotations = item['annotations']
                
                # Aggiorna ID immagine
                old_img_id = image_info['id']
                image_info['id'] = self.image_id_counter
                self.new_images.append(image_info)
                
                # Aggiorna annotazioni
                for ann in annotations:
                    new_ann = ann.copy()
                    new_ann['id'] = self.annotation_id_counter
                    new_ann['image_id'] = self.image_id_counter
                    self.new_annotations.append(new_ann)
                    self.new_category_counts[ann['category_id']] += 1
                    self.annotation_id_counter += 1
                
                self.image_id_counter += 1
            
            print(f"  {cat_name}: aggiunte {len(selected_images)} immagini originali")
    
    def generate_synthetic_images(self, output_dir):
        """Genera immagini sintetiche per bilanciare il dataset"""
        print("\nGenerando immagini sintetiche...")
        
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
        
        for cat_id, current_count in self.new_category_counts.items():
            cat_name = self.id_to_category[cat_id]
            needed = max(0, self.target_count - current_count)
            
            if needed > 0 and cat_id in self.object_bank and len(self.object_bank[cat_id]) > 0:
                print(f"  Generando {needed} immagini sintetiche per {cat_name}...")
                
                for i in tqdm(range(needed), desc=f"Generando {cat_name}"):
                    synthetic_image, synthetic_annotations = self._create_synthetic_image(cat_id)
                    
                    if synthetic_image is not None and synthetic_annotations:
                        # Salva immagine
                        filename = f"synthetic_{cat_id}_{i:04d}.jpg"
                        image_path = output_path / filename
                        cv2.imwrite(str(image_path), synthetic_image)
                        
                        # Crea info immagine
                        image_info = {
                            'id': self.image_id_counter,
                            'width': synthetic_image.shape[1],
                            'height': synthetic_image.shape[0],
                            'file_name': filename,
                            'license': 1,
                            'date_captured': datetime.now().isoformat()
                        }
                        self.new_images.append(image_info)
                        
                        # Aggiorna annotazioni
                        for ann in synthetic_annotations:
                            ann['id'] = self.annotation_id_counter
                            ann['image_id'] = self.image_id_counter
                            self.new_annotations.append(ann)
                            self.new_category_counts[ann['category_id']] += 1
                            self.annotation_id_counter += 1
                        
                        self.image_id_counter += 1
            else:
                if needed > 0:
                    print(f"  ATTENZIONE: Impossibile generare immagini per {cat_name} (oggetti non disponibili)")
    
    def _create_synthetic_image(self, target_category_id):
        """Crea una singola immagine sintetica focalizzata su una categoria target"""
        # Dimensioni dell'immagine
        img_height = random.randint(400, 800)
        img_width = random.randint(600, 1000)
        
        # Crea sfondo usando la funzione corretta dalla terza cella
        bg_type = random.choice(['random', 'gradient', 'solid'])
        background = create_background((img_height, img_width, 3), bg_type)
        
        # Maschera di posizionamento per evitare sovrapposizioni
        placement_mask = np.zeros((img_height, img_width), dtype=np.uint8)
        
        annotations = []
        object_id_counter = 1
        
        # Aggiungi 1-2 oggetti della categoria target
        target_objects_count = random.randint(1, 2)
        
        if target_category_id in self.object_bank and len(self.object_bank[target_category_id]) > 0:
            for _ in range(target_objects_count):
                obj_data = random.choice(self.object_bank[target_category_id])
                annotation = self._place_object_on_background(
                    background, placement_mask, obj_data, target_category_id, object_id_counter
                )
                if annotation:
                    annotations.append(annotation)
                    object_id_counter += 1
        
        # Aggiungi 1-3 oggetti di altre categorie per rendere la scena più realistica
        other_objects_count = random.randint(1, 3)
        available_categories = [cat_id for cat_id in self.object_bank.keys() 
                              if cat_id != target_category_id and len(self.object_bank[cat_id]) > 0]
        
        for _ in range(min(other_objects_count, len(available_categories))):
            other_cat_id = random.choice(available_categories)
            obj_data = random.choice(self.object_bank[other_cat_id])
            annotation = self._place_object_on_background(
                background, placement_mask, obj_data, other_cat_id, object_id_counter
            )
            if annotation:
                annotations.append(annotation)
                object_id_counter += 1
        
        return background if annotations else None, annotations
    
    def _place_object_on_background(self, background, placement_mask, obj_data, category_id, object_id):
        """Posiziona un oggetto sullo sfondo e restituisce l'annotazione"""
        obj_img, mask = obj_data['object'], obj_data['mask']
        
        # Rotazione casuale usando la funzione corretta dalla terza cella
        angle = random.uniform(0, 360)
        rotated_obj, rotated_mask, _ = rotate_object_and_mask(obj_img, mask, angle)
        
        # Scaling usando la funzione corretta dalla terza cella
        scale = random.uniform(0.5, 1.5)
        resized_obj, resized_mask = resize_object_and_mask(rotated_obj, rotated_mask, scale)
        
        if resized_obj is None:
            return None
        
        # Trova posizione senza sovrapposizioni
        best_pos = None
        min_overlap = float('inf')
        
        for _ in range(30):  # Più tentativi per trovare una buona posizione
            max_x = max(0, background.shape[1] - resized_obj.shape[1])
            max_y = max(0, background.shape[0] - resized_obj.shape[0])
            
            if max_x <= 0 or max_y <= 0:
                continue
                
            x_pos = random.randint(0, max_x)
            y_pos = random.randint(0, max_y)
            
            h, w = resized_obj.shape[:2]
            overlap_region = placement_mask[y_pos:y_pos+h, x_pos:x_pos+w]
            overlap = np.sum(overlap_region > 0)
            
            if overlap < min_overlap:
                min_overlap = overlap
                best_pos = (x_pos, y_pos)
            
            if overlap == 0:  # Posizione perfetta trovata
                break
        
        if best_pos:
            x_pos, y_pos = best_pos
            # Usa la funzione corretta dalla terza cella
            result_background, annotation = place_object_on_background_with_annotation(
                background, resized_obj, resized_mask, x_pos, y_pos, category_id, object_id
            )
            
            if annotation:
                # Aggiorna maschera di posizionamento
                h, w = resized_obj.shape[:2]
                placement_mask[y_pos:y_pos+h, x_pos:x_pos+w] = 255
                return annotation
        
        return None
    
    def save_balanced_dataset(self, output_annotation_file):
        """Salva il dataset bilanciato in formato COCO"""
        balanced_data = {
            'info': {
                'description': 'TACO Balanced Dataset',
                'version': '1.0',
                'year': 2025,
                'date_created': datetime.now().isoformat()
            },
            'licenses': self.original_data.get('licenses', []),
            'categories': self.categories,
            'images': self.new_images,
            'annotations': self.new_annotations
        }
        
        with open(output_annotation_file, 'w') as f:
            json.dump(balanced_data, f, indent=2)
        
        print(f"\nDataset bilanciato salvato in: {output_annotation_file}")
        print(f"Totale immagini: {len(self.new_images)}")
        print(f"Totale annotazioni: {len(self.new_annotations)}")
        
        print("\nDistribuzione finale per categoria:")
        for cat_id, count in self.new_category_counts.items():
            cat_name = self.id_to_category[cat_id]
            print(f"  {cat_name}: {count} oggetti")
        
        return balanced_data

# Inizializza il generatore con un target più basso per assicurarci che funzioni
print("Inizializzando il generatore di dataset bilanciato...")
generator = BalancedDatasetGenerator(
    original_annotation_file='../../datasets/taco_official/annotations.json',
    original_image_dir='../../datasets/taco_official/',
    target_count_per_category=100  # Target: 100 oggetti per categoria
)

In [None]:
# Genera il Dataset Bilanciato
print("="*60)
print("GENERAZIONE DATASET BILANCIATO")
print("="*60)

# Crea directory per il dataset bilanciato
balanced_dataset_dir = '../../datasets/taco_balanced'
balanced_images_dir = f'{balanced_dataset_dir}/images'
Path(balanced_images_dir).mkdir(parents=True, exist_ok=True)

# Step 1: Aggiungi immagini originali
print("\\nStep 1: Aggiungendo immagini originali...")
generator.add_original_images(max_images_per_category=50)  # Massimo 50 immagini originali per categoria

# Copia le immagini originali selezionate
print("Copiando immagini originali...")
for image_info in tqdm(generator.new_images, desc="Copiando immagini"):
    src_path = find_image_path(generator.original_image_dir, image_info['file_name'])
    if src_path:
        # Estrai solo il nome del file senza il path della batch
        filename_only = os.path.basename(image_info['file_name'])
        dst_path = Path(balanced_images_dir) / filename_only
        
        # Aggiorna il nome del file nell'info per riflettere la nuova struttura
        image_info['file_name'] = filename_only
        
        shutil.copy2(src_path, dst_path)

print(f"\\nStatistiche dopo l'aggiunta delle immagini originali:")
for cat_id, count in generator.new_category_counts.items():
    cat_name = generator.id_to_category[cat_id]
    needed = max(0, generator.target_count - count)
    print(f"  {cat_name}: {count} oggetti (mancanti: {needed})")

# Step 2: Genera immagini sintetiche
print("\\nStep 2: Generando immagini sintetiche...")
generator.generate_synthetic_images(balanced_images_dir)

# Step 3: Salva il dataset finale
print("\\nStep 3: Salvando dataset bilanciato...")
balanced_annotation_file = f'{balanced_dataset_dir}/annotations_balanced.json'
balanced_data = generator.save_balanced_dataset(balanced_annotation_file)

print("\\n" + "="*60)
print("DATASET BILANCIATO COMPLETATO!")
print("="*60)
print(f"Percorso dataset: {balanced_dataset_dir}")
print(f"File annotazioni: {balanced_annotation_file}")
print(f"Directory immagini: {balanced_images_dir}")

# Analisi finale del bilanciamento
final_category_counts = Counter()
for ann in balanced_data['annotations']:
    final_category_counts[ann['category_id']] += 1

print("\\nDistribuzione finale:")
print(f"{'Categoria':<25} {'Originale':<10} {'Bilanciato':<12} {'Miglioramento'}")
print("-" * 65)

# Prima di questo calcolo, dobbiamo assicurarci che category_counts sia disponibile
if 'category_counts' not in locals():
    category_counts = Counter()
    for ann in generator.original_data['annotations']:
        category_counts[ann['category_id']] += 1

for cat_id in sorted(final_category_counts.keys()):
    cat_name = generator.id_to_category[cat_id]
    original_count = category_counts.get(cat_id, 0)
    balanced_count = final_category_counts[cat_id]
    improvement = f"+{balanced_count - original_count}" if balanced_count > original_count else str(balanced_count - original_count)
    
    print(f"{cat_name[:24]:<25} {original_count:<10} {balanced_count:<12} {improvement}")

# Calcola nuove metriche di bilanciamento
new_max = max(final_category_counts.values())
new_min = min(final_category_counts.values())
new_imbalance_ratio = new_max / new_min if new_min > 0 else float('inf')

# Calcola anche l'imbalance ratio originale
original_max = max(category_counts.values()) if category_counts else 1
original_min = min(category_counts.values()) if category_counts else 1
original_imbalance_ratio = original_max / original_min if original_min > 0 else float('inf')

print(f"\\nMetriche di bilanciamento:")
print(f"  Rapporto sbilanciamento originale: {original_imbalance_ratio:.1f}:1")
print(f"  Rapporto sbilanciamento bilanciato: {new_imbalance_ratio:.1f}:1")
if original_imbalance_ratio > 0:
    print(f"  Miglioramento: {original_imbalance_ratio/new_imbalance_ratio:.1f}x")

In [None]:
# Visualizza Esempi del Dataset Bilanciato
print("\\nVisualizzando esempi dal dataset bilanciato...")

# Carica il dataset bilanciato
balanced_coco = COCO(balanced_annotation_file)

# Seleziona alcune immagini casuali per ogni tipo (originali vs sintetiche)
original_images = [img for img in balanced_data['images'] if not img['file_name'].startswith('synthetic_')]
synthetic_images = [img for img in balanced_data['images'] if img['file_name'].startswith('synthetic_')]

print(f"Immagini originali nel dataset bilanciato: {len(original_images)}")
print(f"Immagini sintetiche generate: {len(synthetic_images)}")

# Visualizza esempi
if len(synthetic_images) > 0:
    # Caso con immagini sintetiche
    num_examples = min(3, len(original_images), len(synthetic_images))
    selected_original = random.sample(original_images, num_examples)
    selected_synthetic = random.sample(synthetic_images, num_examples)
    
    fig, axes = plt.subplots(2, num_examples, figsize=(15, 10))
    fig.suptitle('Esempi dal Dataset Bilanciato: Originali vs Sintetiche', fontsize=16)
    
    # Visualizza immagini originali
    for i, image_info in enumerate(selected_original):
        img_path = Path(balanced_images_dir) / image_info['file_name']
        image = cv2.imread(str(img_path))
        
        if image is not None:
            # Ottieni annotazioni
            ann_ids = balanced_coco.getAnnIds(imgIds=image_info['id'])
            annotations = balanced_coco.loadAnns(ann_ids)
            
            # Disegna bounding boxes
            image_with_boxes = draw_all_bboxes(image, annotations, generator.id_to_category, 
                                             original_color=(0, 255, 0), synthetic_color=(0, 255, 0))
            
            axes[0, i].imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
            axes[0, i].set_title(f'Originale: {len(annotations)} oggetti')
            axes[0, i].axis('off')

    # Visualizza immagini sintetiche
    for i, image_info in enumerate(selected_synthetic):
        img_path = Path(balanced_images_dir) / image_info['file_name']
        image = cv2.imread(str(img_path))
        
        if image is not None:
            # Ottieni annotazioni
            ann_ids = balanced_coco.getAnnIds(imgIds=image_info['id'])
            annotations = balanced_coco.loadAnns(ann_ids)
            
            # Disegna bounding boxes
            image_with_boxes = draw_all_bboxes(image, annotations, generator.id_to_category,
                                             original_color=(255, 0, 0), synthetic_color=(255, 0, 0))
            
            axes[1, i].imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
            axes[1, i].set_title(f'Sintetica: {len(annotations)} oggetti')
            axes[1, i].axis('off')
    
    plt.tight_layout()
    plt.show()

else:
    # Caso senza immagini sintetiche - mostra solo alcune originali
    print("\\nNessuna immagine sintetica generata. Visualizzando solo immagini originali...")
    num_examples = min(5, len(original_images))
    selected_original = random.sample(original_images, num_examples)
    
    fig, axes = plt.subplots(1, num_examples, figsize=(20, 4))
    fig.suptitle(f'Esempi dal Dataset Bilanciato (Solo Originali)', fontsize=16)
    
    if num_examples == 1:
        axes = [axes]  # Make it iterable for single plot
    
    for i, image_info in enumerate(selected_original):
        img_path = Path(balanced_images_dir) / image_info['file_name']
        image = cv2.imread(str(img_path))
        
        if image is not None:
            # Ottieni annotazioni
            ann_ids = balanced_coco.getAnnIds(imgIds=image_info['id'])
            annotations = balanced_coco.loadAnns(ann_ids)
            
            # Disegna bounding boxes
            image_with_boxes = draw_all_bboxes(image, annotations, generator.id_to_category, 
                                             original_color=(0, 255, 0), synthetic_color=(0, 255, 0))
            
            axes[i].imshow(cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB))
            axes[i].set_title(f'{len(annotations)} oggetti')
            axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Statistiche finali dettagliate
print("\\n" + "="*80)
print("STATISTICHE FINALI DEL DATASET BILANCIATO")
print("="*80)

print(f"\\nInformazioni generali:")
print(f"  Totale immagini: {len(balanced_data['images'])}")
print(f"  Immagini originali: {len(original_images)}")
print(f"  Immagini sintetiche: {len(synthetic_images)}")
print(f"  Totale annotazioni: {len(balanced_data['annotations'])}")
print(f"  Numero di categorie: {len(balanced_data['categories'])}")

# Calcola final_category_counts se non è già disponibile
if 'final_category_counts' not in locals():
    final_category_counts = Counter()
    for ann in balanced_data['annotations']:
        final_category_counts[ann['category_id']] += 1

print(f"\\nDistribuzione per categoria nel dataset bilanciato:")
for cat_id, count in sorted(final_category_counts.items()):
    cat_name = generator.id_to_category[cat_id]
    percentage = (count / len(balanced_data['annotations'])) * 100
    print(f"  {cat_name}: {count} oggetti ({percentage:.1f}%)")

# Calcola statistiche di qualità
mean_count = np.mean(list(final_category_counts.values()))
std_count = np.std(list(final_category_counts.values()))
cv = (std_count / mean_count) * 100  # Coefficiente di variazione

# Calcola nuove metriche di bilanciamento
new_max = max(final_category_counts.values())
new_min = min(final_category_counts.values())
new_imbalance_ratio = new_max / new_min if new_min > 0 else float('inf')

print(f"\\nMetriche di bilanciamento:")
print(f"  Media oggetti per categoria: {mean_count:.1f}")
print(f"  Deviazione standard: {std_count:.1f}")
print(f"  Coefficiente di variazione: {cv:.1f}%")
print(f"  Rapporto sbilanciamento finale: {new_imbalance_ratio:.1f}:1")

if cv < 20:
    print("  ✅ Dataset ben bilanciato (CV < 20%)")
elif cv < 50:
    print("  ⚠️ Dataset moderatamente bilanciato (20% ≤ CV < 50%)")
else:
    print("  ❌ Dataset ancora sbilanciato (CV ≥ 50%)")

# Informazioni sui possibili motivi per cui non sono state generate immagini sintetiche
if len(synthetic_images) == 0:
    print(f"\\n⚠️ NOTA: Non sono state generate immagini sintetiche.")
    print("Possibili motivi:")
    print("- L'object bank potrebbe essere vuoto o avere pochi oggetti")
    print("- Il target per categoria (200) potrebbe essere già raggiunto dalle immagini originali")
    print("- Problemi nell'estrazione degli oggetti dalle immagini originali")
    
    print(f"\\nSuggerimenti:")
    print("- Prova a ridurre il target_count_per_category (es. 100 invece di 200)")
    print("- Aumenta il numero di immagini processate per l'object bank")
    print("- Verifica che le funzioni di estrazione oggetti funzionino correttamente")

print(f"\\n🎯 Dataset bilanciato salvato in: {balanced_dataset_dir}")
print("💡 Pronto per l'addestramento di modelli di object detection!")