## 1. Setup and Imports

In [None]:
import os
import sys
import requests
import torch
import torchvision
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from ultralytics import YOLO
import supervision as sv
from typing import List, Tuple, Optional, Dict
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 3. Model Loading and Configuration

In [None]:
# Import SAM2 after installation
try:
    from sam2.build_sam import build_sam2
    from sam2.sam2_image_predictor import SAM2ImagePredictor
    print("SAM2 imports successful")
except ImportError as e:
    print(f"SAM2 import failed: {e}")
    print("Please restart the kernel and run the installation cells again")

In [None]:
# YOLO class definitions (COCO dataset)
YOLO_CLASSES = {
    0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
    6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant',
    11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
    16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
    22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
    27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard',
    32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove',
    36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
    40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
    46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
    51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
    56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
    61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
    67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
    72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
    77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
}

# Vehicle classes we're interested in
VEHICLE_CLASSES = {2: 'car', 5: 'bus', 7: 'truck'} # , 3: 'motorcycle'

print(f"Vehicle classes: {VEHICLE_CLASSES}")

In [None]:
class VehicleProcessor:
    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu", 
                 yolo_conf_threshold=0.25, car_conf_threshold=0.5):
        """
        Initialize the vehicle processing pipeline
        
        Args:
            device: Device to run models on
            yolo_conf_threshold: YOLO detection confidence threshold
            car_conf_threshold: Minimum confidence for vehicle detections
        """
        self.device = device
        self.yolo_conf_threshold = yolo_conf_threshold
        self.car_conf_threshold = car_conf_threshold
        
        print(f"Initializing models on device: {device}")
        
        # Load YOLO model
        print("Loading YOLO model...")
        self.yolo_model = YOLO('yolo11x.pt').to(device)
        self.yolo_model.eval()
        
        # Load SAM2 model
        print("Loading SAM2 model...")
        checkpoint_path = "/mnt/damian/Projects/sam2/checkpoints/sam2.1_hiera_large.pt"
        sam2_config = "configs/sam2.1/sam2.1_hiera_l.yaml"
        self.sam2_model = build_sam2(sam2_config, checkpoint_path, device=device)
        self.sam2_predictor = SAM2ImagePredictor(self.sam2_model)
        
        print("Models loaded successfully!")

In [None]:
# Initialize the processor
processor = VehicleProcessor()

## 4. Core Processing Functions

In [None]:
def expand_bbox(box: np.ndarray, image_shape: tuple, expansion_factor: float = 0.15) -> np.ndarray:
    """
    Expand bounding box by a factor while keeping it within image bounds
    This helps capture the entire vehicle when YOLO detection is partial
    
    Args:
        box: Bounding box [x1, y1, x2, y2]
        image_shape: Shape of the image (height, width)
        expansion_factor: Factor to expand the box (0.15 = 15% expansion)
        
    Returns:
        Expanded bounding box
    """
    x1, y1, x2, y2 = box
    width = x2 - x1
    height = y2 - y1
    
    # Calculate expansion
    expand_w = width * expansion_factor
    expand_h = height * expansion_factor
    
    # Apply expansion while staying within image bounds
    x1 = max(0, x1 - expand_w / 2)
    y1 = max(0, y1 - expand_h / 2)
    x2 = min(image_shape[1], x2 + expand_w / 2)
    y2 = min(image_shape[0], y2 + expand_h / 2)
    
    return np.array([x1, y1, x2, y2])

In [None]:
def detect_vehicles(processor, image: np.ndarray, bbox_expansion: float = 0.0) -> Dict:
    """
    Detect vehicles in image using YOLO with optional bounding box expansion
    
    Args:
        processor: VehicleProcessor instance
        image: Input image as numpy array
        bbox_expansion: Factor to expand bounding boxes, default 0.0, example: 0.15 = 15%

    Returns:
        Dictionary with detection results
    """
    results = processor.yolo_model(source=image, conf=processor.yolo_conf_threshold, verbose=False)
    
    detections = sv.Detections.from_ultralytics(results[0])
    
    # Filter for vehicle classes only
    vehicle_mask = np.isin(detections.class_id, list(VEHICLE_CLASSES.keys()))
    
    if vehicle_mask.any():
        # Expand bounding boxes to capture full vehicle
        expanded_boxes = []
        for box in detections.xyxy[vehicle_mask]:
            if bbox_expansion > 0:
                expanded_box = expand_bbox(box, image.shape[:2], bbox_expansion)
            else:
                expanded_box = box
            expanded_boxes.append(expanded_box)
        
        filtered_detections = {
            'boxes': np.array(expanded_boxes),
            'original_boxes': detections.xyxy[vehicle_mask],  # Keep original for reference
            'confidences': detections.confidence[vehicle_mask], 
            'class_ids': detections.class_id[vehicle_mask],
            'labels': [YOLO_CLASSES[class_id] for class_id in detections.class_id[vehicle_mask]]
        }
    else:
        filtered_detections = {
            'boxes': np.array([]),
            'original_boxes': np.array([]),
            'confidences': np.array([]),
            'class_ids': np.array([]),
            'labels': []
        }
    
    return filtered_detections

In [None]:
def segment_vehicles_sam2(processor, image: np.ndarray, detections: Dict) -> Tuple[List[np.ndarray], List[float]]:
    """
    Use SAM2 to segment vehicles based on YOLO detections
    
    Args:
        processor: VehicleProcessor instance
        image: Input image as numpy array
        detections: YOLO detection results
        
    Returns:
        Tuple of (masks, quality_scores)
    """
    if len(detections['boxes']) == 0:
        return [], []
    
    # Set image in SAM2 predictor
    processor.sam2_predictor.set_image(image)
    
    masks = []
    scores = []
    
    # Process each detected vehicle
    for i, (box, confidence, class_id) in enumerate(zip(
        detections['boxes'], 
        detections['confidences'], 
        detections['class_ids']
    )):
        # Only process high-confidence vehicle detections
        if confidence < processor.car_conf_threshold:
            continue
        
        # Convert box to SAM2 format (xyxy)
        box_prompt = box.reshape(1, 4)  # SAM expects (N, 4)
        
        # Get segmentation mask
        mask_result, quality_scores, _ = processor.sam2_predictor.predict(
            point_coords=None,
            point_labels=None,
            box=box_prompt,
            multimask_output=False,
            return_logits=False,
        )
        
        if len(mask_result) > 0:
            masks.append(mask_result[0])  # Take the first (and only) mask
            scores.append(quality_scores[0])
    
    return masks, scores

In [None]:
def apply_mask_and_rescale(pil_image: Image.Image, mask: np.ndarray, target_size: int = 512, 
                           min_vehicle_size: int = 256, max_upscale_factor: float = 2.0) -> Optional[Image.Image]:
    """
    Apply SAM2 mask to image and adaptively rescale based on vehicle size
    
    Args:
        pil_image: Input PIL image
        mask: Binary mask from SAM2
        target_size: Target image size (will be square)
        min_vehicle_size: Minimum size before applying limited upscaling
        max_upscale_factor: Maximum upscaling without super resolution
        
    Returns:
        Processed image with transparent background
    """
    try:
        # Convert PIL to numpy
        img_array = np.array(pil_image)
        
        # Create RGBA image
        if img_array.shape[2] == 3:
            rgba_array = np.concatenate([
                img_array, 
                np.ones((img_array.shape[0], img_array.shape[1], 1), dtype=np.uint8) * 255
            ], axis=2)
        else:
            rgba_array = img_array.copy()
        
        # Apply mask to alpha channel
        rgba_array[:, :, 3] = (mask * 255).astype(np.uint8)
        
        # Convert back to PIL
        masked_image = Image.fromarray(rgba_array, 'RGBA')
        
        # Find bounding box of non-transparent pixels
        alpha = np.array(masked_image.getchannel('A'))
        coords = np.argwhere(alpha > 0)
        
        if len(coords) == 0:
            return None
        
        y_min, x_min = coords.min(axis=0)
        y_max, x_max = coords.max(axis=0)
        
        # Crop to content
        cropped = masked_image.crop((x_min, y_min, x_max, y_max))
        
        # Get original vehicle dimensions
        original_width, original_height = cropped.size
        original_size = max(original_width, original_height)
        
        # Determine scaling strategy based on vehicle size
        if original_size >= target_size:
            # Vehicle is large enough, scale down to fit
            scale_factor = target_size / original_size
            print(f"  → Large vehicle ({original_size}px), downscaling")
        elif original_size >= min_vehicle_size:
            # Medium size: scale to fit without exceeding target
            scale_factor = min(target_size / original_size, max_upscale_factor)
            print(f"  → Medium vehicle ({original_size}px), moderate upscaling")
        else:
            # Small vehicle: limited upscaling to avoid quality loss
            scale_factor = min(max_upscale_factor, target_size / original_size)
            print(f"  → Small vehicle ({original_size}px), limited upscaling (max {max_upscale_factor}x)")
        
        # Apply scaling
        new_width = int(original_width * scale_factor)
        new_height = int(original_height * scale_factor)
        
        resized = cropped.resize((new_width, new_height), Image.Resampling.LANCZOS)
        
        # Create final image with transparent background
        final_img = Image.new('RGBA', (target_size, target_size), (0, 0, 0, 0))
        paste_x = (target_size - new_width) // 2
        paste_y = (target_size - new_height) // 2
        final_img.paste(resized, (paste_x, paste_y), resized)
        
        print(f"  → Final size in canvas: {new_width}x{new_height} (scale: {scale_factor:.2f}x)")
        
        return final_img
        
    except Exception as e:
        print(f"Error applying mask: {e}")
        return None

## 5. Visualization Functions

In [None]:
def visualize_detections(image: np.ndarray, detections: Dict, figsize=(12, 8)):
    """
    Visualize YOLO detections with bounding boxes
    """
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    
    # Convert BGR to RGB for display
    if len(image.shape) == 3 and image.shape[2] == 3:
        display_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    else:
        display_image = image
    
    ax.imshow(display_image)
    
    # Draw bounding boxes
    for i, (box, conf, label) in enumerate(zip(
        detections['boxes'], detections['confidences'], detections['labels']
    )):
        x1, y1, x2, y2 = box
        width = x2 - x1
        height = y2 - y1
        
        # Create rectangle
        rect = patches.Rectangle((x1, y1), width, height, 
                               linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)
        
        # Add label
        ax.text(x1, y1-10, f'{label}: {conf:.2f}', 
               bbox=dict(boxstyle='round', facecolor='red', alpha=0.7),
               fontsize=10, color='white')
    
    ax.set_title(f'Vehicle Detections ({len(detections["boxes"])} found)')
    ax.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
def visualize_masks(image: np.ndarray, masks: List[np.ndarray], detections: Dict, figsize=(15, 5)):
    """
    Visualize SAM2 segmentation masks
    """
    if not masks:
        print("No masks to visualize")
        return
    
    # Convert BGR to RGB for display
    if len(image.shape) == 3 and image.shape[2] == 3:
        display_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    else:
        display_image = image
    
    num_masks = len(masks)
    fig, axes = plt.subplots(1, min(num_masks + 1, 4), figsize=figsize)
    
    if num_masks == 0:
        axes = [axes]
    elif num_masks + 1 == 1:
        axes = [axes]
    
    # Show original image
    axes[0].imshow(display_image)
    axes[0].set_title('Original')
    axes[0].axis('off')
    
    # Show each mask
    for i, mask in enumerate(masks[:3]):  # Show up to 3 masks
        # Convert mask to boolean type for proper indexing
        mask_bool = mask.astype(bool)
        
        # Overlay mask on image
        overlay = display_image.copy().astype(np.float32)
        overlay[mask_bool] = overlay[mask_bool] * 0.5 + np.array([255, 0, 0]) * 0.5
        overlay = overlay.astype(np.uint8)
        
        axes[i+1].imshow(overlay)
        conf = detections['confidences'][i] if i < len(detections['confidences']) else 0
        label = detections['labels'][i] if i < len(detections['labels']) else 'vehicle'
        axes[i+1].set_title(f'Mask {i+1}: {label} ({conf:.2f})')
        axes[i+1].axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
def visualize_final_results(processed_images: List[Image.Image], figsize=(15, 5)):
    """
    Visualize final processed vehicle images
    """
    if not processed_images:
        print("No processed images to visualize")
        return
    
    num_images = len(processed_images)
    fig, axes = plt.subplots(1, min(num_images, 4), figsize=figsize)
    
    if num_images == 1:
        axes = [axes]
    
    for i, img in enumerate(processed_images[:4]):  # Show up to 4 images
        axes[i].imshow(img)
        axes[i].set_title(f'Vehicle {i+1} (512x512)')
        axes[i].axis('off')
        
        # Add grid to show 512x512 boundaries
        axes[i].set_xlim(0, 512)
        axes[i].set_ylim(512, 0)
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 6. Complete Processing Pipeline

In [None]:
def process_single_image(image_path: str, processor: VehicleProcessor, 
                        min_segmentation_score: float = 0.7,
                        target_size: int = 512,
                        visualize: bool = True) -> List[Dict]:
    """
    Complete processing pipeline for a single image
    
    Args:
        image_path: Path to the image file
        processor: VehicleProcessor instance
        min_segmentation_score: Minimum SAM2 quality score
        target_size: Target output image size
        visualize: Whether to show visualizations
        
    Returns:
        List of processed vehicle data
    """
    # Load image
    print(f"Processing: {image_path}")
    
    # Load as PIL image
    pil_image = Image.open(image_path).convert('RGB')
    print(f"Original image size: {pil_image.size}")
    
    # Convert to OpenCV format for processing
    cv2_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    
    # Step 1: Detect vehicles with YOLO
    print("Step 1: Running YOLO detection...")
    detections = detect_vehicles(processor, cv2_image)
    
    if len(detections['boxes']) == 0:
        print("No vehicles detected!")
        return []
    
    print(f"Found {len(detections['boxes'])} vehicles: {detections['labels']}")
    
    if visualize:
        visualize_detections(cv2_image, detections)
    
    # Step 2: Segment vehicles with SAM2
    print("Step 2: Running SAM2 segmentation...")
    masks, mask_scores = segment_vehicles_sam2(processor, cv2_image, detections)
    
    if not masks:
        print("No valid masks generated!")
        return []
    
    print(f"Generated {len(masks)} masks with scores: {[f'{s:.3f}' for s in mask_scores]}")
    
    if visualize:
        visualize_masks(cv2_image, masks, detections)
    
    # Step 3: Apply masks and rescale
    print("Step 3: Applying masks and rescaling...")
    processed_results = []
    processed_images = []
    
    for i, (mask, score, confidence, label) in enumerate(
        zip(masks, mask_scores, detections['confidences'], detections['labels'])
    ):
        if score < min_segmentation_score:
            print(f"Skipping mask {i} with low score: {score:.3f}")
            continue
        
        # Apply mask and rescale
        processed_image = apply_mask_and_rescale(pil_image, mask, target_size)
        
        if processed_image is not None:
            result = {
                'processed_image': processed_image,
                'detection_label': label,
                'detection_confidence': float(confidence),
                'segmentation_score': float(score),
                'mask_index': i,
                'original_path': image_path
            }
            processed_results.append(result)
            processed_images.append(processed_image)
            print(f"Successfully processed {label} (conf: {confidence:.3f}, seg: {score:.3f})")
    
    if visualize and processed_images:
        visualize_final_results(processed_images)
    
    print(f"Processing complete. Generated {len(processed_results)} vehicle images.")
    return processed_results

## 7. Test with Sample Image

In [None]:
# Test image path - using one of the car images from your dataset
test_image_path = "/mnt/damian/Projects/car_data_scraper/images/autoevolution_renderings/article_230605/230605_reborn-ford-bronco-ii-morphs-ranger-ms-rt-dna-to-mix-and-match-with-suv-body-style_7_15.jpg"

# Verify the image exists
if os.path.exists(test_image_path):
    print(f"Test image found: {test_image_path}")
    
    # Show the original image first
    orig_img = Image.open(test_image_path)
    plt.figure(figsize=(10, 6))
    plt.imshow(orig_img)
    plt.title(f"Original Test Image ({orig_img.size[0]}x{orig_img.size[1]})")
    plt.axis('off')
    plt.show()
    
else:
    print(f"Test image not found: {test_image_path}")
    print("Please check the path or choose a different image")

In [None]:
# Run the complete processing pipeline
if os.path.exists(test_image_path):
    results = process_single_image(
        image_path=test_image_path,
        processor=processor,
        min_segmentation_score=0.6,  # Lower threshold for testing
        target_size=512,
        visualize=True
    )
    
    # Print detailed results
    print(f"\n=== PROCESSING RESULTS ===")
    for i, result in enumerate(results):
        print(f"Vehicle {i+1}:")
        print(f"  - Label: {result['detection_label']}")
        print(f"  - Detection Confidence: {result['detection_confidence']:.3f}")
        print(f"  - Segmentation Score: {result['segmentation_score']:.3f}")
        print(f"  - Output Size: {result['processed_image'].size}")
        print()

## 8. Save Results (Optional)

In [None]:
# Save processed images if desired
save_results = True  # Set to True to save images

if save_results and 'results' in locals() and results:
    output_dir = "./test_output"
    os.makedirs(output_dir, exist_ok=True)
    
    for i, result in enumerate(results):
        filename = f"processed_vehicle_{i+1}_{result['detection_label']}.png"
        output_path = os.path.join(output_dir, filename)
        
        result['processed_image'].save(output_path)
        print(f"Saved: {output_path}")
    
    print(f"\nAll processed images saved to: {output_dir}")

## 9. Quality Metrics and Analysis

In [None]:
def analyze_results(results: List[Dict]):
    """
    Analyze the quality and characteristics of processed results
    """
    if not results:
        print("No results to analyze")
        return
    
    print(f"=== QUALITY ANALYSIS ===")
    print(f"Total vehicles processed: {len(results)}")
    
    # Detection confidence statistics
    det_confs = [r['detection_confidence'] for r in results]
    print(f"\nDetection Confidence:")
    print(f"  - Min: {min(det_confs):.3f}")
    print(f"  - Max: {max(det_confs):.3f}")
    print(f"  - Average: {np.mean(det_confs):.3f}")
    
    # Segmentation score statistics
    seg_scores = [r['segmentation_score'] for r in results]
    print(f"\nSegmentation Quality:")
    print(f"  - Min: {min(seg_scores):.3f}")
    print(f"  - Max: {max(seg_scores):.3f}")
    print(f"  - Average: {np.mean(seg_scores):.3f}")
    
    # Vehicle type distribution
    labels = [r['detection_label'] for r in results]
    from collections import Counter
    label_counts = Counter(labels)
    
    print(f"\nVehicle Types:")
    for label, count in label_counts.items():
        print(f"  - {label}: {count}")

# Analyze our test results
if 'results' in locals():
    analyze_results(results)

## 10. Test enhanced processor

### Pre-SAM Super Resolution Approach (v3)

**Problem with Previous Approach**: Applying super resolution after SAM segmentation caused border artifacts because SR models are trained on natural images, not masked content.

**Improved Solution**: Apply super resolution to bounding box regions BEFORE SAM segmentation.

### New Workflow:
1. **Detect** vehicles with YOLO
2. **Extract** bounding box regions with padding
3. **Apply SR** to small vehicle regions (natural image content)
4. **Segment** with SAM2 on upscaled regions
5. **Apply mask** and final scaling

### Key Benefits:
- ✅ **Cleaner borders**: SR works on natural image content
- ✅ **Better segmentation**: SAM2 operates on higher resolution input
- ✅ **Compute efficient**: Only upscale regions that need it
- ✅ **Quality preservation**: No artifacts from masked SR

### Usage Example:
```python
from enhanced_vehicle_processor_v3 import PreSAMSuperResolutionProcessor

processor = PreSAMSuperResolutionProcessor(
    enable_super_resolution=True,
    sr_threshold_size=300,  # Apply SR if bbox < 300px
    bbox_padding=0.25,      # 25% padding for extraction
    bbox_expansion=0.15     # 15% bbox expansion for detection
)

results = processor.process_image(image_path)
```

### Quality Comparison:
- **Post-SAM SR (v2)**: Good vehicle capture, some border artifacts
- **Pre-SAM SR (v3)**: Excellent vehicle capture, clean natural borders

In [None]:
import os 
import matplotlib.pyplot as plt
from enhanced_vehicle_processor import PreSAMSuperResolutionProcessor

# Initialize processor
processor = PreSAMSuperResolutionProcessor(
    enable_super_resolution=True,
    sr_threshold_size=500,  # Test with higher threshold
    max_upscale_factor=2.0,
    bbox_expansion=0.05,
    bbox_padding=0.10  # Extra padding for SR
)

In [None]:
test_image_path = "/mnt/damian/Projects/car_data_scraper/images/autoevolution_renderings/article_230605/230605_reborn-ford-bronco-ii-morphs-ranger-ms-rt-dna-to-mix-and-match-with-suv-body-style_6_10.jpg"

# Process
results = processor.process_image(
    test_image_path,
    min_segmentation_score=0.6,
    target_size=512
)
    
# Display and save results
if results:
    print(f"\n=== RESULTS ===")
    
    # Create output directory
    output_dir = "./test_output"
    os.makedirs(output_dir, exist_ok=True)
    
    # Process each result
    for i, result in enumerate(results):
        metrics = result['processing_metrics']
        
        print(f"\nVehicle {i+1}:")
        print(f"  Label: {metrics['detection_label']}")
        print(f"  Original size: {metrics['original_vehicle_size']}px")
        print(f"  Used SR: {metrics['used_super_resolution']}")
        print(f"  SR scale: {metrics.get('sr_scale_factor', 1.0):.2f}x")
        print(f"  Segmentation score: {metrics['segmentation_score']:.3f}")
        print(f"  Final size: {metrics['final_size']}")
        
        # Save image
        filename = f"pre_sam_sr_vehicle_{i+1}_{metrics['detection_label']}.png"
        output_path = os.path.join(output_dir, filename)
        result['processed_image'].save(output_path)
        print(f"  Saved: {output_path}")
    
    # Create comparison plot
    fig, axes = plt.subplots(1, len(results), figsize=(5 * len(results), 5))
    if len(results) == 1:
        axes = [axes]
    
    for ax, result in zip(axes, results):
        ax.imshow(result['processed_image'])
        metrics = result['processing_metrics']
        title = f"{metrics['detection_label']}\n"
        title += f"Original: {metrics['original_vehicle_size']}px\n"
        title += f"Pre-SAM SR: {'Yes' if metrics['used_super_resolution'] else 'No'}\n"
        title += f"Seg Score: {metrics['segmentation_score']:.3f}"
        ax.set_title(title, fontsize=10)
        ax.axis('off')
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "pre_sam_sr_comparison.png"), dpi=150, bbox_inches='tight')
    plt.show()
else:
    print("No vehicles processed successfully")

## 11. Compare to rembg

In [None]:
import numpy as np
from PIL import Image
from rembg import remove

def remove_background_and_rescale(img, target_size=512):
    img_no_bg = remove(img)
    alpha = np.array(img_no_bg.getchannel('A'))
    coords = np.argwhere(alpha > 0)
    if len(coords) == 0:
        return None
    y_min, x_min = coords.min(axis=0)
    y_max, x_max = coords.max(axis=0)
    cropped = img_no_bg.crop((x_min, y_min, x_max, y_max))
    width, height = cropped.size
    scale_factor = min(target_size / width, target_size / height)
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)
    resized = cropped.resize((new_width, new_height), Image.LANCZOS)
    final_img = Image.new('RGBA', (target_size, target_size), (0, 0, 0, 0))
    paste_x = (target_size - new_width) // 2
    paste_y = (target_size - new_height) // 2
    final_img.paste(resized, (paste_x, paste_y), resized)
    return final_img

test_image = Image.open(test_image_path).convert('RGBA')
processed_image = remove_background_and_rescale(test_image)
processed_image

## 12. Conclusion and Next Steps

This notebook demonstrates the complete YOLO + SAM2 pipeline for vehicle image processing. The workflow successfully:

1. ✅ Detects vehicles using YOLO11 with configurable confidence thresholds
2. ✅ Segments vehicles precisely using SAM2 with bounding box prompts
3. ✅ Applies quality filtering based on detection and segmentation scores
4. ✅ Removes backgrounds and rescales to 512x512 while preserving aspect ratio
5. ✅ Provides comprehensive visualization at each step

### Key Advantages Over Generic Background Removal:
- **Vehicle-specific targeting**: Only processes detected vehicles
- **Higher precision**: SAM2 provides cleaner edges than rembg
- **Quality metrics**: Confidence scores for filtering
- **Batch processing ready**: Can be scaled for your full dataset

### Next Steps:
1. **Tune thresholds** based on your specific dataset requirements
2. **Scale to batch processing** using the enhanced_car_processor.py framework
3. **Optimize performance** by choosing appropriate SAM2 model size
4. **Add data augmentation** if needed for your text-to-image training

The pipeline is now ready for integration with your larger dataset processing workflow!