In [None]:
# Configuration Flag: Whether to optimize the ensemble or just save it
OPTIMIZE_ENSEMBLE = False  # Set to True to run full optimization, False to just save ensemble

print(f"‚öôÔ∏è  OPTIMIZE_ENSEMBLE = {OPTIMIZE_ENSEMBLE}")
if OPTIMIZE_ENSEMBLE:
    print("   ‚Üí Will run full optimization on ensemble")
else:
    print("   ‚Üí Will only create and save ensemble (no optimization)")

## Current solution ensembles my and public best solutions then runs optimization locally on my computer since it can be time consuming.

In [None]:
# Import required libraries
import math
import random
import multiprocessing as mp
from decimal import Decimal, getcontext
import time
from concurrent.futures import ProcessPoolExecutor, as_completed

import pandas as pd
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

# Set high precision for accurate calculations
getcontext().prec = 25
scale_factor = Decimal('1e18')  # MUST match santa-2025-metric.ipynb

print(f"üñ•Ô∏è  Available CPU cores: {mp.cpu_count()}")

## Configuration

Set the path to your starting CSV file here. The algorithm will load this solution and try to improve upon it.

## Tree Definition

The `ChristmasTree` class represents a single tree with:
- **Position**: (center_x, center_y) - coordinates at the center of the tree top
- **Rotation**: angle in degrees
- **Polygon**: Shapely polygon for collision detection and bounding box calculation

The tree shape consists of:
- 3 tiers of foliage (triangular sections)
- A trunk at the bottom

In [None]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """
        Initialize a Christmas tree with position and rotation.
        
        Parameters:
        - center_x: X-coordinate (center of tree top)
        - center_y: Y-coordinate (center of tree top)
        - angle: Rotation angle in degrees
        """
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        # Tree dimensions (fixed for all trees)
        trunk_w = Decimal('0.15')   # Trunk width
        trunk_h = Decimal('0.2')    # Trunk height
        base_w = Decimal('0.7')     # Bottom tier width
        mid_w = Decimal('0.4')      # Middle tier width
        top_w = Decimal('0.25')     # Top tier width
        tip_y = Decimal('0.8')      # Height of tree tip
        tier_1_y = Decimal('0.5')   # Height of top tier base
        tier_2_y = Decimal('0.25')  # Height of middle tier base
        base_y = Decimal('0.0')     # Height of bottom tier base
        trunk_bottom_y = -trunk_h   # Bottom of trunk

        # Define the tree polygon with all vertices
        # Using scale_factor for numerical precision
        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        
        # Apply rotation and translation
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor)
        )

    def clone(self):
        """Create a deep copy of the tree."""
        return ChristmasTree(str(self.center_x), str(self.center_y), str(self.angle))

print("‚úÖ ChristmasTree class defined!")

## Core Helper Functions

These functions handle:
1. **Collision detection** - checking if trees overlap
2. **Bounding box calculation** - computing the minimum square that contains all trees

In [None]:
def check_collision(tree_polygon, placed_polygons, tree_index):
    """
    Check if a tree collides with any already-placed trees.
    
    Uses spatial indexing (STRtree) for efficient collision detection.
    Trees are allowed to touch, but not overlap.
    
    Returns: True if collision detected, False otherwise
    """
    possible_indices = tree_index.query(tree_polygon)
    for i in possible_indices:
        if tree_polygon.intersects(placed_polygons[i]) and not tree_polygon.touches(placed_polygons[i]):
            return True
    return False


def calculate_bounding_square(placed_trees):
    """
    Calculate the side length of the minimum bounding square.
    
    The bounding square is the smallest square that contains all trees.
    We use the maximum of width and height to force a square.
    
    Returns: Side length as a Decimal
    """
    if not placed_trees:
        return Decimal('0')
    
    # Get union of all tree polygons
    all_polygons = [t.polygon for t in placed_trees]
    bounds = unary_union(all_polygons).bounds
    
    # Convert back from scaled coordinates
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    
    width = maxx - minx
    height = maxy - miny
    side_length = max(width, height)  # Force square
    
    return side_length

print("‚úÖ Helper functions defined!")

In [None]:
def load_all_csv_solutions():
    """
    Load all CSV files in the workspace and create an ensemble solution.
    For each configuration (n), picks the best solution across all CSV files.
    
    Returns: Dictionary mapping n -> list of (x, y, angle) tuples (best for each n)
    """
    import glob
    import os
    
    print(f"\nüìÇ Loading ensemble from all CSV files in workspace...")
    
    # Find all CSV files
    csv_files = glob.glob('/kaggle/input/*/*.csv')
    csv_files = [f for f in csv_files if os.path.isfile(f)]
    
    if not csv_files:
        print("‚ö†Ô∏è  No CSV files found!")
        return {}
    
    print(f"Found {len(csv_files)} CSV files:")
    for f in csv_files:
        print(f"  - {f}")
    
    # Load all solutions
    all_solutions = {}  # file -> {n -> tree_data}
    
    for csv_path in csv_files:
        try:
            df = pd.read_csv(csv_path)
            
            # Remove 's' prefix from values
            data_cols = ['x', 'y', 'deg']
            for col in data_cols:
                df[col] = df[col].astype(str).str.lstrip('s').astype(float)
            
            # Group by tree count
            df['tree_count'] = df['id'].str.split('_').str[0].astype(int)
            
            solutions = {}
            for n, group in df.groupby('tree_count'):
                tree_data = [(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
                if len(tree_data) == n:
                    solutions[n] = tree_data
            
            all_solutions[csv_path] = solutions
            print(f"  ‚úÖ {csv_path}: {len(solutions)} configurations")
            
        except Exception as e:
            print(f"  ‚ö†Ô∏è  {csv_path}: Error loading - {e}")
    
    if not all_solutions:
        print("‚ö†Ô∏è  No valid solutions loaded!")
        return {}
    
    # Create ensemble: for each n, pick the best solution across all files
    print(f"\nüîç Building ensemble by selecting best solution for each configuration...")
    ensemble = {}
    
    # Get all unique n values
    all_n_values = set()
    for solutions in all_solutions.values():
        all_n_values.update(solutions.keys())
    
    for n in sorted(all_n_values):
        best_score = Decimal('Infinity')
        best_tree_data = None
        best_source = None
        
        # Compare all solutions for this n
        for csv_path, solutions in all_solutions.items():
            if n not in solutions:
                continue
            
            tree_data = solutions[n]
            trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in tree_data]
            side = calculate_bounding_square(trees)
            score = (side ** 2) / Decimal(n)
            
            if score < best_score:
                best_score = score
                best_tree_data = tree_data
                best_source = csv_path
        
        if best_tree_data:
            ensemble[n] = best_tree_data
    
    print(f"‚úÖ Ensemble created with {len(ensemble)} configurations")
    
    # Calculate ensemble score
    total_score = Decimal('0.0')
    for n, tree_data in ensemble.items():
        trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in tree_data]
        side = calculate_bounding_square(trees)
        group_score = (side ** 2) / Decimal(n)
        total_score += group_score
    
    print(f"üìä Ensemble total score: {float(total_score):.6f}")
    
    return ensemble


def load_starting_solution(csv_path):
    """
    Load an existing submission CSV and convert to tree configurations.
    
    Parameters:
    - csv_path: Path to the CSV file
    
    Returns: Dictionary mapping n -> list of (x, y, angle) tuples
    """
    print(f"\nüìÇ Loading starting solution from: {csv_path}")
    
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        print(f"‚ö†Ô∏è  File not found: {csv_path}")
        print("Starting from scratch instead.")
        return {}
    
    # Remove 's' prefix from values
    data_cols = ['x', 'y', 'deg']
    for col in data_cols:
        df[col] = df[col].astype(str).str.lstrip('s').astype(float)
    
    # Group by tree count
    df['tree_count'] = df['id'].str.split('_').str[0].astype(int)
    
    solutions = {}
    for n, group in df.groupby('tree_count'):
        tree_data = [(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        solutions[n] = tree_data
        
        # Validate we have the right number of trees
        if len(tree_data) != n:
            print(f"‚ö†Ô∏è  Warning: Configuration n={n} has {len(tree_data)} trees, expected {n}")
    
    print(f"‚úÖ Loaded {len(solutions)} configurations (n=1 to {max(solutions.keys())})")
    
    # Calculate initial score (must match santa-2025-metric.ipynb)
    total_score = Decimal('0.0')
    for n, tree_data in solutions.items():
        trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in tree_data]
        side = calculate_bounding_square(trees)
        # Score formula: (side_length ** 2) / num_trees
        group_score = (side ** 2) / Decimal(n)
        total_score += group_score
    
    print(f"üìä Initial total score: {float(total_score):.6f}")
    
    return solutions

print("‚úÖ CSV loading function defined!")

## Load Starting Solution from CSV

This function loads existing submission CSV files and converts them into tree configurations.

**NEW**: `load_all_csv_solutions()` creates an ensemble by loading ALL .csv files in the workspace and selecting the best solution for each configuration (n=1 to 200).

## Aggressive Tree Placement Algorithm

This is the heart of the solution! For each tree, we:

1. **Test multiple rotation angles** (every 5¬∞, 10¬∞, 7.5¬∞, or 6¬∞ depending on attempt)
2. **Try 32 different directions** from the center
3. **Move inward with small steps** until collision
4. **Fine-tune with ultra-small steps** (0.005) for precision
5. **Pick the configuration with minimum distance** from center

**Key Insight**: For the first tree, we test all rotations to find the one that gives the smallest bounding box!

In [None]:
def place_tree_aggressive(tree_to_place, placed_trees, angle_candidates):
    """
    Aggressively search for the best position and rotation for a tree.
    
    Strategy:
    - For first tree: Find rotation that minimizes bounding box
    - For subsequent trees: Test all angles √ó all directions, pick closest to center
    
    Returns: (x, y, angle) tuple for optimal placement
    """
    if not placed_trees:
        # SPECIAL CASE: First tree rotation optimization
        # A tree at 45¬∞ has ~18% smaller bounding box than at 0¬∞!
        best_angle = Decimal('0')
        best_size = Decimal('Infinity')
        
        for test_angle in angle_candidates:
            test_tree = ChristmasTree(center_x='0', center_y='0', angle=str(test_angle))
            side = calculate_bounding_square([test_tree])
            if side < best_size:
                best_size = side
                best_angle = Decimal(str(test_angle))
        
        return Decimal('0'), Decimal('0'), best_angle
    
    # Build spatial index for fast collision detection
    placed_polygons = [p.polygon for p in placed_trees]
    tree_index = STRtree(placed_polygons)
    
    best_config = None
    best_radius = Decimal('Infinity')
    
    # Try all angle candidates
    for test_angle in angle_candidates:
        # Test 32 directions uniformly distributed around the center
        num_directions = 32
        for i in range(num_directions):
            direction_angle = (2 * math.pi * i) / num_directions
            vx = Decimal(str(math.cos(direction_angle)))
            vy = Decimal(str(math.sin(direction_angle)))
            
            # Phase 1: Coarse approach from far away
            radius = Decimal('15.0')
            step_in = Decimal('0.15')
            
            collision_found = False
            last_valid_radius = radius
            
            test_tree = ChristmasTree(angle=str(test_angle))
            
            while radius >= Decimal('0'):
                px = radius * vx
                py = radius * vy
                
                candidate_poly = affinity.translate(
                    test_tree.polygon,
                    xoff=float(px * scale_factor),
                    yoff=float(py * scale_factor)
                )
                
                if check_collision(candidate_poly, placed_polygons, tree_index):
                    collision_found = True
                    break
                
                last_valid_radius = radius
                radius -= step_in
            
            # Phase 2: Ultra-fine tuning for precision
            if collision_found:
                radius = last_valid_radius + step_in
                fine_step = Decimal('0.005')  # Very small steps!
                
                for _ in range(150):
                    px = radius * vx
                    py = radius * vy
                    
                    candidate_poly = affinity.translate(
                        test_tree.polygon,
                        xoff=float(px * scale_factor),
                        yoff=float(py * scale_factor)
                    )
                    
                    if not check_collision(candidate_poly, placed_polygons, tree_index):
                        break
                    
                    radius += fine_step
                    if radius > Decimal('20.0'):  # Safety limit
                        break
            else:
                # No collision even at center - place at center
                radius = Decimal('0')
                px = Decimal('0')
                py = Decimal('0')
            
            # Track the best configuration (closest to center)
            if radius < best_radius:
                best_radius = radius
                best_config = (px, py, Decimal(str(test_angle)))
    
    return best_config if best_config else (Decimal('0'), Decimal('0'), tree_to_place.angle)

print("‚úÖ Aggressive placement algorithm defined!")

## Local Optimization: Compaction

After initial placement, we try to **move each tree closer to the center** to create a tighter arrangement.

This works by:
1. Calculating direction vector towards center
2. Moving in small steps (0.1 ‚Üí 0.05 ‚Üí 0.02 ‚Üí 0.01)
3. Stopping when collision detected
4. Repeating for multiple iterations

In [None]:
def local_optimization_aggressive(placed_trees, max_iterations=5):
    """
    Compact the arrangement by moving trees towards the center.
    
    Parameters:
    - placed_trees: List of placed ChristmasTree objects
    - max_iterations: Number of compaction passes
    
    Returns: Optimized list of trees
    """
    if len(placed_trees) <= 1:
        return placed_trees
    
    for iteration in range(max_iterations):
        improved = False
        
        # Try to move each tree (except the first at origin)
        for i in range(1, len(placed_trees)):
            tree = placed_trees[i]
            
            # Calculate distance from center
            distance = (tree.center_x**2 + tree.center_y**2).sqrt() \
                       if (tree.center_x**2 + tree.center_y**2) > 0 else Decimal('0')
            if distance == 0:
                continue
            
            # Direction vector towards center
            dx = -tree.center_x / distance if distance > 0 else Decimal('0')
            dy = -tree.center_y / distance if distance > 0 else Decimal('0')
            
            # Try multiple step sizes (coarse to fine)
            for step in [Decimal('0.1'), Decimal('0.05'), Decimal('0.02'), Decimal('0.01')]:
                max_steps = 20
                
                for _ in range(max_steps):
                    new_x = tree.center_x + dx * step
                    new_y = tree.center_y + dy * step
                    
                    # Create test polygon at new position
                    test_poly = affinity.translate(
                        ChristmasTree(angle=str(tree.angle)).polygon,
                        xoff=float(new_x * scale_factor),
                        yoff=float(new_y * scale_factor)
                    )
                    
                    # Check collision with other trees
                    other_polygons = [placed_trees[j].polygon for j in range(len(placed_trees)) if j != i]
                    if other_polygons:
                        tree_index = STRtree(other_polygons)
                        if not check_collision(test_poly, other_polygons, tree_index):
                            # Valid move - update tree position
                            tree.center_x = new_x
                            tree.center_y = new_y
                            tree.polygon = test_poly
                            improved = True
                        else:
                            break  # Collision detected, try smaller step
        
        if not improved:
            break  # No improvement possible
    
    return placed_trees

print("‚úÖ Local optimization defined!")

## Simulated Annealing: Escaping Local Minima

For smaller configurations (n ‚â§ 40), we apply **simulated annealing** to find globally optimal solutions.

### How it works:
1. **Random perturbations**: Randomly move/rotate a tree
2. **Accept good moves**: Always accept if it improves the score
3. **Sometimes accept bad moves**: Accept worse solutions with probability based on temperature
4. **Cool down**: Gradually reduce temperature to converge

This helps escape local minima and find better global solutions!

In [None]:
def simulated_annealing_aggressive(placed_trees, num_iterations=3000, temperature=2.0, cooling_rate=0.98):
    """
    Apply simulated annealing to refine tree placement.
    
    Parameters:
    - num_iterations: Number of SA iterations
    - temperature: Initial temperature for acceptance probability
    - cooling_rate: How fast to cool (0.98 = cool by 2% every 100 iterations)
    
    Returns: Best configuration found
    """
    if len(placed_trees) <= 1:
        return placed_trees
    
    current_trees = [tree.clone() for tree in placed_trees]
    current_score = calculate_bounding_square(current_trees) ** 2
    
    best_trees = [tree.clone() for tree in current_trees]
    best_score = current_score
    
    temp = Decimal(str(temperature))
    
    for iteration in range(num_iterations):
        # Pick a random tree (not the first one)
        tree_idx = random.randint(1, len(current_trees) - 1)
        tree = current_trees[tree_idx]
        
        # Save original state
        orig_x, orig_y, orig_angle = tree.center_x, tree.center_y, tree.angle
        orig_poly = tree.polygon
        
        # Random perturbation
        delta_x = Decimal(str(random.uniform(-0.25, 0.25)))
        delta_y = Decimal(str(random.uniform(-0.25, 0.25)))
        delta_angle = Decimal(str(random.uniform(-30, 30)))
        
        new_x = orig_x + delta_x
        new_y = orig_y + delta_y
        new_angle = (orig_angle + delta_angle) % Decimal('360')
        
        # Create new polygon
        new_poly = affinity.translate(
            ChristmasTree(angle=str(new_angle)).polygon,
            xoff=float(new_x * scale_factor),
            yoff=float(new_y * scale_factor)
        )
        
        # Check collision
        other_polygons = [current_trees[j].polygon for j in range(len(current_trees)) if j != tree_idx]
        if other_polygons:
            tree_index = STRtree(other_polygons)
            if check_collision(new_poly, other_polygons, tree_index):
                continue  # Skip this move
        
        # Apply move
        tree.center_x = new_x
        tree.center_y = new_y
        tree.angle = new_angle
        tree.polygon = new_poly
        
        # Calculate new score
        new_score = calculate_bounding_square(current_trees) ** 2
        delta = new_score - current_score
        
        # Metropolis acceptance criterion
        if delta < 0 or random.random() < float(math.exp(-float(delta) / float(temp))):
            current_score = new_score
            
            if new_score < best_score:
                best_score = new_score
                best_trees = [tree.clone() for tree in current_trees]
        else:
            # Reject move - restore original state
            tree.center_x = orig_x
            tree.center_y = orig_y
            tree.angle = orig_angle
            tree.polygon = orig_poly
        
        # Cool down every 100 iterations
        if (iteration + 1) % 100 == 0:
            temp *= Decimal(str(cooling_rate))
    
    return best_trees

print("‚úÖ Simulated annealing defined!")

## Parallel Processing: Worker Function

This function is executed in parallel by multiple CPU cores. Each worker:
1. Reconstructs the previous (n-1) tree configuration
2. Adds one new tree using aggressive placement
3. Applies local optimization
4. Applies simulated annealing (for small n)
5. Returns the result

Multiple workers run simultaneously, each trying different rotation angle sets!

In [None]:
def pack_configuration_worker(args):
    """
    Worker function for parallel processing of a single configuration.
    
    Parameters:
    - args: Tuple of (n, previous_trees_data, angle_candidates, starting_tree_data)
    
    Returns: (n, tree_data, side_length)
    """
    n, previous_trees_data, angle_candidates, starting_tree_data = args
    
    # Reconstruct trees from previous configuration
    if previous_trees_data:
        placed_trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in previous_trees_data]
    else:
        placed_trees = []
    
    # Add new trees
    num_to_add = n - len(placed_trees)
    
    if num_to_add > 0:
        unplaced_trees = [ChristmasTree(angle='0') for _ in range(num_to_add)]
        
        for tree_to_place in unplaced_trees:
            # Find best position for this tree
            px, py, angle = place_tree_aggressive(tree_to_place, placed_trees, angle_candidates)
            
            # Update tree with optimal placement
            tree_to_place.center_x = px
            tree_to_place.center_y = py
            tree_to_place.angle = angle
            tree_to_place.polygon = affinity.translate(
                ChristmasTree(angle=str(angle)).polygon,
                xoff=float(px * scale_factor),
                yoff=float(py * scale_factor)
            )
            
            placed_trees.append(tree_to_place)
    
    # Apply post-processing optimizations
    if n > 1:
        placed_trees = local_optimization_aggressive(placed_trees, max_iterations=5)
    
    # Apply simulated annealing for smaller configurations
    if n <= 200:
        iterations = 5000
        placed_trees = simulated_annealing_aggressive(
            placed_trees,
            num_iterations=iterations,
            temperature=2.0,
            cooling_rate=0.98
        )
    
    # Calculate final bounding box
    side = calculate_bounding_square(placed_trees)
    tree_data = [(float(t.center_x), float(t.center_y), float(t.angle)) for t in placed_trees]
    
    # Compare with starting solution if available
    if starting_tree_data:
        starting_trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in starting_tree_data]
        starting_side = calculate_bounding_square(starting_trees)
        
        # Keep the better solution
        if starting_side < side:
            side = starting_side
            tree_data = starting_tree_data
    
    return n, tree_data, float(side)

print("‚úÖ Worker function defined!")

## Multi-Attempt Parallel Strategy

This is where the magic happens! For each configuration, we:

1. **Create multiple tasks** with different angle sets
2. **Execute them in parallel** using all CPU cores
3. **Pick the best result** (smallest bounding box)

This ensures we find near-optimal solutions by exploring different strategies simultaneously!

In [None]:
def generate_multiple_attempts_parallel(n, num_attempts, previous_trees_data, starting_tree_data=None):
    """
    Generate multiple solution attempts in parallel and pick the best.
    
    Each attempt uses a different set of rotation angles, adding diversity.
    
    Parameters:
    - n: Number of trees in this configuration
    - num_attempts: Number of parallel attempts
    - previous_trees_data: Tree data from (n-1) configuration
    - starting_tree_data: Optional starting solution for this n
    
    Returns: Best (n, tree_data, side_length) result
    """
    # Different angle strategies for diversity
    angle_sets = [
        list(range(0, 360, 5)),      # Every 5¬∞ (72 angles)
        list(range(0, 360, 10)),     # Every 10¬∞ (36 angles)
        [i * 7.5 for i in range(48)], # Every 7.5¬∞ (48 angles)
        list(range(0, 360, 6)),      # Every 6¬∞ (60 angles)
    ]
    
    # Create tasks
    tasks = []
    for attempt in range(num_attempts):
        angle_candidates = angle_sets[attempt % len(angle_sets)]
        
        # Add randomness for attempts beyond the 4 base strategies
        if attempt > 0:
            angle_candidates_copy = angle_candidates.copy()
            random.shuffle(angle_candidates_copy)
            angle_candidates = angle_candidates_copy[:60]  # Use subset
        
        tasks.append((n, previous_trees_data, angle_candidates, starting_tree_data))
    
    # Process tasks sequentially (parallel processing has issues in Windows/Jupyter)
    # This is more reliable and still reasonably fast
    results = []
    for task in tasks:
        result = pack_configuration_worker(task)
        results.append(result)
    
    # Select the best result (minimum bounding box)
    best_result = min(results, key=lambda x: x[2])
    return best_result

print("‚úÖ Multi-attempt strategy defined!")

## Main Optimization Loop

Now we put it all together! This loop:
1. Iterates through all configurations (n=1 to 200)
2. Uses parallel multi-attempt strategy for each
3. Tracks scores and timing
4. Prints progress
5. Stores results for submission

In [None]:
def optimize_all_configurations(max_n=200, starting_solutions=None, verbose=True):
    """
    Main optimization function that generates solutions for all configurations.
    
    Parameters:
    - max_n: Maximum number of trees (default 200)
    - starting_solutions: Dict mapping n -> tree_data (optional)
    - verbose: Print progress (default True)
    
    Returns: (tree_data_all, scores_by_config)
    """
    start_time = time.time()
    
    tree_data_all = []
    scores_by_config = []
    previous_trees_data = None
    
    if starting_solutions is None:
        starting_solutions = {}
    
    if verbose:
        print(f"\nüéÑ AGGRESSIVE CHRISTMAS TREE PACKING OPTIMIZER üéÑ")
        print("=" * 80)
        print(f"Using {mp.cpu_count()} CPU cores at 100% capacity")
        print(f"Configurations: 1-{max_n} trees")
        print(f"Starting from {len(starting_solutions)} pre-existing configurations")
        print("=" * 80)
    
    for n in range(1, max_n + 1):
        iter_start = time.time()
        
        # Always use all CPU cores - generate multiple attempts in parallel
        num_attempts = mp.cpu_count()
        
        # Get starting solution for this n if available
        starting_tree_data = starting_solutions.get(n, None)
        
        # Generate best solution for this n
        n_result, tree_data, side = generate_multiple_attempts_parallel(
            n, num_attempts, previous_trees_data, starting_tree_data
        )
        
        # Store results
        previous_trees_data = tree_data
        score_n = (side ** 2) / n
        scores_by_config.append((n, side, score_n))
        
        # Extend tree_data_all
        tree_data_all.extend(tree_data)
        
        if verbose:
            iter_time = time.time() - iter_start
            total_score = sum(s[2] for s in scores_by_config)
            
            # Print old and new solutions side by side
            print(f"\n{'='*80}")
            print(f"Configuration n={n:3d} | Time: {iter_time:.2f}s")
            print(f"{'-'*80}")
            
            # Check if this is better than starting solution
            if starting_tree_data:
                starting_trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in starting_tree_data]
                starting_side = float(calculate_bounding_square(starting_trees))
                starting_score = (starting_side ** 2) / n
                
                print(f"üìÅ OLD CSV Solution:")
                print(f"   Box size: {starting_side:.6f}")
                print(f"   Score:    {starting_score:.6f}")
                print(f"\nüîß NEW Optimized Solution:")
                print(f"   Box size: {side:.6f}")
                print(f"   Score:    {score_n:.6f}")
                
                if score_n < starting_score:
                    improvement = ((starting_score - score_n) / starting_score) * 100
                    print(f"\n‚ú® IMPROVED by {improvement:.2f}%")
                elif abs(score_n - starting_score) < 1e-6:
                    print(f"\n= KEPT (same quality)")
                else:
                    diff = ((score_n - starting_score) / starting_score) * 100
                    print(f"\n‚ö†Ô∏è  WORSE by {diff:.2f}% (kept old solution)")
            else:
                print(f"üÜï NEW Configuration:")
                print(f"   Box size: {side:.6f}")
                print(f"   Score:    {score_n:.6f}")
            
            print(f"\nüìä Running Total Score: {total_score:.6f}")
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"Optimization complete!")
        print(f"Total time: {time.time() - start_time:.2f} seconds")
        print(f"Final total score: {sum(s[2] for s in scores_by_config):.6f}")
        print(f"{'='*80}")
    
    return tree_data_all, scores_by_config

print("‚úÖ Main optimization loop defined!")

## Generate Submission

Run the optimization and create the submission file!

**Note**: This will take a significant amount of time (several hours) as it processes 200 configurations with maximum CPU utilization. For testing, you can reduce `max_n` to a smaller value like 10 or 20.

In [None]:
# For full submission, set max_n=200
# For quick testing, use max_n=10
MAX_N = 200  # Change this to test with fewer configurations

print(f"Starting optimization for n=1 to {MAX_N}...")
print()

# Load starting solution from ALL CSV files (ensemble approach)
starting_solutions = load_all_csv_solutions()

if OPTIMIZE_ENSEMBLE:
    print("üîß Running FULL OPTIMIZATION on ensemble...")
    print("This will use 100% of your CPU cores!")
    
    # Run optimization with ensemble solution as baseline
    tree_data_all, scores_by_config = optimize_all_configurations(
        max_n=MAX_N, 
        starting_solutions=starting_solutions,
        verbose=True
    )
else:
    print("üì¶ SKIPPING OPTIMIZATION - Using ensemble as-is")
    
    # Just use the ensemble directly without optimization
    tree_data_all = []
    scores_by_config = []
    
    for n in range(1, MAX_N + 1):
        if n in starting_solutions:
            tree_data = starting_solutions[n]
            tree_data_all.extend(tree_data)
            
            # Calculate score
            trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in tree_data]
            side = float(calculate_bounding_square(trees))
            score_n = (side ** 2) / n
            scores_by_config.append((n, side, score_n))
        else:
            print(f"‚ö†Ô∏è  Warning: Configuration n={n} not in ensemble!")
    
    total_score = sum(s[2] for s in scores_by_config)
    print(f"\n‚úÖ Ensemble loaded: {len(scores_by_config)} configurations")
    print(f"üìä Total score: {total_score:.6f}")

## Create Submission DataFrame

In [None]:
# Build submission index
index = [f'{n:03d}_{t}' for n in range(1, MAX_N + 1) for t in range(n)]

# Create submission dataframe
cols = ['x', 'y', 'deg']
submission = pd.DataFrame(
    index=index,
    columns=cols,
    data=tree_data_all
).rename_axis('id')

# Round to 6 decimal places
for col in cols:
    submission[col] = submission[col].astype(float).round(decimals=6)

# Prepend 's' to all values (required format)
for col in submission.columns:
    submission[col] = 's' + submission[col].astype('string')

print("‚úÖ Submission dataframe created!")
print(f"\nFirst few rows:")
print(submission.head(10))

## Validation: Check for Overlaps

Let's verify there are no overlapping trees in our solution!

In [None]:
def validate_submission(submission_df):
    """
    Validate that no trees overlap in any configuration.
    
    Returns: True if valid, False if overlaps found
    """
    print("Validating submission for overlaps...")
    
    # Remove 's' prefix
    data_cols = ['x', 'y', 'deg']
    val_df = submission_df.copy()
    for c in data_cols:
        val_df[c] = val_df[c].str[1:].astype(float)
    
    # Add tree count group
    val_df['tree_count_group'] = val_df.index.str.split('_').str[0]
    
    overlaps_found = False
    
    for group, df_group in val_df.groupby('tree_count_group'):
        # Create tree objects
        trees = []
        for _, row in df_group.iterrows():
            trees.append(ChristmasTree(str(row['x']), str(row['y']), str(row['deg'])))
        
        # Check for collisions using spatial index
        all_polygons = [t.polygon for t in trees]
        r_tree = STRtree(all_polygons)
        
        for i, poly in enumerate(all_polygons):
            indices = r_tree.query(poly)
            for index in indices:
                if index == i:
                    continue
                if poly.intersects(all_polygons[index]) and not poly.touches(all_polygons[index]):
                    print(f"‚ùå Overlap found in group {group}!")
                    overlaps_found = True
                    break
            if overlaps_found:
                break
        
        if overlaps_found:
            break
        
        if int(group) % 50 == 0:
            print(f"‚úì Validated group {group} - no overlaps")
    
    if not overlaps_found:
        print("\n‚úÖ Validation complete! No overlaps found in any configuration.")
    
    return not overlaps_found

# Validate our submission
is_valid = validate_submission(submission)

## Save Submission File

In [None]:
# Save submission to both files, including ensemble baseline
submission.to_csv('submission.csv')
print("‚úÖ Submission saved to 'submission.csv'")
print(f"\nSubmission shape: {submission.shape}")
print(f"Expected shape: ({sum(range(1, MAX_N + 1))}, 3)")

## Score Analysis

Let's analyze how our solution performs across different configuration sizes.

In [None]:
# Create analysis dataframe
scores_df = pd.DataFrame(scores_by_config, columns=['num_trees', 'box_size', 'score'])

print("\nüìä SCORE ANALYSIS")
print("=" * 80)
print(f"\nTotal Score: {scores_df['score'].sum():.6f}")
print(f"Average Box Size: {scores_df['box_size'].mean():.6f}")
print(f"Average Normalized Score: {scores_df['score'].mean():.6f}")

print("\nüìà Score by Configuration Size:")
print(scores_df.describe())

print("\nüèÜ Best Configurations (lowest normalized score):")
print(scores_df.nsmallest(10, 'score')[['num_trees', 'box_size', 'score']])

print("\n‚ö†Ô∏è Worst Configurations (highest normalized score):")
print(scores_df.nlargest(10, 'score')[['num_trees', 'box_size', 'score']])

In [None]:
if starting_solutions:
    print("\nüîç IMPROVEMENT ANALYSIS")
    print("=" * 80)
    
    improved_count = 0
    kept_count = 0
    worse_count = 0
    new_count = 0
    
    total_starting_score = 0
    total_new_score = 0
    
    for n, side, score_n in scores_by_config:
        if n in starting_solutions:
            starting_tree_data = starting_solutions[n]
            starting_trees = [ChristmasTree(str(t[0]), str(t[1]), str(t[2])) for t in starting_tree_data]
            starting_side = float(calculate_bounding_square(starting_trees))
            starting_score = (starting_side ** 2) / n
            
            total_starting_score += starting_score
            total_new_score += score_n
            
            if score_n < starting_score - 1e-6:
                improved_count += 1
            elif abs(score_n - starting_score) < 1e-6:
                kept_count += 1
            else:
                worse_count += 1
        else:
            new_count += 1
            total_new_score += score_n
    
    print(f"Improved:  {improved_count} configurations")
    print(f"Kept:      {kept_count} configurations (no change)")
    print(f"Worse:     {worse_count} configurations")
    print(f"New:       {new_count} configurations")
    print()
    
    if total_starting_score > 0:
        overall_improvement = ((total_starting_score - total_new_score) / total_starting_score) * 100
        print(f"Starting total score: {total_starting_score:.6f}")
        print(f"New total score:      {total_new_score:.6f}")
        print(f"Overall improvement:  {overall_improvement:.4f}%")
    else:
        print(f"New total score:      {total_new_score:.6f}")
else:
    print("\nüìù No starting solution - all configurations are new!")

## Compare with Starting Solution

Let's see how much we improved over the baseline!

## Key Takeaways & Improvements

### What Makes This Hybrid Solution Work:

1. **Smart Baseline**: Start from an existing good solution (e.g., aligned grid approach)

2. **Parallel Multi-Attempt Strategy**
   - Testing multiple solutions simultaneously finds better results
   - Different angle sets provide diversity

3. **Combined Optimization Techniques**
   - Greedy placement for initial solution
   - Local optimization for compaction
   - Simulated annealing for global optimization

4. **Best-of-Both**: Always keeps the better of (starting solution, new optimization)

5. **Precision Matters**
   - Using Decimal type with high precision
   - Ultra-fine step sizes (0.005) for placement
   - Careful collision detection

### How to Use:

1. **Set your starting CSV**: Edit `STARTING_CSV_PATH` at the top
2. **Configure MAX_N**: Set how many configurations to optimize (1-200)
3. **Run**: The algorithm will try to improve each configuration
4. **Review**: Check the improvement analysis to see gains

### Recommended Strategy:

1. Start with a simple aligned solution (like `88-32999-a-well-aligned-initial-solution.ipynb`)
2. Run this hybrid optimizer to refine it
3. Take the output and run it again (iterative improvement!)
4. Focus optimization time on configurations that need it most

### Potential Further Improvements:

1. **Genetic Algorithms**: Evolve entire populations of solutions
2. **Better Heuristics**: Analyze tree geometry for optimal starting rotations
3. **Adaptive Strategies**: Use different approaches for different n ranges
4. **Targeted Optimization**: Spend more time on high-scoring configurations
5. **Caching**: Store and reuse good sub-configurations

### Performance Tips:

- For testing, use `MAX_N=10` or `20` instead of 200
- Reduce `num_attempts` in `generate_multiple_attempts_parallel` for faster runs
- Adjust simulated annealing iterations based on your time budget
- Run multiple passes: output ‚Üí input ‚Üí output (iterative refinement)

## üéØ Good Luck and Happy Packing! üéÑ

**Pro Tip**: Run this notebook multiple times, each time using the previous output as the new input for continuous improvement!