# Experiment 004: Multi-Start Random Initialization for Small N

Generate configurations FROM SCRATCH using random initialization.
This explores DIFFERENT solution basins than the pre-optimized solutions.

In [1]:
import os
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import time

np.random.seed(42)

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    """Create a Shapely polygon for a tree at (x, y) with rotation deg."""
    base_poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    translated = affinity.translate(rotated, x, y)
    return translated

def get_bounding_box_side(trees):
    """Calculate the side length of the bounding square for trees."""
    if not trees:
        return float('inf')
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    return max(width, height)

def has_overlap(trees):
    """Check if any trees overlap (touching is OK)."""
    if len(trees) <= 1:
        return False
    
    polygons = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i != j and poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                return True
    return False

def recenter_trees(trees):
    """Recenter trees to minimize bounding box."""
    if not trees:
        return trees
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    center_x = (min(all_x) + max(all_x)) / 2
    center_y = (min(all_y) + max(all_y)) / 2
    
    return [(x - center_x, y - center_y, deg) for x, y, deg in trees]

print("Functions defined successfully!")

Functions defined successfully!


In [2]:
# Load baseline configurations
baseline_path = '/home/code/experiments/001_baseline/santa-2025.csv'
df_baseline = pd.read_csv(baseline_path, dtype=str)

def load_all_configs(df):
    """Load all configurations from a submission dataframe."""
    configs = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        rows = df[df['id'].str.startswith(prefix)]
        trees = []
        for _, row in rows.iterrows():
            x = float(str(row['x']).replace('s', ''))
            y = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            trees.append((x, y, deg))
        configs[n] = trees
    return configs

baseline_configs = load_all_configs(df_baseline)

# Calculate baseline scores
baseline_scores = {}
for n in range(1, 201):
    side = get_bounding_box_side(baseline_configs[n])
    baseline_scores[n] = side**2 / n

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")
print(f"\nBaseline scores for N=1-10:")
for n in range(1, 11):
    print(f"  N={n}: {baseline_scores[n]:.6f}")

Baseline total score: 70.676102

Baseline scores for N=1-10:
  N=1: 0.661250
  N=2: 0.450779
  N=3: 0.434745
  N=4: 0.416545
  N=5: 0.416850
  N=6: 0.399610
  N=7: 0.399897
  N=8: 0.385407
  N=9: 0.387415
  N=10: 0.376630


In [3]:
# Multi-start random initialization
def random_initialization(n, num_restarts=10000, area_size=3.0):
    """Generate random initial configs and keep the best non-overlapping one."""
    best_config = None
    best_score = float('inf')
    valid_count = 0
    
    for _ in range(num_restarts):
        # Random placement in a reasonable area
        trees = []
        for i in range(n):
            x = np.random.uniform(-area_size, area_size)
            y = np.random.uniform(-area_size, area_size)
            deg = np.random.uniform(0, 360)
            trees.append((x, y, deg))
        
        # Check for overlaps
        if not has_overlap(trees):
            valid_count += 1
            # Recenter and calculate score
            trees = recenter_trees(trees)
            score = get_bounding_box_side(trees)**2 / n
            if score < best_score:
                best_score = score
                best_config = trees
    
    return best_config, best_score, valid_count

print("Random initialization function defined!")

Random initialization function defined!


In [4]:
# Run random initialization for N=1-20
print("Running multi-start random initialization for N=1-20...")
print("(10000 restarts per N)\n")

random_configs = {n: list(baseline_configs[n]) for n in range(1, 201)}
improvements = []

start_time = time.time()

for n in range(1, 21):
    baseline_score_n = baseline_scores[n]
    
    # Adjust area size based on N
    area_size = 0.5 + 0.3 * n  # Larger area for more trees
    
    random_config, random_score, valid_count = random_initialization(n, num_restarts=10000, area_size=area_size)
    
    if random_config is not None and random_score < baseline_score_n:
        improvement = baseline_score_n - random_score
        random_configs[n] = random_config
        improvements.append((n, improvement, random_score))
        print(f"N={n:2d}: IMPROVED! {baseline_score_n:.6f} -> {random_score:.6f} (improvement: {improvement:.6f}, valid: {valid_count})")
    else:
        if random_config is not None:
            print(f"N={n:2d}: No improvement. Baseline: {baseline_score_n:.6f}, Best random: {random_score:.6f} (valid: {valid_count})")
        else:
            print(f"N={n:2d}: No valid configurations found (valid: {valid_count})")

elapsed = time.time() - start_time
print(f"\nElapsed time: {elapsed:.1f} seconds")
print(f"\nTotal improvements found: {len(improvements)}")
if improvements:
    total_improvement = sum(imp for _, imp, _ in improvements)
    print(f"Total score improvement: {total_improvement:.6f}")

Running multi-start random initialization for N=1-20...
(10000 restarts per N)



N= 1: No improvement. Baseline: 0.661250, Best random: 0.661333 (valid: 10000)


N= 2: No improvement. Baseline: 0.450779, Best random: 0.561353 (valid: 7677)


N= 3: No improvement. Baseline: 0.434745, Best random: 0.613813 (valid: 5948)


N= 4: No improvement. Baseline: 0.416545, Best random: 0.888056 (valid: 4835)


N= 5: No improvement. Baseline: 0.416850, Best random: 1.303475 (valid: 4122)


N= 6: No improvement. Baseline: 0.399610, Best random: 1.511428 (valid: 3545)


N= 7: No improvement. Baseline: 0.399897, Best random: 1.766871 (valid: 3191)


N= 8: No improvement. Baseline: 0.385407, Best random: 1.888823 (valid: 2916)


N= 9: No improvement. Baseline: 0.387415, Best random: 2.455477 (valid: 2606)


N=10: No improvement. Baseline: 0.376630, Best random: 2.536989 (valid: 2455)


N=11: No improvement. Baseline: 0.375736, Best random: 2.582194 (valid: 2304)


N=12: No improvement. Baseline: 0.372724, Best random: 3.534445 (valid: 2241)


N=13: No improvement. Baseline: 0.372323, Best random: 3.211396 (valid: 2007)


N=14: No improvement. Baseline: 0.370569, Best random: 3.343304 (valid: 2051)


N=15: No improvement. Baseline: 0.379203, Best random: 3.940580 (valid: 1967)


N=16: No improvement. Baseline: 0.374128, Best random: 4.046091 (valid: 1967)


N=17: No improvement. Baseline: 0.370040, Best random: 4.997977 (valid: 1889)


N=18: No improvement. Baseline: 0.368771, Best random: 5.534726 (valid: 1827)


N=19: No improvement. Baseline: 0.368615, Best random: 5.837833 (valid: 1709)


N=20: No improvement. Baseline: 0.376057, Best random: 5.847972 (valid: 1705)

Elapsed time: 271.2 seconds

Total improvements found: 0


In [5]:
# Random initialization didn't work - the random configs are much worse than baseline.\n# This is because random placement without optimization doesn't produce tight packings.\n# \n# Let's try a different approach: greedy construction with local optimization.\n# Place trees one at a time, optimizing each placement.\n\ndef greedy_construction(n, num_candidates=1000):\n    \"\"\"Build configuration by placing trees one at a time, optimizing each.\"\"\"\n    if n == 0:\n        return [], 0\n    \n    # Start with first tree at origin with optimal rotation (45 degrees for N=1)\n    trees = [(0, 0, 45)]\n    \n    for i in range(1, n):\n        best_pos = None\n        best_score = float('inf')\n        \n        # Determine search area based on current bounding box\n        if trees:\n            current_side = get_bounding_box_side(trees)\n            search_radius = current_side + 1.5\n        else:\n            search_radius = 2.0\n        \n        # Try many random positions\n        for _ in range(num_candidates):\n            x = np.random.uniform(-search_radius, search_radius)\n            y = np.random.uniform(-search_radius, search_radius)\n            deg = np.random.uniform(0, 360)\n            \n            candidate = trees + [(x, y, deg)]\n            if not has_overlap(candidate):\n                # Recenter and calculate score\n                centered = recenter_trees(candidate)\n                score = get_bounding_box_side(centered)**2 / len(centered)\n                if score < best_score:\n                    best_score = score\n                    best_pos = (x, y, deg)\n        \n        if best_pos:\n            trees.append(best_pos)\n            trees = recenter_trees(trees)\n        else:\n            # If no valid position found, try harder\n            for _ in range(num_candidates * 10):\n                x = np.random.uniform(-search_radius * 2, search_radius * 2)\n                y = np.random.uniform(-search_radius * 2, search_radius * 2)\n                deg = np.random.uniform(0, 360)\n                \n                candidate = trees + [(x, y, deg)]\n                if not has_overlap(candidate):\n                    trees.append((x, y, deg))\n                    trees = recenter_trees(trees)\n                    break\n    \n    final_score = get_bounding_box_side(trees)**2 / n if trees else float('inf')\n    return trees, final_score\n\nprint(\"Greedy construction function defined!\")"}, {"cell_type": "code", "content": "# Run greedy construction for N=1-15\nprint(\"Running greedy construction for N=1-15...\")\nprint(\"(1000 candidates per tree placement)\\n\")\n\ngreedy_configs = {n: list(baseline_configs[n]) for n in range(1, 201)}\ngreedy_improvements = []\n\nstart_time = time.time()\n\nfor n in range(1, 16):\n    baseline_score_n = baseline_scores[n]\n    \n    # Try greedy construction multiple times and keep best\n    best_greedy_config = None\n    best_greedy_score = float('inf')\n    \n    for trial in range(5):  # 5 trials per N\n        greedy_config, greedy_score = greedy_construction(n, num_candidates=1000)\n        if greedy_config and greedy_score < best_greedy_score:\n            best_greedy_score = greedy_score\n            best_greedy_config = greedy_config\n    \n    if best_greedy_config and best_greedy_score < baseline_score_n:\n        improvement = baseline_score_n - best_greedy_score\n        greedy_configs[n] = best_greedy_config\n        greedy_improvements.append((n, improvement, best_greedy_score))\n        print(f\"N={n:2d}: IMPROVED! {baseline_score_n:.6f} -> {best_greedy_score:.6f} (improvement: {improvement:.6f})\")\n    else:\n        print(f\"N={n:2d}: No improvement. Baseline: {baseline_score_n:.6f}, Best greedy: {best_greedy_score:.6f}\")\n\nelapsed = time.time() - start_time\nprint(f\"\\nElapsed time: {elapsed:.1f} seconds\")\nprint(f\"\\nTotal improvements found: {len(greedy_improvements)}\")\nif greedy_improvements:\n    total_improvement = sum(imp for _, imp, _ in greedy_improvements)\n    print(f\"Total score improvement: {total_improvement:.6f}\")"}]

In [6]:
# Calculate new total score
new_scores = {}
for n in range(1, 201):
    side = get_bounding_box_side(random_configs[n])
    new_scores[n] = side**2 / n

new_total = sum(new_scores.values())
print(f"\nNew total score: {new_total:.6f}")
print(f"Baseline total: {baseline_total:.6f}")
print(f"Improvement: {baseline_total - new_total:.6f}")


New total score: 70.676102
Baseline total: 70.676102
Improvement: 0.000000


In [None]:
# Validate for overlaps
print("\nValidating for overlaps...")
overlap_count = 0
for n in range(1, 201):
    trees = random_configs[n]
    if has_overlap(trees):
        print(f"  N={n}: OVERLAP DETECTED!")
        overlap_count += 1

if overlap_count == 0:
    print("✓ No overlaps detected!")
else:
    print(f"✗ {overlap_count} configurations have overlaps")

In [None]:
# Create submission dataframe with full precision
submission_rows = []
for n in range(1, 201):
    trees = random_configs[n]
    for i, (x, y, deg) in enumerate(trees):
        row_id = f'{n:03d}_{i}'
        submission_rows.append({
            'id': row_id,
            'x': f's{x:.18f}',
            'y': f's{y:.18f}',
            'deg': f's{deg:.18f}'
        })

submission_df = pd.DataFrame(submission_rows)
print(f"Created submission with {len(submission_df)} rows")

# Save
submission_df.to_csv('/home/submission/submission.csv', index=False)
submission_df.to_csv('/home/code/experiments/004_random_init/submission.csv', index=False)
print(f"Saved submission files")

print(f"\n=== FINAL SCORE: {new_total:.6f} ===")
print(f"=== IMPROVEMENT: {baseline_total - new_total:.6f} ===")