# Experiment 004: Multi-Start Random Initialization for Small N

Generate configurations FROM SCRATCH using random initialization.
This explores DIFFERENT solution basins than the pre-optimized solutions.

In [None]:
import os
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
import time

np.random.seed(42)

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    """Create a Shapely polygon for a tree at (x, y) with rotation deg."""
    base_poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    translated = affinity.translate(rotated, x, y)
    return translated

def get_bounding_box_side(trees):
    """Calculate the side length of the bounding square for trees."""
    if not trees:
        return float('inf')
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    return max(width, height)

def has_overlap(trees):
    """Check if any trees overlap (touching is OK)."""
    if len(trees) <= 1:
        return False
    
    polygons = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i != j and poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                return True
    return False

def recenter_trees(trees):
    """Recenter trees to minimize bounding box."""
    if not trees:
        return trees
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    center_x = (min(all_x) + max(all_x)) / 2
    center_y = (min(all_y) + max(all_y)) / 2
    
    return [(x - center_x, y - center_y, deg) for x, y, deg in trees]

print("Functions defined successfully!")

In [None]:
# Load baseline configurations
baseline_path = '/home/code/experiments/001_baseline/santa-2025.csv'
df_baseline = pd.read_csv(baseline_path, dtype=str)

def load_all_configs(df):
    """Load all configurations from a submission dataframe."""
    configs = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        rows = df[df['id'].str.startswith(prefix)]
        trees = []
        for _, row in rows.iterrows():
            x = float(str(row['x']).replace('s', ''))
            y = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            trees.append((x, y, deg))
        configs[n] = trees
    return configs

baseline_configs = load_all_configs(df_baseline)

# Calculate baseline scores
baseline_scores = {}
for n in range(1, 201):
    side = get_bounding_box_side(baseline_configs[n])
    baseline_scores[n] = side**2 / n

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")
print(f"\nBaseline scores for N=1-10:")
for n in range(1, 11):
    print(f"  N={n}: {baseline_scores[n]:.6f}")

In [None]:
# Multi-start random initialization
def random_initialization(n, num_restarts=10000, area_size=3.0):
    """Generate random initial configs and keep the best non-overlapping one."""
    best_config = None
    best_score = float('inf')
    valid_count = 0
    
    for _ in range(num_restarts):
        # Random placement in a reasonable area
        trees = []
        for i in range(n):
            x = np.random.uniform(-area_size, area_size)
            y = np.random.uniform(-area_size, area_size)
            deg = np.random.uniform(0, 360)
            trees.append((x, y, deg))
        
        # Check for overlaps
        if not has_overlap(trees):
            valid_count += 1
            # Recenter and calculate score
            trees = recenter_trees(trees)
            score = get_bounding_box_side(trees)**2 / n
            if score < best_score:
                best_score = score
                best_config = trees
    
    return best_config, best_score, valid_count

print("Random initialization function defined!")

In [None]:
# Run random initialization for N=1-20
print("Running multi-start random initialization for N=1-20...")
print("(10000 restarts per N)\n")

random_configs = {n: list(baseline_configs[n]) for n in range(1, 201)}
improvements = []

start_time = time.time()

for n in range(1, 21):
    baseline_score_n = baseline_scores[n]
    
    # Adjust area size based on N
    area_size = 0.5 + 0.3 * n  # Larger area for more trees
    
    random_config, random_score, valid_count = random_initialization(n, num_restarts=10000, area_size=area_size)
    
    if random_config is not None and random_score < baseline_score_n:
        improvement = baseline_score_n - random_score
        random_configs[n] = random_config
        improvements.append((n, improvement, random_score))
        print(f"N={n:2d}: IMPROVED! {baseline_score_n:.6f} -> {random_score:.6f} (improvement: {improvement:.6f}, valid: {valid_count})")
    else:
        if random_config is not None:
            print(f"N={n:2d}: No improvement. Baseline: {baseline_score_n:.6f}, Best random: {random_score:.6f} (valid: {valid_count})")
        else:
            print(f"N={n:2d}: No valid configurations found (valid: {valid_count})")

elapsed = time.time() - start_time
print(f"\nElapsed time: {elapsed:.1f} seconds")
print(f"\nTotal improvements found: {len(improvements)}")
if improvements:
    total_improvement = sum(imp for _, imp, _ in improvements)
    print(f"Total score improvement: {total_improvement:.6f}")

In [None]:
# Calculate new total score
new_scores = {}
for n in range(1, 201):
    side = get_bounding_box_side(random_configs[n])
    new_scores[n] = side**2 / n

new_total = sum(new_scores.values())
print(f"\nNew total score: {new_total:.6f}")
print(f"Baseline total: {baseline_total:.6f}")
print(f"Improvement: {baseline_total - new_total:.6f}")

In [None]:
# Validate for overlaps
print("\nValidating for overlaps...")
overlap_count = 0
for n in range(1, 201):
    trees = random_configs[n]
    if has_overlap(trees):
        print(f"  N={n}: OVERLAP DETECTED!")
        overlap_count += 1

if overlap_count == 0:
    print("✓ No overlaps detected!")
else:
    print(f"✗ {overlap_count} configurations have overlaps")

In [None]:
# Create submission dataframe with full precision
submission_rows = []
for n in range(1, 201):
    trees = random_configs[n]
    for i, (x, y, deg) in enumerate(trees):
        row_id = f'{n:03d}_{i}'
        submission_rows.append({
            'id': row_id,
            'x': f's{x:.18f}',
            'y': f's{y:.18f}',
            'deg': f's{deg:.18f}'
        })

submission_df = pd.DataFrame(submission_rows)
print(f"Created submission with {len(submission_df)} rows")

# Save
submission_df.to_csv('/home/submission/submission.csv', index=False)
submission_df.to_csv('/home/code/experiments/004_random_init/submission.csv', index=False)
print(f"Saved submission files")

print(f"\n=== FINAL SCORE: {new_total:.6f} ===")
print(f"=== IMPROVEMENT: {baseline_total - new_total:.6f} ===")