# Multi-Start Random Initialization

Generate random initial configurations and optimize with SA to find different basins of attraction.
The baseline is at a strong local optimum - we need to find a DIFFERENT starting point.

In [1]:
import sys
import os
os.chdir('/home/code/experiments/006_multistart_random')
sys.path.insert(0, '/home/code')

import numpy as np
import pandas as pd
import json
import time
import random
from numba import njit

from code.tree_geometry import TX, TY, calculate_score, calculate_bbox, get_tree_vertices_numba, calculate_score_numba
from code.overlap_check import has_overlap, has_any_overlap_numba
from code.utils import parse_submission, save_submission

print("Modules imported successfully")

Modules imported successfully


In [2]:
# Load baseline
baseline_df = pd.read_csv('/home/code/experiments/001_valid_baseline/submission.csv')
baseline_configs = parse_submission(baseline_df)

# Calculate baseline scores per N
baseline_scores = {}
for n in range(1, 201):
    baseline_scores[n] = calculate_score(baseline_configs[n])

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")
print(f"\nSample baseline scores:")
for n in [10, 20, 50, 100, 150, 200]:
    print(f"  N={n}: {baseline_scores[n]:.6f}")

Baseline total score: 70.615102

Sample baseline scores:
  N=10: 0.376630
  N=20: 0.376057
  N=50: 0.360753
  N=100: 0.343395
  N=150: 0.337064
  N=200: 0.337549


In [3]:
def generate_random_config_spread(n, spread=3.0, max_attempts=1000):
    """
    Generate a random valid configuration for N trees.
    Trees are spread out to avoid overlaps.
    """
    # Estimate required area based on tree size (~0.7 x 1.0)
    tree_area = 0.7 * 1.0
    total_area = n * tree_area * 2  # 2x for spacing
    side = np.sqrt(total_area)
    
    for attempt in range(max_attempts):
        # Random positions spread across the area
        positions = np.random.uniform(-side/2, side/2, (n, 2))
        # Random angles
        angles = np.random.uniform(0, 360, n)
        
        config = [(positions[i, 0], positions[i, 1], angles[i]) for i in range(n)]
        
        if not has_overlap(config):
            return config
    
    return None  # Failed to find valid config

# Test random config generation
print("Testing random config generation...")
for n in [5, 10, 20]:
    start = time.time()
    config = generate_random_config_spread(n)
    elapsed = time.time() - start
    if config:
        score = calculate_score(config)
        print(f"N={n}: Generated valid config, score={score:.6f}, time={elapsed:.2f}s")
    else:
        print(f"N={n}: Failed to generate valid config")

Testing random config generation...


N=5: Generated valid config, score=2.112310, time=0.78s
N=10: Generated valid config, score=2.071887, time=0.00s
N=20: Failed to generate valid config


In [4]:
# Simple SA optimizer for random configs
@njit
def sa_optimize_config(trees, n_iterations=5000, T_start=1.0, T_end=0.001):
    """Simple SA optimization."""
    n = len(trees)
    current = trees.copy()
    current_score = calculate_score_numba(current)
    best = current.copy()
    best_score = current_score
    
    cooling_rate = (T_end / T_start) ** (1.0 / n_iterations)
    T = T_start
    
    for _ in range(n_iterations):
        neighbor = current.copy()
        
        # Random move
        idx = np.random.randint(n)
        move_type = np.random.randint(3)
        
        if move_type == 0:  # Translate
            neighbor[idx, 0] += np.random.normal(0, 0.1)
            neighbor[idx, 1] += np.random.normal(0, 0.1)
        elif move_type == 1:  # Rotate
            neighbor[idx, 2] = (neighbor[idx, 2] + np.random.normal(0, 10)) % 360
        else:  # Swap
            idx2 = np.random.randint(n)
            temp = neighbor[idx].copy()
            neighbor[idx] = neighbor[idx2].copy()
            neighbor[idx2] = temp
        
        if not has_any_overlap_numba(neighbor):
            neighbor_score = calculate_score_numba(neighbor)
            delta = neighbor_score - current_score
            
            if delta < 0 or np.random.random() < np.exp(-delta / T):
                current = neighbor.copy()
                current_score = neighbor_score
                
                if current_score < best_score:
                    best = current.copy()
                    best_score = current_score
        
        T *= cooling_rate
    
    return best, best_score

print("SA optimizer defined")

SA optimizer defined


In [5]:
def multistart_optimization(n, n_starts=10, sa_iterations=5000):
    """Run SA from multiple random starting points."""
    best_score = float('inf')
    best_config = None
    
    for start in range(n_starts):
        # Generate random initial config
        config = generate_random_config_spread(n)
        if config is None:
            continue
        
        # Convert to numpy array for SA
        trees_arr = np.array(config, dtype=np.float64)
        
        # Run SA
        optimized, score = sa_optimize_config(trees_arr, n_iterations=sa_iterations)
        
        if score < best_score:
            best_score = score
            best_config = [(optimized[i, 0], optimized[i, 1], optimized[i, 2]) for i in range(n)]
    
    return best_config, best_score

print("Multi-start optimization function defined")

Multi-start optimization function defined


In [6]:
# Test multi-start on a few N values
print("Testing multi-start optimization on selected N values...")
print("="*70)

test_n_values = [10, 20, 30, 50]
results = {}

for n in test_n_values:
    print(f"\nN={n}:")
    start_time = time.time()
    
    config, score = multistart_optimization(n, n_starts=10, sa_iterations=5000)
    
    elapsed = time.time() - start_time
    baseline_score = baseline_scores[n]
    diff = baseline_score - score
    
    results[n] = {
        'multistart_score': score,
        'baseline_score': baseline_score,
        'diff': diff,
        'config': config
    }
    
    status = "✅ BETTER" if diff > 0 else "❌ WORSE"
    print(f"  Multistart: {score:.6f}")
    print(f"  Baseline:   {baseline_score:.6f}")
    print(f"  Diff:       {diff:+.6f} {status}")
    print(f"  Time:       {elapsed:.1f}s")

Testing multi-start optimization on selected N values...

N=10:


  Multistart: 0.651968
  Baseline:   0.376630
  Diff:       -0.275338 ❌ WORSE
  Time:       1.0s

N=20:
  Multistart: inf
  Baseline:   0.376057
  Diff:       -inf ❌ WORSE
  Time:       0.2s

N=30:


  Multistart: inf
  Baseline:   0.360883
  Diff:       -inf ❌ WORSE
  Time:       0.2s

N=50:


  Multistart: inf
  Baseline:   0.360753
  Diff:       -inf ❌ WORSE
  Time:       0.3s


In [None]:
# Analyze results
print("\n" + "="*70)
print("SUMMARY")
print("="*70)

improvements = []
for n, r in results.items():
    if r['diff'] > 1e-6:
        improvements.append((n, r['diff']))
        print(f"✅ N={n}: IMPROVED by {r['diff']:.6f}")
    else:
        print(f"❌ N={n}: No improvement (diff={r['diff']:.6f})")

if improvements:
    print(f"\nTotal improvements found: {len(improvements)}")
else:
    print(f"\nNo improvements found - random starts converge to worse solutions than baseline")

In [None]:
# Create final submission using baseline (since no improvements found)
improved_configs = {n: list(baseline_configs[n]) for n in range(1, 201)}

# If any improvements were found, use them
for n, r in results.items():
    if r['diff'] > 1e-6 and r['config'] is not None:
        improved_configs[n] = r['config']

# Calculate total score
new_total = sum(calculate_score(improved_configs[n]) for n in range(1, 201))

print(f"\nBaseline total: {baseline_total:.6f}")
print(f"New total: {new_total:.6f}")
print(f"Improvement: {baseline_total - new_total:.6f}")

In [None]:
# Save submission
save_submission(improved_configs, 'submission.csv')
print("Saved submission.csv")

# Save metrics
metrics = {
    'cv_score': new_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - new_total,
    'n_starts': 10,
    'sa_iterations': 5000,
    'test_n_values': test_n_values,
    'results': {str(n): {'multistart': r['multistart_score'], 'baseline': r['baseline_score'], 'diff': r['diff']} 
                for n, r in results.items()},
    'notes': 'Multi-start random initialization with SA optimization. Tested on N=10,20,30,50.'
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal CV Score: {new_total:.6f}")

In [None]:
# Copy to submission folder
import shutil
shutil.copy('submission.csv', '/home/submission/submission.csv')
print("Copied submission to /home/submission/")