# Experiment 002: Pre-optimized Baseline Verification

This notebook:
1. Loads the pre-optimized ensemble submission (score ~70.63)
2. Verifies no overlaps
3. Calculates score breakdown by N range
4. Saves as our new baseline

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

print('Libraries loaded')

In [None]:
# Tree geometry constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

# Base tree polygon vertices
BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x,y) with rotation deg degrees."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def get_bounding_box_side(polygons):
    """Get the side length of the bounding box for a list of polygons."""
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def check_overlaps(polygons):
    """Check if any polygons overlap (not just touch)."""
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i < j:
                if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                    return True, (i, j)
    return False, None

def parse_submission(df):
    """Parse submission dataframe, removing 's' prefix from values."""
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Helper functions defined')

In [None]:
# Load pre-optimized ensemble submission
ensemble_df = pd.read_csv('/home/code/preoptimized/ensemble_70_627.csv')
print(f'Ensemble submission shape: {ensemble_df.shape}')
print(ensemble_df.head())

ensemble_parsed = parse_submission(ensemble_df)

In [None]:
# Calculate score for each configuration
def calculate_score(df):
    """Calculate the total score for a submission."""
    scores = {}
    total_score = 0.0
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        
        if len(config_df) != n:
            print(f'Warning: Config {n} has {len(config_df)} trees instead of {n}')
            continue
        
        polygons = []
        for _, row in config_df.iterrows():
            poly = create_tree_polygon(row['x'], row['y'], row['deg'])
            polygons.append(poly)
        
        side = get_bounding_box_side(polygons)
        scores[n] = side
        total_score += (side ** 2) / n
    
    return total_score, scores

print('Calculating ensemble score...')
ensemble_score, ensemble_sides = calculate_score(ensemble_parsed)
print(f'Ensemble score: {ensemble_score:.6f}')

In [None]:
# Check for overlaps in ensemble submission
print('Checking for overlaps in ensemble submission...')
overlap_configs = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = ensemble_parsed[ensemble_parsed['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in config_df.iterrows():
        poly = create_tree_polygon(row['x'], row['y'], row['deg'])
        polygons.append(poly)
    
    overlap, pair = check_overlaps(polygons)
    if overlap:
        overlap_configs.append(n)

print(f'Configs with overlaps: {len(overlap_configs)}')
if overlap_configs:
    print(f'Overlap configs: {overlap_configs}')
else:
    print('No overlaps found! Submission is valid.')

In [None]:
# Score breakdown by N range
print('\nScore breakdown by N range:')
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]

for start, end in ranges:
    range_score = sum((ensemble_sides[n] ** 2) / n for n in range(start, end + 1))
    print(f'N={start:3d}-{end:3d}: {range_score:.6f}')

print(f'\nTotal score: {ensemble_score:.6f}')
print(f'Target score: 68.901319')
print(f'Gap to target: {ensemble_score - 68.901319:.6f}')

In [None]:
# Compare with other pre-optimized submissions
print('\nComparing all pre-optimized submissions:')
print('='*60)

submissions = [
    'ensemble_70_627.csv',
    'better_ensemble.csv',
    'best_snapshot.csv',
    'chistyakov_best.csv',
    'submission_70_926.csv',
    'saspav_best.csv',
    'bucket_of_chump.csv',
]

results = []
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed = parse_submission(df)
        score, sides = calculate_score(parsed)
        
        # Check overlaps
        n_overlaps = 0
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = parsed[parsed['id'].str.startswith(prefix)]
            polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
            overlap, _ = check_overlaps(polygons)
            if overlap:
                n_overlaps += 1
        
        results.append((filename, score, n_overlaps))
        status = 'VALID' if n_overlaps == 0 else f'{n_overlaps} overlaps'
        print(f'{filename:25s}: {score:.6f} ({status})')
    except Exception as e:
        print(f'{filename:25s}: ERROR - {e}')

print('='*60)

In [None]:
# Find the best valid submission
valid_results = [(f, s, o) for f, s, o in results if o == 0]
if valid_results:
    best_file, best_score, _ = min(valid_results, key=lambda x: x[1])
    print(f'\nBest valid submission: {best_file}')
    print(f'Score: {best_score:.6f}')
else:
    print('No valid submissions found!')

In [None]:
# Save the best valid submission as our baseline
import os

# Use the best valid submission
best_df = pd.read_csv(f'/home/code/preoptimized/{best_file}')

# Save to submission directory
os.makedirs('/home/submission', exist_ok=True)
best_df.to_csv('/home/submission/submission.csv', index=False)
best_df.to_csv('/home/code/experiments/002_preoptimized_baseline/submission.csv', index=False)

print(f'Saved {best_file} as submission')
print(f'Score: {best_score:.6f}')
print(f'Gap to target: {best_score - 68.901319:.6f}')

In [None]:
# Final summary
print('\n' + '='*60)
print('EXPERIMENT 002 SUMMARY')
print('='*60)
print(f'Best valid pre-optimized submission: {best_file}')
print(f'Score: {best_score:.6f}')
print(f'Target: 68.901319')
print(f'Gap: {best_score - 68.901319:.6f}')
print(f'Overlaps: 0 (valid)')
print('='*60)