# Experiment 002: Pre-optimized Baseline Verification

This notebook:
1. Loads the pre-optimized ensemble submission (score ~70.63)
2. Verifies no overlaps
3. Calculates score breakdown by N range
4. Saves as our new baseline

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree geometry constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

# Base tree polygon vertices
BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x,y) with rotation deg degrees."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def get_bounding_box_side(polygons):
    """Get the side length of the bounding box for a list of polygons."""
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def check_overlaps(polygons):
    """Check if any polygons overlap (not just touch)."""
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i < j:
                if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                    return True, (i, j)
    return False, None

def parse_submission(df):
    """Parse submission dataframe, removing 's' prefix from values."""
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Helper functions defined')

Helper functions defined


In [3]:
# Load pre-optimized ensemble submission
ensemble_df = pd.read_csv('/home/code/preoptimized/ensemble_70_627.csv')
print(f'Ensemble submission shape: {ensemble_df.shape}')
print(ensemble_df.head())

ensemble_parsed = parse_submission(ensemble_df)

Ensemble submission shape: (20100, 4)
      id                  x                 y                deg
0  001_0  s-48.196086194214  s58.770984615214   s45.000000000000
1  002_0    s0.154097069621  s-0.038540742695  s203.629377730657
2  002_1   s-0.154097069621  s-0.561459257305   s23.629377730657
3  003_0    s0.185770648105  s-0.547448842336  s111.125132292893
4  003_1    s0.296170527807  s-0.052551157664   s66.370622269343


In [4]:
# Calculate score for each configuration
def calculate_score(df):
    """Calculate the total score for a submission."""
    scores = {}
    total_score = 0.0
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        
        if len(config_df) != n:
            print(f'Warning: Config {n} has {len(config_df)} trees instead of {n}')
            continue
        
        polygons = []
        for _, row in config_df.iterrows():
            poly = create_tree_polygon(row['x'], row['y'], row['deg'])
            polygons.append(poly)
        
        side = get_bounding_box_side(polygons)
        scores[n] = side
        total_score += (side ** 2) / n
    
    return total_score, scores

print('Calculating ensemble score...')
ensemble_score, ensemble_sides = calculate_score(ensemble_parsed)
print(f'Ensemble score: {ensemble_score:.6f}')

Calculating ensemble score...


Ensemble score: 70.627582


In [5]:
# Check for overlaps in ensemble submission
print('Checking for overlaps in ensemble submission...')
overlap_configs = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = ensemble_parsed[ensemble_parsed['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in config_df.iterrows():
        poly = create_tree_polygon(row['x'], row['y'], row['deg'])
        polygons.append(poly)
    
    overlap, pair = check_overlaps(polygons)
    if overlap:
        overlap_configs.append(n)

print(f'Configs with overlaps: {len(overlap_configs)}')
if overlap_configs:
    print(f'Overlap configs: {overlap_configs}')
else:
    print('No overlaps found! Submission is valid.')

Checking for overlaps in ensemble submission...


Configs with overlaps: 121
Overlap configs: [2, 4, 6, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 55, 56, 57, 58, 59, 60, 61, 62, 65, 66, 67, 68, 69, 70, 71, 73, 75, 77, 79, 80, 82, 84, 86, 87, 89, 90, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 117, 121, 122, 125, 126, 127, 128, 130, 131, 135, 136, 138, 141, 145, 148, 150, 151, 152, 153, 154, 156, 158, 160, 161, 164, 165, 169, 173, 176, 177, 179, 184, 188, 194, 195]


In [6]:
# Score breakdown by N range
print('\nScore breakdown by N range:')
ranges = [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]

for start, end in ranges:
    range_score = sum((ensemble_sides[n] ** 2) / n for n in range(start, end + 1))
    print(f'N={start:3d}-{end:3d}: {range_score:.6f}')

print(f'\nTotal score: {ensemble_score:.6f}')
print(f'Target score: 68.901319')
print(f'Gap to target: {ensemble_score - 68.901319:.6f}')


Score breakdown by N range:
N=  1- 10: 4.329128
N= 11- 50: 14.704788
N= 51-100: 17.614300
N=101-150: 17.136399
N=151-200: 16.842967

Total score: 70.627582
Target score: 68.901319
Gap to target: 1.726263


In [7]:
# Compare with other pre-optimized submissions
print('\nComparing all pre-optimized submissions:')
print('='*60)

submissions = [
    'ensemble_70_627.csv',
    'better_ensemble.csv',
    'best_snapshot.csv',
    'chistyakov_best.csv',
    'submission_70_926.csv',
    'saspav_best.csv',
    'bucket_of_chump.csv',
]

results = []
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed = parse_submission(df)
        score, sides = calculate_score(parsed)
        
        # Check overlaps
        n_overlaps = 0
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = parsed[parsed['id'].str.startswith(prefix)]
            polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
            overlap, _ = check_overlaps(polygons)
            if overlap:
                n_overlaps += 1
        
        results.append((filename, score, n_overlaps))
        status = 'VALID' if n_overlaps == 0 else f'{n_overlaps} overlaps'
        print(f'{filename:25s}: {score:.6f} ({status})')
    except Exception as e:
        print(f'{filename:25s}: ERROR - {e}')

print('='*60)


Comparing all pre-optimized submissions:


ensemble_70_627.csv      : 70.627582 (121 overlaps)


better_ensemble.csv      : 70.647306 (9 overlaps)


best_snapshot.csv        : 70.627582 (8 overlaps)


chistyakov_best.csv      : 70.926150 (VALID)


submission_70_926.csv    : 70.926150 (VALID)


saspav_best.csv          : 70.630478 (7 overlaps)


bucket_of_chump.csv      : 70.676501 (16 overlaps)


In [8]:
# Find the best valid submission
valid_results = [(f, s, o) for f, s, o in results if o == 0]
if valid_results:
    best_file, best_score, _ = min(valid_results, key=lambda x: x[1])
    print(f'\nBest valid submission: {best_file}')
    print(f'Score: {best_score:.6f}')
else:
    print('No valid submissions found!')


Best valid submission: chistyakov_best.csv
Score: 70.926150


In [9]:
# Save the best valid submission as our baseline
import os

# Use the best valid submission
best_df = pd.read_csv(f'/home/code/preoptimized/{best_file}')

# Save to submission directory
os.makedirs('/home/submission', exist_ok=True)
best_df.to_csv('/home/submission/submission.csv', index=False)
best_df.to_csv('/home/code/experiments/002_preoptimized_baseline/submission.csv', index=False)

print(f'Saved {best_file} as submission')
print(f'Score: {best_score:.6f}')
print(f'Gap to target: {best_score - 68.901319:.6f}')

Saved chistyakov_best.csv as submission
Score: 70.926150
Gap to target: 2.024831


In [16]:
# Create a better ensemble by taking the best valid configuration for each N
print('Creating optimal ensemble from all submissions...')

# Load all submissions
all_submissions = {}
for filename in submissions:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        parsed = parse_submission(df)
        all_submissions[filename] = parsed
    except Exception as e:
        print(f'Error loading {filename}: {e}')

print(f'Loaded {len(all_submissions)} submissions')

Creating optimal ensemble from all submissions...


Loaded 7 submissions


In [17]:
# For each N, find the best valid configuration
print('Finding best valid configuration for each N...')

best_configs = {}
best_sources = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    for filename, df in all_submissions.items():
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check for overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        overlap, _ = check_overlaps(polygons)
        
        if not overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    if best_config is not None:
        best_configs[n] = best_config
        best_sources[n] = best_source
    else:
        print(f'Warning: No valid config found for N={n}')

print(f'Found valid configs for {len(best_configs)} out of 200 N values')

Finding best valid configuration for each N...


Found valid configs for 200 out of 200 N values


In [18]:
# Build the optimal ensemble
print('Building optimal ensemble...')

ensemble_data = []
for n in range(1, 201):
    if n in best_configs:
        for _, row in best_configs[n].iterrows():
            ensemble_data.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

optimal_ensemble = pd.DataFrame(ensemble_data)
print(f'Optimal ensemble shape: {optimal_ensemble.shape}')

# Calculate score
optimal_score, optimal_sides = calculate_score(optimal_ensemble)
print(f'Optimal ensemble score: {optimal_score:.6f}')

Building optimal ensemble...


Optimal ensemble shape: (20100, 4)


Optimal ensemble score: 70.627589


In [19]:
# Verify no overlaps in optimal ensemble
print('Verifying optimal ensemble...')
overlap_count = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = optimal_ensemble[optimal_ensemble['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    overlap, _ = check_overlaps(polygons)
    if overlap:
        overlap_count += 1
        print(f'Overlap in N={n}')

if overlap_count == 0:
    print('No overlaps! Optimal ensemble is valid.')
else:
    print(f'Found {overlap_count} overlapping configurations')

Verifying optimal ensemble...


No overlaps! Optimal ensemble is valid.


In [20]:
# Show source breakdown
print('Source breakdown for optimal ensemble:')
from collections import Counter
source_counts = Counter(best_sources.values())
for source, count in source_counts.most_common():
    print(f'{source}: {count} configs')

Source breakdown for optimal ensemble:
best_snapshot.csv: 86 configs
better_ensemble.csv: 71 configs
ensemble_70_627.csv: 35 configs
bucket_of_chump.csv: 8 configs


In [21]:
# Save the optimal ensemble
print('Saving optimal ensemble...')

# Format with 's' prefix
submission = optimal_ensemble.copy()
for col in ['x', 'y', 'deg']:
    submission[col] = 's' + submission[col].apply(lambda x: f'{x:.15f}')

submission.to_csv('/home/submission/submission.csv', index=False)
submission.to_csv('/home/code/experiments/002_preoptimized_baseline/submission.csv', index=False)

print('Submission saved!')
print(f'Score: {optimal_score:.6f}')
print(f'Target: 68.901319')
print(f'Gap: {optimal_score - 68.901319:.6f}')

Saving optimal ensemble...
Submission saved!
Score: 70.627589
Target: 68.901319
Gap: 1.726270


In [22]:
# Final summary
print('\n' + '='*60)
print('EXPERIMENT 002 SUMMARY')
print('='*60)
print(f'Best valid pre-optimized submission: {best_file}')
print(f'Score: {best_score:.6f}')
print(f'Target: 68.901319')
print(f'Gap: {best_score - 68.901319:.6f}')
print(f'Overlaps: 0 (valid)')
print('='*60)


EXPERIMENT 002 SUMMARY
Best valid pre-optimized submission: chistyakov_best.csv
Score: 70.926150
Target: 68.901319
Gap: 2.024831
Overlaps: 0 (valid)
