# Experiment 001: Establish Baseline

Using the pre-optimized jazivxt/bucket-of-chump submission as baseline.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import warnings
warnings.filterwarnings('ignore')

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    # Create base polygon
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    # Rotate around origin
    poly = rotate(poly, deg, origin=(0, 0))
    # Translate to position
    poly = translate(poly, x, y)
    return poly

def calculate_bounding_box_side(trees):
    """Calculate the side length of the bounding box for a set of trees."""
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    return max(width, height)

def parse_submission(filepath):
    """Parse submission CSV and return dict of N -> list of (x, y, deg) tuples."""
    df = pd.read_csv(filepath)
    
    # Parse the 's' prefix from values
    def parse_val(v):
        if isinstance(v, str) and v.startswith('s'):
            return float(v[1:])
        return float(v)
    
    df['x_val'] = df['x'].apply(parse_val)
    df['y_val'] = df['y'].apply(parse_val)
    df['deg_val'] = df['deg'].apply(parse_val)
    
    # Extract N from id (format: NNN_i)
    df['N'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    
    # Group by N
    result = {}
    for n, group in df.groupby('N'):
        trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
        result[n] = trees
    
    return result

def calculate_score(side_lengths):
    """Calculate total score from side lengths dict."""
    return sum(s**2 / n for n, s in side_lengths.items())

print("Functions defined successfully")
print(f"Tree polygon has {len(TX)} vertices")
print(f"Tree bounds: x=[{min(TX)}, {max(TX)}], y=[{min(TY)}, {max(TY)}]")
print(f"Tree width: {max(TX) - min(TX)}, height: {max(TY) - min(TY)}")

In [None]:
# Load the best available submission (jazivxt/bucket-of-chump)
submission_path = '/home/code/exploration/datasets/submission.csv'
print(f"Loading submission from: {submission_path}")

# Parse submission
trees_by_n = parse_submission(submission_path)
print(f"Loaded configurations for N=1 to N={max(trees_by_n.keys())}")
print(f"Total configurations: {len(trees_by_n)}")

# Verify counts
for n in [1, 2, 3, 10, 50, 100, 200]:
    if n in trees_by_n:
        print(f"N={n}: {len(trees_by_n[n])} trees")

In [None]:
# Calculate side lengths for all N values
print("Calculating side lengths for all N values...")
side_lengths = {}

for n in range(1, 201):
    if n in trees_by_n:
        side = calculate_bounding_box_side(trees_by_n[n])
        side_lengths[n] = side
        if n <= 10 or n % 20 == 0:
            print(f"N={n}: side={side:.6f}, contribution={side**2/n:.6f}")

print(f"\nCalculated side lengths for {len(side_lengths)} configurations")

In [None]:
# Calculate total score
total_score = calculate_score(side_lengths)
print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Expected: ~70.647")
print(f"Target: 68.919")
print(f"Gap to target: {total_score - 68.919:.3f} points")

# Score breakdown
score_1_20 = sum(side_lengths[n]**2 / n for n in range(1, 21))
score_21_200 = sum(side_lengths[n]**2 / n for n in range(21, 201))
print(f"\nScore breakdown:")
print(f"N=1-20: {score_1_20:.4f} ({100*score_1_20/total_score:.1f}%)")
print(f"N=21-200: {score_21_200:.4f} ({100*score_21_200/total_score:.1f}%)")

In [None]:
# Copy submission to the submission folder
import shutil
import os

os.makedirs('/home/submission', exist_ok=True)
shutil.copy(submission_path, '/home/submission/submission.csv')
print("Copied submission to /home/submission/submission.csv")

# Verify
df = pd.read_csv('/home/submission/submission.csv')
print(f"Submission has {len(df)} rows")
print(f"Columns: {list(df.columns)}")
print(f"\nFirst 5 rows:")
print(df.head())

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'side_lengths': {str(k): v for k, v in side_lengths.items()},
    'score_1_20': score_1_20,
    'score_21_200': score_21_200
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved metrics to experiments/001_baseline/metrics.json")
print(f"\nFinal Score: {total_score:.6f}")