# Experiment 005: Validated Baseline Submission

Since bbox3 produces overlapping trees that fail Kaggle validation, we use the original baseline submission which is known to be valid.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import shutil
import json
import os

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def parse_submission(filepath):
    df = pd.read_csv(filepath)
    def parse_val(v):
        if isinstance(v, str) and v.startswith('s'):
            return float(v[1:])
        return float(v)
    df['x_val'] = df['x'].apply(parse_val)
    df['y_val'] = df['y'].apply(parse_val)
    df['deg_val'] = df['deg'].apply(parse_val)
    df['N'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    result = {}
    for n, group in df.groupby('N'):
        trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
        result[n] = trees
    return result

def calculate_bounding_box_side(trees):
    all_x, all_y = [], []
    for x, y, deg in trees:
        poly = create_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def calculate_score(side_lengths):
    return sum(s**2 / n for n, s in side_lengths.items())

print("Functions defined")

Functions defined


In [2]:
# Load and validate the baseline submission
baseline_path = '/home/code/exploration/datasets/submission.csv'
print(f"Loading baseline from {baseline_path}")

trees_by_n = parse_submission(baseline_path)
print(f"Loaded {len(trees_by_n)} configurations")

# Validate all configurations for overlaps
print("\nValidating all configurations for overlaps...")
failed_n = []
for n in range(1, 201):
    if n in trees_by_n:
        if has_overlap(trees_by_n[n]):
            failed_n.append(n)
            print(f"  N={n}: OVERLAP DETECTED")

if len(failed_n) == 0:
    print("All configurations passed overlap check!")
else:
    print(f"\nFailed N values: {failed_n}")

Loading baseline from /home/code/exploration/datasets/submission.csv
Loaded 200 configurations

Validating all configurations for overlaps...


All configurations passed overlap check!


In [3]:
# Calculate score
side_lengths = {}
for n in range(1, 201):
    if n in trees_by_n:
        side_lengths[n] = calculate_bounding_box_side(trees_by_n[n])

total_score = calculate_score(side_lengths)
print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Target: 68.919")
print(f"Gap to target: {total_score - 68.919:.6f}")


=== BASELINE SCORE ===
Total Score: 70.647327
Target: 68.919
Gap to target: 1.728327


In [4]:
# Copy to submission folder
os.makedirs('/home/submission', exist_ok=True)
shutil.copy(baseline_path, '/home/submission/submission.csv')
print("Copied baseline to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': total_score,
    'validation_passed': len(failed_n) == 0,
    'failed_n': failed_n,
    'notes': 'Using validated baseline submission. bbox3 optimizer produces overlapping trees that fail Kaggle validation.'
}

with open('/home/code/experiments/005_baseline_validated/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal Score: {total_score:.6f}")

Copied baseline to /home/submission/submission.csv

Final Score: 70.647327
