# Validate the Correct Baseline (snapshot 21328309254)

This submission scored 70.647327 on LB and has no overlaps.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
from shapely.ops import unary_union
from decimal import Decimal, getcontext
import json

getcontext().prec = 25

# CORRECT tree geometry
def get_tree_polygon():
    trunk_w = Decimal('0.15')
    trunk_h = Decimal('0.2')
    base_w = Decimal('0.7')
    mid_w = Decimal('0.4')
    top_w = Decimal('0.25')
    tip_y = Decimal('0.8')
    tier_1_y = Decimal('0.5')
    tier_2_y = Decimal('0.25')
    base_y = Decimal('0.0')
    trunk_bottom_y = -trunk_h

    vertices = [
        (float(Decimal('0.0')), float(tip_y)),
        (float(top_w / Decimal('2')), float(tier_1_y)),
        (float(top_w / Decimal('4')), float(tier_1_y)),
        (float(mid_w / Decimal('2')), float(tier_2_y)),
        (float(mid_w / Decimal('4')), float(tier_2_y)),
        (float(base_w / Decimal('2')), float(base_y)),
        (float(trunk_w / Decimal('2')), float(base_y)),
        (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
        (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
        (float(-(trunk_w / Decimal('2'))), float(base_y)),
        (float(-(base_w / Decimal('2'))), float(base_y)),
        (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
        (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
        (float(-(top_w / Decimal('4'))), float(tier_1_y)),
        (float(-(top_w / Decimal('2'))), float(tier_1_y)),
    ]
    return Polygon(vertices)

TREE_POLY = get_tree_polygon()
print(f"Tree: {len(TREE_POLY.exterior.coords)} vertices, bounds={TREE_POLY.bounds}")

Tree: 16 vertices, bounds=(-0.35, -0.2, 0.35, 0.8)


In [2]:
def parse_s_value(s_val):
    if isinstance(s_val, str) and s_val.startswith('s'):
        return float(s_val[1:])
    return float(s_val)

def load_submission(path):
    df = pd.read_csv(path)
    df['x_val'] = df['x'].apply(parse_s_value)
    df['y_val'] = df['y'].apply(parse_s_value)
    df['deg_val'] = df['deg'].apply(parse_s_value)
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))
    return df

def create_tree(x, y, deg):
    tree = rotate(TREE_POLY, deg, origin=(0, 0))
    tree = translate(tree, x, y)
    return tree

def get_bbox_side(polygons):
    if not polygons:
        return 0
    combined = unary_union(polygons)
    bounds = combined.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def check_overlaps(polygons, tolerance=1e-12):
    if len(polygons) <= 1:
        return []
    overlaps = []
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:
                if polygons[i].intersects(polygons[idx]):
                    if not polygons[i].touches(polygons[idx]):
                        intersection = polygons[i].intersection(polygons[idx])
                        if intersection.area > tolerance:
                            overlaps.append((i, idx, intersection.area))
    return overlaps

print("Functions defined")

Functions defined


In [3]:
# Load the VALID submission
valid_path = '/home/code/experiments/000_baseline/submission_valid.csv'
df = load_submission(valid_path)
print(f"Loaded {len(df)} rows")
print(f"N range: {df['n'].min()} to {df['n'].max()}")

Loaded 20100 rows
N range: 1 to 200


In [4]:
# Compute score and check overlaps for all n
scores_by_n = {}
overlap_configs = []

for n in range(1, 201):
    n_df = df[df['n'] == n]
    if len(n_df) != n:
        print(f"ERROR: n={n} has {len(n_df)} trees")
        continue
    
    polygons = [create_tree(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in n_df.iterrows()]
    
    overlaps = check_overlaps(polygons)
    if overlaps:
        overlap_configs.append((n, len(overlaps), overlaps[0][2] if overlaps else 0))
    
    side = get_bbox_side(polygons)
    score_n = (side ** 2) / n
    scores_by_n[n] = {'side': side, 'score': score_n}
    
    if n <= 10 or n % 50 == 0:
        print(f"n={n:3d}: side={side:.6f}, score={score_n:.6f}, overlaps={len(overlaps)}")

print(f"\nConfigurations with overlaps: {len(overlap_configs)}")
if overlap_configs:
    print("First 5 overlap configs:")
    for n, count, area in overlap_configs[:5]:
        print(f"  n={n}: {count} overlaps, max_area={area:.2e}")

n=  1: side=0.813173, score=0.661250, overlaps=0
n=  2: side=0.949504, score=0.450779, overlaps=0
n=  3: side=1.142031, score=0.434745, overlaps=0
n=  4: side=1.290806, score=0.416545, overlaps=0
n=  5: side=1.443692, score=0.416850, overlaps=0
n=  6: side=1.548438, score=0.399610, overlaps=0
n=  7: side=1.673104, score=0.399897, overlaps=0
n=  8: side=1.755921, score=0.385407, overlaps=0
n=  9: side=1.867280, score=0.387415, overlaps=0
n= 10: side=1.940696, score=0.376630, overlaps=0


n= 50: side=4.247076, score=0.360753, overlaps=0


n=100: side=5.878187, score=0.345531, overlaps=0


n=150: side=7.110523, score=0.337064, overlaps=0


n=200: side=8.216441, score=0.337549, overlaps=0

Configurations with overlaps: 0


In [5]:
# Compute total score
total_score = sum(s['score'] for s in scores_by_n.values())
print(f"\n{'='*50}")
print(f"TOTAL SCORE: {total_score:.6f}")
print(f"{'='*50}")
print(f"\nExpected LB score: 70.647327")
print(f"Difference: {abs(total_score - 70.647327):.6f}")
print(f"\nTarget: 68.888293")
print(f"Gap to target: {total_score - 68.888293:.6f}")


TOTAL SCORE: 70.647327

Expected LB score: 70.647327
Difference: 0.000000

Target: 68.888293
Gap to target: 1.759034


In [6]:
# Score breakdown
print("\nScore breakdown:")
ranges = [(1, 10), (11, 20), (21, 50), (51, 100), (101, 150), (151, 200)]
for start, end in ranges:
    range_score = sum(scores_by_n[n]['score'] for n in range(start, end+1))
    pct = range_score / total_score * 100
    print(f"n={start:3d}-{end:3d}: {range_score:.6f} ({pct:.1f}%)")


Score breakdown:
n=  1- 10: 4.329128 (6.1%)
n= 11- 20: 3.726299 (5.3%)
n= 21- 50: 10.984443 (15.5%)
n= 51-100: 17.627892 (25.0%)
n=101-150: 17.136578 (24.3%)
n=151-200: 16.842987 (23.8%)


In [7]:
# Save metrics
metrics = {
    'cv_score': total_score,
    'lb_score': 70.647327,
    'overlap_configs': len(overlap_configs),
    'target': 68.888293,
    'gap': total_score - 68.888293
}

import os
os.makedirs('/home/code/experiments/001_validate_baseline', exist_ok=True)
with open('/home/code/experiments/001_validate_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Saved metrics: {metrics}")

Saved metrics: {'cv_score': 70.64732689763682, 'lb_score': 70.647327, 'overlap_configs': 0, 'target': 68.888293, 'gap': 1.7590338976368116}
