# Baseline Validation

Validate the best available baseline submission and calculate per-N scores.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
import warnings
warnings.filterwarnings('ignore')

# Tree shape vertices (from competition description)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with given rotation angle."""
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def parse_value(val):
    """Parse value from submission format (with 's' prefix)."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

print("Functions defined successfully")

In [None]:
# Load submission
df = pd.read_csv('/home/code/experiments/000_baseline/submission.csv')
print(f"Submission shape: {df.shape}")
print(f"Expected rows: {sum(range(1, 201))} (1+2+...+200 = 20100)")
print(f"\nFirst few rows:")
print(df.head(10))

In [None]:
def has_overlap(polygons):
    """Check if any polygons overlap (not just touch)."""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                # Check if intersection area is significant
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:
                    return True
    return False

def calculate_per_n_score(df):
    """Calculate score for each N value."""
    per_n_scores = {}
    per_n_sides = {}
    overlap_errors = []
    
    for n in range(1, 201):
        # Get trees for this N
        prefix = f"{n:03d}_"
        group = df[df['id'].str.startswith(prefix)]
        
        if len(group) != n:
            print(f"WARNING: N={n} has {len(group)} trees, expected {n}")
            continue
        
        # Create polygons
        polygons = []
        for _, row in group.iterrows():
            x = parse_value(row['x'])
            y = parse_value(row['y'])
            angle = parse_value(row['deg'])
            poly = create_tree_polygon(x, y, angle)
            polygons.append(poly)
        
        # Check for overlaps
        if has_overlap(polygons):
            overlap_errors.append(n)
        
        # Calculate bounding box
        union = unary_union(polygons)
        bounds = union.bounds  # (minx, miny, maxx, maxy)
        width = bounds[2] - bounds[0]
        height = bounds[3] - bounds[1]
        side = max(width, height)
        
        # Score contribution: side^2 / n
        score = (side ** 2) / n
        per_n_scores[n] = score
        per_n_sides[n] = side
    
    return per_n_scores, per_n_sides, overlap_errors

print("Calculating per-N scores...")
per_n_scores, per_n_sides, overlap_errors = calculate_per_n_score(df)
print(f"Done! Processed {len(per_n_scores)} N values")
print(f"Overlap errors: {len(overlap_errors)}")
if overlap_errors:
    print(f"N values with overlaps: {overlap_errors[:10]}...")

In [None]:
# Calculate total score
total_score = sum(per_n_scores.values())
print(f"Total Score: {total_score:.6f}")
print(f"Target Score: 68.888293")
print(f"Gap: {total_score - 68.888293:.6f}")

# Show score breakdown by N ranges
print("\n=== Score Breakdown by N Range ===")
ranges = [(1, 1), (2, 5), (6, 10), (11, 50), (51, 100), (101, 200)]
for start, end in ranges:
    range_score = sum(per_n_scores[n] for n in range(start, end+1))
    print(f"N={start}-{end}: {range_score:.4f}")

In [None]:
# Show individual scores for small N (highest impact)
print("\n=== Small N Scores (Highest Impact) ===")
for n in range(1, 21):
    print(f"N={n:3d}: side={per_n_sides[n]:.6f}, score={per_n_scores[n]:.6f}")

In [None]:
# Save metrics and per-N scores
metrics = {
    'cv_score': total_score,
    'total_score': total_score,
    'overlap_errors': len(overlap_errors),
    'target': 68.888293,
    'gap': total_score - 68.888293,
    'source': 'snapshot_21337107511'
}

with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Save per-N scores for future comparison
per_n_data = {
    'scores': per_n_scores,
    'sides': per_n_sides
}
with open('/home/code/experiments/000_baseline/per_n_scores.json', 'w') as f:
    json.dump(per_n_data, f)

print("Metrics saved!")
print(f"\nFinal CV Score: {total_score:.6f}")