# Valid Baseline Validation

Validate the baseline from snapshot 21329067673 has no overlaps and calculate its score.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import json
from decimal import Decimal, getcontext
from tqdm import tqdm

getcontext().prec = 30

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with rotation angle."""
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def load_submission(path):
    """Load a submission CSV and return trees by N."""
    df = pd.read_csv(path)
    trees_by_n = {}
    
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        
        if n not in trees_by_n:
            trees_by_n[n] = []
        trees_by_n[n].append((x, y, deg))
    
    return trees_by_n

def calculate_bounding_box_side(trees):
    """Calculate the side length of the bounding square for a list of trees."""
    all_points = []
    for x, y, angle in trees:
        poly = get_tree_polygon(x, y, angle)
        all_points.extend(list(poly.exterior.coords))
    points = np.array(all_points)
    min_xy = points.min(axis=0)
    max_xy = points.max(axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def check_overlap(trees):
    """Check if any trees overlap. Returns list of overlapping pairs."""
    polygons = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        polygons.append(poly)
    
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-12:  # Small tolerance
                    overlaps.append((i, j, intersection.area))
    return overlaps

print("Functions defined!")

In [None]:
# Load the valid baseline
baseline_path = '/home/code/experiments/001_valid_baseline/submission.csv'
trees_by_n = load_submission(baseline_path)
print(f"Loaded {len(trees_by_n)} N values")

In [None]:
# Check for overlaps in all N values
overlapping_n = []
for n in tqdm(range(1, 201), desc="Checking overlaps"):
    if n in trees_by_n:
        overlaps = check_overlap(trees_by_n[n])
        if overlaps:
            overlapping_n.append(n)
            if len(overlapping_n) <= 5:  # Only print first 5
                print(f"N={n}: {len(overlaps)} overlapping pairs")

print(f"\nTotal N values with overlaps: {len(overlapping_n)}")
if overlapping_n:
    print(f"Overlapping N values: {overlapping_n}")

In [None]:
# Calculate total score
total_score = 0
per_n_scores = {}

for n in range(1, 201):
    if n in trees_by_n:
        side = calculate_bounding_box_side(trees_by_n[n])
        score = (side ** 2) / n
        per_n_scores[n] = score
        total_score += score

print(f"Total score: {total_score:.6f}")

# Show top contributors
print("\nTop 10 score contributors:")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    side = calculate_bounding_box_side(trees_by_n[n])
    print(f"N={n:3d}: side={side:.6f}, score={score:.6f}")

In [None]:
# Save metrics
import shutil
import os

metrics = {
    'cv_score': total_score,
    'overlapping_n_count': len(overlapping_n),
    'overlapping_n_values': overlapping_n,
    'per_n_scores': {str(k): v for k, v in per_n_scores.items()}
}

with open('/home/code/experiments/001_valid_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Copy to submission folder
os.makedirs('/home/submission', exist_ok=True)
shutil.copy(baseline_path, '/home/submission/submission.csv')

print(f"\nMetrics saved!")
print(f"CV Score: {total_score:.6f}")
print(f"Overlapping N values: {len(overlapping_n)}")
print(f"Submission copied to /home/submission/submission.csv")