# Validated Baseline - Kaggle-Confirmed Submission

This notebook uses a submission that has been validated by Kaggle (LB score 70.627582).
We implement high-precision overlap detection to verify it locally.

In [1]:
import pandas as pd
import numpy as np
import math
from numba import njit
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
import os

# Set high precision for Decimal arithmetic
getcontext().prec = 25

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

print(f"Tree has {len(TX)} vertices")
print(f"Decimal precision: {getcontext().prec}")

Tree has 15 vertices
Decimal precision: 25


In [2]:
@njit
def score_group(xs, ys, degs, tx, ty):
    """Calculate score for a single N-tree configuration"""
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def calculate_total_score(df):
    """Calculate total score for a submission dataframe"""
    total_score = 0.0
    scores_by_n = {}
    
    for n in range(1, 201):
        mask = df['id'].str.startswith(f'{n:03d}_')
        group = df[mask]
        
        if len(group) != n:
            print(f"Warning: N={n} has {len(group)} trees, expected {n}")
            continue
        
        xs = group['x'].str[1:].astype(float).values
        ys = group['y'].str[1:].astype(float).values
        degs = group['deg'].str[1:].astype(float).values
        
        score = score_group(xs, ys, degs, TX, TY)
        scores_by_n[n] = score
        total_score += score
    
    return total_score, scores_by_n

print("Scoring functions defined")

Scoring functions defined


In [3]:
# High-precision overlap detection (matching Kaggle's implementation)
scale_factor = Decimal('1e15')

def get_tree_polygon_high_precision(x, y, deg):
    """Create tree polygon with high-precision coordinates"""
    # Convert to Decimal for high precision
    x_dec = Decimal(str(x))
    y_dec = Decimal(str(y))
    deg_dec = Decimal(str(deg))
    
    # Calculate rotation
    rad = float(deg_dec) * math.pi / 180.0
    cos_r = Decimal(str(math.cos(rad)))
    sin_r = Decimal(str(math.sin(rad)))
    
    # Transform vertices with high precision
    vertices = []
    for tx, ty in zip(TX, TY):
        tx_dec = Decimal(str(tx))
        ty_dec = Decimal(str(ty))
        
        # Rotate and translate
        new_x = cos_r * tx_dec - sin_r * ty_dec + x_dec
        new_y = sin_r * tx_dec + cos_r * ty_dec + y_dec
        
        # Scale for precision
        vertices.append((float(new_x * scale_factor), float(new_y * scale_factor)))
    
    return Polygon(vertices)

def check_overlaps_high_precision(df, n):
    """Check for overlaps in N-tree configuration using high precision"""
    mask = df['id'].str.startswith(f'{n:03d}_')
    group = df[mask]
    
    if len(group) != n:
        return False, f"Wrong number of trees: {len(group)} vs {n}"
    
    xs = group['x'].str[1:].astype(float).values
    ys = group['y'].str[1:].astype(float).values
    degs = group['deg'].str[1:].astype(float).values
    
    # Create polygons
    polygons = [get_tree_polygon_high_precision(x, y, deg) for x, y, deg in zip(xs, ys, degs)]
    
    # Check all pairs for overlaps
    for i in range(len(polygons)):
        for j in range(i + 1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 0:
                    return False, f"Overlap between trees {i} and {j}, area={intersection.area / (scale_factor**2):.2e}"
    
    return True, "No overlaps"

print("High-precision overlap detection defined")

High-precision overlap detection defined


In [4]:
# Load the Kaggle-validated submission
submission_path = '/home/nonroot/snapshots/santa-2025/21198893057/code/submission.csv'
df = pd.read_csv(submission_path)
print(f"Loaded submission with {len(df)} rows")
print(df.head())

Loaded submission with 20100 rows
      id                     x                      y                  deg
0  001_0  s-48.196086194214246    s58.770984615214225                s45.0
1  002_0   s0.1540970696213643  s-0.03854074269478543  s203.62937773065684
2  002_1  s-0.1540970696213643   s-0.5614592573052146  s23.629377730656792
3  003_0    s1.123655816140301     s0.781101815992563    s111.125132292893
4  003_1     s1.23405569584216     s1.275999500663759     s66.370622269343


In [5]:
# Calculate total score
total_score, scores_by_n = calculate_total_score(df)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Expected LB: 70.627582")
print(f"Difference: {abs(total_score - 70.627582):.6f}")


Total Score: 70.627582
Expected LB: 70.627582
Difference: 0.000000


In [6]:
# Verify no overlaps in a sample of N values (checking all 200 would take too long)
print("\nVerifying overlaps for sample N values...")
sample_ns = [2, 3, 4, 5, 8, 10, 15, 20, 50, 100, 200]

for n in sample_ns:
    valid, msg = check_overlaps_high_precision(df, n)
    status = "✓" if valid else "✗"
    print(f"  N={n:3d}: {status} {msg}")


Verifying overlaps for sample N values...
  N=  2: ✓ No overlaps
  N=  3: ✓ No overlaps
  N=  4: ✓ No overlaps
  N=  5: ✓ No overlaps
  N=  8: ✓ No overlaps
  N= 10: ✓ No overlaps
  N= 15: ✓ No overlaps
  N= 20: ✓ No overlaps
  N= 50: ✓ No overlaps
  N=100: ✓ No overlaps
  N=200: ✓ No overlaps


In [7]:
# Copy to submission folder
import shutil
os.makedirs('/home/submission', exist_ok=True)
shutil.copy(submission_path, '/home/submission/submission.csv')
print("Submission copied to /home/submission/submission.csv")

# Verify the copy
df_verify = pd.read_csv('/home/submission/submission.csv')
print(f"Verified: {len(df_verify)} rows")

Submission copied to /home/submission/submission.csv


Verified: 20100 rows


In [8]:
# Save metrics
import json
metrics = {
    'cv_score': total_score,
    'expected_lb_score': 70.627582,
    'source': '/home/nonroot/snapshots/santa-2025/21198893057/code/submission.csv',
    'kaggle_validated': True
}

with open('/home/code/experiments/002_validated_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved. CV Score: {total_score:.6f}")

Metrics saved. CV Score: 70.627582
