# Verify Valid Baseline (candidate_024.csv)

The previous submission (candidate_029.csv) failed with 'Overlapping trees in group 040' despite passing local validation.

candidate_024.csv is LB-verified with score 70.626088313081.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree

getcontext().prec = 30

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > 1e-12:
                        return True
    return False

def get_bounding_box_side(trees):
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

def score_submission(df, max_n=200):
    total_score = 0
    overlaps = []
    side_lengths = {}
    for n in range(1, max_n + 1):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f"Warning: n={n} has {len(trees)} trees instead of {n}")
            continue
        if has_overlap(trees):
            overlaps.append(n)
        side = get_bounding_box_side(trees)
        side_lengths[n] = side
        score_n = (side ** 2) / n
        total_score += score_n
    return total_score, overlaps, side_lengths

print("Scoring functions defined.")

Scoring functions defined.


In [2]:
# Load and verify the LB-verified submission
df = pd.read_csv('/home/submission/submission.csv')
print(f"Submission shape: {df.shape}")
print(f"First few rows:\n{df.head()}")

Submission shape: (20100, 4)
First few rows:
      id                     x                      y                  deg
0  001_0  s-48.196086194214246    s58.770984615214225                s45.0
1  002_0   s0.1540970696213643  s-0.03854074269478543  s203.62937773065684
2  002_1  s-0.1540970696213643   s-0.5614592573052146  s23.629377730656792
3  003_0    s1.123655816140301     s0.781101815992563    s111.125132292893
4  003_1     s1.23405569584216     s1.275999500663759     s66.370622269343


In [3]:
# Score the submission
print("Scoring submission...")
score, overlaps, side_lengths = score_submission(df)
print(f"\nScore: {score:.6f}")
print(f"Overlapping configurations: {overlaps}")
print(f"\nTarget: 68.897509")
print(f"Gap to target: {score - 68.897509:.6f} ({(score - 68.897509)/68.897509*100:.2f}%)")

Scoring submission...



Score: 70.626088
Overlapping configurations: []

Target: 68.897509
Gap to target: 1.728579 (2.51%)


In [4]:
# Score breakdown by N range
print("\nScore breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = sum((side_lengths[n]**2)/n for n in range(start, end+1))
    print(f"  N={start}-{end}: {range_score:.6f}")

print("\nTop 10 N values by score contribution (S^2/n):")
contributions = [(n, (side_lengths[n]**2)/n, side_lengths[n]) for n in range(1, 201)]
contributions.sort(key=lambda x: x[1], reverse=True)

for n, contrib, side in contributions[:10]:
    print(f"  N={n:3d}: contrib={contrib:.6f}, side={side:.6f}")


Score breakdown by N range:
  N=1-10: 4.329128
  N=11-50: 14.704788
  N=51-100: 17.612806
  N=101-150: 17.136399
  N=151-200: 16.842967

Top 10 N values by score contribution (S^2/n):
  N=  1: contrib=0.661250, side=0.813173
  N=  2: contrib=0.450779, side=0.949504
  N=  3: contrib=0.434745, side=1.142031
  N=  5: contrib=0.416850, side=1.443692
  N=  4: contrib=0.416545, side=1.290806
  N=  7: contrib=0.399897, side=1.673104
  N=  6: contrib=0.399610, side=1.548438
  N=  9: contrib=0.387415, side=1.867280
  N=  8: contrib=0.385407, side=1.755921
  N= 15: contrib=0.376978, side=2.377955


In [None]:
# Save metrics
import json
metrics = {
    'cv_score': score,
    'overlaps': overlaps,
    'source_file': '/home/nonroot/snapshots/santa-2025/21222392487/code/submission_candidates/candidate_024.csv',
    'target': 68.897509,
    'gap': score - 68.897509,
    'lb_verified': True,
    'lb_score': 70.626088313081
}
with open('/home/code/experiments/002_valid_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\n" + "="*60)
print("VALID BASELINE SUMMARY")
print("="*60)
print(f"CV Score: {score:.6f}")
print(f"LB Score (verified): 70.626088313081")
print(f"Target: 68.897509")
print(f"Gap: {score - 68.897509:.6f} ({(score - 68.897509)/68.897509*100:.2f}%)")
print(f"Overlaps: {len(overlaps)}")
print(f"\nSubmission saved to: /home/submission/submission.csv")
print("="*60)