# Baseline Experiment - Verify Pre-optimized Submission

Using the best available pre-optimized submission from snapshots.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree

getcontext().prec = 30

# Load the submission
submission_path = '/home/code/experiments/000_baseline/submission.csv'
df = pd.read_csv(submission_path)
print(f"Submission shape: {df.shape}")
print(df.head(10))

Submission shape: (20100, 4)
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264089255  
8  s336.370622269264003990  
9  s336.370622145636

In [2]:
# Define the Christmas tree polygon (correct 15-vertex version)
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

# Test the tree polygon
test_tree = ChristmasTree('0', '0', '0')
print(f"Tree polygon has {len(test_tree.polygon.exterior.coords) - 1} vertices")
print(f"Tree area: {test_tree.polygon.area:.6f}")

Tree polygon has 15 vertices
Tree area: 0.245625


In [3]:
def load_trees_for_n(df, n):
    """Load all trees for a given N value."""
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees, tolerance=1e-12):
    """Check if any trees overlap."""
    if len(trees) <= 1:
        return False, []
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:  # Only check each pair once
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return len(overlaps) > 0, overlaps

def get_bounding_box_side(trees):
    """Get the side length of the bounding box."""
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

# Test on N=1
trees = load_trees_for_n(df, 1)
print(f"N=1: {len(trees)} trees")
side = get_bounding_box_side(trees)
print(f"Bounding box side: {side:.6f}")
print(f"Score contribution: {side**2 / 1:.6f}")

N=1: 1 trees
Bounding box side: 0.813173
Score contribution: 0.661250


In [4]:
def score_submission(df, max_n=200, check_overlaps=True):
    """Calculate the competition score."""
    total_score = 0
    scores_by_n = {}
    overlapping_ns = []
    
    for n in range(1, max_n + 1):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f"Warning: n={n} has {len(trees)} trees instead of {n}")
            continue
        
        if check_overlaps:
            has_ovlp, ovlps = has_overlap(trees)
            if has_ovlp:
                overlapping_ns.append(n)
        
        side = get_bounding_box_side(trees)
        score_n = (side ** 2) / n
        scores_by_n[n] = {'side': side, 'score': score_n}
        total_score += score_n
    
    return total_score, scores_by_n, overlapping_ns

print("Scoring submission...")
score, scores_by_n, overlapping_ns = score_submission(df)
print(f"\\nTotal Score: {score:.6f}")
print(f"Target Score: 68.890873")
print(f"Gap to target: {score - 68.890873:.6f}")
print(f"\\nOverlapping N values: {overlapping_ns[:20]}..." if len(overlapping_ns) > 20 else f"\\nOverlapping N values: {overlapping_ns}")

Scoring submission...


\nTotal Score: 70.676102
Target Score: 68.890873
Gap to target: 1.785229
\nOverlapping N values: []


In [5]:
# Show score breakdown for first 20 N values
print("Score breakdown (first 20 N values):")
print("-" * 50)
for n in range(1, 21):
    if n in scores_by_n:
        s = scores_by_n[n]
        print(f"N={n:3d}: side={s['side']:.6f}, score={s['score']:.6f}")

Score breakdown (first 20 N values):
--------------------------------------------------
N=  1: side=0.813173, score=0.661250
N=  2: side=0.949504, score=0.450779
N=  3: side=1.142031, score=0.434745
N=  4: side=1.290806, score=0.416545
N=  5: side=1.443692, score=0.416850
N=  6: side=1.548438, score=0.399610
N=  7: side=1.673104, score=0.399897
N=  8: side=1.755921, score=0.385407
N=  9: side=1.867280, score=0.387415
N= 10: side=1.940696, score=0.376630
N= 11: side=2.033002, score=0.375736
N= 12: side=2.114873, score=0.372724
N= 13: side=2.200046, score=0.372323
N= 14: side=2.277711, score=0.370569
N= 15: side=2.384962, score=0.379203
N= 16: side=2.446640, score=0.374128
N= 17: side=2.508124, score=0.370040
N= 18: side=2.576409, score=0.368771
N= 19: side=2.646449, score=0.368615
N= 20: side=2.742469, score=0.376057


In [6]:
# If no overlaps, copy to submission folder
import shutil
import json

if not overlapping_ns:
    print("No overlaps found! Submission is valid.")
    shutil.copy(submission_path, '/home/submission/submission.csv')
    print("Submission copied to /home/submission/submission.csv")
else:
    print(f"WARNING: {len(overlapping_ns)} N values have overlaps!")
    print("Submission may fail Kaggle validation.")
    # Still copy for now
    shutil.copy(submission_path, '/home/submission/submission.csv')
    print("Submission copied anyway for testing.")

# Save metrics
metrics = {'cv_score': score, 'overlapping_ns': overlapping_ns}
with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"\\nMetrics saved: cv_score={score:.6f}")

No overlaps found! Submission is valid.
Submission copied to /home/submission/submission.csv
\nMetrics saved: cv_score=70.676102


In [7]:
# Final summary
print("=" * 60)
print("BASELINE EXPERIMENT SUMMARY")
print("=" * 60)
print(f"Submission: santa-2025.csv (pre-optimized)")
print(f"Score: {score:.6f}")
print(f"Target: 68.890873")
print(f"Gap: {score - 68.890873:.6f} ({(score - 68.890873) / 68.890873 * 100:.2f}%)")
print(f"Overlaps: {len(overlapping_ns)} N values")
print("=" * 60)

BASELINE EXPERIMENT SUMMARY
Submission: santa-2025.csv (pre-optimized)
Score: 70.676102
Target: 68.890873
Gap: 1.785229 (2.59%)
Overlaps: 0 N values
