# Baseline Experiment - Verify Pre-optimized Submission

Using the best available pre-optimized submission from snapshots.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree

getcontext().prec = 30

# Load the submission
submission_path = '/home/code/experiments/000_baseline/submission.csv'
df = pd.read_csv(submission_path)
print(f"Submission shape: {df.shape}")
print(df.head(10))

Submission shape: (20100, 4)
      id                      x                      y                  deg
0  001_0     s43.59119209210215    s-31.78326706874178   s44.99999999999998
1  002_0    s0.1540970696213559  s-0.03854074269479465  s144.27276086312358
2  002_1  s-0.15409706962137285   s-0.5614592573052241  s324.27276086312355
3  003_0     s0.254937643697833    s-0.233436061549416  s113.56326044172948
4  003_1     s0.357722754471247     s0.250360566787394     s66.370622269343
5  003_2    s-0.234618301141838     s0.154819632737017  s155.13405193710082
6  004_0  s-0.29385141092425193    s0.1318322746912058    s154.766362231348
7  004_1    s0.3093423021943079   s0.12387011696071852   s154.7663623549757
8  004_2    s0.3016776723386178   s-0.7246632361943603   s334.7663623549756
9  004_3  s-0.30140730580875774   s-0.7147976146068641  s334.76636223134807


In [2]:
# Define the Christmas tree polygon (correct 15-vertex version)
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(0), float(tip_y)),
            (float(top_w / 2), float(tier_1_y)),
            (float(top_w / 4), float(tier_1_y)),
            (float(mid_w / 2), float(tier_2_y)),
            (float(mid_w / 4), float(tier_2_y)),
            (float(base_w / 2), float(base_y)),
            (float(trunk_w / 2), float(base_y)),
            (float(trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(trunk_bottom_y)),
            (float(-trunk_w / 2), float(base_y)),
            (float(-base_w / 2), float(base_y)),
            (float(-mid_w / 4), float(tier_2_y)),
            (float(-mid_w / 2), float(tier_2_y)),
            (float(-top_w / 4), float(tier_1_y)),
            (float(-top_w / 2), float(tier_1_y)),
        ])

        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

# Test the tree polygon
test_tree = ChristmasTree('0', '0', '0')
print(f"Tree polygon has {len(test_tree.polygon.exterior.coords) - 1} vertices")
print(f"Tree area: {test_tree.polygon.area:.6f}")

Base tree has 11 vertices
Vertices: [(-0.075, -0.2), (-0.075, 0), (-0.35, 0), (-0.2, 0.3), (-0.125, 0.5), (0, 0.8), (0.125, 0.5), (0.2, 0.3), (0.35, 0), (0.075, 0), (0.075, -0.2)]


In [3]:
def load_trees_for_n(df, n):
    """Load all trees for a given N value."""
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = str(row['x']).lstrip('s')
        y = str(row['y']).lstrip('s')
        deg = str(row['deg']).lstrip('s')
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees, tolerance=1e-12):
    """Check if any trees overlap."""
    if len(trees) <= 1:
        return False, []
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:  # Only check each pair once
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return len(overlaps) > 0, overlaps

def get_bounding_box_side(trees):
    """Get the side length of the bounding box."""
    if not trees:
        return 0
    all_coords = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_coords.append(coords)
    all_coords = np.vstack(all_coords)
    x_range = all_coords[:, 0].max() - all_coords[:, 0].min()
    y_range = all_coords[:, 1].max() - all_coords[:, 1].min()
    return max(x_range, y_range)

# Test on N=1
trees = load_trees_for_n(df, 1)
print(f"N=1: {len(trees)} trees")
side = get_bounding_box_side(trees)
print(f"Bounding box side: {side:.6f}")
print(f"Score contribution: {side**2 / 1:.6f}")

N values: 1 to 200
Total trees: 20100
Expected trees: 20100 = 20100


In [4]:
def score_submission(df, max_n=200, check_overlaps=True):
    """Calculate the competition score."""
    total_score = 0
    scores_by_n = {}
    overlapping_ns = []
    
    for n in range(1, max_n + 1):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f"Warning: n={n} has {len(trees)} trees instead of {n}")
            continue
        
        if check_overlaps:
            has_ovlp, ovlps = has_overlap(trees)
            if has_ovlp:
                overlapping_ns.append(n)
        
        side = get_bounding_box_side(trees)
        score_n = (side ** 2) / n
        scores_by_n[n] = {'side': side, 'score': score_n}
        total_score += score_n
    
    return total_score, scores_by_n, overlapping_ns

print("Scoring submission...")
score, scores_by_n, overlapping_ns = score_submission(df)
print(f"\\nTotal Score: {score:.6f}")
print(f"Target Score: 68.890873")
print(f"Gap to target: {score - 68.890873:.6f}")
print(f"\\nOverlapping N values: {overlapping_ns[:20]}..." if len(overlapping_ns) > 20 else f"\\nOverlapping N values: {overlapping_ns}")


Total Score: 70.523320
Target Score: 68.890873
Gap to target: 1.632447


In [5]:
# Show score breakdown for first 20 N values
print("Score breakdown (first 20 N values):")
print("-" * 50)
for n in range(1, 21):
    if n in scores_by_n:
        s = scores_by_n[n]
        print(f"N={n:3d}: side={s['side']:.6f}, score={s['score']:.6f}")

Checking for overlaps...
N=2: 1 overlaps found!
  Trees 0 and 1: intersection area = 1.91e-01
N=5: 10 overlaps found!
  Trees 0 and 1: intersection area = 1.36e-02
  Trees 0 and 2: intersection area = 2.04e-02
  Trees 0 and 3: intersection area = 7.29e-02
N=10: 12 overlaps found!
  Trees 0 and 7: intersection area = 8.82e-04
  Trees 0 and 9: intersection area = 3.15e-03
  Trees 1 and 3: intersection area = 1.04e-03
N=20: 26 overlaps found!
  Trees 0 and 3: intersection area = 1.46e-03
  Trees 0 and 4: intersection area = 1.92e-03
  Trees 0 and 11: intersection area = 4.41e-03
N=50: 47 overlaps found!
  Trees 0 and 2: intersection area = 3.12e-02
  Trees 0 and 34: intersection area = 1.85e-03
  Trees 1 and 45: intersection area = 1.37e-04
N=100: 137 overlaps found!
  Trees 0 and 25: intersection area = 1.46e-02
  Trees 0 and 40: intersection area = 4.77e-03
  Trees 1 and 10: intersection area = 2.69e-03
N=150: 385 overlaps found!
  Trees 0 and 24: intersection area = 1.54e-04
  Trees 0 

N=200: 244 overlaps found!
  Trees 0 and 174: intersection area = 1.80e-02
  Trees 1 and 74: intersection area = 1.68e-02
  Trees 2 and 84: intersection area = 8.85e-06

Overlap check complete.


In [6]:
# If no overlaps, copy to submission folder
import shutil
import json

if not overlapping_ns:
    print("No overlaps found! Submission is valid.")
    shutil.copy(submission_path, '/home/submission/submission.csv')
    print("Submission copied to /home/submission/submission.csv")
else:
    print(f"WARNING: {len(overlapping_ns)} N values have overlaps!")
    print("Submission may fail Kaggle validation.")
    # Still copy for now
    shutil.copy(submission_path, '/home/submission/submission.csv')
    print("Submission copied anyway for testing.")

# Save metrics
metrics = {'cv_score': score, 'overlapping_ns': overlapping_ns}
with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"\\nMetrics saved: cv_score={score:.6f}")

Full overlap check for all N values...
N=2: 1 overlaps
N=3: 3 overlaps
N=4: 5 overlaps
N=5: 10 overlaps
N=6: 4 overlaps
N=7: 7 overlaps
N=8: 8 overlaps
N=9: 8 overlaps
N=10: 12 overlaps
N=11: 9 overlaps
N=12: 16 overlaps
N=13: 15 overlaps
N=14: 14 overlaps
N=15: 19 overlaps
N=16: 23 overlaps
N=17: 19 overlaps
N=18: 18 overlaps
N=19: 23 overlaps
N=20: 26 overlaps
N=21: 25 overlaps
N=22: 22 overlaps
N=23: 24 overlaps
N=24: 22 overlaps
N=25: 30 overlaps
N=26: 34 overlaps
N=27: 25 overlaps
N=28: 36 overlaps
N=29: 36 overlaps
N=30: 38 overlaps
N=31: 34 overlaps
N=32: 40 overlaps
N=33: 30 overlaps
N=34: 41 overlaps
N=35: 43 overlaps
N=36: 45 overlaps
N=37: 40 overlaps
N=38: 41 overlaps
N=39: 40 overlaps
N=40: 39 overlaps
N=41: 50 overlaps
N=42: 50 overlaps
N=43: 57 overlaps
N=44: 59 overlaps
N=45: 55 overlaps


N=46: 45 overlaps
N=47: 46 overlaps
N=48: 56 overlaps
N=49: 55 overlaps
N=50: 47 overlaps
N=51: 77 overlaps
N=52: 82 overlaps
N=53: 88 overlaps
N=54: 91 overlaps
N=55: 92 overlaps
N=56: 93 overlaps
N=57: 73 overlaps
N=58: 64 overlaps
N=59: 55 overlaps
N=60: 25 overlaps
N=61: 88 overlaps


N=62: 106 overlaps
N=63: 88 overlaps
N=64: 84 overlaps
N=65: 82 overlaps
N=66: 94 overlaps
N=67: 89 overlaps
N=68: 95 overlaps
N=69: 74 overlaps
N=70: 75 overlaps
N=71: 120 overlaps
N=72: 106 overlaps


N=73: 104 overlaps
N=74: 80 overlaps
N=75: 75 overlaps
N=76: 87 overlaps
N=77: 110 overlaps
N=78: 119 overlaps
N=79: 113 overlaps
N=80: 115 overlaps
N=81: 120 overlaps


N=82: 130 overlaps
N=83: 135 overlaps
N=84: 146 overlaps
N=85: 147 overlaps
N=86: 107 overlaps
N=87: 121 overlaps
N=88: 115 overlaps
N=89: 116 overlaps


N=90: 118 overlaps
N=91: 118 overlaps
N=92: 124 overlaps
N=93: 125 overlaps
N=94: 127 overlaps
N=95: 44 overlaps
N=96: 93 overlaps


N=97: 208 overlaps
N=98: 135 overlaps
N=99: 203 overlaps
N=100: 137 overlaps
N=101: 141 overlaps
N=102: 141 overlaps


N=103: 140 overlaps
N=104: 143 overlaps
N=105: 149 overlaps
N=106: 154 overlaps
N=107: 192 overlaps
N=108: 194 overlaps


N=109: 197 overlaps
N=110: 200 overlaps
N=111: 160 overlaps
N=112: 131 overlaps
N=113: 154 overlaps


N=114: 155 overlaps
N=115: 147 overlaps
N=116: 178 overlaps
N=117: 184 overlaps
N=118: 215 overlaps


N=119: 219 overlaps
N=120: 207 overlaps
N=121: 172 overlaps
N=122: 68 overlaps
N=123: 69 overlaps


N=124: 121 overlaps
N=125: 119 overlaps
N=126: 126 overlaps
N=127: 174 overlaps


N=128: 181 overlaps
N=129: 289 overlaps
N=130: 293 overlaps
N=131: 273 overlaps


N=132: 176 overlaps
N=133: 190 overlaps
N=134: 199 overlaps
N=135: 195 overlaps


N=136: 97 overlaps
N=137: 100 overlaps
N=138: 100 overlaps
N=139: 147 overlaps


N=140: 165 overlaps
N=141: 193 overlaps
N=142: 195 overlaps
N=143: 201 overlaps


N=144: 253 overlaps
N=145: 210 overlaps
N=146: 203 overlaps
N=147: 212 overlaps


N=148: 213 overlaps
N=149: 212 overlaps
N=150: 385 overlaps


N=151: 211 overlaps
N=152: 337 overlaps
N=153: 349 overlaps


N=154: 352 overlaps
N=155: 355 overlaps
N=156: 359 overlaps


N=157: 242 overlaps
N=158: 228 overlaps
N=159: 255 overlaps


N=160: 255 overlaps
N=161: 261 overlaps
N=162: 268 overlaps


N=163: 270 overlaps
N=164: 286 overlaps
N=165: 292 overlaps


N=166: 300 overlaps
N=167: 302 overlaps
N=168: 304 overlaps


N=169: 90 overlaps
N=170: 98 overlaps
N=171: 88 overlaps


N=172: 103 overlaps
N=173: 100 overlaps
N=174: 91 overlaps


N=175: 168 overlaps
N=176: 445 overlaps


N=177: 448 overlaps
N=178: 449 overlaps


N=179: 449 overlaps
N=180: 249 overlaps
N=181: 248 overlaps


N=182: 248 overlaps
N=183: 279 overlaps


N=184: 206 overlaps
N=185: 269 overlaps


N=186: 199 overlaps
N=187: 201 overlaps
N=188: 201 overlaps


N=189: 191 overlaps
N=190: 264 overlaps


N=191: 262 overlaps
N=192: 265 overlaps


N=193: 333 overlaps
N=194: 340 overlaps


N=195: 352 overlaps
N=196: 345 overlaps


N=197: 201 overlaps
N=198: 235 overlaps


N=199: 241 overlaps
N=200: 244 overlaps

Total N values with overlaps: 199

Final Score: 70.523320


In [7]:
# Copy to submission folder
import shutil
shutil.copy(submission_path, '/home/submission/submission.csv')
print("Submission copied to /home/submission/submission.csv")

# Save metrics
import json
metrics = {'cv_score': score}
with open('/home/code/experiments/000_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")

Submission copied to /home/submission/submission.csv
Metrics saved: {'cv_score': 70.52331967787894}
