# Experiment 003: Lattice-Based Construction

Implement the Zaburo-style lattice construction approach:
- Trees in alternating rows with 0° and 180° angles
- Offset pattern for interlocking
- Try multiple row/column spacing combinations
- Compare with pre-optimized baseline per-N

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from decimal import Decimal, getcontext
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil

# Set precision
getcontext().prec = 25
scale_factor = Decimal('1e15')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def compute_bounding_side(polygons):
    if not polygons:
        return 0
    all_points = []
    for poly in polygons:
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def compute_score_for_n(df, n):
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    if len(trees) != n:
        return float('inf')
    polygons = [create_tree_polygon(parse_value(row['x']), parse_value(row['y']), parse_value(row['deg'])) for _, row in trees.iterrows()]
    side = compute_bounding_side(polygons)
    return side**2 / n

def compute_total_score(df):
    total = 0
    for n in range(1, 201):
        total += compute_score_for_n(df, n)
    return total

print("Functions defined")

Functions defined


In [2]:
# Zaburo-style lattice construction
class ChristmasTree:
    """Represents a single, rotatable Christmas tree."""
    def __init__(self, center_x=0, center_y=0, angle=0):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        # Create polygon
        self.polygon = create_tree_polygon(self.center_x, self.center_y, self.angle)

def find_best_trees_zaburo(n):
    """Zaburo-style lattice construction with alternating rows."""
    best_score, best_trees = float("inf"), None
    
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1]:
            if n_odd < 0:
                continue
            all_trees = []
            rest = n
            r = 0
            while rest > 0:
                m = min(rest, n_even if r % 2 == 0 else n_odd)
                if m <= 0:
                    break
                rest -= m
                
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else 0.35  # Half tree width
                y = r // 2 * 1.0 if r % 2 == 0 else (0.8 + (r - 1) // 2 * 1.0)
                
                for i in range(m):
                    tree = ChristmasTree(center_x=0.7 * i + x_offset, center_y=y, angle=angle)
                    all_trees.append(tree)
                
                r += 1
            
            if len(all_trees) != n:
                continue
                
            # Compute bounding box
            polygons = [t.polygon for t in all_trees]
            side = compute_bounding_side(polygons)
            score = side ** 2
            
            if score < best_score:
                best_score = score
                best_trees = all_trees
    
    return best_score, best_trees

# Test for N=10
score, trees = find_best_trees_zaburo(10)
print(f"N=10: score={score:.6f}, num_trees={len(trees) if trees else 0}")

N=10: score=4.840000, num_trees=10


In [3]:
# Generate lattice solutions for all N from 1 to 200
print("Generating lattice solutions for N=1 to 200...")
lattice_solutions = []

for n in tqdm(range(1, 201)):
    score, trees = find_best_trees_zaburo(n)
    lattice_solutions.append((n, score, trees))

print(f"Generated {len(lattice_solutions)} solutions")

Generating lattice solutions for N=1 to 200...


  0%|          | 0/200 [00:00<?, ?it/s]

  8%|▊         | 15/200 [00:00<00:01, 146.41it/s]

 15%|█▌        | 30/200 [00:00<00:04, 34.35it/s] 

 19%|█▉        | 38/200 [00:01<00:08, 19.87it/s]

 22%|██▏       | 43/200 [00:02<00:11, 14.19it/s]

 23%|██▎       | 46/200 [00:02<00:13, 11.73it/s]

 24%|██▍       | 49/200 [00:03<00:15,  9.64it/s]

 26%|██▌       | 51/200 [00:03<00:17,  8.40it/s]

 26%|██▋       | 53/200 [00:04<00:20,  7.27it/s]

 27%|██▋       | 54/200 [00:04<00:21,  6.73it/s]

 28%|██▊       | 55/200 [00:04<00:23,  6.17it/s]

 28%|██▊       | 56/200 [00:04<00:25,  5.65it/s]

 28%|██▊       | 57/200 [00:05<00:27,  5.17it/s]

 29%|██▉       | 58/200 [00:05<00:29,  4.75it/s]

 30%|██▉       | 59/200 [00:05<00:32,  4.40it/s]

 30%|███       | 60/200 [00:06<00:34,  4.09it/s]

 30%|███       | 61/200 [00:06<00:36,  3.85it/s]

 31%|███       | 62/200 [00:06<00:37,  3.65it/s]

 32%|███▏      | 63/200 [00:06<00:39,  3.48it/s]

 32%|███▏      | 64/200 [00:07<00:41,  3.24it/s]

 32%|███▎      | 65/200 [00:07<00:43,  3.14it/s]

 33%|███▎      | 66/200 [00:08<00:44,  3.04it/s]

 34%|███▎      | 67/200 [00:08<00:45,  2.94it/s]

 34%|███▍      | 68/200 [00:08<00:46,  2.86it/s]

 34%|███▍      | 69/200 [00:09<00:47,  2.77it/s]

 35%|███▌      | 70/200 [00:09<00:48,  2.69it/s]

 36%|███▌      | 71/200 [00:09<00:49,  2.60it/s]

 36%|███▌      | 72/200 [00:10<00:50,  2.53it/s]

 36%|███▋      | 73/200 [00:10<00:51,  2.46it/s]

 37%|███▋      | 74/200 [00:11<00:52,  2.40it/s]

 38%|███▊      | 75/200 [00:11<00:53,  2.34it/s]

 38%|███▊      | 76/200 [00:12<00:54,  2.26it/s]

 38%|███▊      | 77/200 [00:12<00:56,  2.17it/s]

 39%|███▉      | 78/200 [00:13<00:57,  2.12it/s]

 40%|███▉      | 79/200 [00:13<00:58,  2.08it/s]

 40%|████      | 80/200 [00:14<00:59,  2.03it/s]

 40%|████      | 81/200 [00:14<01:00,  1.98it/s]

 41%|████      | 82/200 [00:15<01:01,  1.93it/s]

 42%|████▏     | 83/200 [00:15<01:02,  1.89it/s]

 42%|████▏     | 84/200 [00:16<01:03,  1.84it/s]

 42%|████▎     | 85/200 [00:17<01:03,  1.80it/s]

 43%|████▎     | 86/200 [00:17<01:05,  1.73it/s]

 44%|████▎     | 87/200 [00:18<01:06,  1.70it/s]

 44%|████▍     | 88/200 [00:18<01:07,  1.67it/s]

 44%|████▍     | 89/200 [00:19<01:07,  1.63it/s]

 45%|████▌     | 90/200 [00:20<01:09,  1.59it/s]

 46%|████▌     | 91/200 [00:20<01:10,  1.55it/s]

 46%|████▌     | 92/200 [00:21<01:10,  1.52it/s]

 46%|████▋     | 93/200 [00:22<01:11,  1.49it/s]

 47%|████▋     | 94/200 [00:23<01:13,  1.45it/s]

 48%|████▊     | 95/200 [00:23<01:13,  1.42it/s]

 48%|████▊     | 96/200 [00:24<01:14,  1.39it/s]

 48%|████▊     | 97/200 [00:25<01:15,  1.36it/s]

 49%|████▉     | 98/200 [00:26<01:16,  1.34it/s]

 50%|████▉     | 99/200 [00:26<01:17,  1.31it/s]

 50%|█████     | 100/200 [00:27<01:18,  1.28it/s]

 50%|█████     | 101/200 [00:28<01:19,  1.24it/s]

 51%|█████     | 102/200 [00:29<01:20,  1.22it/s]

 52%|█████▏    | 103/200 [00:30<01:20,  1.20it/s]

 52%|█████▏    | 104/200 [00:31<01:21,  1.18it/s]

 52%|█████▎    | 105/200 [00:32<01:21,  1.17it/s]

 53%|█████▎    | 106/200 [00:32<01:22,  1.14it/s]

 54%|█████▎    | 107/200 [00:33<01:22,  1.13it/s]

 54%|█████▍    | 108/200 [00:34<01:23,  1.10it/s]

 55%|█████▍    | 109/200 [00:35<01:23,  1.08it/s]

 55%|█████▌    | 110/200 [00:36<01:24,  1.06it/s]

 56%|█████▌    | 111/200 [00:37<01:24,  1.05it/s]

 56%|█████▌    | 112/200 [00:38<01:25,  1.03it/s]

 56%|█████▋    | 113/200 [00:39<01:26,  1.01it/s]

 57%|█████▋    | 114/200 [00:40<01:26,  1.01s/it]

 57%|█████▊    | 115/200 [00:41<01:27,  1.03s/it]

 58%|█████▊    | 116/200 [00:43<01:28,  1.05s/it]

 58%|█████▊    | 117/200 [00:44<01:28,  1.07s/it]

 59%|█████▉    | 118/200 [00:45<01:29,  1.09s/it]

 60%|█████▉    | 119/200 [00:46<01:29,  1.11s/it]

 60%|██████    | 120/200 [00:47<01:29,  1.12s/it]

 60%|██████    | 121/200 [00:48<01:30,  1.14s/it]

 61%|██████    | 122/200 [00:49<01:30,  1.16s/it]

 62%|██████▏   | 123/200 [00:51<01:31,  1.19s/it]

 62%|██████▏   | 124/200 [00:52<01:31,  1.20s/it]

 62%|██████▎   | 125/200 [00:53<01:31,  1.22s/it]

 63%|██████▎   | 126/200 [00:54<01:31,  1.24s/it]

 64%|██████▎   | 127/200 [00:56<01:32,  1.26s/it]

 64%|██████▍   | 128/200 [00:57<01:32,  1.28s/it]

 64%|██████▍   | 129/200 [00:58<01:32,  1.31s/it]

 65%|██████▌   | 130/200 [01:00<01:32,  1.32s/it]

 66%|██████▌   | 131/200 [01:01<01:32,  1.34s/it]

 66%|██████▌   | 132/200 [01:03<01:32,  1.36s/it]

 66%|██████▋   | 133/200 [01:04<01:32,  1.39s/it]

 67%|██████▋   | 134/200 [01:06<01:32,  1.41s/it]

 68%|██████▊   | 135/200 [01:07<01:32,  1.43s/it]

 68%|██████▊   | 136/200 [01:09<01:32,  1.45s/it]

 68%|██████▊   | 137/200 [01:10<01:32,  1.47s/it]

 69%|██████▉   | 138/200 [01:12<01:32,  1.49s/it]

 70%|██████▉   | 139/200 [01:13<01:32,  1.51s/it]

 70%|███████   | 140/200 [01:15<01:32,  1.54s/it]

 70%|███████   | 141/200 [01:16<01:32,  1.56s/it]

 71%|███████   | 142/200 [01:18<01:31,  1.58s/it]

 72%|███████▏  | 143/200 [01:20<01:31,  1.60s/it]

 72%|███████▏  | 144/200 [01:21<01:31,  1.63s/it]

 72%|███████▎  | 145/200 [01:23<01:30,  1.65s/it]

 73%|███████▎  | 146/200 [01:25<01:30,  1.67s/it]

 74%|███████▎  | 147/200 [01:27<01:29,  1.70s/it]

 74%|███████▍  | 148/200 [01:28<01:29,  1.72s/it]

 74%|███████▍  | 149/200 [01:30<01:28,  1.74s/it]

 75%|███████▌  | 150/200 [01:32<01:28,  1.76s/it]

 76%|███████▌  | 151/200 [01:34<01:27,  1.79s/it]

 76%|███████▌  | 152/200 [01:36<01:26,  1.81s/it]

 76%|███████▋  | 153/200 [01:37<01:26,  1.83s/it]

 77%|███████▋  | 154/200 [01:39<01:26,  1.87s/it]

 78%|███████▊  | 155/200 [01:41<01:25,  1.89s/it]

 78%|███████▊  | 156/200 [01:43<01:24,  1.91s/it]

 78%|███████▊  | 157/200 [01:45<01:23,  1.95s/it]

 79%|███████▉  | 158/200 [01:47<01:22,  1.97s/it]

 80%|███████▉  | 159/200 [01:49<01:22,  2.00s/it]

 80%|████████  | 160/200 [01:52<01:20,  2.02s/it]

 80%|████████  | 161/200 [01:54<01:19,  2.05s/it]

 81%|████████  | 162/200 [01:56<01:18,  2.08s/it]

 82%|████████▏ | 163/200 [01:58<01:18,  2.11s/it]

 82%|████████▏ | 164/200 [02:00<01:17,  2.14s/it]

 82%|████████▎ | 165/200 [02:02<01:15,  2.16s/it]

 83%|████████▎ | 166/200 [02:05<01:14,  2.19s/it]

 84%|████████▎ | 167/200 [02:07<01:13,  2.22s/it]

 84%|████████▍ | 168/200 [02:09<01:11,  2.24s/it]

 84%|████████▍ | 169/200 [02:12<01:10,  2.27s/it]

 85%|████████▌ | 170/200 [02:14<01:08,  2.30s/it]

 86%|████████▌ | 171/200 [02:16<01:07,  2.33s/it]

 86%|████████▌ | 172/200 [02:19<01:06,  2.36s/it]

 86%|████████▋ | 173/200 [02:21<01:04,  2.39s/it]

 87%|████████▋ | 174/200 [02:24<01:02,  2.41s/it]

 88%|████████▊ | 175/200 [02:26<01:00,  2.44s/it]

 88%|████████▊ | 176/200 [02:29<00:59,  2.47s/it]

 88%|████████▊ | 177/200 [02:31<00:57,  2.50s/it]

 89%|████████▉ | 178/200 [02:34<00:55,  2.52s/it]

 90%|████████▉ | 179/200 [02:37<00:53,  2.55s/it]

 90%|█████████ | 180/200 [02:39<00:51,  2.59s/it]

 90%|█████████ | 181/200 [02:42<00:49,  2.62s/it]

 91%|█████████ | 182/200 [02:45<00:47,  2.65s/it]

 92%|█████████▏| 183/200 [02:47<00:45,  2.67s/it]

 92%|█████████▏| 184/200 [02:50<00:43,  2.71s/it]

 92%|█████████▎| 185/200 [02:53<00:40,  2.73s/it]

 93%|█████████▎| 186/200 [02:56<00:38,  2.75s/it]

 94%|█████████▎| 187/200 [02:59<00:36,  2.79s/it]

 94%|█████████▍| 188/200 [03:01<00:33,  2.82s/it]

 94%|█████████▍| 189/200 [03:04<00:31,  2.84s/it]

 95%|█████████▌| 190/200 [03:07<00:28,  2.87s/it]

 96%|█████████▌| 191/200 [03:10<00:26,  2.91s/it]

 96%|█████████▌| 192/200 [03:13<00:23,  2.94s/it]

 96%|█████████▋| 193/200 [03:16<00:20,  2.97s/it]

 97%|█████████▋| 194/200 [03:19<00:18,  3.01s/it]

 98%|█████████▊| 195/200 [03:23<00:15,  3.04s/it]

 98%|█████████▊| 196/200 [03:26<00:12,  3.07s/it]

 98%|█████████▊| 197/200 [03:29<00:09,  3.11s/it]

 99%|█████████▉| 198/200 [03:32<00:06,  3.13s/it]

100%|█████████▉| 199/200 [03:35<00:03,  3.16s/it]

100%|██████████| 200/200 [03:39<00:00,  3.21s/it]

100%|██████████| 200/200 [03:39<00:00,  1.10s/it]

Generated 200 solutions





In [4]:
# Compute total lattice score
lattice_total = sum(score / n for n, score, _ in lattice_solutions)
print(f"Lattice construction total score: {lattice_total:.6f}")

# Load baseline for comparison
df_baseline = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
baseline_total = compute_total_score(df_baseline)
print(f"Baseline total score: {baseline_total:.6f}")
print(f"Difference: {lattice_total - baseline_total:.6f}")

Lattice construction total score: 88.329998


Baseline total score: 70.659959
Difference: 17.670038


In [5]:
# Compare per-N scores
print("\nPer-N comparison (lattice vs baseline):")
print("N\tLattice\t\tBaseline\tDiff\t\tBetter")
print("-" * 70)

improved_count = 0
for n, lattice_score, _ in lattice_solutions:
    baseline_score = compute_score_for_n(df_baseline, n)
    diff = lattice_score / n - baseline_score
    better = "LATTICE" if diff < -1e-9 else ("BASELINE" if diff > 1e-9 else "SAME")
    if diff < -1e-9:
        improved_count += 1
    if n <= 20 or n >= 190 or diff < -1e-6:
        print(f"{n}\t{lattice_score/n:.6f}\t{baseline_score:.6f}\t{diff:.6f}\t{better}")

print(f"\nLattice is better for {improved_count} out of 200 N values")


Per-N comparison (lattice vs baseline):
N	Lattice		Baseline	Diff		Better
----------------------------------------------------------------------
1	1.000000	0.661250	0.338750	BASELINE
2	0.720000	0.450779	0.269221	BASELINE
3	0.653333	0.434745	0.218588	BASELINE
4	0.765625	0.416545	0.349080	BASELINE
5	0.800000	0.416850	0.383150	BASELINE
6	0.666667	0.399610	0.267056	BASELINE
7	0.630000	0.399897	0.230103	BASELINE
8	0.551250	0.385407	0.165843	BASELINE
9	0.537778	0.387415	0.150363	BASELINE
10	0.484000	0.376630	0.107370	BASELINE
11	0.545682	0.375736	0.169946	BASELINE
12	0.500208	0.372724	0.127484	BASELINE
13	0.603077	0.372294	0.230783	BASELINE
14	0.560000	0.370454	0.189546	BASELINE
15	0.600000	0.379203	0.220797	BASELINE
16	0.562500	0.374128	0.188372	BASELINE
17	0.529412	0.370040	0.159371	BASELINE
18	0.500000	0.368771	0.131229	BASELINE
19	0.522237	0.368615	0.153622	BASELINE
20	0.496125	0.376057	0.120068	BASELINE


190	0.402961	0.338231	0.064729	BASELINE
191	0.400851	0.336758	0.064093	BASELINE
192	0.398763	0.335301	0.063462	BASELINE
193	0.419689	0.333764	0.085925	BASELINE
194	0.417526	0.332999	0.084526	BASELINE
195	0.415385	0.332617	0.082768	BASELINE
196	0.413265	0.333268	0.079997	BASELINE
197	0.411168	0.335990	0.075177	BASELINE
198	0.409091	0.337316	0.071775	BASELINE
199	0.407035	0.338269	0.068766	BASELINE
200	0.405000	0.337564	0.067436	BASELINE

Lattice is better for 0 out of 200 N values


In [6]:
# Create ensemble: pick best per-N from lattice and baseline
print("Creating ensemble (best per-N from lattice and baseline)...")

ensemble_rows = []
ensemble_total = 0

for n, lattice_score, lattice_trees in lattice_solutions:
    baseline_score = compute_score_for_n(df_baseline, n)
    lattice_score_per_n = lattice_score / n
    
    if lattice_score_per_n < baseline_score - 1e-9:
        # Use lattice solution
        for i, tree in enumerate(lattice_trees):
            ensemble_rows.append({
                'id': f"{n:03d}_{i}",
                'x': f"s{tree.center_x}",
                'y': f"s{tree.center_y}",
                'deg': f"s{tree.angle}"
            })
        ensemble_total += lattice_score_per_n
    else:
        # Use baseline solution
        prefix = f"{n:03d}_"
        baseline_trees = df_baseline[df_baseline['id'].str.startswith(prefix)]
        for _, row in baseline_trees.iterrows():
            ensemble_rows.append(row.to_dict())
        ensemble_total += baseline_score

print(f"Ensemble total score: {ensemble_total:.6f}")
print(f"Baseline total score: {baseline_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.6f}")

Creating ensemble (best per-N from lattice and baseline)...


Ensemble total score: 70.659959
Baseline total score: 70.659959
Improvement: 0.000000


In [7]:
# Save ensemble submission
ensemble_df = pd.DataFrame(ensemble_rows)
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved ensemble with {len(ensemble_df)} rows")

# Verify
df_verify = pd.read_csv('/home/submission/submission.csv')
verify_score = compute_total_score(df_verify)
print(f"Verified ensemble score: {verify_score:.6f}")

Saved ensemble with 20100 rows


Verified ensemble score: 70.659959


In [8]:
# Summary
print("="*60)
print("EXPERIMENT 003 SUMMARY: Lattice Construction")
print("="*60)
print(f"Lattice-only score: {lattice_total:.6f}")
print(f"Baseline score: {baseline_total:.6f}")
print(f"Ensemble score: {verify_score:.6f}")
print(f"Improvement over baseline: {baseline_total - verify_score:.6f}")
print("="*60)

EXPERIMENT 003 SUMMARY: Lattice Construction
Lattice-only score: 88.329998
Baseline score: 70.659959
Ensemble score: 70.659959
Improvement over baseline: 0.000000


In [17]:
# Model wrapper for submission
class LatticeConstruction:
    def __init__(self, data='single'):
        self.data = data
        
    def load_best(self):
        return pd.read_csv('/home/submission/submission.csv')
    
    def save_submission(self, path):
        df = self.load_best()
        df.to_csv(path, index=False)
        return df

model = LatticeConstruction(data='single')
print("Model wrapper defined")

Model wrapper defined


In [9]:
# The basic Zaburo lattice is worse than baseline for ALL N values
# This is expected - the baseline is already highly optimized
# 
# Let's try a different approach: random multi-start construction
# Focus on small N (1-20) where efficiency is worst

import random
from shapely.strtree import STRtree

def has_overlap(polygons):
    """Check if any polygons overlap (not just touch)"""
    if len(polygons) <= 1:
        return False
    tree = STRtree(polygons)
    for i, poly in enumerate(polygons):
        candidates = tree.query(poly)
        for j in candidates:
            if i != j:
                other = polygons[j]
                if poly.intersects(other) and not poly.touches(other):
                    # Check if it's a real overlap (not just touching)
                    intersection = poly.intersection(other)
                    if intersection.area > 1e-10:
                        return True
    return False

def random_construction(n, num_restarts=500):
    """Generate random configurations and keep the best valid one"""
    best_score = float('inf')
    best_trees = None
    
    for _ in range(num_restarts):
        trees = []
        polygons = []
        
        # Random bounding box size (start small, expand if needed)
        box_size = np.sqrt(n) * 0.8  # Rough estimate
        
        for i in range(n):
            # Try to place a tree
            placed = False
            for attempt in range(100):
                x = random.uniform(-box_size, box_size)
                y = random.uniform(-box_size, box_size)
                angle = random.choice([0, 90, 180, 270])  # Cardinal angles
                
                poly = create_tree_polygon(x, y, angle)
                
                # Check for overlap with existing trees
                overlap = False
                for existing in polygons:
                    if poly.intersects(existing) and not poly.touches(existing):
                        intersection = poly.intersection(existing)
                        if intersection.area > 1e-10:
                            overlap = True
                            break
                
                if not overlap:
                    trees.append({'x': x, 'y': y, 'angle': angle})
                    polygons.append(poly)
                    placed = True
                    break
            
            if not placed:
                # Expand box and try again
                box_size *= 1.2
                break
        
        if len(trees) == n:
            side = compute_bounding_side(polygons)
            score = side ** 2 / n
            if score < best_score:
                best_score = score
                best_trees = trees
    
    return best_score, best_trees

# Test for N=5
print("Testing random construction for N=5...")
score, trees = random_construction(5, num_restarts=100)
print(f"N=5: random score={score:.6f}")
baseline_score = compute_score_for_n(df_baseline, 5)
print(f"N=5: baseline score={baseline_score:.6f}")

Testing random construction for N=5...
N=5: random score=1.611147
N=5: baseline score=0.416850


In [10]:
# Random construction is also worse than baseline
# The pre-optimized solutions are extremely well-optimized
# 
# Let's search for newer external datasets that might have better solutions
# The strategy mentions that top teams (68.9 score) might have shared solutions

import subprocess

# Search for Santa 2025 datasets sorted by date
result = subprocess.run(
    ['kaggle', 'datasets', 'list', '-s', 'santa 2025', '--sort-by', 'dateCreated'],
    capture_output=True,
    text=True
)
print("Available Santa 2025 datasets (sorted by date):")
print(result.stdout)

Available Santa 2025 datasets (sorted by date):
Invalid sort by specified. Valid options are ['hottest', 'votes', 'updated', 'active', 'published']



In [11]:
# Try with valid sort option
result = subprocess.run(
    ['kaggle', 'datasets', 'list', '-s', 'santa 2025', '--sort-by', 'updated'],
    capture_output=True,
    text=True
)
print("Available Santa 2025 datasets (sorted by updated):")
print(result.stdout)

Available Santa 2025 datasets (sorted by updated):
ref                                                             title                                                    size  lastUpdated                 downloadCount  voteCount  usabilityRating  
--------------------------------------------------------------  -------------------------------------------------  ----------  --------------------------  -------------  ---------  ---------------  
saspav/santa-2025-csv                                           santa-2025.csv                                         739307  2026-01-19 09:34:09.577000            105          6  0.23529412       
jazivxt/bucket-of-chump                                         Bucket of Chump                                       7081564  2026-01-19 06:10:57.150000            157          8  0.5294118        
wbqrmgmcia7lhhq/sec-financial-statement-data-in-json            Sec Financial Statement Data in Json               1389517917  2026-01-14 00:50:58.473000

In [12]:
# Download the artemevstafyev dataset (newer than our current best)
!kaggle datasets download -d artemevstafyev/santa2025 -p /home/code/external_data/artemevstafyev --unzip 2>&1

Dataset URL: https://www.kaggle.com/datasets/artemevstafyev/santa2025
License(s): unknown


Downloading santa2025.zip to /home/code/external_data/artemevstafyev
  0%|                                                | 0.00/692k [00:00<?, ?B/s]


100%|████████████████████████████████████████| 692k/692k [00:00<00:00, 18.8MB/s]


In [13]:
# Check the artemevstafyev dataset
import os
print("Files in artemevstafyev dataset:")
for f in os.listdir('/home/code/external_data/artemevstafyev'):
    print(f"  {f}")

# Load and score
for f in os.listdir('/home/code/external_data/artemevstafyev'):
    if f.endswith('.csv'):
        try:
            df_new = pd.read_csv(f'/home/code/external_data/artemevstafyev/{f}')
            if 'id' in df_new.columns and 'x' in df_new.columns:
                score = compute_total_score(df_new)
                print(f"{f}: score={score:.6f}")
        except Exception as e:
            print(f"{f}: Error - {e}")

Files in artemevstafyev dataset:
  submission.csv


submission.csv: score=70.773855


In [14]:
# Download the octaviograu best submission dataset
!kaggle datasets download -d octaviograu/santa-2025-best-submission-20251210 -p /home/code/external_data/octaviograu --unzip 2>&1

Dataset URL: https://www.kaggle.com/datasets/octaviograu/santa-2025-best-submission-20251210
License(s): unknown


Downloading santa-2025-best-submission-20251210.zip to /home/code/external_data/octaviograu
  0%|                                                | 0.00/533k [00:00<?, ?B/s]


100%|████████████████████████████████████████| 533k/533k [00:00<00:00, 14.9MB/s]


In [15]:
# Check the octaviograu dataset
print("Files in octaviograu dataset:")
for f in os.listdir('/home/code/external_data/octaviograu'):
    print(f"  {f}")

# Load and score
for f in os.listdir('/home/code/external_data/octaviograu'):
    if f.endswith('.csv'):
        try:
            df_new = pd.read_csv(f'/home/code/external_data/octaviograu/{f}')
            if 'id' in df_new.columns and 'x' in df_new.columns:
                score = compute_total_score(df_new)
                print(f"{f}: score={score:.6f}")
        except Exception as e:
            print(f"{f}: Error - {e}")

Files in octaviograu dataset:
  submission (80).csv


submission (80).csv: score=71.946272


In [16]:
# All external datasets are worse than our current best
# Let's try fix_direction rotation optimization
# This rotates the entire configuration to minimize bounding box

from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull

def get_all_vertices(df):
    """Get all vertices from all trees in the dataframe"""
    all_points = []
    for _, row in df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    return np.array(all_points)

def rotate_points(points, angle_deg):
    """Rotate points around origin by angle_deg degrees"""
    angle_rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    return points @ rotation_matrix.T

def bbox_side_at_angle(angle, points):
    """Compute bounding box side at a given rotation angle"""
    rotated = rotate_points(points, angle)
    min_x, min_y = rotated.min(axis=0)
    max_x, max_y = rotated.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def optimize_rotation_for_n(df, n):
    """Find optimal rotation angle for N trees"""
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    
    if len(trees) != n:
        return None, float('inf')
    
    # Get all vertices
    all_points = []
    for _, row in trees.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    points = np.array(all_points)
    
    # Use convex hull for faster computation
    try:
        hull = ConvexHull(points)
        hull_points = points[hull.vertices]
    except:
        hull_points = points
    
    # Find optimal rotation
    result = minimize_scalar(
        lambda angle: bbox_side_at_angle(angle, hull_points),
        bounds=(0, 90),
        method='bounded'
    )
    
    best_angle = result.x
    best_side = result.fun
    
    return best_angle, best_side ** 2 / n

# Test for N=10
angle, score = optimize_rotation_for_n(df_baseline, 10)
baseline_score = compute_score_for_n(df_baseline, 10)
print(f"N=10: baseline={baseline_score:.6f}, optimized rotation={score:.6f}, angle={angle:.2f}°")
print(f"Improvement: {baseline_score - score:.9f}")

N=10: baseline=0.376630, optimized rotation=0.376630, angle=90.00°
Improvement: -0.000000074


In [18]:
# Final summary
print("="*60)
print("EXPERIMENT 003 FINAL SUMMARY")
print("="*60)
print("\nApproaches tried:")
print("1. Zaburo-style lattice construction: 88.33 (WORSE than baseline)")
print("2. Random multi-start construction: Much worse than baseline")
print("3. External datasets search:")
print("   - artemevstafyev/santa2025: 70.77 (worse)")
print("   - octaviograu/santa-2025-best-submission: 71.95 (worse)")
print("4. Rotation optimization: 0 improvement")
print("\nConclusion:")
print("The pre-optimized saspav solution (70.659959) is already highly optimized.")
print("Simple constructive approaches cannot beat it.")
print("The gap to target (68.919154) requires more sophisticated techniques.")
print("="*60)

EXPERIMENT 003 FINAL SUMMARY

Approaches tried:
1. Zaburo-style lattice construction: 88.33 (WORSE than baseline)
2. Random multi-start construction: Much worse than baseline
3. External datasets search:
   - artemevstafyev/santa2025: 70.77 (worse)
   - octaviograu/santa-2025-best-submission: 71.95 (worse)
4. Rotation optimization: 0 improvement

Conclusion:
The pre-optimized saspav solution (70.659959) is already highly optimized.
Simple constructive approaches cannot beat it.
The gap to target (68.919154) requires more sophisticated techniques.
