# Full jiweiliu SA Kernel Implementation

This notebook implements the COMPLETE jiweiliu kernel with:
1. Pre-optimized seeds
2. Grid generation with append support
3. SA optimization on EACH grid configuration (100,000 moves per config)
4. Deletion cascade
5. Multiprocessing for parallel execution

In [1]:
import math
import os
import time
from multiprocessing import Pool, cpu_count

import numpy as np
import pandas as pd
from numba import njit
from numba.typed import List as NumbaList

print(f'CPU count: {cpu_count()}')

CPU count: 26


In [2]:
# Tree shape constants (must match official spec)
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

# Maximum distance between tree centers for possible overlap
MAX_OVERLAP_DIST = 2.0
MAX_OVERLAP_DIST_SQ = MAX_OVERLAP_DIST * MAX_OVERLAP_DIST

In [3]:
# Geometry helper functions
@njit(cache=True)
def rotate_point(x, y, cos_a, sin_a):
    return x * cos_a - y * sin_a, x * sin_a + y * cos_a


@njit(cache=True)
def get_tree_vertices(cx, cy, angle_deg):
    """Get 15 vertices of tree polygon at given position and angle."""
    angle_rad = angle_deg * math.pi / 180.0
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    vertices = np.empty((15, 2), dtype=np.float64)
    pts = np.array([
        [0.0, TIP_Y],
        [TOP_W / 2.0, TIER_1_Y],
        [TOP_W / 4.0, TIER_1_Y],
        [MID_W / 2.0, TIER_2_Y],
        [MID_W / 4.0, TIER_2_Y],
        [BASE_W / 2.0, BASE_Y],
        [TRUNK_W / 2.0, BASE_Y],
        [TRUNK_W / 2.0, TRUNK_BOTTOM_Y],
        [-TRUNK_W / 2.0, TRUNK_BOTTOM_Y],
        [-TRUNK_W / 2.0, BASE_Y],
        [-BASE_W / 2.0, BASE_Y],
        [-MID_W / 4.0, TIER_2_Y],
        [-MID_W / 2.0, TIER_2_Y],
        [-TOP_W / 4.0, TIER_1_Y],
        [-TOP_W / 2.0, TIER_1_Y],
    ], dtype=np.float64)
    for i in range(15):
        rx, ry = rotate_point(pts[i, 0], pts[i, 1], cos_a, sin_a)
        vertices[i, 0] = rx + cx
        vertices[i, 1] = ry + cy
    return vertices

In [4]:
@njit(cache=True)
def polygon_bounds(vertices):
    """Get bounding box of polygon vertices."""
    min_x = vertices[0, 0]
    min_y = vertices[0, 1]
    max_x = vertices[0, 0]
    max_y = vertices[0, 1]
    for i in range(1, vertices.shape[0]):
        x = vertices[i, 0]
        y = vertices[i, 1]
        if x < min_x:
            min_x = x
        if x > max_x:
            max_x = x
        if y < min_y:
            min_y = y
        if y > max_y:
            max_y = y
    return min_x, min_y, max_x, max_y


@njit(cache=True)
def point_in_polygon(px, py, vertices):
    """Check if point is inside polygon using ray casting."""
    n = vertices.shape[0]
    inside = False
    j = n - 1
    for i in range(n):
        xi, yi = vertices[i, 0], vertices[i, 1]
        xj, yj = vertices[j, 0], vertices[j, 1]
        if ((yi > py) != (yj > py)) and (px < (xj - xi) * (py - yi) / (yj - yi) + xi):
            inside = not inside
        j = i
    return inside


@njit(cache=True)
def segments_intersect(p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y):
    """Check if two line segments intersect."""
    dax = p2x - p1x
    day = p2y - p1y
    dbx = p4x - p3x
    dby = p4y - p3y
    d1x = p1x - p3x
    d1y = p1y - p3y
    d2x = p2x - p3x
    d2y = p2y - p3y
    cross_b1 = dbx * d1y - dby * d1x
    cross_b2 = dbx * d2y - dby * d2x
    if cross_b1 * cross_b2 > 0:
        return False
    d3x = p3x - p1x
    d3y = p3y - p1y
    d4x = p4x - p1x
    d4y = p4y - p1y
    cross_a1 = dax * d3y - day * d3x
    cross_a2 = dax * d4y - day * d4x
    if cross_a1 * cross_a2 > 0:
        return False
    return True

In [5]:
@njit(cache=True)
def polygons_overlap(verts1, verts2, cx1, cy1, cx2, cy2):
    """Check if two polygons overlap (not just touch)."""
    dx = cx2 - cx1
    dy = cy2 - cy1
    dist_sq = dx * dx + dy * dy
    if dist_sq > MAX_OVERLAP_DIST_SQ:
        return False
    min_x1, min_y1, max_x1, max_y1 = polygon_bounds(verts1)
    min_x2, min_y2, max_x2, max_y2 = polygon_bounds(verts2)
    if max_x1 < min_x2 or max_x2 < min_x1 or max_y1 < min_y2 or max_y2 < min_y1:
        return False
    for i in range(verts1.shape[0]):
        if point_in_polygon(verts1[i, 0], verts1[i, 1], verts2):
            return True
    for i in range(verts2.shape[0]):
        if point_in_polygon(verts2[i, 0], verts2[i, 1], verts1):
            return True
    n1 = verts1.shape[0]
    n2 = verts2.shape[0]
    for i in range(n1):
        j = (i + 1) % n1
        p1x, p1y = verts1[i, 0], verts1[i, 1]
        p2x, p2y = verts1[j, 0], verts1[j, 1]
        for k in range(n2):
            m = (k + 1) % n2
            p3x, p3y = verts2[k, 0], verts2[k, 1]
            p4x, p4y = verts2[m, 0], verts2[m, 1]
            if segments_intersect(p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y):
                return True
    return False


@njit(cache=True)
def has_any_overlap(all_vertices, centers_x, centers_y):
    """Check if any pair of polygons overlap."""
    n = len(all_vertices)
    for i in range(n):
        for j in range(i + 1, n):
            if polygons_overlap(all_vertices[i], all_vertices[j],
                              centers_x[i], centers_y[i], centers_x[j], centers_y[j]):
                return True
    return False

In [6]:
@njit(cache=True)
def compute_bounding_box(all_vertices):
    """Compute overall bounding box of all polygons."""
    min_x = math.inf
    min_y = math.inf
    max_x = -math.inf
    max_y = -math.inf
    for verts in all_vertices:
        x1, y1, x2, y2 = polygon_bounds(verts)
        if x1 < min_x:
            min_x = x1
        if y1 < min_y:
            min_y = y1
        if x2 > max_x:
            max_x = x2
        if y2 > max_y:
            max_y = y2
    return min_x, min_y, max_x, max_y


@njit(cache=True)
def get_side_length(all_vertices):
    """Get side length of bounding square."""
    min_x, min_y, max_x, max_y = compute_bounding_box(all_vertices)
    return max(max_x - min_x, max_y - min_y)


@njit(cache=True)
def calculate_score_numba(all_vertices):
    """Calculate score = max(width, height)^2 / n"""
    side = get_side_length(all_vertices)
    return side * side / len(all_vertices)

In [7]:
@njit(cache=True)
def create_grid_vertices_extended(seed_xs, seed_ys, seed_degs, a, b, ncols, nrows, append_x, append_y):
    """Create grid of tree vertices by translation with optional append."""
    n_seeds = len(seed_xs)
    n_base = n_seeds * ncols * nrows
    n_append_x = nrows if append_x else 0
    n_append_y = ncols if append_y else 0
    n_total = n_base + n_append_x + n_append_y

    all_vertices = []
    centers_x = np.empty(n_total, dtype=np.float64)
    centers_y = np.empty(n_total, dtype=np.float64)

    idx = 0
    for s in range(n_seeds):
        for col in range(ncols):
            for row in range(nrows):
                cx = seed_xs[s] + col * a
                cy = seed_ys[s] + row * b
                all_vertices.append(get_tree_vertices(cx, cy, seed_degs[s]))
                centers_x[idx] = cx
                centers_y[idx] = cy
                idx += 1

    if append_x and n_seeds > 1:
        for row in range(nrows):
            cx = seed_xs[1] + ncols * a
            cy = seed_ys[1] + row * b
            all_vertices.append(get_tree_vertices(cx, cy, seed_degs[1]))
            centers_x[idx] = cx
            centers_y[idx] = cy
            idx += 1

    if append_y and n_seeds > 1:
        for col in range(ncols):
            cx = seed_xs[1] + col * a
            cy = seed_ys[1] + nrows * b
            all_vertices.append(get_tree_vertices(cx, cy, seed_degs[1]))
            centers_x[idx] = cx
            centers_y[idx] = cy
            idx += 1

    return all_vertices, centers_x, centers_y


@njit(cache=True)
def get_initial_translations(seed_xs, seed_ys, seed_degs):
    """Get initial translation lengths from seed bounding box."""
    seed_vertices = [get_tree_vertices(seed_xs[i], seed_ys[i], seed_degs[i]) for i in range(len(seed_xs))]
    min_x, min_y, max_x, max_y = compute_bounding_box(seed_vertices)
    return max_x - min_x, max_y - min_y

In [8]:
@njit(cache=True)
def get_final_grid_positions_extended(seed_xs, seed_ys, seed_degs, a, b, ncols, nrows, append_x, append_y):
    """Get final tree positions for the optimized grid with append support."""
    n_seeds = len(seed_xs)
    n_base = n_seeds * ncols * nrows
    n_append_x = nrows if append_x else 0
    n_append_y = ncols if append_y else 0
    n_total = n_base + n_append_x + n_append_y

    xs = np.empty(n_total, dtype=np.float64)
    ys = np.empty(n_total, dtype=np.float64)
    degs = np.empty(n_total, dtype=np.float64)

    idx = 0
    for s in range(n_seeds):
        for col in range(ncols):
            for row in range(nrows):
                xs[idx] = seed_xs[s] + col * a
                ys[idx] = seed_ys[s] + row * b
                degs[idx] = seed_degs[s]
                idx += 1

    if append_x and n_seeds > 1:
        for row in range(nrows):
            xs[idx] = seed_xs[1] + ncols * a
            ys[idx] = seed_ys[1] + row * b
            degs[idx] = seed_degs[1]
            idx += 1

    if append_y and n_seeds > 1:
        for col in range(ncols):
            xs[idx] = seed_xs[1] + col * a
            ys[idx] = seed_ys[1] + nrows * b
            degs[idx] = seed_degs[1]
            idx += 1

    return xs, ys, degs

In [9]:
# THE CRITICAL SA OPTIMIZATION FUNCTION
@njit(cache=True)
def sa_optimize_improved(
    seed_xs_init,
    seed_ys_init,
    seed_degs_init,
    a_init,
    b_init,
    ncols,
    nrows,
    append_x,
    append_y,
    Tmax,
    Tmin,
    nsteps,
    nsteps_per_T,
    position_delta,
    angle_delta,
    angle_delta2,
    delta_t,
    random_seed,
):
    """Improved simulated annealing with translation optimization."""
    np.random.seed(random_seed)
    n_seeds = len(seed_xs_init)

    seed_xs = seed_xs_init.copy()
    seed_ys = seed_ys_init.copy()
    seed_degs = seed_degs_init.copy()

    a = a_init
    b = b_init

    all_vertices, centers_x, centers_y = create_grid_vertices_extended(seed_xs, seed_ys, seed_degs, a, b, ncols, nrows, append_x, append_y)
    if has_any_overlap(all_vertices, centers_x, centers_y):
        a_test, b_test = get_initial_translations(seed_xs, seed_ys, seed_degs)
        a = max(a, a_test * 1.5)
        b = max(b, b_test * 1.5)
        all_vertices, centers_x, centers_y = create_grid_vertices_extended(seed_xs, seed_ys, seed_degs, a, b, ncols, nrows, append_x, append_y)

    current_score = calculate_score_numba(all_vertices)

    best_score = current_score
    best_xs = seed_xs.copy()
    best_ys = seed_ys.copy()
    best_degs = seed_degs.copy()
    best_a = a
    best_b = b

    T = Tmax
    Tfactor = -math.log(Tmax / Tmin)

    n_move_types = n_seeds + 2

    for step in range(nsteps):
        for _ in range(nsteps_per_T):
            move_type = np.random.randint(0, n_move_types)

            if move_type < n_seeds:
                i = move_type
                old_x = seed_xs[i]
                old_y = seed_ys[i]
                old_deg = seed_degs[i]

                dx = (np.random.random() * 2.0 - 1.0) * position_delta
                dy = (np.random.random() * 2.0 - 1.0) * position_delta
                ddeg = (np.random.random() * 2.0 - 1.0) * angle_delta

                seed_xs[i] = old_x + dx
                seed_ys[i] = old_y + dy
                seed_degs[i] = (old_deg + ddeg) % 360.0

            elif move_type == n_seeds:
                old_a = a
                old_b = b
                da = (np.random.random() * 2.0 - 1.0) * delta_t
                db = (np.random.random() * 2.0 - 1.0) * delta_t
                a = old_a + old_a * da
                b = old_b + old_b * db

            else:
                old_degs = seed_degs.copy()
                ddeg = (np.random.random() * 2.0 - 1.0) * angle_delta2
                for i in range(n_seeds):
                    seed_degs[i] = (seed_degs[i] + ddeg) % 360.0

            test_vertices, test_cx, test_cy = create_grid_vertices_extended(seed_xs, seed_ys, seed_degs, a, b, 2, 2, False, False)
            if has_any_overlap(test_vertices, test_cx, test_cy):
                if move_type < n_seeds:
                    seed_xs[move_type] = old_x
                    seed_ys[move_type] = old_y
                    seed_degs[move_type] = old_deg
                elif move_type == n_seeds:
                    a = old_a
                    b = old_b
                else:
                    for i in range(n_seeds):
                        seed_degs[i] = old_degs[i]
                continue

            new_vertices, new_cx, new_cy = create_grid_vertices_extended(seed_xs, seed_ys, seed_degs, a, b, ncols, nrows, append_x, append_y)

            if has_any_overlap(new_vertices, new_cx, new_cy):
                if move_type < n_seeds:
                    seed_xs[move_type] = old_x
                    seed_ys[move_type] = old_y
                    seed_degs[move_type] = old_deg
                elif move_type == n_seeds:
                    a = old_a
                    b = old_b
                else:
                    for i in range(n_seeds):
                        seed_degs[i] = old_degs[i]
                continue

            new_score = calculate_score_numba(new_vertices)
            delta = new_score - current_score

            accept = False
            if delta < 0:
                accept = True
            elif T > 1e-10:
                if np.random.random() < math.exp(-delta / T):
                    accept = True

            if accept:
                current_score = new_score
                if new_score < best_score:
                    best_score = new_score
                    best_xs = seed_xs.copy()
                    best_ys = seed_ys.copy()
                    best_degs = seed_degs.copy()
                    best_a = a
                    best_b = b
            else:
                if move_type < n_seeds:
                    seed_xs[move_type] = old_x
                    seed_ys[move_type] = old_y
                    seed_degs[move_type] = old_deg
                elif move_type == n_seeds:
                    a = old_a
                    b = old_b
                else:
                    for i in range(n_seeds):
                        seed_degs[i] = old_degs[i]

        T = Tmax * math.exp(Tfactor * (step + 1) / nsteps)

    return best_score, best_xs, best_ys, best_degs, best_a, best_b

In [10]:
# Worker function for multiprocessing
def optimize_grid_config(args):
    """Optimize a single grid configuration (worker function for multiprocessing)."""
    ncols, nrows, append_x, append_y, initial_seeds, a_init, b_init, params, seed = args

    seed_xs = np.array([s[0] for s in initial_seeds], dtype=np.float64)
    seed_ys = np.array([s[1] for s in initial_seeds], dtype=np.float64)
    seed_degs = np.array([s[2] for s in initial_seeds], dtype=np.float64)

    n_seeds = len(initial_seeds)
    n_base = n_seeds * ncols * nrows
    n_append_x = nrows if append_x else 0
    n_append_y = ncols if append_y else 0
    n_trees = n_base + n_append_x + n_append_y

    best_score, best_xs, best_ys, best_degs, best_a, best_b = sa_optimize_improved(
        seed_xs, seed_ys, seed_degs,
        a_init, b_init,
        ncols, nrows,
        append_x, append_y,
        params["Tmax"],
        params["Tmin"],
        params["nsteps"],
        params["nsteps_per_T"],
        params["position_delta"],
        params["angle_delta"],
        params["angle_delta2"],
        params["delta_t"],
        seed,
    )

    final_xs, final_ys, final_degs = get_final_grid_positions_extended(
        best_xs, best_ys, best_degs, best_a, best_b, ncols, nrows, append_x, append_y
    )

    tree_data = [(final_xs[i], final_ys[i], final_degs[i]) for i in range(len(final_xs))]

    return n_trees, best_score, tree_data

In [11]:
# Deletion cascade function
@njit(cache=True)
def deletion_cascade_numba(all_xs, all_ys, all_degs, group_sizes):
    """Apply tree deletion cascade using numba."""
    group_start = np.zeros(201, dtype=np.int64)
    for n in range(1, 201):
        group_start[n] = group_start[n-1] + (n - 1) if n > 1 else 0

    new_xs = all_xs.copy()
    new_ys = all_ys.copy()
    new_degs = all_degs.copy()

    side_lengths = np.zeros(201, dtype=np.float64)
    for n in range(1, 201):
        start = group_start[n]
        end = start + n
        vertices = [get_tree_vertices(new_xs[i], new_ys[i], new_degs[i]) for i in range(start, end)]
        side_lengths[n] = get_side_length(vertices)

    for n in range(200, 1, -1):
        start_n = group_start[n]
        end_n = start_n + n
        start_prev = group_start[n - 1]

        best_side = side_lengths[n - 1]
        best_delete_idx = -1

        for del_idx in range(n):
            vertices = []
            for i in range(n):
                if i != del_idx:
                    idx = start_n + i
                    vertices.append(get_tree_vertices(new_xs[idx], new_ys[idx], new_degs[idx]))

            candidate_side = get_side_length(vertices)
            if candidate_side < best_side:
                best_side = candidate_side
                best_delete_idx = del_idx

        if best_delete_idx >= 0:
            out_idx = start_prev
            for i in range(n):
                if i != best_delete_idx:
                    in_idx = start_n + i
                    new_xs[out_idx] = new_xs[in_idx]
                    new_ys[out_idx] = new_ys[in_idx]
                    new_degs[out_idx] = new_degs[in_idx]
                    out_idx += 1
            side_lengths[n - 1] = best_side

    return new_xs, new_ys, new_degs, side_lengths

In [15]:
# I/O helpers
def load_submission_data(filepath):
    """Load submission and return flattened arrays."""
    df = pd.read_csv(filepath)
    
    # Handle 's' prefix in values (saspav format)
    for col in ['x', 'y', 'deg']:
        if col in df.columns:
            if df[col].dtype == object:
                df[col] = df[col].str.replace('s', '').astype(float)
    
    # Handle different column names
    angle_col = 'deg' if 'deg' in df.columns else 'angle'

    all_xs = []
    all_ys = []
    all_degs = []

    for n in range(1, 201):
        prefix = f"{n:03d}_"
        group = df[df["id"].str.startswith(prefix)].sort_values("id")
        all_xs.extend(group["x"].values)
        all_ys.extend(group["y"].values)
        all_degs.extend(group[angle_col].values)

    return np.array(all_xs), np.array(all_ys), np.array(all_degs)


def calculate_total_score(all_xs, all_ys, all_degs):
    """Calculate total score from flattened arrays."""
    total = 0.0
    idx = 0
    for n in range(1, 201):
        vertices = [get_tree_vertices(all_xs[idx + i], all_ys[idx + i], all_degs[idx + i]) for i in range(n)]
        side = get_side_length(vertices)
        total += side * side / n
        idx += n
    return total


def save_submission(filepath, all_xs, all_ys, all_degs):
    """Save submission to CSV."""
    rows = []
    idx = 0
    for n in range(1, 201):
        for i in range(n):
            rows.append({
                "id": f"{n:03d}_{i:03d}",
                "x": all_xs[idx],
                "y": all_ys[idx],
                "angle": all_degs[idx],
            })
            idx += 1
    df = pd.DataFrame(rows)
    df.to_csv(filepath, index=False)

In [16]:
# Load baseline
baseline_path = '/home/code/external_data/saspav_latest/santa-2025.csv'
baseline_xs, baseline_ys, baseline_degs = load_submission_data(baseline_path)
baseline_total = calculate_total_score(baseline_xs, baseline_ys, baseline_degs)
print(f"Baseline total score: {baseline_total:.6f}")

Baseline total score: 70.659958


In [17]:
# Pre-optimized seeds from jiweiliu kernel
initial_seeds = [
    (-4.191683864412409, -4.498489528496051, 74.54421568660419),
    (-4.92202045352307, -4.727639556649786, 254.5401905706735),
]

# Initial translation lengths
a_init = 0.8744896974945239
b_init = 0.7499641699190263

print(f"Seed 1: {initial_seeds[0]}")
print(f"Seed 2: {initial_seeds[1]}")
print(f"a_init: {a_init}, b_init: {b_init}")

Seed 1: (-4.191683864412409, -4.498489528496051, 74.54421568660419)
Seed 2: (-4.92202045352307, -4.727639556649786, 254.5401905706735)
a_init: 0.8744896974945239, b_init: 0.7499641699190263


In [18]:
# Generate grid configurations
grid_configs = [
    (3, 5, False, False),   # 30 trees
    (4, 5, False, False),   # 40 trees
    (4, 6, False, False),   # 48 trees
    (4, 7, False, False),   # 56 trees
    (5, 7, False, True),    # 75 trees
    (5, 8, False, False),   # 80 trees
    (6, 7, False, False),   # 84 trees
    (7, 11, False, True),   # 161 trees
    (8, 12, False, True),   # 200 trees
]

# Generate more configurations for better coverage
for ncols in range(2, 11):
    for nrows in range(ncols, 15):
        n_trees = 2 * ncols * nrows
        if 20 <= n_trees <= 200:
            if (ncols, nrows, False, False) not in grid_configs:
                grid_configs.append((ncols, nrows, False, False))
            n_with_append_y = n_trees + ncols
            if n_with_append_y <= 200:
                if (ncols, nrows, False, True) not in grid_configs:
                    grid_configs.append((ncols, nrows, False, True))
            n_with_append_x = n_trees + nrows
            if n_with_append_x <= 200:
                if (ncols, nrows, True, False) not in grid_configs:
                    grid_configs.append((ncols, nrows, True, False))

# Remove duplicates and sort
grid_configs = list(set(grid_configs))
grid_configs.sort(key=lambda x: (2 * x[0] * x[1] + (x[1] if x[2] else 0) + (x[0] if x[3] else 0)))

print(f"Generated {len(grid_configs)} grid configurations")

Generated 197 grid configurations


In [19]:
# SA parameters - exactly as in jiweiliu kernel
sa_params = {
    "Tmax": 0.001,
    "Tmin": 0.000001,
    "nsteps": 10,
    "nsteps_per_T": 10000,  # 100,000 total SA moves per config
    "position_delta": 0.002,
    "angle_delta": 1.0,
    "angle_delta2": 1.0,
    "delta_t": 0.002,
}

print(f"SA params: {sa_params}")
print(f"Total SA moves per config: {sa_params['nsteps'] * sa_params['nsteps_per_T']:,}")

SA params: {'Tmax': 0.001, 'Tmin': 1e-06, 'nsteps': 10, 'nsteps_per_T': 10000, 'position_delta': 0.002, 'angle_delta': 1.0, 'angle_delta2': 1.0, 'delta_t': 0.002}
Total SA moves per config: 100,000


In [20]:
# Warm up numba
print("Compiling numba functions...")
t0 = time.time()
dummy_xs = np.array([0.0, 1.0], dtype=np.float64)
dummy_ys = np.array([0.0, 0.0], dtype=np.float64)
dummy_degs = np.array([0.0, 180.0], dtype=np.float64)
_ = sa_optimize_improved(
    dummy_xs, dummy_ys, dummy_degs,
    1.0, 1.0,
    2, 2,
    False, False,
    0.001, 0.0001, 2, 100, 0.01, 1.0, 1.0, 0.01, 42
)
print(f"Numba compilation done in {time.time() - t0:.1f}s")

Compiling numba functions...


Numba compilation done in 2.5s


In [21]:
# Prepare tasks
tasks = []
tree_counts = []
for i, (ncols, nrows, append_x, append_y) in enumerate(grid_configs):
    n_base = 2 * ncols * nrows
    n_append_x = nrows if append_x else 0
    n_append_y = ncols if append_y else 0
    n_trees = n_base + n_append_x + n_append_y

    if n_trees > 200:
        continue

    # Run with 3 different random seeds for each config
    for seed in range(3):
        tasks.append((ncols, nrows, append_x, append_y, initial_seeds, a_init, b_init, sa_params, seed))
        tree_counts.append(n_trees)

print(f"Prepared {len(tasks)} tasks for SA optimization")
print(f"Unique tree counts: {sorted(set(tree_counts))}")

Prepared 591 tasks for SA optimization
Unique tree counts: [20, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 60, 63, 64, 65, 66, 68, 69, 70, 72, 75, 76, 77, 78, 80, 81, 84, 85, 87, 88, 90, 91, 92, 95, 96, 98, 99, 100, 102, 104, 105, 108, 110, 112, 114, 115, 116, 117, 119, 120, 121, 125, 126, 128, 130, 132, 133, 135, 136, 138, 140, 143, 144, 145, 147, 150, 152, 153, 154, 156, 160, 161, 162, 165, 168, 169, 170, 171, 174, 175, 176, 180, 182, 184, 187, 189, 190, 192, 195, 196, 198, 200]


In [22]:
# Run SA optimization in parallel
print(f"Running SA optimization on {len(tasks)} configurations...")
num_workers = min(cpu_count(), len(tasks))
print(f"Using {num_workers} workers")

t0 = time.time()
with Pool(num_workers) as pool:
    results = pool.map(optimize_grid_config, tasks)
elapsed = time.time() - t0
print(f"SA optimization completed in {elapsed:.1f}s ({elapsed/60:.1f} min)")

Running SA optimization on 591 configurations...
Using 26 workers


SA optimization completed in 591.7s (9.9 min)


In [23]:
# Collect results and compare with baseline
new_trees = {}
improved_count = 0
for n_trees, score, tree_data in results:
    # Get baseline score for this n
    idx = sum(range(1, n_trees))
    baseline_vertices = [get_tree_vertices(baseline_xs[idx + i], baseline_ys[idx + i], baseline_degs[idx + i]) for i in range(n_trees)]
    baseline_score = calculate_score_numba(baseline_vertices)

    if score < baseline_score:
        if n_trees not in new_trees or score < new_trees[n_trees][0]:
            new_trees[n_trees] = (score, tree_data)
            improved_count += 1
            print(f"N={n_trees}: SA={score:.6f} < baseline={baseline_score:.6f} (improvement: {baseline_score - score:.6f})")

print(f"\nFound improvements for {len(new_trees)} different N values")
print(f"Total improved configurations: {improved_count}")


Found improvements for 0 different N values
Total improved configurations: 0


In [24]:
# Debug: Check a few results vs baseline
print("Comparing SA results vs baseline for a few N values:")
for n_trees, score, tree_data in results[:20]:
    idx = sum(range(1, n_trees))
    baseline_vertices = [get_tree_vertices(baseline_xs[idx + i], baseline_ys[idx + i], baseline_degs[idx + i]) for i in range(n_trees)]
    baseline_score = calculate_score_numba(baseline_vertices)
    diff = score - baseline_score
    print(f"N={n_trees}: SA={score:.6f}, baseline={baseline_score:.6f}, diff={diff:+.6f}")

Comparing SA results vs baseline for a few N values:
N=20: SA=0.535060, baseline=0.376057, diff=+0.159004
N=20: SA=0.532802, baseline=0.376057, diff=+0.156745
N=20: SA=0.532474, baseline=0.376057, diff=+0.156417
N=22: SA=0.615471, baseline=0.375258, diff=+0.240213
N=22: SA=0.612228, baseline=0.375258, diff=+0.236970
N=22: SA=0.611241, baseline=0.375258, diff=+0.235983
N=24: SA=0.375920, baseline=0.365506, diff=+0.010414
N=24: SA=0.375809, baseline=0.365506, diff=+0.010303
N=24: SA=0.375855, baseline=0.365506, diff=+0.010349
N=24: SA=0.625291, baseline=0.365506, diff=+0.259785
N=24: SA=0.622946, baseline=0.365506, diff=+0.257441
N=24: SA=0.623844, baseline=0.365506, diff=+0.258339
N=25: SA=0.427844, baseline=0.372144, diff=+0.055700
N=25: SA=0.426155, baseline=0.372144, diff=+0.054011
N=25: SA=0.425582, baseline=0.372144, diff=+0.053437
N=26: SA=0.701920, baseline=0.373997, diff=+0.327923
N=26: SA=0.705210, baseline=0.373997, diff=+0.331213
N=26: SA=0.701991, baseline=0.373997, diff=+0.

# Corner Extraction Approach\n\nSince the jiweiliu SA approach didn't improve the baseline (saspav_latest is already better than what jiweiliu can produce), let's try the chistyakov corner extraction approach.\n\nThis approach extracts smaller layouts from corners of larger layouts."

In [25]:
# Corner extraction approach
# For each large N, extract smaller layouts from corners

def corner_extraction(all_xs, all_ys, all_degs):
    """Extract smaller layouts from corners of larger layouts."""
    improvements = {}
    
    # Try extracting from large N values
    for source_n in range(50, 201):
        # Get trees for this N
        source_idx = sum(range(1, source_n))
        source_xs = all_xs[source_idx:source_idx + source_n]
        source_ys = all_ys[source_idx:source_idx + source_n]
        source_degs = all_degs[source_idx:source_idx + source_n]
        
        # Get bounding box
        all_vertices = [get_tree_vertices(source_xs[i], source_ys[i], source_degs[i]) for i in range(source_n)]
        min_x, min_y, max_x, max_y = compute_bounding_box(all_vertices)
        
        # Try each corner
        corners = [(min_x, min_y), (min_x, max_y), (max_x, min_y), (max_x, max_y)]
        
        for corner_x, corner_y in corners:
            # Calculate distance from corner for each tree
            distances = []
            for i in range(source_n):
                verts = all_vertices[i]
                # Max distance from corner to any vertex of tree
                max_dist = 0
                for j in range(verts.shape[0]):
                    dist = max(abs(verts[j, 0] - corner_x), abs(verts[j, 1] - corner_y))
                    if dist > max_dist:
                        max_dist = dist
                distances.append((max_dist, i))
            
            # Sort by distance
            distances.sort()
            
            # Try extracting subsets
            for target_n in range(2, min(source_n, 50)):  # Only check small N values
                # Get the closest target_n trees
                subset_indices = [idx for _, idx in distances[:target_n]]
                subset_xs = [source_xs[i] for i in subset_indices]
                subset_ys = [source_ys[i] for i in subset_indices]
                subset_degs = [source_degs[i] for i in subset_indices]
                
                # Calculate score for this subset
                subset_vertices = [get_tree_vertices(subset_xs[i], subset_ys[i], subset_degs[i]) for i in range(target_n)]
                subset_side = get_side_length(subset_vertices)
                subset_score = subset_side * subset_side / target_n
                
                # Compare with baseline
                target_idx = sum(range(1, target_n))
                baseline_vertices = [get_tree_vertices(all_xs[target_idx + i], all_ys[target_idx + i], all_degs[target_idx + i]) for i in range(target_n)]
                baseline_score = calculate_score_numba(baseline_vertices)
                
                if subset_score < baseline_score:
                    if target_n not in improvements or subset_score < improvements[target_n][0]:
                        improvements[target_n] = (subset_score, baseline_score, subset_xs, subset_ys, subset_degs, source_n)
                        print(f"N={target_n}: Found improvement from N={source_n} corner ({corner_x:.2f},{corner_y:.2f}): {baseline_score:.6f} -> {subset_score:.6f}")
    
    return improvements

print("Running corner extraction...")
t0 = time.time()
improvements = corner_extraction(baseline_xs, baseline_ys, baseline_degs)
print(f"Corner extraction completed in {time.time() - t0:.1f}s")
print(f"Found {len(improvements)} improvements")

Running corner extraction...


Corner extraction completed in 11.4s
Found 0 improvements


In [26]:
# Try running SA with more aggressive parameters and longer runs
# Focus on specific N values that might have room for improvement

# First, let's analyze which N values have the worst "efficiency" (highest score per tree)
print("Analyzing baseline scores per N:")
scores_per_n = []
idx = 0
for n in range(1, 201):
    vertices = [get_tree_vertices(baseline_xs[idx + i], baseline_ys[idx + i], baseline_degs[idx + i]) for i in range(n)]
    side = get_side_length(vertices)
    score = side * side / n
    scores_per_n.append((n, score, side))
    idx += n

# Sort by score (highest first) - these are the worst packed
scores_per_n.sort(key=lambda x: x[1], reverse=True)
print("\nTop 20 worst-packed N values (highest score):")
for n, score, side in scores_per_n[:20]:
    print(f"  N={n}: score={score:.6f}, side={side:.4f}")

Analyzing baseline scores per N:

Top 20 worst-packed N values (highest score):
  N=1: score=0.661250, side=0.8132
  N=2: score=0.450779, side=0.9495
  N=3: score=0.434745, side=1.1420
  N=5: score=0.416850, side=1.4437
  N=4: score=0.416545, side=1.2908
  N=7: score=0.399897, side=1.6731
  N=6: score=0.399610, side=1.5484
  N=9: score=0.387415, side=1.8673
  N=8: score=0.385407, side=1.7559
  N=15: score=0.379203, side=2.3850
  N=10: score=0.376630, side=1.9407
  N=21: score=0.376451, side=2.8117
  N=20: score=0.376057, side=2.7425
  N=11: score=0.375736, side=2.0330
  N=22: score=0.375258, side=2.8733
  N=16: score=0.374128, side=2.4466
  N=26: score=0.373997, side=3.1183
  N=12: score=0.372724, side=2.1149
  N=13: score=0.372294, side=2.2000
  N=25: score=0.372144, side=3.0502


In [27]:
# Check which N values are exact multiples of 2 (base grid) and might benefit from lattice
# The jiweiliu kernel uses 2-tree seeds, so N = 2 * ncols * nrows

print("Checking lattice-compatible N values:")
lattice_n_values = []
for ncols in range(2, 11):
    for nrows in range(ncols, 15):
        n = 2 * ncols * nrows
        if n <= 200:
            lattice_n_values.append(n)

lattice_n_values = sorted(set(lattice_n_values))
print(f"Lattice-compatible N values: {lattice_n_values}")

# Check baseline scores for these N values
print("\nBaseline scores for lattice-compatible N values:")
for n in lattice_n_values[:15]:
    idx = sum(range(1, n))
    vertices = [get_tree_vertices(baseline_xs[idx + i], baseline_ys[idx + i], baseline_degs[idx + i]) for i in range(n)]
    score = calculate_score_numba(vertices)
    print(f"  N={n}: score={score:.6f}")

Checking lattice-compatible N values:
Lattice-compatible N values: [8, 12, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 48, 50, 52, 54, 56, 60, 64, 66, 70, 72, 78, 80, 84, 88, 90, 96, 98, 100, 104, 108, 110, 112, 120, 126, 128, 130, 132, 140, 144, 154, 156, 160, 162, 168, 176, 180, 182, 192, 196, 198, 200]

Baseline scores for lattice-compatible N values:
  N=8: score=0.385407
  N=12: score=0.372724
  N=16: score=0.374128
  N=18: score=0.368771
  N=20: score=0.376057
  N=24: score=0.365506
  N=28: score=0.366105
  N=30: score=0.360883
  N=32: score=0.365592
  N=36: score=0.358820
  N=40: score=0.362148
  N=42: score=0.366839
  N=44: score=0.366271
  N=48: score=0.355530
  N=50: score=0.360753


In [28]:
# Try more aggressive SA parameters
# Focus on a few specific N values with longer runs

aggressive_sa_params = {
    "Tmax": 0.01,  # Higher starting temperature
    "Tmin": 0.0000001,  # Lower ending temperature
    "nsteps": 20,  # More temperature steps
    "nsteps_per_T": 50000,  # More iterations per step (1M total moves)
    "position_delta": 0.005,  # Slightly larger position changes
    "angle_delta": 2.0,  # Slightly larger angle changes
    "angle_delta2": 2.0,
    "delta_t": 0.005,
}

print(f"Aggressive SA params: {aggressive_sa_params}")
print(f"Total SA moves per config: {aggressive_sa_params['nsteps'] * aggressive_sa_params['nsteps_per_T']:,}")

# Test on a few specific grid configs
test_configs = [
    (4, 6, False, False),   # 48 trees
    (5, 7, False, False),   # 70 trees
    (6, 8, False, False),   # 96 trees
]

print("\nRunning aggressive SA on test configs...")
for ncols, nrows, append_x, append_y in test_configs:
    n_trees = 2 * ncols * nrows
    
    # Get baseline score
    idx = sum(range(1, n_trees))
    baseline_vertices = [get_tree_vertices(baseline_xs[idx + i], baseline_ys[idx + i], baseline_degs[idx + i]) for i in range(n_trees)]
    baseline_score = calculate_score_numba(baseline_vertices)
    
    # Run SA
    seed_xs = np.array([s[0] for s in initial_seeds], dtype=np.float64)
    seed_ys = np.array([s[1] for s in initial_seeds], dtype=np.float64)
    seed_degs = np.array([s[2] for s in initial_seeds], dtype=np.float64)
    
    t0 = time.time()
    best_score, best_xs, best_ys, best_degs, best_a, best_b = sa_optimize_improved(
        seed_xs, seed_ys, seed_degs,
        a_init, b_init,
        ncols, nrows,
        append_x, append_y,
        aggressive_sa_params["Tmax"],
        aggressive_sa_params["Tmin"],
        aggressive_sa_params["nsteps"],
        aggressive_sa_params["nsteps_per_T"],
        aggressive_sa_params["position_delta"],
        aggressive_sa_params["angle_delta"],
        aggressive_sa_params["angle_delta2"],
        aggressive_sa_params["delta_t"],
        42,
    )
    elapsed = time.time() - t0
    
    diff = best_score - baseline_score
    status = "BETTER!" if diff < 0 else "worse"
    print(f"  N={n_trees} ({ncols}x{nrows}): SA={best_score:.6f}, baseline={baseline_score:.6f}, diff={diff:+.6f} ({status}) [{elapsed:.1f}s]")

Aggressive SA params: {'Tmax': 0.01, 'Tmin': 1e-07, 'nsteps': 20, 'nsteps_per_T': 50000, 'position_delta': 0.005, 'angle_delta': 2.0, 'angle_delta2': 2.0, 'delta_t': 0.005}
Total SA moves per config: 1,000,000

Running aggressive SA on test configs...
  N=48 (4x6): SA=0.358497, baseline=0.355530, diff=+0.002968 (worse) [61.7s]
  N=70 (5x7): SA=0.351889, baseline=0.349513, diff=+0.002376 (worse) [73.2s]
  N=96 (6x8): SA=0.348290, baseline=0.346397, diff=+0.001893 (worse) [100.2s]


In [29]:
# Try SA optimization starting from the BASELINE configurations instead of lattice seeds
# This might find small improvements by fine-tuning the existing solution

@njit(cache=True)
def sa_optimize_from_baseline(
    xs_init, ys_init, degs_init,
    Tmax, Tmin, nsteps, nsteps_per_T,
    position_delta, angle_delta,
    random_seed,
):
    """SA optimization starting from existing configuration."""
    np.random.seed(random_seed)
    n = len(xs_init)
    
    xs = xs_init.copy()
    ys = ys_init.copy()
    degs = degs_init.copy()
    
    # Calculate initial score
    all_vertices = [get_tree_vertices(xs[i], ys[i], degs[i]) for i in range(n)]
    current_score = calculate_score_numba(all_vertices)
    
    best_score = current_score
    best_xs = xs.copy()
    best_ys = ys.copy()
    best_degs = degs.copy()
    
    T = Tmax
    Tfactor = -math.log(Tmax / Tmin)
    
    for step in range(nsteps):
        for _ in range(nsteps_per_T):
            # Choose a random tree to modify
            i = np.random.randint(0, n)
            
            old_x = xs[i]
            old_y = ys[i]
            old_deg = degs[i]
            
            # Apply small perturbation
            dx = (np.random.random() * 2.0 - 1.0) * position_delta
            dy = (np.random.random() * 2.0 - 1.0) * position_delta
            ddeg = (np.random.random() * 2.0 - 1.0) * angle_delta
            
            xs[i] = old_x + dx
            ys[i] = old_y + dy
            degs[i] = (old_deg + ddeg) % 360.0
            
            # Check for overlaps
            new_vertices = [get_tree_vertices(xs[j], ys[j], degs[j]) for j in range(n)]
            centers_x = xs
            centers_y = ys
            
            # Check if this tree overlaps with any other
            has_overlap = False
            new_vert_i = new_vertices[i]
            for j in range(n):
                if i != j:
                    if polygons_overlap(new_vert_i, new_vertices[j], xs[i], ys[i], xs[j], ys[j]):
                        has_overlap = True
                        break
            
            if has_overlap:
                xs[i] = old_x
                ys[i] = old_y
                degs[i] = old_deg
                continue
            
            new_score = calculate_score_numba(new_vertices)
            delta = new_score - current_score
            
            accept = False
            if delta < 0:
                accept = True
            elif T > 1e-10:
                if np.random.random() < math.exp(-delta / T):
                    accept = True
            
            if accept:
                current_score = new_score
                if new_score < best_score:
                    best_score = new_score
                    best_xs = xs.copy()
                    best_ys = ys.copy()
                    best_degs = degs.copy()
            else:
                xs[i] = old_x
                ys[i] = old_y
                degs[i] = old_deg
        
        T = Tmax * math.exp(Tfactor * (step + 1) / nsteps)
    
    return best_score, best_xs, best_ys, best_degs

# Warm up
print("Compiling SA from baseline function...")
dummy_xs = np.array([0.0, 1.0], dtype=np.float64)
dummy_ys = np.array([0.0, 0.0], dtype=np.float64)
dummy_degs = np.array([0.0, 180.0], dtype=np.float64)
_ = sa_optimize_from_baseline(dummy_xs, dummy_ys, dummy_degs, 0.001, 0.0001, 2, 100, 0.01, 1.0, 42)
print("Done")

Compiling SA from baseline function...


Done


In [None]:
# Merge with baseline
print("Merging with baseline...")
merged_xs = baseline_xs.copy()
merged_ys = baseline_ys.copy()
merged_degs = baseline_degs.copy()

for n_trees, (score, tree_data) in new_trees.items():
    idx = sum(range(1, n_trees))
    for i in range(n_trees):
        merged_xs[idx + i] = tree_data[i][0]
        merged_ys[idx + i] = tree_data[i][1]
        merged_degs[idx + i] = tree_data[i][2]

pre_cascade_score = calculate_total_score(merged_xs, merged_ys, merged_degs)
print(f"Score after SA merge: {pre_cascade_score:.6f}")
print(f"Improvement from SA: {baseline_total - pre_cascade_score:.6f}")

In [None]:
# Apply tree deletion cascade
print("Applying tree deletion cascade...")
t0 = time.time()
final_xs, final_ys, final_degs, side_lengths = deletion_cascade_numba(
    merged_xs, merged_ys, merged_degs,
    np.arange(1, 201, dtype=np.int64)
)
print(f"Cascade completed in {time.time() - t0:.1f}s")

final_score = calculate_total_score(final_xs, final_ys, final_degs)
print(f"Score after cascade: {final_score:.6f}")

In [None]:
print("=" * 80)
print("Summary:")
print(f"  Baseline total:      {baseline_total:.6f}")
print(f"  After SA:            {pre_cascade_score:.6f}")
print(f"  After cascade:       {final_score:.6f}")
print(f"  Total improvement:   {baseline_total - final_score:+.6f}")
print("=" * 80)

In [None]:
# Save submission
os.makedirs('/home/submission', exist_ok=True)
if final_score < baseline_total:
    output_path = "/home/submission/submission.csv"
    save_submission(output_path, final_xs, final_ys, final_degs)
    print(f"Saved to {output_path}")
else:
    # Save baseline if no improvement
    output_path = "/home/submission/submission.csv"
    save_submission(output_path, baseline_xs, baseline_ys, baseline_degs)
    print(f"No improvement - saved baseline to {output_path}")

print(f"\nFinal score: {min(final_score, baseline_total):.9f}")