# Loop 13 Analysis

Analyzing the current state and identifying next steps.

In [1]:
import pandas as pd
import numpy as np
import math
import os
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

# Tree shape constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

def get_tree_poly(x, y, deg):
    coords = [
        (0.0, TIP_Y), (TOP_W / 2.0, TIER_1_Y), (TOP_W / 4.0, TIER_1_Y),
        (MID_W / 2.0, TIER_2_Y), (MID_W / 4.0, TIER_2_Y), (BASE_W / 2.0, BASE_Y),
        (TRUNK_W / 2.0, BASE_Y), (TRUNK_W / 2.0, TRUNK_BOTTOM_Y),
        (-TRUNK_W / 2.0, TRUNK_BOTTOM_Y), (-TRUNK_W / 2.0, BASE_Y),
        (-BASE_W / 2.0, BASE_Y), (-MID_W / 4.0, TIER_2_Y), (-MID_W / 2.0, TIER_2_Y),
        (-TOP_W / 4.0, TIER_1_Y), (-TOP_W / 2.0, TIER_1_Y),
    ]
    poly = Polygon(coords)
    return affinity.translate(affinity.rotate(poly, deg, origin=(0, 0)), x, y)

def rotate_point(x, y, cos_a, sin_a):
    return x * cos_a - y * sin_a, x * sin_a + y * cos_a

def get_tree_vertices(cx, cy, angle_deg):
    angle_rad = angle_deg * math.pi / 180.0
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    pts = [
        [0.0, TIP_Y], [TOP_W / 2.0, TIER_1_Y], [TOP_W / 4.0, TIER_1_Y],
        [MID_W / 2.0, TIER_2_Y], [MID_W / 4.0, TIER_2_Y], [BASE_W / 2.0, BASE_Y],
        [TRUNK_W / 2.0, BASE_Y], [TRUNK_W / 2.0, TRUNK_BOTTOM_Y],
        [-TRUNK_W / 2.0, TRUNK_BOTTOM_Y], [-TRUNK_W / 2.0, BASE_Y],
        [-BASE_W / 2.0, BASE_Y], [-MID_W / 4.0, TIER_2_Y], [-MID_W / 2.0, TIER_2_Y],
        [-TOP_W / 4.0, TIER_1_Y], [-TOP_W / 2.0, TIER_1_Y],
    ]
    vertices = []
    for px, py in pts:
        rx, ry = rotate_point(px, py, cos_a, sin_a)
        vertices.append((rx + cx, ry + cy))
    return vertices

def calculate_score(filepath):
    df = pd.read_csv(filepath)
    for col in ['x', 'y', 'deg', 'angle']:
        if col in df.columns and df[col].dtype == object:
            df[col] = df[col].astype(str).str.replace('s', '').astype(float)
    angle_col = 'deg' if 'deg' in df.columns else 'angle'
    
    total = 0.0
    scores = {}
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        group = df[df["id"].str.startswith(prefix)].sort_values("id")
        if len(group) != n:
            continue
        xs = group['x'].values
        ys = group['y'].values
        degs = group[angle_col].values
        
        min_x = min_y = 1e10
        max_x = max_y = -1e10
        for i in range(n):
            verts = get_tree_vertices(xs[i], ys[i], degs[i])
            for vx, vy in verts:
                if vx < min_x: min_x = vx
                if vx > max_x: max_x = vx
                if vy < min_y: min_y = vy
                if vy > max_y: max_y = vy
        
        side = max(max_x - min_x, max_y - min_y)
        score = side * side / n
        scores[n] = score
        total += score
    
    return total, scores

print("Functions defined.")

Functions defined.


In [2]:
# Current state analysis
print("=== CURRENT STATE ===")
print()

# Check current submission
current_score, current_scores = calculate_score('/home/submission/submission.csv')
print(f"Current submission score: {current_score:.6f}")

# Check baseline
baseline_score, baseline_scores = calculate_score('/home/code/external_data/saspav_latest/santa-2025.csv')
print(f"Baseline (saspav_latest) score: {baseline_score:.6f}")

# Check eazy output
eazy_score, eazy_scores = calculate_score('/home/code/experiments/017_just_luck_multiphase/eazy_output.csv')
print(f"Eazy output score: {eazy_score:.6f}")

# Target
target = 68.919154
print(f"\nTarget: {target}")
print(f"Gap from current: {current_score - target:.6f} ({(current_score - target)/target*100:.2f}%)")
print(f"Gap from baseline: {baseline_score - target:.6f} ({(baseline_score - target)/target*100:.2f}%)")

=== CURRENT STATE ===



Current submission score: 70.659493


Baseline (saspav_latest) score: 70.659958


Eazy output score: 70.659943

Target: 68.919154
Gap from current: 1.740339 (2.53%)
Gap from baseline: 1.740804 (2.53%)


In [3]:
# Compare current vs baseline per N
print("\n=== PER-N COMPARISON (current vs baseline) ===")
print()

improvements = []
for n in range(1, 201):
    if n in current_scores and n in baseline_scores:
        diff = baseline_scores[n] - current_scores[n]
        if abs(diff) > 1e-9:
            improvements.append((n, baseline_scores[n], current_scores[n], diff))

print(f"N values where current differs from baseline: {len(improvements)}")
if improvements:
    print("\nTop 10 improvements (current better than baseline):")
    for n, base, curr, diff in sorted(improvements, key=lambda x: -x[3])[:10]:
        print(f"  N={n}: baseline={base:.6f}, current={curr:.6f}, improvement={diff:.6f}")
    
    print("\nTop 10 regressions (current worse than baseline):")
    for n, base, curr, diff in sorted(improvements, key=lambda x: x[3])[:10]:
        print(f"  N={n}: baseline={base:.6f}, current={curr:.6f}, regression={-diff:.6f}")


=== PER-N COMPARISON (current vs baseline) ===

N values where current differs from baseline: 1

Top 10 improvements (current better than baseline):
  N=65: baseline=0.363793, current=0.363328, improvement=0.000466

Top 10 regressions (current worse than baseline):
  N=65: baseline=0.363793, current=0.363328, regression=-0.000466


In [4]:
# Analyze which N values contribute most to the gap
print("\n=== GAP ANALYSIS ===")
print()

# Calculate contribution to gap for each N
gap_contributions = []
for n in range(1, 201):
    if n in current_scores:
        contribution = current_scores[n]
        gap_contributions.append((n, contribution, contribution / current_score * 100))

# Sort by contribution
gap_contributions.sort(key=lambda x: -x[1])

print("Top 20 N values by score contribution:")
for n, score, pct in gap_contributions[:20]:
    print(f"  N={n}: score={score:.6f} ({pct:.2f}% of total)")

# Small N analysis
print("\n=== SMALL N ANALYSIS ===")
for n in range(1, 11):
    if n in current_scores:
        print(f"  N={n}: score={current_scores[n]:.6f}")


=== GAP ANALYSIS ===

Top 20 N values by score contribution:
  N=1: score=0.661250 (0.94% of total)
  N=2: score=0.450779 (0.64% of total)
  N=3: score=0.434745 (0.62% of total)
  N=5: score=0.416850 (0.59% of total)
  N=4: score=0.416545 (0.59% of total)
  N=7: score=0.399897 (0.57% of total)
  N=6: score=0.399610 (0.57% of total)
  N=9: score=0.387415 (0.55% of total)
  N=8: score=0.385407 (0.55% of total)
  N=15: score=0.379203 (0.54% of total)
  N=10: score=0.376630 (0.53% of total)
  N=21: score=0.376451 (0.53% of total)
  N=20: score=0.376057 (0.53% of total)
  N=11: score=0.375736 (0.53% of total)
  N=22: score=0.375258 (0.53% of total)
  N=16: score=0.374128 (0.53% of total)
  N=26: score=0.373997 (0.53% of total)
  N=12: score=0.372724 (0.53% of total)
  N=13: score=0.372294 (0.53% of total)
  N=25: score=0.372144 (0.53% of total)

=== SMALL N ANALYSIS ===
  N=1: score=0.661250
  N=2: score=0.450779
  N=3: score=0.434745
  N=4: score=0.416545
  N=5: score=0.416850
  N=6: scor

In [None]:
# Check what the corner extraction approach could achieve
print("\n=== CORNER EXTRACTION POTENTIAL ===")
print()

# Load baseline
df = pd.read_csv('/home/code/external_data/saspav_latest/santa-2025.csv')
for col in ['x', 'y', 'deg']:
    if df[col].dtype == object:
        df[col] = df[col].astype(str).str.replace('s', '').astype(float)

# For a sample large N, check if corner subsets could beat smaller N
def extract_corner_subsets(df, n_large):
    prefix = f"{n_large:03d}_"
    group = df[df["id"].str.startswith(prefix)].sort_values("id")
    if len(group) != n_large:
        return {}
    
    xs = group['x'].values
    ys = group['y'].values
    degs = group['deg'].values
    
    # Create tree list with polygons
    trees = []
    for i in range(n_large):
        poly = get_tree_poly(xs[i], ys[i], degs[i])
        trees.append({'x': xs[i], 'y': ys[i], 'deg': degs[i], 'poly': poly})
    
    # Get bounds
    all_polys = [t['poly'] for t in trees]
    bounds = unary_union(all_polys).bounds
    
    results = {}
    corners = [
        (bounds[0], bounds[1]),  # bottom-left
        (bounds[0], bounds[3]),  # top-left
        (bounds[2], bounds[1]),  # bottom-right
        (bounds[2], bounds[3]),  # top-right
    ]
    
    for corner_x, corner_y in corners:
        # Calculate max distance to corner for each tree
        tree_distances = []
        for tree in trees:
            poly_bounds = tree['poly'].bounds
            max_dist = max(
                abs(poly_bounds[0] - corner_x),
                abs(poly_bounds[2] - corner_x),
                abs(poly_bounds[1] - corner_y),
                abs(poly_bounds[3] - corner_y),
            )
            tree_distances.append((max_dist, tree))
        
        tree_distances.sort(key=lambda x: x[0])
        
        subset = []
        for dist, tree in tree_distances:
            subset.append(tree)
            small_n = len(subset)
            if small_n >= n_large:
                break
            
            # Calculate subset score
            subset_polys = [t['poly'] for t in subset]
            subset_bounds = unary_union(subset_polys).bounds
            width = subset_bounds[2] - subset_bounds[0]
            height = subset_bounds[3] - subset_bounds[1]
            side = max(width, height)
            subset_score = side * side / small_n
            
            if small_n not in results or subset_score < results[small_n][0]:
                results[small_n] = (subset_score, subset)
    
    return results

# Test on multiple large N values
print("Testing corner extraction on multiple large N values...")
all_improvements = []
for n_large in [50, 100, 111, 150, 200]:
    subsets = extract_corner_subsets(df, n_large)
    for small_n, (subset_score, _) in subsets.items():
        if small_n in baseline_scores and subset_score < baseline_scores[small_n]:
            all_improvements.append((small_n, n_large, baseline_scores[small_n], subset_score, baseline_scores[small_n] - subset_score))

print(f"\nFound {len(all_improvements)} potential improvements from corner extraction")
if all_improvements:
    print("\nTop 20 improvements:")
    for n, from_n, base, sub, imp in sorted(all_improvements, key=lambda x: -x[4])[:20]:
        print(f"  N={n} (from N={from_n}): baseline={base:.6f}, subset={sub:.6f}, improvement={imp:.6f}")