# Evolver Loop 1 Analysis

## Current Situation
- Best CV score: 135.819103 (exp_000)
- Target: 68.931058
- Gap: ~97% worse than target (nearly 2x)

## Key Insights from Research

### 1. Top Kernels Analysis
- **74.75 BackPacking**: Uses backward iteration - start from N=200, work backwards
- **Ensemble approach**: Combine solutions from many sources, keep best for each N
- **bbox3 binary**: External optimizer used by top kernels (not available locally)
- **Fractional translation**: Very fine-grained position adjustments (0.00001 steps)

### 2. Critical Missing Techniques
1. **Ensemble from multiple sources** - We only have one solution
2. **Backward propagation** - Not implemented
3. **Multi-phase optimization** - Only ran single pass
4. **Fractional translation** - Not implemented
5. **Better starting point** - Started from sample_submission

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
import os

getcontext().prec = 25
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Load and analyze current best submission
submission_path = '/home/code/submission_candidates/candidate_000.csv'
df = pd.read_csv(submission_path)

print(f'Submission shape: {df.shape}')
print(f'Columns: {df.columns.tolist()}')
print(f'\nFirst few rows:')
print(df.head(10))

Submission shape: (20100, 4)
Columns: ['id', 'x', 'y', 'deg']

First few rows:
      id                    x                    y                  deg
0  001_0  s-4.071254742298786   s3.843581395066648   s45.00025601644471
1  002_0  s-0.178872253724363  s-0.067729152765533   s65.84869686100669
2  002_1  s-0.077606144156982  s-0.461090335677369  s112.20936641561065
3  003_0  s-0.115412453767125  s-0.144739315483519   s48.69893245690267
4  003_1   s0.031427698356818  s-0.490844529079489  s124.12416947785509
5  003_2   s0.270378019016876   s0.009529902609125  s196.26138446313004
6  004_0  s-0.151034420726324  s-0.220426297738166   s55.02763469314592
7  004_1   s0.044736791840841  s-0.549158475238475   s136.4498149410888
8  004_2   s0.214499641799307  s-0.019469446003845  s203.26836997628192
9  004_3     s-0.541629465437  s-0.428249981176921  s156.38384093391062


In [3]:
# Define ChristmasTree class for analysis
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x).lstrip('s'))
        self.center_y = Decimal(str(center_y).lstrip('s'))
        self.angle = Decimal(str(angle).lstrip('s'))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def calculate_side_length(trees):
    if not trees:
        return Decimal('0')
    all_polygons = [t.polygon for t in trees]
    bounds = unary_union(all_polygons).bounds
    minx = Decimal(bounds[0]) / scale_factor
    miny = Decimal(bounds[1]) / scale_factor
    maxx = Decimal(bounds[2]) / scale_factor
    maxy = Decimal(bounds[3]) / scale_factor
    width = maxx - minx
    height = maxy - miny
    return max(width, height)

def calculate_score(trees, n_trees):
    side = calculate_side_length(trees)
    return float(side * side / n_trees)

print('ChristmasTree class defined')

ChristmasTree class defined


In [4]:
# Analyze score breakdown by N
df['x_clean'] = df['x'].astype(str).str.lstrip('s')
df['y_clean'] = df['y'].astype(str).str.lstrip('s')
df['deg_clean'] = df['deg'].astype(str).str.lstrip('s')
df['N'] = df['id'].str.split('_').str[0].astype(int)

scores_by_n = []
for n in range(1, 201):
    group = df[df['N'] == n]
    if len(group) == n:
        trees = [ChristmasTree(row['x_clean'], row['y_clean'], row['deg_clean']) 
                 for _, row in group.iterrows()]
        score = calculate_score(trees, n)
        side = float(calculate_side_length(trees))
        scores_by_n.append({'N': n, 'score': score, 'side': side})

scores_df = pd.DataFrame(scores_by_n)
print(f'Total score: {scores_df["score"].sum():.6f}')
print(f'\nScore breakdown by N (first 20):')
print(scores_df.head(20).to_string(index=False))

Total score: 135.819103

Score breakdown by N (first 20):
 N    score     side
 1 0.661252 0.813174
 2 0.545753 1.044752
 3 0.583362 1.322908
 4 0.489558 1.399369
 5 0.608346 1.744056
 6 0.672138 2.008190
 7 0.784446 2.343315
 8 0.678465 2.329746
 9 0.679351 2.472683
10 0.619756 2.489489
11 0.578373 2.522320
12 0.741879 2.983713
13 0.776163 3.176494
14 0.695480 3.120372
15 0.679112 3.191657
16 0.712498 3.376384
17 0.703619 3.458543
18 0.700364 3.550572
19 0.694287 3.632003
20 0.675355 3.675202


In [5]:
# Identify worst performing N values (highest score contribution)
scores_df_sorted = scores_df.sort_values('score', ascending=False)
print('Top 20 worst performing N values (highest score contribution):')
print(scores_df_sorted.head(20).to_string(index=False))

print(f'\nTop 20 worst N contribute: {scores_df_sorted.head(20)["score"].sum():.6f} ({scores_df_sorted.head(20)["score"].sum()/scores_df["score"].sum()*100:.1f}% of total)')

Top 20 worst performing N values (highest score contribution):
  N    score      side
 26 0.787355  4.524515
  7 0.784446  2.343315
 13 0.776163  3.176494
 25 0.771353  4.391335
 12 0.741879  2.983713
 42 0.740641  5.577359
 43 0.732498  5.612257
 28 0.720505  4.491563
122 0.718957  9.365508
 62 0.716105  6.663222
 16 0.712498  3.376384
163 0.712254 10.774852
161 0.712242 10.708453
 21 0.711579  3.865638
182 0.708685 11.356969
199 0.708402 11.873163
194 0.708287 11.722099
193 0.708118 11.690455
162 0.707853 10.708511
176 0.706283 11.149248

Top 20 worst N contribute: 14.586103 (10.7% of total)


In [6]:
# Calculate theoretical minimum for N=1 (single tree at optimal angle)
# A single tree at 45 degrees should have minimum bounding box
import math

# Tree dimensions
tree_height = 0.8 + 0.2  # tip to trunk bottom = 1.0
tree_width = 0.7  # base width

# At 0 degrees: bbox = max(0.7, 1.0) = 1.0
# At 45 degrees: diagonal alignment reduces bbox
# Rotated dimensions: 
angle_45 = math.radians(45)
rotated_width = abs(tree_width * math.cos(angle_45)) + abs(tree_height * math.sin(angle_45))
rotated_height = abs(tree_width * math.sin(angle_45)) + abs(tree_height * math.cos(angle_45))
print(f'At 0 degrees: bbox side = {max(tree_width, tree_height):.4f}')
print(f'At 45 degrees (approx): bbox side = {max(rotated_width, rotated_height):.4f}')

# N=1 optimal score
optimal_n1_side = 0.8485  # Approximate from 45 degree rotation
optimal_n1_score = optimal_n1_side**2 / 1
print(f'\nOptimal N=1 score (approx): {optimal_n1_score:.6f}')
print(f'Current N=1 score: {scores_df[scores_df["N"]==1]["score"].values[0]:.6f}')

At 0 degrees: bbox side = 1.0000
At 45 degrees (approx): bbox side = 1.2021

Optimal N=1 score (approx): 0.719952
Current N=1 score: 0.661252


In [7]:
# Summary statistics
print('=== ANALYSIS SUMMARY ===')
print(f'Current total score: {scores_df["score"].sum():.6f}')
print(f'Target score: 68.931058')
print(f'Gap: {scores_df["score"].sum() - 68.931058:.6f} ({(scores_df["score"].sum()/68.931058 - 1)*100:.1f}% worse)')
print(f'\nAverage score per N: {scores_df["score"].mean():.6f}')
print(f'Median score per N: {scores_df["score"].median():.6f}')
print(f'\nTo reach target, need average score per N: {68.931058/200:.6f}')
print(f'Current average: {scores_df["score"].mean():.6f}')
print(f'Need to reduce by: {(1 - 68.931058/200/scores_df["score"].mean())*100:.1f}%')

=== ANALYSIS SUMMARY ===
Current total score: 135.819103
Target score: 68.931058
Gap: 66.888045 (97.0% worse)

Average score per N: 0.679096
Median score per N: 0.679231

To reach target, need average score per N: 0.344655
Current average: 0.679096
Need to reduce by: 49.2%
