# Loop 3 LB Feedback Analysis

**LB Score: 70.6600** (CV: 70.6600, gap: 0.0000)

The clean ensemble submission was successful! CV matches LB perfectly.

## Key Observations:
1. The clean ensemble (26 snapshots) scores 70.66
2. C++ optimizers (sa_v1_parallel, tree_packer_v18) cannot improve pre-optimized solutions
3. Bottom-left beam search produces worse results than baseline
4. Public datasets we can access don't have better solutions than 70.66

## Gap Analysis:
- Current: 70.66
- Target: 68.919154
- Gap: 1.74 points (2.5%)

## Next Steps:
1. Implement fractional translation in Python
2. Try backward propagation (use larger N configs for smaller N)
3. Look for better public sources or generate new solutions

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import time

# Tree polygon
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
BASE_TREE = Polygon(zip(TX, TY))

def parse_val(v):
    if isinstance(v, str) and v.startswith('s'):
        return float(v[1:])
    return float(v)

def create_tree(x, y, deg):
    tree = affinity.rotate(BASE_TREE, deg, origin=(0, 0))
    tree = affinity.translate(tree, x, y)
    return tree

def get_bounding_box(trees):
    if not trees:
        return 0, 0, 0, 0
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    for tree in trees:
        b = tree.bounds
        min_x, min_y = min(min_x, b[0]), min(min_y, b[1])
        max_x, max_y = max(max_x, b[2]), max(max_y, b[3])
    return min_x, min_y, max_x, max_y

def get_side(trees):
    if not trees:
        return 0
    min_x, min_y, max_x, max_y = get_bounding_box(trees)
    return max(max_x - min_x, max_y - min_y)

def check_overlap(tree1, tree2, tolerance=1e-9):
    if tree1.intersects(tree2):
        intersection = tree1.intersection(tree2)
        if intersection.area > tolerance:
            return True
    return False

def has_any_overlap(trees, idx, tolerance=1e-9):
    for j, tree in enumerate(trees):
        if j != idx and check_overlap(trees[idx], tree, tolerance):
            return True
    return False

print('Functions loaded')

Functions loaded


In [2]:
# Load current submission
df = pd.read_csv('/home/submission/submission.csv')
print(f'Loaded {len(df)} rows')

# Calculate current score
total_score = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    if len(rows) != n:
        continue
    trees = []
    for _, row in rows.iterrows():
        x, y, deg = parse_val(row['x']), parse_val(row['y']), parse_val(row['deg'])
        trees.append(create_tree(x, y, deg))
    side = get_side(trees)
    total_score += side**2 / n

print(f'Current total score: {total_score:.6f}')

Loaded 20100 rows


Current total score: 70.659437


In [3]:
# Implement fractional translation
def fractional_translation(trees, coords, max_iter=100):
    """Apply fractional translation to improve packing.
    
    Args:
        trees: list of Shapely polygons
        coords: list of (x, y, deg) tuples
        max_iter: maximum iterations
    
    Returns:
        improved_coords, improved_score
    """
    n = len(trees)
    best_coords = list(coords)
    best_trees = list(trees)
    best_side = get_side(best_trees)
    
    frac_steps = [0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    for iteration in range(max_iter):
        improved = False
        for i in range(n):
            for step in frac_steps:
                for dx, dy in directions:
                    # Save original
                    ox, oy, odeg = best_coords[i]
                    
                    # Try new position
                    nx = ox + dx * step
                    ny = oy + dy * step
                    new_tree = create_tree(nx, ny, odeg)
                    
                    # Check overlap
                    old_tree = best_trees[i]
                    best_trees[i] = new_tree
                    
                    if not has_any_overlap(best_trees, i):
                        new_side = get_side(best_trees)
                        if new_side < best_side - 1e-12:
                            best_side = new_side
                            best_coords[i] = (nx, ny, odeg)
                            improved = True
                        else:
                            best_trees[i] = old_tree
                    else:
                        best_trees[i] = old_tree
        
        if not improved:
            break
    
    return best_coords, best_side

print('Fractional translation function defined')

Fractional translation function defined


In [4]:
# Test fractional translation on a few N values
test_ns = [2, 3, 5, 10, 20]

for n in test_ns:
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    
    # Get current configuration
    coords = []
    trees = []
    for _, row in rows.iterrows():
        x, y, deg = parse_val(row['x']), parse_val(row['y']), parse_val(row['deg'])
        coords.append((x, y, deg))
        trees.append(create_tree(x, y, deg))
    
    original_side = get_side(trees)
    original_score = original_side**2 / n
    
    # Apply fractional translation
    start = time.time()
    new_coords, new_side = fractional_translation(trees, coords, max_iter=50)
    elapsed = time.time() - start
    
    new_score = new_side**2 / n
    improvement = original_score - new_score
    
    print(f'N={n}: original={original_score:.6f}, new={new_score:.6f}, improvement={improvement:+.6f}, time={elapsed:.1f}s')

N=2: original=0.450779, new=0.450770, improvement=+0.000009, time=0.0s
N=3: original=0.434745, new=0.434730, improvement=+0.000015, time=0.0s
N=5: original=0.416850, new=0.416850, improvement=+0.000000, time=0.0s
N=10: original=0.376630, new=0.376630, improvement=+0.000000, time=0.1s


N=20: original=0.376057, new=0.376057, improvement=+0.000000, time=0.2s


In [5]:
# Analyze per-N scores to find where we can improve most
per_n_scores = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    rows = df[df['id'].str.startswith(prefix)]
    if len(rows) != n:
        continue
    trees = []
    for _, row in rows.iterrows():
        x, y, deg = parse_val(row['x']), parse_val(row['y']), parse_val(row['deg'])
        trees.append(create_tree(x, y, deg))
    side = get_side(trees)
    score = side**2 / n
    
    # Theoretical minimum (tree area = 0.245625)
    tree_area = 0.245625
    min_area = n * tree_area
    min_side = np.sqrt(min_area)
    min_score = min_area / n  # = tree_area
    
    efficiency = min_score / score * 100
    per_n_scores.append({'n': n, 'score': score, 'side': side, 'efficiency': efficiency})

per_n_df = pd.DataFrame(per_n_scores)
print('Per-N score analysis:')
print(per_n_df.describe())

Per-N score analysis:
                n       score        side  efficiency
count  200.000000  200.000000  200.000000  200.000000
mean   100.500000    0.353297    5.568841   69.825161
std     57.879185    0.028104    1.877249    3.957547
min      1.000000    0.329946    0.813173   37.145558
25%     50.750000    0.340034    4.300194   68.036976
50%    100.500000    0.347025    5.907787   70.780245
75%    150.250000    0.361017    7.124880   72.235474
max    200.000000    0.661250    8.216619   74.444037


In [6]:
# Find N values with lowest efficiency (most room for improvement)
print('\nN values with lowest efficiency (most room for improvement):')
worst = per_n_df.nsmallest(20, 'efficiency')
print(worst[['n', 'score', 'side', 'efficiency']].to_string())


N values with lowest efficiency (most room for improvement):
     n     score      side  efficiency
0    1  0.661250  0.813173   37.145558
1    2  0.450779  0.949504   54.488985
2    3  0.434745  1.142031   56.498619
4    5  0.416850  1.443692   58.924131
3    4  0.416545  1.290806   58.967240
6    7  0.399897  1.673104   61.422135
5    6  0.399610  1.548438   61.466135
8    9  0.387415  1.867280   63.400999
7    8  0.385407  1.755921   63.731287
14  15  0.379203  2.384962   64.774038
9   10  0.376630  1.940696   65.216518
20  21  0.376451  2.811667   65.247553
19  20  0.376057  2.742469   65.315921
10  11  0.375736  2.033002   65.371685
21  22  0.375258  2.873270   65.454949
15  16  0.374128  2.446640   65.652665
25  26  0.373997  3.118320   65.675668
11  12  0.372724  2.114873   65.899970
12  13  0.372294  2.199960   65.976081
24  25  0.372144  3.050182   66.002611


In [7]:
# Calculate how much improvement we need per N to reach target
target = 68.919154
current = total_score
gap = current - target

print(f'Current: {current:.6f}')
print(f'Target: {target:.6f}')
print(f'Gap: {gap:.6f} ({gap/current*100:.2f}%)')
print(f'\nNeed to reduce score by {gap:.6f} points')
print(f'Average reduction per N: {gap/200:.6f}')

Current: 70.659437
Target: 68.919154
Gap: 1.740283 (2.46%)

Need to reduce score by 1.740283 points
Average reduction per N: 0.008701
