# Loop 19 LB Feedback Analysis

## Key Results
- Submitted exp_019 (021_cpp_sa_2M_iter): CV 70.6279 | LB 70.6279
- CV-LB gap: 0.0000 (perfect alignment - this is a deterministic optimization problem)
- Target: 68.919154 | Gap: 1.709 (2.42%)

## Critical Insight: N=88 Improvement
The C++ SA with 2M iterations found a SIGNIFICANT improvement for N=88:
- Side length: 5.5548 -> 5.5357 (0.34% reduction)
- Score contribution: 0.002398 improvement

This is 100x larger than other improvements! Why?

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import matplotlib.pyplot as plt

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def get_n_side(df, n):
    group = df[df['n'] == n]
    all_x = []
    all_y = []
    for _, row in group.iterrows():
        tree = create_tree_polygon(row['x'], row['y'], row['deg'])
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    if not all_x:
        return 0
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

# Load current best
df = pd.read_csv('/home/submission/submission.csv')
df['x'] = df['x'].apply(parse_s_value)
df['y'] = df['y'].apply(parse_s_value)
df['deg'] = df['deg'].apply(parse_s_value)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded {len(df)} rows")

In [None]:
# Calculate score breakdown by N
scores = []
for n in range(1, 201):
    s = get_n_side(df, n)
    score = (s ** 2) / n
    scores.append({'n': n, 'side': s, 'score': score})

scores_df = pd.DataFrame(scores)
print(f"Total score: {scores_df['score'].sum():.6f}")
print(f"\nTop 10 N values by score contribution:")
print(scores_df.nlargest(10, 'score')[['n', 'side', 'score']])

In [None]:
# Calculate theoretical minimum (efficiency analysis)
# Theoretical minimum is when all trees are packed with 100% efficiency
# Tree area = 0.35 (from polygon calculation)

tree_area = Polygon(TREE_TEMPLATE).area
print(f"Tree area: {tree_area:.6f}")

# For N trees, minimum square side = sqrt(N * tree_area)
scores_df['min_side'] = np.sqrt(scores_df['n'] * tree_area)
scores_df['efficiency'] = (scores_df['min_side'] / scores_df['side']) ** 2 * 100

print(f"\nEfficiency analysis (how close to theoretical minimum):")
print(f"Average efficiency: {scores_df['efficiency'].mean():.2f}%")
print(f"Min efficiency: {scores_df['efficiency'].min():.2f}% at N={scores_df.loc[scores_df['efficiency'].idxmin(), 'n']}")
print(f"Max efficiency: {scores_df['efficiency'].max():.2f}% at N={scores_df.loc[scores_df['efficiency'].idxmax(), 'n']}")

# Find N values with lowest efficiency (most room for improvement)
print(f"\nN values with LOWEST efficiency (most room for improvement):")
print(scores_df.nsmallest(10, 'efficiency')[['n', 'side', 'score', 'efficiency']])

In [None]:
# Analyze N=88 specifically
n88 = scores_df[scores_df['n'] == 88].iloc[0]
print(f"N=88 Analysis:")
print(f"  Current side: {n88['side']:.6f}")
print(f"  Current score: {n88['score']:.6f}")
print(f"  Efficiency: {n88['efficiency']:.2f}%")
print(f"  Theoretical min side: {n88['min_side']:.6f}")

# What's special about N=88?
print(f"\nN=88 factorization: 88 = 8 × 11")
print(f"This suggests a rectangular grid arrangement might work well.")

# Check other 'grid-friendly' N values
grid_friendly = [n for n in range(1, 201) if any(
    n == a * b for a in range(2, 15) for b in range(a, 15) if a * b == n
)]
print(f"\nGrid-friendly N values (products of small integers):")
print(grid_friendly[:20])

In [None]:
# Calculate gap to target for each N
target = 68.919154
current_total = scores_df['score'].sum()
gap = current_total - target

print(f"Current total: {current_total:.6f}")
print(f"Target: {target:.6f}")
print(f"Gap: {gap:.6f} ({100*gap/current_total:.2f}%)")

# How much improvement needed per N on average?
print(f"\nAverage improvement needed per N: {gap/200:.6f}")

# Which N values contribute most to the gap?
# Assume we need to reduce each N's score proportionally
scores_df['gap_contribution'] = scores_df['score'] * (gap / current_total)
print(f"\nN values with largest gap contribution:")
print(scores_df.nlargest(10, 'gap_contribution')[['n', 'side', 'score', 'gap_contribution']])

In [None]:
# Key insight: N=88 improved by 0.34% with 2M iterations
# If we can find 10 more N values with similar improvement potential,
# that's 0.024 total improvement (1.4% of the gap)

# Let's identify N values that might have similar characteristics to N=88:
# 1. Products of small primes (grid-friendly)
# 2. Low efficiency (room for improvement)
# 3. High score contribution (high impact)

scores_df['priority'] = (100 - scores_df['efficiency']) * scores_df['score']
print("N values with highest improvement priority (low efficiency × high score):")
print(scores_df.nlargest(15, 'priority')[['n', 'side', 'score', 'efficiency', 'priority']])

## Conclusions

1. **CV-LB alignment is perfect** - this is a deterministic optimization problem
2. **N=88 showed 100x more improvement** than other N values with 2M SA iterations
3. **Low efficiency N values** have the most room for improvement
4. **Gap to target is 1.709 points (2.42%)** - need fundamentally different approaches

## Next Steps
1. Run longer SA (5M+ iterations) on high-priority N values
2. Try asymmetric solutions (not grid-based)
3. Investigate why N=88 improved so much - can we replicate for other N?