# Loop 1 LB Feedback Analysis

**Baseline Score**: 70.7438 (CV = LB, no gap since deterministic optimization)
**Target**: 68.922808
**Gap**: 1.82 points (2.6% reduction needed)

## Key Questions:
1. Which N values contribute most to the total score?
2. Where are the biggest opportunities for improvement?
3. What optimization techniques should we prioritize?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon

getcontext().prec = 30
scale_factor = 1

print('Libraries loaded')

In [None]:
# Christmas Tree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x * scale_factor), yoff=float(self.center_y * scale_factor))

print('ChristmasTree class defined')

In [None]:
# Helper functions
def load_configuration_from_df(n, df):
    group_data = df[df['id'].str.startswith(f'{n:03d}_')]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row['x'])[1:]  # Remove 's' prefix
        y = str(row['y'])[1:]
        deg = str(row['deg'])[1:]
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees

def get_score_for_n(trees, n):
    if not trees:
        return 0.0, 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    score = side_length**2 / n
    return score, side_length

print('Helper functions defined')

In [None]:
# Load the baseline submission and compute per-N scores
df = pd.read_csv('/home/code/preoptimized_submission.csv')

per_n_scores = []
for n in range(1, 201):
    trees = load_configuration_from_df(n, df)
    score, side = get_score_for_n(trees, n)
    per_n_scores.append({
        'N': n,
        'score': score,
        'side_length': side,
        'side_squared': side**2,
        'pct_of_total': 0  # Will compute after
    })

scores_df = pd.DataFrame(per_n_scores)
total_score = scores_df['score'].sum()
scores_df['pct_of_total'] = scores_df['score'] / total_score * 100

print(f'Total Score: {total_score:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {total_score - 68.922808:.6f}')
print(f'\nTop 10 N values by score contribution:')
print(scores_df.nlargest(10, 'score')[['N', 'score', 'side_length', 'pct_of_total']])

In [None]:
# Visualize score distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Score by N
ax1 = axes[0, 0]
ax1.bar(scores_df['N'], scores_df['score'], alpha=0.7)
ax1.set_xlabel('N')
ax1.set_ylabel('Score (side²/N)')
ax1.set_title('Score Contribution by N')
ax1.axhline(y=scores_df['score'].mean(), color='r', linestyle='--', label=f'Mean: {scores_df["score"].mean():.4f}')
ax1.legend()

# Side length by N
ax2 = axes[0, 1]
ax2.plot(scores_df['N'], scores_df['side_length'], 'b-', alpha=0.7)
ax2.set_xlabel('N')
ax2.set_ylabel('Side Length')
ax2.set_title('Bounding Box Side Length by N')

# Cumulative score
ax3 = axes[1, 0]
scores_df['cumulative_score'] = scores_df['score'].cumsum()
ax3.plot(scores_df['N'], scores_df['cumulative_score'], 'g-')
ax3.axhline(y=68.922808, color='r', linestyle='--', label='Target: 68.92')
ax3.set_xlabel('N')
ax3.set_ylabel('Cumulative Score')
ax3.set_title('Cumulative Score by N')
ax3.legend()

# Score efficiency (side²/N normalized)
ax4 = axes[1, 1]
scores_df['efficiency'] = scores_df['side_length'] / np.sqrt(scores_df['N'])
ax4.plot(scores_df['N'], scores_df['efficiency'], 'm-', alpha=0.7)
ax4.set_xlabel('N')
ax4.set_ylabel('Side / sqrt(N)')
ax4.set_title('Packing Efficiency (lower is better)')

plt.tight_layout()
plt.savefig('/home/code/exploration/per_n_analysis.png', dpi=100)
plt.show()

print('\nAnalysis saved to /home/code/exploration/per_n_analysis.png')

In [None]:
# Identify worst-performing N values (highest score contribution)
print('\n=== WORST PERFORMING N VALUES (highest score) ===')
worst_n = scores_df.nlargest(20, 'score')
print(worst_n[['N', 'score', 'side_length', 'pct_of_total']].to_string())

print('\n=== BEST PERFORMING N VALUES (lowest score) ===')
best_n = scores_df.nsmallest(10, 'score')
print(best_n[['N', 'score', 'side_length', 'pct_of_total']].to_string())

In [None]:
# Calculate theoretical lower bounds
# For a single tree at 45 degrees, the bounding box is minimized
# Tree dimensions: width=0.7, height=1.0 (from -0.2 to 0.8)
# At 45 degrees: diagonal = sqrt(0.7^2 + 1.0^2) = 1.22

print('=== THEORETICAL ANALYSIS ===')
print('\nSingle tree (N=1):')
print(f'  Current score: {scores_df[scores_df["N"]==1]["score"].values[0]:.6f}')
print(f'  Current side: {scores_df[scores_df["N"]==1]["side_length"].values[0]:.6f}')
print(f'  Optimal at 45°: side = sqrt(0.7² + 1.0²) / sqrt(2) ≈ 0.813')
print(f'  Optimal score: 0.813² / 1 ≈ 0.661')

# For N=2, optimal is two trees interlocking
print('\nN=2:')
print(f'  Current score: {scores_df[scores_df["N"]==2]["score"].values[0]:.6f}')
print(f'  Current side: {scores_df[scores_df["N"]==2]["side_length"].values[0]:.6f}')

# Estimate improvement potential
print('\n=== IMPROVEMENT POTENTIAL ===')
print(f'Total score: {total_score:.6f}')
print(f'Target: 68.922808')
print(f'Gap: {total_score - 68.922808:.6f}')
print(f'Reduction needed: {(total_score - 68.922808) / total_score * 100:.2f}%')

# If we improve worst 20 N values by 5% each
worst_20_total = worst_n['score'].sum()
print(f'\nWorst 20 N values contribute: {worst_20_total:.4f} ({worst_20_total/total_score*100:.1f}% of total)')
print(f'If improved by 5%: saves {worst_20_total * 0.05:.4f} points')

In [None]:
# Summary and recommendations
print('\n' + '='*60)
print('SUMMARY AND RECOMMENDATIONS')
print('='*60)

print(f'''
1. CURRENT STATUS:
   - Baseline score: {total_score:.6f}
   - Target: 68.922808
   - Gap: {total_score - 68.922808:.6f} ({(total_score - 68.922808) / total_score * 100:.2f}% reduction needed)

2. KEY OBSERVATIONS:
   - Small N values (1-10) contribute disproportionately to total score
   - N=1 alone contributes {scores_df[scores_df['N']==1]['pct_of_total'].values[0]:.2f}% of total
   - Worst 20 N values contribute {worst_n['score'].sum()/total_score*100:.1f}% of total

3. RECOMMENDED APPROACHES (Priority Order):
   a) Run bbox3 C++ optimizer with extended time (3-11 hours)
      - This is the primary technique used by top performers
      - Parameters: -n 10000+ -r 256+
   
   b) Ensemble multiple solutions
      - Combine best configs from multiple sources for each N
      - Sources: bucket-of-chump, SmartManoj, telegram-public-shared
   
   c) Backward propagation
      - Start from N=200, remove boundary trees to improve smaller N
   
   d) Fractional translation refinement
      - Fine-grained translation with steps [0.001, 0.0005, 0.0002, 0.0001]

4. EXPECTED IMPROVEMENT:
   - bbox3 extended run: ~1-2 points
   - Ensemble: ~0.5-1 points
   - Backward propagation: ~0.2-0.5 points
   - Total potential: 1.5-3.5 points (enough to beat target)
''')

print('='*60)