# Loop 1 LB Feedback Analysis

**Baseline submitted:** CV 70.6761 | LB 70.6761 (gap: 0.0000)

This is excellent - CV and LB match perfectly, meaning our scoring is accurate.

## Key Questions:
1. What's the best available score in snapshots?
2. What N values have the most room for improvement?
3. What techniques can close the 1.79 point gap to target (68.89)?

In [None]:
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path

# Load current baseline
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
df = pd.read_csv(baseline_path)
print(f"Baseline submission shape: {df.shape}")
print(df.head())

In [None]:
# Check best available snapshot
best_snapshot_path = '/home/nonroot/snapshots/santa-2025/21329069570/code/code/solutions/submission_70.627569.csv'
if os.path.exists(best_snapshot_path):
    df_best = pd.read_csv(best_snapshot_path)
    print(f"Best snapshot submission shape: {df_best.shape}")
    print(f"\nThis is 0.049 points better than current baseline!")
else:
    print("Best snapshot not found")

In [None]:
# Define tree polygon and scoring functions
import math
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union

def get_tree_polygon():
    """Return the 15-vertex Christmas tree polygon."""
    trunk_w, trunk_h = 0.15, 0.2
    base_w, mid_w, top_w = 0.7, 0.4, 0.25
    tip_y, tier_1_y, tier_2_y = 0.8, 0.5, 0.25
    base_y, trunk_bottom_y = 0.0, -trunk_h
    
    vertices = [
        (0, tip_y),
        (top_w/2, tier_1_y), (top_w/4, tier_1_y),
        (mid_w/2, tier_2_y), (mid_w/4, tier_2_y),
        (base_w/2, base_y),
        (trunk_w/2, base_y), (trunk_w/2, trunk_bottom_y),
        (-trunk_w/2, trunk_bottom_y), (-trunk_w/2, base_y),
        (-base_w/2, base_y),
        (-mid_w/4, tier_2_y), (-mid_w/2, tier_2_y),
        (-top_w/4, tier_1_y), (-top_w/2, tier_1_y),
    ]
    return Polygon(vertices)

BASE_TREE = get_tree_polygon()
print(f"Tree polygon area: {BASE_TREE.area:.6f}")
print(f"Tree bounding box: {BASE_TREE.bounds}")

In [None]:
def parse_value(s):
    """Parse submission value (remove 's' prefix)."""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def calculate_per_n_scores(df):
    """Calculate score contribution for each N."""
    df = df.copy()
    df['x_val'] = df['x'].apply(parse_value)
    df['y_val'] = df['y'].apply(parse_value)
    df['deg_val'] = df['deg'].apply(parse_value)
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    
    scores = {}
    for n in range(1, 201):
        n_df = df[df['n'] == n]
        if len(n_df) != n:
            continue
        
        # Get all tree vertices
        all_points = []
        for _, row in n_df.iterrows():
            tree = affinity.rotate(BASE_TREE, row['deg_val'], origin=(0, 0))
            tree = affinity.translate(tree, xoff=row['x_val'], yoff=row['y_val'])
            all_points.extend(list(tree.exterior.coords))
        
        xs = [p[0] for p in all_points]
        ys = [p[1] for p in all_points]
        side = max(max(xs) - min(xs), max(ys) - min(ys))
        score_n = (side ** 2) / n
        scores[n] = {'side': side, 'score': score_n}
    
    return scores

print("Calculating per-N scores for baseline...")
baseline_scores = calculate_per_n_scores(df)
total_score = sum(s['score'] for s in baseline_scores.values())
print(f"Total baseline score: {total_score:.6f}")

In [None]:
# Analyze which N values contribute most to the score
scores_df = pd.DataFrame([
    {'N': n, 'side': s['side'], 'score': s['score'], 'pct': s['score']/total_score*100}
    for n, s in baseline_scores.items()
])

print("Top 20 N values by score contribution:")
print(scores_df.nlargest(20, 'score')[['N', 'side', 'score', 'pct']].to_string())

print(f"\nTotal score: {total_score:.6f}")
print(f"Target score: 68.890873")
print(f"Gap: {total_score - 68.890873:.6f}")

In [None]:
# Calculate theoretical minimum (if trees could be packed perfectly)
# Each tree has area ~0.2175 (from polygon area)
tree_area = BASE_TREE.area
print(f"Single tree area: {tree_area:.6f}")

# Theoretical minimum: if N trees could be packed into a square with no wasted space
# side = sqrt(N * tree_area), score_n = side^2 / n = tree_area
theoretical_min = sum(tree_area for n in range(1, 201))
print(f"\nTheoretical minimum score (perfect packing): {theoretical_min:.6f}")
print(f"Current score: {total_score:.6f}")
print(f"Packing efficiency: {theoretical_min/total_score*100:.2f}%")

In [None]:
# Compare with best snapshot if available
if os.path.exists(best_snapshot_path):
    print("Calculating per-N scores for best snapshot...")
    best_scores = calculate_per_n_scores(df_best)
    best_total = sum(s['score'] for s in best_scores.values())
    print(f"Best snapshot total: {best_total:.6f}")
    
    # Find N values where best snapshot is better
    improvements = []
    for n in range(1, 201):
        if n in baseline_scores and n in best_scores:
            diff = baseline_scores[n]['score'] - best_scores[n]['score']
            if diff > 0.0001:
                improvements.append({'N': n, 'baseline': baseline_scores[n]['score'], 
                                   'best': best_scores[n]['score'], 'improvement': diff})
    
    if improvements:
        imp_df = pd.DataFrame(improvements).sort_values('improvement', ascending=False)
        print(f"\nN values where best snapshot is better (top 20):")
        print(imp_df.head(20).to_string())
        print(f"\nTotal potential improvement: {sum(i['improvement'] for i in improvements):.6f}")

In [None]:
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
print(f"Current baseline: {total_score:.6f}")
print(f"Target: 68.890873")
print(f"Gap to target: {total_score - 68.890873:.6f} ({(total_score - 68.890873)/68.890873*100:.2f}%)")
print(f"\nBest available snapshot: 70.627569")
print(f"Improvement from better baseline: {total_score - 70.627569:.6f}")
print(f"\nRemaining gap after better baseline: {70.627569 - 68.890873:.6f}")
print("\nKey insight: Even with the best snapshot, we still need 1.74 points of improvement.")
print("This requires fundamentally different approaches, not just running optimizers longer.")