# Loop 1 LB Feedback Analysis

**Baseline submitted:** CV 70.6761 | LB 70.6761 (gap: 0.0000)

The CV-LB gap is essentially zero, which is expected for this optimization problem (no train/test split - we're optimizing the same configurations).

## Key Insights from Evaluator:
1. **Ensemble opportunity**: Multiple pre-optimized CSVs exist but haven't been compared
2. **N=1 optimization**: Currently at 45 degrees (optimal) but at extreme coordinates
3. **bbox3 not yet used**: Available but no extended optimization attempted
4. **Lattice-based approach**: Not explored for large N

## Strategy Analysis
The gap is 1.78 points (2.6%). This is a significant gap that requires fundamentally different approaches.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.ops import unary_union
import os
import glob

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_COORDS = list(zip(TX, TY))

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_COORDS)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def calculate_score_for_n(group):
    n = group['n'].iloc[0]
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in group.iterrows()]
    all_coords = np.vstack([np.array(p.exterior.coords) for p in polygons])
    min_xy = all_coords.min(axis=0)
    max_xy = all_coords.max(axis=0)
    side = max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])
    return side**2 / n, side

print('Functions loaded')

Functions loaded


In [2]:
# Load and score all available CSVs to find the best ensemble
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

# Find all CSV files
csv_files = []
for root, dirs, files in os.walk(preopt_dir):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f'Found {len(csv_files)} CSV files:')
for f in csv_files:
    print(f'  {f}')


Found 30 CSV files:
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT4.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/New_Tree_144_196.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimi

In [3]:
# Score each CSV and find per-N scores
def score_csv(csv_path):
    try:
        df = pd.read_csv(csv_path)
        if 'id' not in df.columns or 'x' not in df.columns:
            return None, None
        
        df['x_val'] = df['x'].apply(parse_value)
        df['y_val'] = df['y'].apply(parse_value)
        df['deg_val'] = df['deg'].apply(parse_value)
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        
        scores_by_n = {}
        for n, group in df.groupby('n'):
            score, side = calculate_score_for_n(group)
            scores_by_n[n] = {'score': score, 'side': side}
        
        total = sum(s['score'] for s in scores_by_n.values())
        return total, scores_by_n
    except Exception as e:
        print(f'Error processing {csv_path}: {e}')
        return None, None

# Score all CSVs
csv_scores = {}
for csv_path in csv_files:
    total, by_n = score_csv(csv_path)
    if total is not None:
        csv_scores[csv_path] = {'total': total, 'by_n': by_n}
        print(f'{os.path.basename(csv_path)}: {total:.6f}')


ensemble.csv: 70.676102


submission.csv: 70.676501


santa-2025.csv: 70.676102


best_ensemble.csv: 70.676102


72.49.csv: 72.495739


71.97.csv: 71.972027


72.49.csv: 72.495739


71.97.csv: 71.972027


submission_JKoT4.csv: 72.489504


New_Tree_144_196.csv: 72.927920


submission_JKoT3.csv: 72.489488


santa2025_ver2_v61.csv: 72.951925


submission_JKoT2.csv: 72.489348


santa2025_ver2_v67.csv: 72.938567


santa2025_ver2_v76.csv: 72.826444


submission_70_936673758122.csv: 70.936674


santa2025_ver2_v65.csv: 72.935294


submission_70_926149550346.csv: 70.926150


santa2025_ver2_v66.csv: 72.938599


santa2025_ver2_v63.csv: 72.947427


santa2025_ver2_v69.csv: 72.850110


submission_JKoT1.csv: 72.489483


submission_opt1.csv: 70.990692


santa2025_ver2_v68.csv: 72.939233


santa-2025.csv: 70.676102


submission.csv: 70.676501


submission (77).csv: 72.135010


submission.csv: 72.935294


submission_sa.csv: 72.935294


submission_best.csv: 70.926150


In [4]:
# Find the best configuration for each N across all CSVs
best_by_n = {}
for csv_path, data in csv_scores.items():
    for n, scores in data['by_n'].items():
        if n not in best_by_n or scores['score'] < best_by_n[n]['score']:
            best_by_n[n] = {
                'score': scores['score'],
                'side': scores['side'],
                'source': csv_path
            }

# Calculate ensemble score
ensemble_score = sum(s['score'] for s in best_by_n.values())
print(f'\nEnsemble score (best per N): {ensemble_score:.6f}')
print(f'Best single CSV: {min(csv_scores.items(), key=lambda x: x[1]["total"])[1]["total"]:.6f}')

# Show which CSVs contribute to the ensemble
source_counts = {}
for n, data in best_by_n.items():
    src = os.path.basename(data['source'])
    source_counts[src] = source_counts.get(src, 0) + 1

print('\nContributions by source:')
for src, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f'  {src}: {count} N values')


Ensemble score (best per N): 70.676102
Best single CSV: 70.676102

Contributions by source:
  ensemble.csv: 200 N values


In [5]:
# Check if ensemble provides any improvement
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
baseline_total = csv_scores.get(baseline_path, {}).get('total', 70.676102)

print(f'\nBaseline (santa-2025.csv): {baseline_total:.6f}')
print(f'Ensemble score: {ensemble_score:.6f}')
print(f'Improvement: {baseline_total - ensemble_score:.6f}')

# Find N values where ensemble beats baseline
if baseline_path in csv_scores:
    baseline_by_n = csv_scores[baseline_path]['by_n']
    improvements = []
    for n in range(1, 201):
        if n in best_by_n and n in baseline_by_n:
            diff = baseline_by_n[n]['score'] - best_by_n[n]['score']
            if diff > 1e-10:
                improvements.append((n, diff, best_by_n[n]['source']))
    
    if improvements:
        print(f'\nN values where ensemble beats baseline ({len(improvements)} total):')
        for n, diff, src in sorted(improvements, key=lambda x: -x[1])[:10]:
            print(f'  N={n}: improvement={diff:.6f} from {os.path.basename(src)}')
    else:
        print('\nNo N values where ensemble beats baseline - santa-2025.csv dominates all!')


Baseline (santa-2025.csv): 70.676102
Ensemble score: 70.676102
Improvement: 0.000000

No N values where ensemble beats baseline - santa-2025.csv dominates all!


In [6]:
# Analyze the theoretical minimum for N=1
# For a single tree at 45 degrees, what's the minimum bounding box?

import numpy as np
from scipy.optimize import minimize_scalar

def tree_bbox_at_angle(deg):
    """Calculate bounding box side for a single tree at given angle."""
    poly = create_tree_polygon(0, 0, deg)
    coords = np.array(poly.exterior.coords)
    min_xy = coords.min(axis=0)
    max_xy = coords.max(axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

# Find optimal angle for N=1
result = minimize_scalar(tree_bbox_at_angle, bounds=(0, 90), method='bounded')
optimal_angle = result.x
optimal_side = result.fun
optimal_score = optimal_side**2

print(f'Optimal angle for N=1: {optimal_angle:.4f} degrees')
print(f'Optimal side length: {optimal_side:.6f}')
print(f'Optimal score contribution: {optimal_score:.6f}')

# Compare to current N=1
if baseline_path in csv_scores:
    current_n1 = csv_scores[baseline_path]['by_n'][1]
    print(f'\nCurrent N=1 score: {current_n1["score"]:.6f}')
    print(f'Current N=1 side: {current_n1["side"]:.6f}')
    print(f'Gap: {current_n1["score"] - optimal_score:.6f}')

# Check a few angles around 45
print('\nScore at different angles for N=1:')
for angle in [0, 30, 45, 60, 90]:
    side = tree_bbox_at_angle(angle)
    print(f'  {angle}°: side={side:.6f}, score={side**2:.6f}')

Optimal angle for N=1: 45.0000 degrees
Optimal side length: 0.813173
Optimal score contribution: 0.661250

Current N=1 score: 0.661250
Current N=1 side: 0.813173
Gap: -0.000000

Score at different angles for N=1:
  0°: side=1.000000, score=1.000000
  30°: side=0.903525, score=0.816358
  45°: side=0.813173, score=0.661250
  60°: side=0.903525, score=0.816358
  90°: side=1.000000, score=1.000000
