# Loop 1 LB Feedback Analysis

Baseline submitted: CV 70.6761 = LB 70.6761 (perfect match)

## Goal: Find approaches to close the 1.76 point gap to target (68.919154)

In [1]:
import pandas as pd
import numpy as np
import glob
import os
from collections import defaultdict

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotated_x = TX * cos_a - TY * sin_a + x
    rotated_y = TX * sin_a + TY * cos_a + y
    return rotated_x, rotated_y

def get_bounding_box(trees_df):
    all_x, all_y = [], []
    for _, row in trees_df.iterrows():
        x = float(str(row['x'])[1:]) if str(row['x']).startswith('s') else float(row['x'])
        y = float(str(row['y'])[1:]) if str(row['y']).startswith('s') else float(row['y'])
        deg = float(str(row['deg'])[1:]) if str(row['deg']).startswith('s') else float(row['deg'])
        vx, vy = get_tree_vertices(x, y, deg)
        all_x.extend(vx)
        all_y.extend(vy)
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

def calculate_all_scores(df):
    scores = {}
    for n in range(1, 201):
        prefix = f"{n:03d}_"
        trees = df[df['id'].str.startswith(prefix)]
        if len(trees) == n:
            side = get_bounding_box(trees)
            scores[n] = {'side': side, 'contribution': (side ** 2) / n}
    return scores

print("Functions defined.")

Functions defined.


In [2]:
# Load all available CSV files and find best per-N
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

csv_files = []
for root, dirs, files in os.walk(preopt_dir):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f"Found {len(csv_files)} CSV files")
for f in csv_files[:10]:
    print(f"  {f}")

Found 30 CSV files
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/telegram_extracted/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT4.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/New_Tree_144_196.csv


In [3]:
# Calculate scores for each CSV and find best per-N
best_per_n = {n: {'score': float('inf'), 'source': None} for n in range(1, 201)}

for csv_path in csv_files:
    try:
        df = pd.read_csv(csv_path)
        if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
            continue
        scores = calculate_all_scores(df)
        for n, data in scores.items():
            if data['contribution'] < best_per_n[n]['score']:
                best_per_n[n]['score'] = data['contribution']
                best_per_n[n]['source'] = os.path.basename(csv_path)
                best_per_n[n]['side'] = data['side']
    except Exception as e:
        pass

print("Calculated best per-N from all sources")

Calculated best per-N from all sources


In [4]:
# Calculate total score from best per-N ensemble
total_ensemble = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"Best possible ensemble score: {total_ensemble:.6f}")
print(f"Current baseline score: 70.676102")
print(f"Target score: 68.919154")
print(f"Gap from ensemble to target: {total_ensemble - 68.919154:.6f}")

Best possible ensemble score: 70.676102
Current baseline score: 70.676102
Target score: 68.919154
Gap from ensemble to target: 1.756948


In [5]:
# Show which sources contribute to the best ensemble
source_counts = defaultdict(int)
for n in range(1, 201):
    source_counts[best_per_n[n]['source']] += 1

print("\nSource contributions to best ensemble:")
for source, count in sorted(source_counts.items(), key=lambda x: -x[1]):
    print(f"  {source}: {count} N values")


Source contributions to best ensemble:
  ensemble.csv: 199 N values
  submission.csv: 1 N values


In [6]:
# Find N values where ensemble beats baseline
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
baseline_df = pd.read_csv(baseline_path)
baseline_scores = calculate_all_scores(baseline_df)

improvements = []
for n in range(1, 201):
    baseline_contrib = baseline_scores[n]['contribution']
    ensemble_contrib = best_per_n[n]['score']
    if ensemble_contrib < baseline_contrib - 1e-9:
        improvements.append({
            'n': n,
            'baseline': baseline_contrib,
            'ensemble': ensemble_contrib,
            'improvement': baseline_contrib - ensemble_contrib,
            'source': best_per_n[n]['source']
        })

print(f"\nN values where ensemble beats baseline: {len(improvements)}")
if improvements:
    imp_df = pd.DataFrame(improvements).sort_values('improvement', ascending=False)
    print(imp_df.head(20).to_string(index=False))
    print(f"\nTotal improvement from ensemble: {imp_df['improvement'].sum():.6f}")


N values where ensemble beats baseline: 0


In [7]:
# Analyze score breakdown by N range for baseline
print("\nBaseline score breakdown by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    range_score = sum(baseline_scores[n]['contribution'] for n in range(start, end+1))
    print(f"  N={start}-{end}: {range_score:.4f}")


Baseline score breakdown by N range:
  N=1-10: 4.3291
  N=11-50: 14.7130
  N=51-100: 17.6411
  N=101-150: 17.1441
  N=151-200: 16.8487


In [8]:
# Find N values with worst efficiency (most room for improvement)
efficiencies = []
for n in range(1, 201):
    side = baseline_scores[n]['side']
    area = side ** 2
    tree_area = 0.245625  # Single tree area
    efficiency = (n * tree_area) / area
    efficiencies.append({'n': n, 'side': side, 'efficiency': efficiency, 'contribution': baseline_scores[n]['contribution']})

eff_df = pd.DataFrame(efficiencies).sort_values('efficiency')
print("\nLowest efficiency N values (most room for improvement):")
print(eff_df.head(20).to_string(index=False))


Lowest efficiency N values (most room for improvement):
 n     side  efficiency  contribution
 1 0.813173    0.371456      0.661250
 2 0.949504    0.544890      0.450779
 3 1.142031    0.564986      0.434745
 5 1.443692    0.589241      0.416850
 4 1.290806    0.589672      0.416545
 7 1.673104    0.614221      0.399897
 6 1.548438    0.614661      0.399610
 9 1.867280    0.634010      0.387415
 8 1.755921    0.637313      0.385407
15 2.384962    0.647740      0.379203
10 1.940696    0.652165      0.376630
21 2.811667    0.652476      0.376451
20 2.742469    0.653159      0.376057
11 2.033002    0.653717      0.375736
22 2.873270    0.654549      0.375258
16 2.446640    0.656527      0.374128
26 3.118320    0.656757      0.373997
12 2.114873    0.659000      0.372724
13 2.200046    0.659709      0.372323
25 3.050182    0.660026      0.372144


In [9]:
# Check if N=1 is at optimal 45 degrees
n1_trees = baseline_df[baseline_df['id'].str.startswith('001_')]
print("\nN=1 configuration:")
print(n1_trees)

# Calculate bounding box at different angles for single tree
print("\nBounding box size at different angles for single tree:")
for angle in [0, 30, 45, 60, 90]:
    vx, vy = get_tree_vertices(0, 0, angle)
    side = max(max(vx) - min(vx), max(vy) - min(vy))
    print(f"  Angle {angle}: side = {side:.6f}, score = {side**2:.6f}")


N=1 configuration:
      id                     x                    y    deg
0  001_0  s-48.196086194214246  s58.770984615214225  s45.0

Bounding box size at different angles for single tree:
  Angle 0: side = 1.000000, score = 1.000000
  Angle 30: side = 0.903525, score = 0.816358
  Angle 45: side = 0.813173, score = 0.661250
  Angle 60: side = 0.903525, score = 0.816358
  Angle 90: side = 1.000000, score = 1.000000


In [10]:
# Summary
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Current baseline: 70.676102")
print(f"Best ensemble from all CSVs: {total_ensemble:.6f}")
print(f"Target: 68.919154")
print(f"Gap to target: {70.676102 - 68.919154:.6f} ({(70.676102 - 68.919154)/68.919154*100:.2f}%)")
print(f"\nKey insights:")
print(f"1. N=1 is already at 45 degrees (optimal)")
print(f"2. Ensemble from existing CSVs provides minimal improvement")
print(f"3. Need fundamentally different approaches for large N values")
print(f"4. Lattice/translation approach from egortrushin kernel is key")


SUMMARY
Current baseline: 70.676102
Best ensemble from all CSVs: 70.676102
Target: 68.919154
Gap to target: 1.756948 (2.55%)

Key insights:
1. N=1 is already at 45 degrees (optimal)
2. Ensemble from existing CSVs provides minimal improvement
3. Need fundamentally different approaches for large N values
4. Lattice/translation approach from egortrushin kernel is key
