# Evolver Loop 10 - LB Feedback Analysis

## Key Results
- exp_009 submitted: CV 70.3733 = LB 70.3733 (EXACT MATCH!)
- Gap to target: 1.49 points (68.879 target)
- CV-LB alignment is PERFECT

## Strategy: Ensemble ALL Available Sources

We have many untapped sources:
1. kaggle_datasets/71.97.csv
2. kaggle_datasets/72.49.csv  
3. kaggle_datasets/submission.csv
4. kaggle_datasets/chistyakov/*.csv
5. kaggle_datasets/santa25-public/*.csv (17 files!)

In [7]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
import json

getcontext().prec = 30
SCALE = Decimal('1e18')

# Define tree shape (from getting-started kernel)
def get_tree_polygon_scaled(center_x, center_y, angle):
    """Get tree polygon at position (x, y) with rotation angle, using 1e18 scaling."""
    trunk_w = Decimal('0.15')
    trunk_h = Decimal('0.2')
    base_w = Decimal('0.7')
    mid_w = Decimal('0.4')
    top_w = Decimal('0.25')
    tip_y = Decimal('0.8')
    tier_1_y = Decimal('0.5')
    tier_2_y = Decimal('0.25')
    base_y = Decimal('0.0')
    trunk_bottom_y = -trunk_h

    vertices = [
        (Decimal('0.0') * SCALE, tip_y * SCALE),
        (top_w / Decimal('2') * SCALE, tier_1_y * SCALE),
        (top_w / Decimal('4') * SCALE, tier_1_y * SCALE),
        (mid_w / Decimal('2') * SCALE, tier_2_y * SCALE),
        (mid_w / Decimal('4') * SCALE, tier_2_y * SCALE),
        (base_w / Decimal('2') * SCALE, base_y * SCALE),
        (trunk_w / Decimal('2') * SCALE, base_y * SCALE),
        (trunk_w / Decimal('2') * SCALE, trunk_bottom_y * SCALE),
        (-(trunk_w / Decimal('2')) * SCALE, trunk_bottom_y * SCALE),
        (-(trunk_w / Decimal('2')) * SCALE, base_y * SCALE),
        (-(base_w / Decimal('2')) * SCALE, base_y * SCALE),
        (-(mid_w / Decimal('4')) * SCALE, tier_2_y * SCALE),
        (-(mid_w / Decimal('2')) * SCALE, tier_2_y * SCALE),
        (-(top_w / Decimal('4')) * SCALE, tier_1_y * SCALE),
        (-(top_w / Decimal('2')) * SCALE, tier_1_y * SCALE),
    ]
    
    initial_polygon = Polygon([(float(x), float(y)) for x, y in vertices])
    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    translated = affinity.translate(rotated, 
                                    xoff=float(Decimal(str(center_x)) * SCALE),
                                    yoff=float(Decimal(str(center_y)) * SCALE))
    return translated

def compute_bbox_score(trees):
    """Compute bounding box score for a list of trees."""
    all_coords = []
    for tree in trees:
        poly = get_tree_polygon_scaled(tree['x'], tree['y'], tree['deg'])
        all_coords.extend(list(poly.exterior.coords))
    if not all_coords:
        return float('inf')
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    width = (max(xs) - min(xs)) / float(SCALE)
    height = (max(ys) - min(ys)) / float(SCALE)
    return max(width, height)

def validate_no_overlap_strict(trees):
    """Validate no overlaps using 1e18 scaling."""
    polygons = []
    for tree in trees:
        poly = get_tree_polygon_scaled(tree['x'], tree['y'], tree['deg'])
        polygons.append(poly)
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return False
    return True

print('Functions loaded')

Functions loaded


In [8]:
def parse_submission(filepath):
    """Parse a submission CSV into dict of N -> list of trees."""
    df = pd.read_csv(filepath)
    solutions = {}
    for _, row in df.iterrows():
        id_parts = row['id'].split('_')
        n = int(id_parts[0])
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        if n not in solutions:
            solutions[n] = []
        solutions[n].append({'x': x, 'y': y, 'deg': deg})
    return solutions

def compute_total_score(solutions):
    """Compute total score: sum of s^2/n for each N."""
    total = 0
    for n in range(1, 201):
        if n in solutions:
            s = compute_bbox_score(solutions[n])  # side length
            total += (s ** 2) / n
    return total

def compute_per_n_scores(solutions):
    """Compute score (s^2/n) for each N."""
    scores = {}
    for n in range(1, 201):
        if n in solutions:
            s = compute_bbox_score(solutions[n])
            scores[n] = (s ** 2) / n
    return scores

print('Parsing functions loaded')

Parsing functions loaded


In [9]:
# Load current best (exp_009)
best_path = '/home/code/experiments/009_santa_ensemble/submission.csv'
best_solutions = parse_submission(best_path)
best_per_n = compute_per_n_scores(best_solutions)
best_total = sum(best_per_n.values())
print(f'Current best (exp_009): {best_total:.6f}')

Current best (exp_009): 70.373334


In [10]:
# Collect ALL available CSV sources
sources = []

# Main kaggle_datasets
for f in ['71.97.csv', '72.49.csv', 'submission.csv']:
    path = f'/home/code/kaggle_datasets/{f}'
    if os.path.exists(path):
        sources.append(path)

# Chistyakov
for f in os.listdir('/home/code/kaggle_datasets/chistyakov'):
    if f.endswith('.csv'):
        sources.append(f'/home/code/kaggle_datasets/chistyakov/{f}')

# Santa25-public
for f in os.listdir('/home/code/kaggle_datasets/santa25-public'):
    if f.endswith('.csv'):
        sources.append(f'/home/code/kaggle_datasets/santa25-public/{f}')

print(f'Found {len(sources)} additional sources:')
for s in sources:
    print(f'  {s}')

Found 21 additional sources:
  /home/code/kaggle_datasets/71.97.csv
  /home/code/kaggle_datasets/72.49.csv
  /home/code/kaggle_datasets/submission.csv
  /home/code/kaggle_datasets/chistyakov/submission_best.csv
  /home/code/kaggle_datasets/chistyakov/70.378875862989_20260126_045659.csv
  /home/code/kaggle_datasets/santa25-public/submission_JKoT4.csv
  /home/code/kaggle_datasets/santa25-public/New_Tree_144_196.csv
  /home/code/kaggle_datasets/santa25-public/submission_JKoT3.csv
  /home/code/kaggle_datasets/santa25-public/santa2025_ver2_v61.csv
  /home/code/kaggle_datasets/santa25-public/submission_JKoT2.csv
  /home/code/kaggle_datasets/santa25-public/santa2025_ver2_v67.csv
  /home/code/kaggle_datasets/santa25-public/santa2025_ver2_v76.csv
  /home/code/kaggle_datasets/santa25-public/submission_70_936673758122.csv
  /home/code/kaggle_datasets/santa25-public/santa2025_ver2_v65.csv
  /home/code/kaggle_datasets/santa25-public/submission_70_926149550346.csv
  /home/code/kaggle_datasets/santa2

In [11]:
# Evaluate each source
source_scores = {}
source_solutions = {}

for source_path in sources:
    try:
        solutions = parse_submission(source_path)
        total = compute_total_score(solutions)
        source_scores[source_path] = total
        source_solutions[source_path] = solutions
        print(f'{os.path.basename(source_path)}: {total:.6f}')
    except Exception as e:
        print(f'{os.path.basename(source_path)}: ERROR - {e}')

71.97.csv: 71.972027


72.49.csv: 72.495739


submission.csv: 70.647327


submission_best.csv: 70.926150


70.378875862989_20260126_045659.csv: 70.378876


submission_JKoT4.csv: 72.489504


New_Tree_144_196.csv: 72.927920


submission_JKoT3.csv: 72.489488


santa2025_ver2_v61.csv: 72.951925


submission_JKoT2.csv: 72.489348


santa2025_ver2_v67.csv: 72.938567


santa2025_ver2_v76.csv: 72.826444


submission_70_936673758122.csv: 70.936674


santa2025_ver2_v65.csv: 72.935294


submission_70_926149550346.csv: 70.926150


santa2025_ver2_v66.csv: 72.938599


santa2025_ver2_v63.csv: 72.947427


santa2025_ver2_v69.csv: 72.850110


submission_JKoT1.csv: 72.489483


submission_opt1.csv: 70.990692


santa2025_ver2_v68.csv: 72.939233


In [12]:
# Sort sources by score
sorted_sources = sorted(source_scores.items(), key=lambda x: x[1])
print('\nSources ranked by total score:')
for path, score in sorted_sources:
    print(f'  {score:.4f}: {os.path.basename(path)}')


Sources ranked by total score:
  70.3789: 70.378875862989_20260126_045659.csv
  70.6473: submission.csv
  70.9261: submission_best.csv
  70.9261: submission_70_926149550346.csv
  70.9367: submission_70_936673758122.csv
  70.9907: submission_opt1.csv
  71.9720: 71.97.csv
  72.4893: submission_JKoT2.csv
  72.4895: submission_JKoT1.csv
  72.4895: submission_JKoT3.csv
  72.4895: submission_JKoT4.csv
  72.4957: 72.49.csv
  72.8264: santa2025_ver2_v76.csv
  72.8501: santa2025_ver2_v69.csv
  72.9279: New_Tree_144_196.csv
  72.9353: santa2025_ver2_v65.csv
  72.9386: santa2025_ver2_v67.csv
  72.9386: santa2025_ver2_v66.csv
  72.9392: santa2025_ver2_v68.csv
  72.9474: santa2025_ver2_v63.csv
  72.9519: santa2025_ver2_v61.csv


In [13]:
# Now ensemble: for each N, find the BEST solution across ALL sources
# that passes strict validation

all_sources = {'exp_009': best_solutions}
for path, solutions in source_solutions.items():
    all_sources[os.path.basename(path)] = solutions

print(f'Total sources for ensemble: {len(all_sources)}')

# Track best per-N
ensemble_solutions = {}
ensemble_sources = {}
improvements = []

for n in range(1, 201):
    best_score = best_per_n.get(n, float('inf'))
    best_source = 'exp_009'
    best_trees = best_solutions.get(n, [])
    
    for source_name, solutions in all_sources.items():
        if source_name == 'exp_009':
            continue
        if n not in solutions:
            continue
        
        trees = solutions[n]
        score = compute_bbox_score(trees)
        
        if score < best_score - 1e-9:  # Found improvement
            # Validate no overlaps
            if validate_no_overlap_strict(trees):
                best_score = score
                best_source = source_name
                best_trees = trees
    
    ensemble_solutions[n] = best_trees
    ensemble_sources[n] = best_source
    
    if best_source != 'exp_009':
        improvement = best_per_n.get(n, 0) - best_score
        improvements.append((n, improvement, best_source))

print(f'\nFound {len(improvements)} improvements over exp_009')
if improvements:
    improvements.sort(key=lambda x: -x[1])
    print('\nTop 20 improvements:')
    for n, imp, src in improvements[:20]:
        print(f'  N={n}: +{imp:.6f} from {src}')

Total sources for ensemble: 22



Found 0 improvements over exp_009


In [14]:
# Compute ensemble total score
ensemble_per_n = compute_per_n_scores(ensemble_solutions)
ensemble_total = sum(ensemble_per_n.values())

print(f'\nExp_009 total: {best_total:.6f}')
print(f'Ensemble total: {ensemble_total:.6f}')
print(f'Improvement: {best_total - ensemble_total:.6f}')
print(f'\nTarget: 68.879467')
print(f'Gap: {ensemble_total - 68.879467:.6f}')


Exp_009 total: 70.373334
Ensemble total: 70.373334
Improvement: 0.000000

Target: 68.879467
Gap: 1.493867


In [15]:
# Count sources used
from collections import Counter
source_counts = Counter(ensemble_sources.values())
print('\nSources used in ensemble:')
for src, count in source_counts.most_common():
    print(f'  {src}: {count} N values')


Sources used in ensemble:
  exp_009: 200 N values


In [16]:
# Save ensemble if it's better
if ensemble_total < best_total - 0.0001:
    print('\n=== SAVING IMPROVED ENSEMBLE ===')
    
    # Create submission CSV
    rows = []
    for n in range(1, 201):
        trees = ensemble_solutions[n]
        for i, tree in enumerate(trees):
            rows.append({
                'id': f'{n:03d}_{i}',
                'x': f"s{tree['x']}",
                'y': f"s{tree['y']}",
                'deg': f"s{tree['deg']}"
            })
    
    submission_df = pd.DataFrame(rows)
    
    # Save to experiment folder
    exp_folder = '/home/code/experiments/010_full_ensemble'
    os.makedirs(exp_folder, exist_ok=True)
    submission_df.to_csv(f'{exp_folder}/submission.csv', index=False)
    
    # Save metrics
    metrics = {
        'cv_score': ensemble_total,
        'baseline_score': best_total,
        'improvement': best_total - ensemble_total,
        'n_improved': len(improvements),
        'sources_used': dict(source_counts),
        'target': 68.879467,
        'gap': ensemble_total - 68.879467
    }
    with open(f'{exp_folder}/metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    
    # Copy to submission folder
    submission_df.to_csv('/home/submission/submission.csv', index=False)
    
    print(f'Saved to {exp_folder}')
    print(f'Score: {ensemble_total:.6f}')
    print(f'Improvement: {best_total - ensemble_total:.6f}')
else:
    print('\nNo improvement found from additional sources')


No improvement found from additional sources


In [17]:
# Debug: Compare exp_009 vs chistyakov file per-N
chistyakov_path = '/home/code/kaggle_datasets/chistyakov/70.378875862989_20260126_045659.csv'
chistyakov_solutions = parse_submission(chistyakov_path)
chistyakov_per_n = compute_per_n_scores(chistyakov_solutions)

print("Comparing exp_009 vs chistyakov (70.378):")
print("N values where chistyakov is better:")
better_count = 0
for n in range(1, 201):
    exp_score = best_per_n.get(n, float('inf'))
    chi_score = chistyakov_per_n.get(n, float('inf'))
    if chi_score < exp_score - 1e-9:
        print(f"  N={n}: exp_009={exp_score:.6f}, chistyakov={chi_score:.6f}, diff={exp_score - chi_score:.6f}")
        better_count += 1

print(f"\nTotal N values where chistyakov is better: {better_count}")
print(f"\nExp_009 total: {best_total:.6f}")
print(f"Chistyakov total: {sum(chistyakov_per_n.values()):.6f}")

Comparing exp_009 vs chistyakov (70.378):
N values where chistyakov is better:
  N=7: exp_009=0.399897, chistyakov=0.399893, diff=0.000004
  N=9: exp_009=0.383130, chistyakov=0.383125, diff=0.000005
  N=15: exp_009=0.376638, chistyakov=0.376325, diff=0.000313
  N=21: exp_009=0.374790, chistyakov=0.372174, diff=0.002616
  N=22: exp_009=0.369853, chistyakov=0.369832, diff=0.000022
  N=26: exp_009=0.369820, chistyakov=0.369628, diff=0.000192
  N=31: exp_009=0.367577, chistyakov=0.364460, diff=0.003116
  N=34: exp_009=0.368051, chistyakov=0.368024, diff=0.000027
  N=37: exp_009=0.365859, chistyakov=0.365398, diff=0.000461
  N=38: exp_009=0.363152, chistyakov=0.363047, diff=0.000105
  N=39: exp_009=0.360350, chistyakov=0.360309, diff=0.000040
  N=41: exp_009=0.367831, chistyakov=0.366314, diff=0.001517
  N=43: exp_009=0.367017, chistyakov=0.366822, diff=0.000195
  N=46: exp_009=0.361589, chistyakov=0.361322, diff=0.000267
  N=47: exp_009=0.356618, chistyakov=0.356608, diff=0.000011
  N=64: 

In [18]:
# Check if chistyakov solutions pass validation for N=21 (biggest improvement)
n = 21
chi_trees = chistyakov_solutions[n]
print(f"N={n}: Testing chistyakov solution")
print(f"  Score: {chistyakov_per_n[n]:.6f} vs exp_009: {best_per_n[n]:.6f}")
print(f"  Improvement: {best_per_n[n] - chistyakov_per_n[n]:.6f}")

# Test validation
valid = validate_no_overlap_strict(chi_trees)
print(f"  Passes strict validation: {valid}")

# If not valid, find which trees overlap
if not valid:
    polygons = []
    for tree in chi_trees:
        poly = get_tree_polygon_scaled(tree['x'], tree['y'], tree['deg'])
        polygons.append(poly)
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                print(f"  Overlap: trees {i} and {j}")

N=21: Testing chistyakov solution
  Score: 0.372174 vs exp_009: 0.374790
  Improvement: 0.002616
  Passes strict validation: True


In [19]:
# Debug ensemble logic for N=21
n = 21
print(f"N={n} Debug:")
print(f"  exp_009 score: {best_per_n[n]:.9f}")
print(f"  chistyakov score: {chistyakov_per_n[n]:.9f}")
print(f"  Difference: {best_per_n[n] - chistyakov_per_n[n]:.9f}")
print(f"  Is chistyakov better? {chistyakov_per_n[n] < best_per_n[n] - 1e-9}")

# The issue might be that we're comparing wrong scores
# Let me recalculate directly
exp_trees = best_solutions[n]
chi_trees = chistyakov_solutions[n]

exp_s = compute_bbox_score(exp_trees)
chi_s = compute_bbox_score(chi_trees)

exp_score = (exp_s ** 2) / n
chi_score = (chi_s ** 2) / n

print(f"\nDirect calculation:")
print(f"  exp_009 side: {exp_s:.9f}, score: {exp_score:.9f}")
print(f"  chistyakov side: {chi_s:.9f}, score: {chi_score:.9f}")
print(f"  Is chistyakov better? {chi_score < exp_score - 1e-9}")

N=21 Debug:
  exp_009 score: 0.374789694
  chistyakov score: 0.372174052
  Difference: 0.002615642
  Is chistyakov better? True

Direct calculation:
  exp_009 side: 2.805456038, score: 0.374789694
  chistyakov side: 2.795649316, score: 0.372174052
  Is chistyakov better? True


In [20]:
# Re-run ensemble with detailed debugging
all_sources = {'exp_009': best_solutions}
for path, solutions in source_solutions.items():
    all_sources[os.path.basename(path)] = solutions

print(f'Total sources for ensemble: {len(all_sources)}')
print(f'Source names: {list(all_sources.keys())}')

# Track best per-N
ensemble_solutions = {}
ensemble_sources = {}
improvements = []

# Debug N=21 specifically
n = 21
print(f"\n=== Debugging N={n} ===")
best_score = best_per_n.get(n, float('inf'))
best_source = 'exp_009'
best_trees = best_solutions.get(n, [])
print(f"Starting with exp_009 score: {best_score:.9f}")

for source_name, solutions in all_sources.items():
    if source_name == 'exp_009':
        continue
    if n not in solutions:
        continue
    
    trees = solutions[n]
    s = compute_bbox_score(trees)
    score = (s ** 2) / n
    
    if score < best_score - 1e-9:
        print(f"  {source_name}: score={score:.9f} (better by {best_score - score:.9f})")
        # Validate
        valid = validate_no_overlap_strict(trees)
        print(f"    Validation: {valid}")
        if valid:
            best_score = score
            best_source = source_name
            best_trees = trees
            print(f"    -> ACCEPTED!")
        else:
            print(f"    -> REJECTED (overlap)")

print(f"\nFinal best for N={n}: {best_source} with score {best_score:.9f}")

Total sources for ensemble: 22
Source names: ['exp_009', '71.97.csv', '72.49.csv', 'submission.csv', 'submission_best.csv', '70.378875862989_20260126_045659.csv', 'submission_JKoT4.csv', 'New_Tree_144_196.csv', 'submission_JKoT3.csv', 'santa2025_ver2_v61.csv', 'submission_JKoT2.csv', 'santa2025_ver2_v67.csv', 'santa2025_ver2_v76.csv', 'submission_70_936673758122.csv', 'santa2025_ver2_v65.csv', 'submission_70_926149550346.csv', 'santa2025_ver2_v66.csv', 'santa2025_ver2_v63.csv', 'santa2025_ver2_v69.csv', 'submission_JKoT1.csv', 'submission_opt1.csv', 'santa2025_ver2_v68.csv']

=== Debugging N=21 ===
Starting with exp_009 score: 0.374789694
  70.378875862989_20260126_045659.csv: score=0.372174052 (better by 0.002615642)
    Validation: True
    -> ACCEPTED!

Final best for N=21: 70.378875862989_20260126_045659.csv with score 0.372174052


In [21]:
# FIXED ensemble: for each N, find the BEST solution across ALL sources
# that passes strict validation

all_sources = {'exp_009': best_solutions}
for path, solutions in source_solutions.items():
    all_sources[os.path.basename(path)] = solutions

print(f'Total sources for ensemble: {len(all_sources)}')

# Track best per-N
ensemble_solutions = {}
ensemble_sources = {}
improvements = []

for n in range(1, 201):
    # Start with exp_009
    exp_trees = best_solutions.get(n, [])
    exp_s = compute_bbox_score(exp_trees)
    best_score = (exp_s ** 2) / n
    best_source = 'exp_009'
    best_trees = exp_trees
    
    for source_name, solutions in all_sources.items():
        if source_name == 'exp_009':
            continue
        if n not in solutions:
            continue
        
        trees = solutions[n]
        s = compute_bbox_score(trees)
        score = (s ** 2) / n
        
        if score < best_score - 1e-9:  # Found improvement
            # Validate no overlaps
            if validate_no_overlap_strict(trees):
                best_score = score
                best_source = source_name
                best_trees = trees
    
    ensemble_solutions[n] = best_trees
    ensemble_sources[n] = best_source
    
    if best_source != 'exp_009':
        exp_score = (exp_s ** 2) / n
        improvement = exp_score - best_score
        improvements.append((n, improvement, best_source))

print(f'\\nFound {len(improvements)} improvements over exp_009')
if improvements:
    improvements.sort(key=lambda x: -x[1])
    print('\\nTop 20 improvements:')
    for n, imp, src in improvements[:20]:
        print(f'  N={n}: +{imp:.6f} from {src}')

Total sources for ensemble: 22


\nFound 42 improvements over exp_009
\nTop 20 improvements:
  N=31: +0.003116 from 70.378875862989_20260126_045659.csv
  N=21: +0.002616 from 70.378875862989_20260126_045659.csv
  N=41: +0.001517 from 70.378875862989_20260126_045659.csv
  N=37: +0.000461 from 70.378875862989_20260126_045659.csv
  N=15: +0.000313 from 70.378875862989_20260126_045659.csv
  N=46: +0.000267 from 70.378875862989_20260126_045659.csv
  N=43: +0.000195 from 70.378875862989_20260126_045659.csv
  N=26: +0.000192 from 70.378875862989_20260126_045659.csv
  N=38: +0.000105 from 70.378875862989_20260126_045659.csv
  N=39: +0.000040 from 70.378875862989_20260126_045659.csv
  N=34: +0.000027 from 70.378875862989_20260126_045659.csv
  N=22: +0.000022 from 70.378875862989_20260126_045659.csv
  N=47: +0.000011 from 70.378875862989_20260126_045659.csv
  N=139: +0.000009 from 70.378875862989_20260126_045659.csv
  N=138: +0.000009 from 70.378875862989_20260126_045659.csv
  N=165: +0.000008 from 70.378875862989_20260126_0456

In [22]:
# Compute ensemble total score
ensemble_total = 0
for n in range(1, 201):
    trees = ensemble_solutions[n]
    s = compute_bbox_score(trees)
    ensemble_total += (s ** 2) / n

print(f'Exp_009 total: {best_total:.6f}')
print(f'Ensemble total: {ensemble_total:.6f}')
print(f'Improvement: {best_total - ensemble_total:.6f}')
print(f'\\nTarget: 68.879467')
print(f'Gap: {ensemble_total - 68.879467:.6f}')

# Count sources used
from collections import Counter
source_counts = Counter(ensemble_sources.values())
print('\\nSources used in ensemble:')
for src, count in source_counts.most_common():
    print(f'  {src}: {count} N values')

Exp_009 total: 70.373334
Ensemble total: 70.364392
Improvement: 0.008942
\nTarget: 68.879467
Gap: 1.484925
\nSources used in ensemble:
  exp_009: 158 N values
  70.378875862989_20260126_045659.csv: 42 N values


In [None]:
# Save the improved ensemble
print('=== SAVING IMPROVED ENSEMBLE ===')

# Create submission CSV
rows = []
for n in range(1, 201):
    trees = ensemble_solutions[n]
    for i, tree in enumerate(trees):
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f"s{tree['x']}",
            'y': f"s{tree['y']}",
            'deg': f"s{tree['deg']}"
        })

submission_df = pd.DataFrame(rows)

# Save to experiment folder
exp_folder = '/home/code/experiments/010_full_ensemble'
os.makedirs(exp_folder, exist_ok=True)
submission_df.to_csv(f'{exp_folder}/submission.csv', index=False)

# Save metrics
metrics = {
    'cv_score': ensemble_total,
    'baseline_score': best_total,
    'improvement': best_total - ensemble_total,
    'n_improved': len(improvements),
    'sources_used': dict(source_counts),
    'target': 68.879467,
    'gap': ensemble_total - 68.879467
}
with open(f'{exp_folder}/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Copy to submission folder
submission_df.to_csv('/home/submission/submission.csv', index=False)

print(f'Saved to {exp_folder}')
print(f'Score: {ensemble_total:.6f}')
print(f'Improvement: {best_total - ensemble_total:.6f}')
print(f'Gap to target: {ensemble_total - 68.879467:.6f}')