# Loop 5 Analysis: Fix Overlap Issue in Ensemble

The exp_004 submission failed with 'Overlapping trees in group 002'. Let's investigate:
1. What's in group 002 (N=2) in our submission?
2. Why does it have overlaps?
3. How to fix it?

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from decimal import Decimal, getcontext
getcontext().prec = 30

# Load the failed submission
submission = pd.read_csv('/home/code/experiments/004_ensemble_valid/submission.csv')
print(f"Total rows: {len(submission)}")
print(submission.head(10))

Total rows: 20100
      id                         x                         y  \
0  001_0   s-48.196086194214246001    s58.770984615214225000   
1  002_0   s0.15409700000000001174  s-0.03854099999999999887   
2  002_1  s-0.15409700000000001174  s-0.56145900000000004137   
3  003_0        s0.254937643697833       s-0.233436061549416   
4  003_1        s0.357722754471247        s0.250360566787394   
5  003_2       s-0.234618301141838        s0.154819632737017   
6  004_0       s-0.324747789589372        s0.132109978088185   
7  004_1        s0.315354346242638        s0.132109978063475   
8  004_2        s0.324747789592379       s-0.732109978069476   
9  004_3       s-0.315354348134818       s-0.732109978094186   

                         deg  
0     s45.000000000000000000  
1  s203.62937800000000265754  
2   s23.62937799999999910483  
3        s113.56326044172948  
4           s66.370622269343  
5        s155.13405193710082  
6       s156.370622145636389  
7       s156.370622269264089 

In [2]:
# Extract N=2 (group 002)
n2_rows = submission[submission['id'].str.startswith('002_')]
print(f"N=2 rows: {len(n2_rows)}")
print(n2_rows)

N=2 rows: 2
      id                         x                         y  \
1  002_0   s0.15409700000000001174  s-0.03854099999999999887   
2  002_1  s-0.15409700000000001174  s-0.56145900000000004137   

                         deg  
1  s203.62937800000000265754  
2   s23.62937799999999910483  


In [3]:
# Define tree geometry
def make_tree_polygon(x, y, deg):
    """Create tree polygon at position (x, y) with rotation deg."""
    # Tree template vertices
    tw, th = 0.15, 0.2
    bw, mw, ow = 0.7, 0.4, 0.25
    tip, t1, t2, base, tbot = 0.8, 0.5, 0.25, 0.0, -th
    
    vx = np.array([0, ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2])
    vy = np.array([tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1])
    
    # Rotate
    rad = np.radians(deg)
    cos_r, sin_r = np.cos(rad), np.sin(rad)
    rx = cos_r * vx - sin_r * vy + x
    ry = sin_r * vx + cos_r * vy + y
    
    return Polygon(zip(rx, ry))

# Parse the 's' prefix values
def parse_val(v):
    s = str(v)
    if s.startswith('s'):
        s = s[1:]
    return float(s)

# Check N=2 for overlaps
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    print(f"{row['id']}: x={x:.15f}, y={y:.15f}, deg={deg:.15f}")

002_0: x=0.154097000000000, y=-0.038541000000000, deg=203.629378000000003
002_1: x=-0.154097000000000, y=-0.561459000000000, deg=23.629377999999999


In [4]:
# Create polygons for N=2 and check overlap
trees = []
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    poly = make_tree_polygon(x, y, deg)
    trees.append((row['id'], poly))

# Check overlap
if len(trees) >= 2:
    poly1 = trees[0][1]
    poly2 = trees[1][1]
    
    print(f"Tree 0 area: {poly1.area}")
    print(f"Tree 1 area: {poly2.area}")
    print(f"Intersection: {poly1.intersection(poly2)}")
    print(f"Intersection area: {poly1.intersection(poly2).area}")
    print(f"Do they overlap? {poly1.intersects(poly2) and not poly1.touches(poly2)}")
    print(f"Do they touch? {poly1.touches(poly2)}")

Tree 0 area: 0.2456249999999999
Tree 1 area: 0.24562499999999998
Intersection: MULTIPOLYGON (((0.1665564216720645 -0.4211717629353869, 0.1665580665562894 -0.4211724065857269, 0.1665575516360123 -0.421172631863351, 0.1665564216720645 -0.4211717629353869)), ((-0.1665580665562893 -0.1788275934142733, -0.1665575516360122 -0.1788273681366491, -0.1665564216720642 -0.1788282370646133, -0.1665580665562893 -0.1788275934142733)))
Intersection area: 7.019842216240558e-13
Do they overlap? True
Do they touch? False


In [5]:
# Check with higher precision using integer scaling
SCALE = 10**18

def make_tree_polygon_int(x, y, deg):
    """Create tree polygon with integer coordinates for precise overlap detection."""
    tw, th = Decimal('0.15'), Decimal('0.2')
    bw, mw, ow = Decimal('0.7'), Decimal('0.4'), Decimal('0.25')
    tip, t1, t2, base, tbot = Decimal('0.8'), Decimal('0.5'), Decimal('0.25'), Decimal('0'), -th
    
    vx = [Decimal('0'), ow/2, ow/4, mw/2, mw/4, bw/2, tw/2, tw/2, -tw/2, -tw/2, -bw/2, -mw/4, -mw/2, -ow/4, -ow/2]
    vy = [tip, t1, t1, t2, t2, base, base, tbot, tbot, base, base, t2, t2, t1, t1]
    
    # Convert to Decimal
    x_d = Decimal(str(x))
    y_d = Decimal(str(y))
    deg_d = Decimal(str(deg))
    
    # Rotate using high precision
    import math
    rad = float(deg_d) * math.pi / 180
    cos_r = Decimal(str(math.cos(rad)))
    sin_r = Decimal(str(math.sin(rad)))
    
    coords = []
    for vxi, vyi in zip(vx, vy):
        rx = cos_r * vxi - sin_r * vyi + x_d
        ry = sin_r * vxi + cos_r * vyi + y_d
        # Scale to integer
        coords.append((int(rx * SCALE), int(ry * SCALE)))
    
    return Polygon(coords)

# Check with integer precision
trees_int = []
for _, row in n2_rows.iterrows():
    x = parse_val(row['x'])
    y = parse_val(row['y'])
    deg = parse_val(row['deg'])
    poly = make_tree_polygon_int(x, y, deg)
    trees_int.append((row['id'], poly))

if len(trees_int) >= 2:
    poly1 = trees_int[0][1]
    poly2 = trees_int[1][1]
    
    print(f"Integer-scaled overlap check:")
    print(f"Intersection area (scaled): {poly1.intersection(poly2).area}")
    print(f"Do they overlap? {poly1.intersects(poly2) and not poly1.touches(poly2)}")

Integer-scaled overlap check:
Intersection area (scaled): 7.019842220112116e+23
Do they overlap? True


In [6]:
# Let's check what the VALID baseline has for N=2
valid_baseline = pd.read_csv('/home/nonroot/snapshots/santa-2025/21328309254/submission/submission.csv')
n2_valid = valid_baseline[valid_baseline['id'].str.startswith('002_')]
print("Valid baseline N=2:")
print(n2_valid)

Valid baseline N=2:
      id                     x                      y                  deg
1  002_0   s0.1540970696213643  s-0.03854074269478543  s203.62937773065684
2  002_1  s-0.1540970696213643   s-0.5614592573052146  s23.629377730656792


In [7]:
# Compare our ensemble N=2 with valid baseline N=2
print("\nOur ensemble N=2:")
for _, row in n2_rows.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")

print("\nValid baseline N=2:")
for _, row in n2_valid.iterrows():
    print(f"  {row['id']}: x={row['x']}, y={row['y']}, deg={row['deg']}")


Our ensemble N=2:
  002_0: x=s0.15409700000000001174, y=s-0.03854099999999999887, deg=s203.62937800000000265754
  002_1: x=s-0.15409700000000001174, y=s-0.56145900000000004137, deg=s23.62937799999999910483

Valid baseline N=2:
  002_0: x=s0.1540970696213643, y=s-0.03854074269478543, deg=s203.62937773065684
  002_1: x=s-0.1540970696213643, y=s-0.5614592573052146, deg=s23.629377730656792


In [8]:
# Check which snapshot contributed N=2 to our ensemble
# Load the ensemble map
import json
import os

ensemble_map_path = '/home/code/experiments/004_ensemble_valid/ensemble_map.json'
if os.path.exists(ensemble_map_path):
    with open(ensemble_map_path) as f:
        ensemble_map = json.load(f)
    print(f"N=2 source: {ensemble_map.get('2', 'not found')}")
else:
    print("No ensemble_map.json found")

No ensemble_map.json found


In [9]:
# Let's check all snapshots for N=2 and find the best VALID one
import glob

snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
snapshots = sorted(glob.glob(f'{snapshot_dir}*/submission/submission.csv'))

print(f"Found {len(snapshots)} snapshots")

# Check N=2 in each snapshot
n2_scores = []
for snap_path in snapshots:
    try:
        df = pd.read_csv(snap_path)
        n2 = df[df['id'].str.startswith('002_')]
        if len(n2) != 2:
            continue
        
        # Parse values
        trees = []
        for _, row in n2.iterrows():
            x = parse_val(row['x'])
            y = parse_val(row['y'])
            deg = parse_val(row['deg'])
            trees.append((x, y, deg))
        
        # Create polygons and check overlap
        poly1 = make_tree_polygon(trees[0][0], trees[0][1], trees[0][2])
        poly2 = make_tree_polygon(trees[1][0], trees[1][1], trees[1][2])
        
        has_overlap = poly1.intersects(poly2) and not poly1.touches(poly2)
        
        # Calculate score
        all_x = []
        all_y = []
        for x, y, deg in trees:
            poly = make_tree_polygon(x, y, deg)
            coords = list(poly.exterior.coords)
            all_x.extend([c[0] for c in coords])
            all_y.extend([c[1] for c in coords])
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        score = side * side / 2
        
        snap_id = snap_path.split('/')[-3]
        n2_scores.append((snap_id, score, has_overlap))
    except Exception as e:
        continue

# Sort by score
n2_scores.sort(key=lambda x: x[1])

print("\nTop 10 N=2 solutions:")
for snap_id, score, has_overlap in n2_scores[:10]:
    status = "OVERLAP" if has_overlap else "VALID"
    print(f"  {snap_id}: score={score:.6f} [{status}]")

Found 88 snapshots



Top 10 N=2 solutions:
  21145966992: score=0.437328 [OVERLAP]
  21337107511: score=0.437328 [OVERLAP]
  21322576451: score=0.450779 [OVERLAP]
  21328309666: score=0.450779 [OVERLAP]
  21336527339: score=0.450779 [OVERLAP]
  21322577324: score=0.450779 [OVERLAP]
  21329069570: score=0.450779 [OVERLAP]
  21191209482: score=0.450779 [VALID]
  21198893057: score=0.450779 [VALID]
  21198928571: score=0.450779 [VALID]


In [10]:
# The issue is clear: our ensemble N=2 has overlaps (intersection area 7e-13)
# We need to use the VALID baseline N=2 instead

# Let's rebuild the ensemble with proper overlap checking
# First, let's understand which N values have overlaps in our current submission

def check_all_overlaps(submission_df):
    """Check all N values for overlaps."""
    overlaps = []
    
    for n in range(1, 201):
        n_str = f"{n:03d}_"
        n_rows = submission_df[submission_df['id'].str.startswith(n_str)]
        
        if len(n_rows) != n:
            overlaps.append((n, "WRONG_COUNT", len(n_rows)))
            continue
        
        # Create polygons
        polys = []
        for _, row in n_rows.iterrows():
            x = parse_val(row['x'])
            y = parse_val(row['y'])
            deg = parse_val(row['deg'])
            poly = make_tree_polygon(x, y, deg)
            polys.append(poly)
        
        # Check all pairs
        has_overlap = False
        for i in range(len(polys)):
            for j in range(i+1, len(polys)):
                if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                    intersection = polys[i].intersection(polys[j])
                    if intersection.area > 1e-15:  # Tiny tolerance
                        overlaps.append((n, f"OVERLAP_{i}_{j}", intersection.area))
                        has_overlap = True
                        break
            if has_overlap:
                break
    
    return overlaps

print("Checking our ensemble for overlaps...")
overlaps = check_all_overlaps(submission)
print(f"Found {len(overlaps)} N values with issues:")
for n, issue, val in overlaps[:20]:
    print(f"  N={n}: {issue} (value={val})")

Checking our ensemble for overlaps...


Found 4 N values with issues:
  N=2: OVERLAP_0_1 (value=7.019842216240558e-13)
  N=29: OVERLAP_6_27 (value=3.2627401103001893e-14)
  N=103: OVERLAP_24_49 (value=4.164081845617297e-15)
  N=138: OVERLAP_86_137 (value=1.180975996041941e-13)


In [12]:
# We have 4 N values with overlaps: N=2, N=29, N=103, N=138
# Strategy: Replace these with solutions from the VALID baseline (21328309254)

# Load valid baseline
valid_baseline = pd.read_csv('/home/nonroot/snapshots/santa-2025/21328309254/submission/submission.csv')

# Check that valid baseline has no overlaps for these N values
print("Checking valid baseline for the problematic N values...")
for n in [2, 29, 103, 138]:
    n_str = f"{n:03d}_"
    n_rows = valid_baseline[valid_baseline['id'].str.startswith(n_str)]
    
    polys = []
    for _, row in n_rows.iterrows():
        x = parse_val(row['x'])
        y = parse_val(row['y'])
        deg = parse_val(row['deg'])
        poly = make_tree_polygon(x, y, deg)
        polys.append(poly)
    
    has_overlap = False
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                intersection = polys[i].intersection(polys[j])
                if intersection.area > 1e-15:
                    has_overlap = True
                    print(f"  N={n}: OVERLAP (area={intersection.area})")
                    break
        if has_overlap:
            break
    
    if not has_overlap:
        print(f"  N={n}: VALID")

Checking valid baseline for the problematic N values...
  N=2: VALID
  N=29: VALID
  N=103: VALID
  N=138: VALID


In [13]:
# Create fixed ensemble by replacing problematic N values with valid baseline
fixed_submission = submission.copy()

problematic_n = [2, 29, 103, 138]

for n in problematic_n:
    n_str = f"{n:03d}_"
    
    # Remove problematic rows from fixed_submission
    fixed_submission = fixed_submission[~fixed_submission['id'].str.startswith(n_str)]
    
    # Add valid baseline rows
    valid_rows = valid_baseline[valid_baseline['id'].str.startswith(n_str)]
    fixed_submission = pd.concat([fixed_submission, valid_rows], ignore_index=True)

# Sort by id
fixed_submission['n'] = fixed_submission['id'].str.split('_').str[0].astype(int)
fixed_submission['i'] = fixed_submission['id'].str.split('_').str[1].astype(int)
fixed_submission = fixed_submission.sort_values(['n', 'i']).drop(columns=['n', 'i'])

print(f"Fixed submission rows: {len(fixed_submission)}")
print(f"Expected rows: 20100")

# Verify no overlaps
print("\nVerifying fixed submission has no overlaps...")
overlaps = check_all_overlaps(fixed_submission)
print(f"Found {len(overlaps)} N values with issues")
if overlaps:
    for n, issue, val in overlaps:
        print(f"  N={n}: {issue} (value={val})")

Fixed submission rows: 20100
Expected rows: 20100

Verifying fixed submission has no overlaps...


Found 0 N values with issues


In [14]:
# Calculate score for fixed submission
def calculate_score(submission_df):
    """Calculate total score for submission."""
    total_score = 0.0
    
    for n in range(1, 201):
        n_str = f"{n:03d}_"
        n_rows = submission_df[submission_df['id'].str.startswith(n_str)]
        
        if len(n_rows) != n:
            print(f"Warning: N={n} has {len(n_rows)} rows instead of {n}")
            continue
        
        # Get bounding box
        all_x = []
        all_y = []
        for _, row in n_rows.iterrows():
            x = parse_val(row['x'])
            y = parse_val(row['y'])
            deg = parse_val(row['deg'])
            poly = make_tree_polygon(x, y, deg)
            coords = list(poly.exterior.coords)
            all_x.extend([c[0] for c in coords])
            all_y.extend([c[1] for c in coords])
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        score = side * side / n
        total_score += score
    
    return total_score

print("Calculating score for fixed submission...")
fixed_score = calculate_score(fixed_submission)
print(f"Fixed submission score: {fixed_score:.6f}")

# Compare with original ensemble
original_score = calculate_score(submission)
print(f"Original ensemble score: {original_score:.6f}")

# Compare with valid baseline
baseline_score = calculate_score(valid_baseline)
print(f"Valid baseline score: {baseline_score:.6f}")

print(f"\nImprovement from baseline: {baseline_score - fixed_score:.6f}")
print(f"Gap to target (68.888293): {fixed_score - 68.888293:.6f}")

Calculating score for fixed submission...


Fixed submission score: 70.615744


Original ensemble score: 70.615744


Valid baseline score: 70.647327

Improvement from baseline: 0.031583
Gap to target (68.888293): 1.727451


In [15]:
# Save fixed submission
import os

exp_dir = '/home/code/experiments/005_ensemble_fixed'
os.makedirs(exp_dir, exist_ok=True)

# Save submission
fixed_submission.to_csv(f'{exp_dir}/submission.csv', index=False)
print(f"Saved to {exp_dir}/submission.csv")

# Also copy to /home/submission
fixed_submission.to_csv('/home/submission/submission.csv', index=False)
print("Copied to /home/submission/submission.csv")

# Save metrics
import json
metrics = {
    'cv_score': fixed_score,
    'baseline_score': baseline_score,
    'improvement': baseline_score - fixed_score,
    'target': 68.888293,
    'gap': fixed_score - 68.888293,
    'fixed_n_values': problematic_n,
    'overlaps_fixed': 4
}
with open(f'{exp_dir}/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"Saved metrics to {exp_dir}/metrics.json")

# Verify the saved file
saved = pd.read_csv(f'{exp_dir}/submission.csv')
print(f"\nVerification: {len(saved)} rows")
print(saved.head())

Saved to /home/code/experiments/005_ensemble_fixed/submission.csv
Copied to /home/submission/submission.csv
Saved metrics to /home/code/experiments/005_ensemble_fixed/metrics.json

Verification: 20100 rows
      id                        x                       y  \
0  001_0  s-48.196086194214246001  s58.770984615214225000   
1  002_0      s0.1540970696213643   s-0.03854074269478543   
2  002_1     s-0.1540970696213643    s-0.5614592573052146   
3  003_0       s0.254937643697833     s-0.233436061549416   
4  003_1       s0.357722754471247      s0.250360566787394   

                      deg  
0  s45.000000000000000000  
1     s203.62937773065684  
2     s23.629377730656792  
3     s113.56326044172948  
4        s66.370622269343  


In [16]:
# Let's analyze what the jonathanchan kernel does differently
# Key insights from the kernel:
# 1. Uses 19+ different solution sources
# 2. Overrides N=1 with optimal value (x=0, y=0, deg=45)
# 3. Applies fractional translation after ensemble
# 4. Uses C++ SA optimization

# Let's first check if we can improve N=1
# The optimal N=1 solution is x=0, y=0, deg=45

# Current N=1 in our submission
n1_current = fixed_submission[fixed_submission['id'] == '001_0']
print("Current N=1:")
print(n1_current)

# Calculate current N=1 score
x = parse_val(n1_current['x'].values[0])
y = parse_val(n1_current['y'].values[0])
deg = parse_val(n1_current['deg'].values[0])
poly = make_tree_polygon(x, y, deg)
coords = list(poly.exterior.coords)
all_x = [c[0] for c in coords]
all_y = [c[1] for c in coords]
side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
current_n1_score = side * side / 1
print(f"Current N=1 score: {current_n1_score:.10f}")

# Calculate optimal N=1 score (x=0, y=0, deg=45)
poly_opt = make_tree_polygon(0, 0, 45)
coords_opt = list(poly_opt.exterior.coords)
all_x_opt = [c[0] for c in coords_opt]
all_y_opt = [c[1] for c in coords_opt]
side_opt = max(max(all_x_opt) - min(all_x_opt), max(all_y_opt) - min(all_y_opt))
optimal_n1_score = side_opt * side_opt / 1
print(f"Optimal N=1 score (deg=45): {optimal_n1_score:.10f}")

print(f"\nPotential improvement from N=1 fix: {current_n1_score - optimal_n1_score:.10f}")

Current N=1:
      id                        x                       y  \
0  001_0  s-48.196086194214246001  s58.770984615214225000   

                      deg  
0  s45.000000000000000000  
Current N=1 score: 0.6612500000
Optimal N=1 score (deg=45): 0.6612500000

Potential improvement from N=1 fix: -0.0000000000


In [17]:
# Implement fractional translation
# This is a key technique from the jonathanchan kernel

def fractional_translation(submission_df, n, max_iter=100):
    """Apply fractional translation to improve score for a specific N."""
    n_str = f"{n:03d}_"
    n_rows = submission_df[submission_df['id'].str.startswith(n_str)].copy()
    
    if len(n_rows) != n:
        return n_rows, 0
    
    # Parse current positions
    trees = []
    for _, row in n_rows.iterrows():
        x = parse_val(row['x'])
        y = parse_val(row['y'])
        deg = parse_val(row['deg'])
        trees.append([x, y, deg])
    
    # Calculate initial score
    def calc_score(trees_list):
        all_x = []
        all_y = []
        for x, y, deg in trees_list:
            poly = make_tree_polygon(x, y, deg)
            coords = list(poly.exterior.coords)
            all_x.extend([c[0] for c in coords])
            all_y.extend([c[1] for c in coords])
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        return side * side / n
    
    def check_overlaps(trees_list):
        polys = [make_tree_polygon(x, y, deg) for x, y, deg in trees_list]
        for i in range(len(polys)):
            for j in range(i+1, len(polys)):
                if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                    if polys[i].intersection(polys[j]).area > 1e-15:
                        return True
        return False
    
    initial_score = calc_score(trees)
    best_score = initial_score
    best_trees = [t.copy() for t in trees]
    
    # Fractional steps
    frac_steps = [0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    improved = True
    iteration = 0
    while improved and iteration < max_iter:
        improved = False
        iteration += 1
        
        for i in range(n):
            for step in frac_steps:
                for dx, dy in directions:
                    # Try moving tree i
                    test_trees = [t.copy() for t in best_trees]
                    test_trees[i][0] += dx * step
                    test_trees[i][1] += dy * step
                    
                    # Check if valid (no overlaps)
                    if not check_overlaps(test_trees):
                        new_score = calc_score(test_trees)
                        if new_score < best_score - 1e-12:
                            best_score = new_score
                            best_trees = test_trees
                            improved = True
    
    improvement = initial_score - best_score
    
    # Update n_rows with best positions
    for idx, (_, row) in enumerate(n_rows.iterrows()):
        n_rows.loc[row.name, 'x'] = f"s{best_trees[idx][0]}"
        n_rows.loc[row.name, 'y'] = f"s{best_trees[idx][1]}"
    
    return n_rows, improvement

# Test on a few N values
print("Testing fractional translation on small N values...")
for n in [5, 10, 15, 20]:
    improved_rows, improvement = fractional_translation(fixed_submission, n, max_iter=50)
    print(f"N={n}: improvement = {improvement:.10f}")

Testing fractional translation on small N values...
N=5: improvement = 0.0000000000


N=10: improvement = 0.0000000000


N=15: improvement = 0.0000000000


N=20: improvement = 0.0000000000


In [18]:
# Build comprehensive ensemble from ALL available solution sources
import glob

# Collect all CSV files from preoptimized directory
preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/'
all_csvs = []

# Main preoptimized files
all_csvs.extend(glob.glob(f'{preopt_dir}*.csv'))

# Subdirectories
for subdir in ['blended', 'bucket-of-chump', 'chistyakov', 'santa-2025-csv', 
               'santa-2025-try3', 'santa25-public', 'telegram', 'telegram/telegram_extracted']:
    all_csvs.extend(glob.glob(f'{preopt_dir}{subdir}/*.csv'))

# Also add all snapshot submissions
snapshot_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
all_csvs.extend(snapshot_csvs)

print(f"Found {len(all_csvs)} CSV files to scan")
print("\\nSample files:")
for f in all_csvs[:10]:
    print(f"  {f}")

Found 118 CSV files to scan
\nSample files:
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/blended/submission (77).csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov/submission_best.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-try3/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-try3/submission_sa.csv


In [19]:
# Build comprehensive ensemble with proper overlap checking
from tqdm import tqdm

# Best solution for each N (score, data, source, has_overlap)
best = {n: {'score': float('inf'), 'data': None, 'src': None, 'has_overlap': True} for n in range(1, 201)}

def score_group(n_rows, n):
    """Calculate score for a group of trees."""
    all_x = []
    all_y = []
    for _, row in n_rows.iterrows():
        x = parse_val(row['x'])
        y = parse_val(row['y'])
        deg = parse_val(row['deg'])
        poly = make_tree_polygon(x, y, deg)
        coords = list(poly.exterior.coords)
        all_x.extend([c[0] for c in coords])
        all_y.extend([c[1] for c in coords])
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

def check_group_overlap(n_rows):
    """Check if a group has overlaps."""
    polys = []
    for _, row in n_rows.iterrows():
        x = parse_val(row['x'])
        y = parse_val(row['y'])
        deg = parse_val(row['deg'])
        poly = make_tree_polygon(x, y, deg)
        polys.append(poly)
    
    for i in range(len(polys)):
        for j in range(i+1, len(polys)):
            if polys[i].intersects(polys[j]) and not polys[i].touches(polys[j]):
                if polys[i].intersection(polys[j]).area > 1e-15:
                    return True
    return False

# Scan all CSV files
for fp in tqdm(all_csvs, desc="Scanning"):
    try:
        df = pd.read_csv(fp)
    except Exception:
        continue
    
    if not {'id', 'x', 'y', 'deg'}.issubset(df.columns):
        continue
    
    # Extract N from id
    df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)
    
    for n, g in df.groupby('N'):
        if n < 1 or n > 200:
            continue
        if len(g) != n:
            continue
        
        # Calculate score
        score = score_group(g, n)
        
        # Check if this is better than current best
        if score < best[n]['score']:
            # Check for overlaps
            has_overlap = check_group_overlap(g)
            
            # Only update if: (1) no overlap, or (2) better than current overlapping solution
            if not has_overlap:
                best[n] = {
                    'score': score,
                    'data': g.drop(columns=['N']).copy(),
                    'src': fp.split('/')[-1],
                    'has_overlap': False
                }
            elif best[n]['has_overlap']:  # Current best also has overlap
                best[n] = {
                    'score': score,
                    'data': g.drop(columns=['N']).copy(),
                    'src': fp.split('/')[-1],
                    'has_overlap': True
                }

print("\\nScanning complete!")

Scanning:   0%|          | 0/118 [00:00<?, ?it/s]

Scanning:   1%|          | 1/118 [00:12<23:38, 12.12s/it]

Scanning:   2%|▏         | 2/118 [00:14<12:10,  6.30s/it]

Scanning:   3%|▎         | 3/118 [00:16<08:29,  4.43s/it]

Scanning:   3%|▎         | 4/118 [00:18<06:45,  3.56s/it]

Scanning:   4%|▍         | 5/118 [00:21<05:49,  3.09s/it]

Scanning:   5%|▌         | 6/118 [00:23<05:12,  2.79s/it]

Scanning:   6%|▌         | 7/118 [00:25<04:49,  2.61s/it]

Scanning:   7%|▋         | 8/118 [00:27<04:32,  2.48s/it]

Scanning:   8%|▊         | 9/118 [00:29<04:21,  2.40s/it]

Scanning:   8%|▊         | 10/118 [00:32<04:13,  2.35s/it]

Scanning:   9%|▉         | 11/118 [00:34<04:06,  2.30s/it]

Scanning:  10%|█         | 12/118 [00:36<04:00,  2.27s/it]

Scanning:  11%|█         | 13/118 [00:38<03:56,  2.25s/it]

Scanning:  12%|█▏        | 14/118 [00:40<03:53,  2.25s/it]

Scanning:  13%|█▎        | 15/118 [00:43<03:50,  2.24s/it]

Scanning:  14%|█▎        | 16/118 [00:45<03:46,  2.22s/it]

Scanning:  14%|█▍        | 17/118 [00:47<03:43,  2.22s/it]

Scanning:  15%|█▌        | 18/118 [00:49<03:42,  2.22s/it]

Scanning:  16%|█▌        | 19/118 [00:52<03:39,  2.22s/it]

Scanning:  17%|█▋        | 20/118 [00:54<03:36,  2.21s/it]

Scanning:  18%|█▊        | 21/118 [00:56<03:34,  2.22s/it]

Scanning:  19%|█▊        | 22/118 [00:58<03:32,  2.21s/it]

Scanning:  19%|█▉        | 23/118 [01:00<03:30,  2.21s/it]

Scanning:  20%|██        | 24/118 [01:03<03:28,  2.21s/it]

Scanning:  21%|██        | 25/118 [01:05<03:24,  2.20s/it]

Scanning:  22%|██▏       | 26/118 [01:07<03:22,  2.20s/it]

Scanning:  23%|██▎       | 27/118 [01:09<03:21,  2.22s/it]

Scanning:  24%|██▎       | 28/118 [01:11<03:19,  2.22s/it]

Scanning:  25%|██▍       | 29/118 [01:14<03:17,  2.22s/it]

Scanning:  25%|██▌       | 30/118 [01:16<03:15,  2.22s/it]

Scanning:  26%|██▋       | 31/118 [01:18<03:12,  2.22s/it]

Scanning:  27%|██▋       | 32/118 [01:30<07:27,  5.20s/it]

Scanning:  28%|██▊       | 33/118 [01:32<06:05,  4.30s/it]

Scanning:  29%|██▉       | 34/118 [01:38<06:40,  4.77s/it]

Scanning:  30%|██▉       | 35/118 [01:49<09:02,  6.53s/it]

Scanning:  31%|███       | 36/118 [01:51<07:10,  5.25s/it]

Scanning:  31%|███▏      | 37/118 [01:53<05:50,  4.33s/it]

Scanning:  32%|███▏      | 38/118 [01:56<04:55,  3.69s/it]

Scanning:  33%|███▎      | 39/118 [01:59<04:47,  3.63s/it]

Scanning:  34%|███▍      | 40/118 [02:01<04:11,  3.22s/it]

Scanning:  35%|███▍      | 41/118 [02:04<03:45,  2.93s/it]

Scanning:  36%|███▋      | 43/118 [02:06<02:35,  2.08s/it]

Scanning:  37%|███▋      | 44/118 [02:08<02:36,  2.12s/it]

Scanning:  38%|███▊      | 45/118 [02:10<02:36,  2.14s/it]

Scanning:  39%|███▉      | 46/118 [02:12<02:35,  2.17s/it]

Scanning:  40%|███▉      | 47/118 [02:15<02:34,  2.18s/it]

Scanning:  41%|████      | 48/118 [02:17<02:33,  2.19s/it]

Scanning:  42%|████▏     | 49/118 [02:19<02:30,  2.18s/it]

Scanning:  42%|████▏     | 50/118 [02:21<02:28,  2.18s/it]

Scanning:  43%|████▎     | 51/118 [02:24<02:27,  2.20s/it]

Scanning:  44%|████▍     | 52/118 [02:26<02:25,  2.20s/it]

Scanning:  45%|████▍     | 53/118 [02:28<02:22,  2.19s/it]

Scanning:  46%|████▌     | 54/118 [02:30<02:20,  2.20s/it]

Scanning:  47%|████▋     | 55/118 [02:32<02:19,  2.21s/it]

Scanning:  47%|████▋     | 56/118 [02:35<02:16,  2.21s/it]

Scanning:  48%|████▊     | 57/118 [02:37<02:14,  2.20s/it]

Scanning:  49%|████▉     | 58/118 [02:39<02:12,  2.21s/it]

Scanning:  50%|█████     | 59/118 [02:41<02:10,  2.20s/it]

Scanning:  51%|█████     | 60/118 [02:44<02:15,  2.34s/it]

Scanning:  52%|█████▏    | 61/118 [02:46<02:11,  2.30s/it]

Scanning:  53%|█████▎    | 62/118 [02:48<02:06,  2.27s/it]

Scanning:  53%|█████▎    | 63/118 [02:50<02:03,  2.25s/it]

Scanning:  54%|█████▍    | 64/118 [02:53<02:00,  2.24s/it]

Scanning:  55%|█████▌    | 65/118 [02:55<01:58,  2.23s/it]

Scanning:  56%|█████▌    | 66/118 [02:57<01:55,  2.23s/it]

Scanning:  57%|█████▋    | 67/118 [02:59<01:53,  2.22s/it]

Scanning:  58%|█████▊    | 68/118 [03:01<01:51,  2.22s/it]

Scanning:  58%|█████▊    | 69/118 [03:04<01:49,  2.23s/it]

Scanning:  59%|█████▉    | 70/118 [03:06<01:47,  2.23s/it]

Scanning:  60%|██████    | 71/118 [03:08<01:44,  2.23s/it]

Scanning:  61%|██████    | 72/118 [03:10<01:42,  2.22s/it]

Scanning:  62%|██████▏   | 73/118 [03:13<01:39,  2.21s/it]

Scanning:  63%|██████▎   | 74/118 [03:15<01:36,  2.20s/it]

Scanning:  64%|██████▎   | 75/118 [03:17<01:34,  2.20s/it]

Scanning:  64%|██████▍   | 76/118 [03:19<01:32,  2.20s/it]

Scanning:  65%|██████▌   | 77/118 [03:21<01:30,  2.21s/it]

Scanning:  66%|██████▌   | 78/118 [03:24<01:28,  2.22s/it]

Scanning:  67%|██████▋   | 79/118 [03:26<01:26,  2.21s/it]

Scanning:  68%|██████▊   | 80/118 [03:31<01:55,  3.05s/it]

Scanning:  69%|██████▊   | 81/118 [03:33<01:43,  2.80s/it]

Scanning:  69%|██████▉   | 82/118 [03:36<01:40,  2.79s/it]

Scanning:  70%|███████   | 83/118 [03:39<01:42,  2.94s/it]

Scanning:  71%|███████   | 84/118 [03:41<01:32,  2.72s/it]

Scanning:  72%|███████▏  | 85/118 [03:44<01:24,  2.57s/it]

Scanning:  73%|███████▎  | 86/118 [03:46<01:18,  2.47s/it]

Scanning:  74%|███████▎  | 87/118 [03:48<01:14,  2.40s/it]

Scanning:  75%|███████▍  | 88/118 [03:51<01:14,  2.50s/it]

Scanning:  75%|███████▌  | 89/118 [03:53<01:09,  2.41s/it]

Scanning:  76%|███████▋  | 90/118 [03:55<01:05,  2.34s/it]

Scanning:  77%|███████▋  | 91/118 [03:58<01:04,  2.41s/it]

Scanning:  78%|███████▊  | 92/118 [04:00<01:01,  2.36s/it]

Scanning:  79%|███████▉  | 93/118 [04:02<00:57,  2.31s/it]

Scanning:  80%|███████▉  | 94/118 [04:04<00:54,  2.28s/it]

Scanning:  81%|████████  | 95/118 [04:07<00:51,  2.26s/it]

Scanning:  81%|████████▏ | 96/118 [04:09<00:49,  2.25s/it]

Scanning:  82%|████████▏ | 97/118 [04:11<00:47,  2.24s/it]

Scanning:  83%|████████▎ | 98/118 [04:13<00:44,  2.24s/it]

Scanning:  84%|████████▍ | 99/118 [04:15<00:42,  2.23s/it]

Scanning:  85%|████████▍ | 100/118 [04:18<00:40,  2.23s/it]

Scanning:  86%|████████▌ | 101/118 [04:20<00:37,  2.23s/it]

Scanning:  86%|████████▋ | 102/118 [04:22<00:35,  2.23s/it]

Scanning:  87%|████████▋ | 103/118 [04:24<00:33,  2.22s/it]

Scanning:  88%|████████▊ | 104/118 [04:27<00:31,  2.22s/it]

Scanning:  89%|████████▉ | 105/118 [04:29<00:28,  2.21s/it]

Scanning:  90%|████████▉ | 106/118 [04:31<00:26,  2.21s/it]

Scanning:  91%|█████████ | 107/118 [04:33<00:24,  2.21s/it]

Scanning:  92%|█████████▏| 108/118 [04:35<00:22,  2.21s/it]

Scanning:  92%|█████████▏| 109/118 [04:38<00:19,  2.22s/it]

Scanning:  93%|█████████▎| 110/118 [04:40<00:17,  2.22s/it]

Scanning:  94%|█████████▍| 111/118 [04:42<00:15,  2.22s/it]

Scanning:  95%|█████████▍| 112/118 [04:44<00:13,  2.24s/it]

Scanning:  96%|█████████▌| 113/118 [04:46<00:11,  2.22s/it]

Scanning:  97%|█████████▋| 114/118 [04:49<00:08,  2.22s/it]

Scanning:  97%|█████████▋| 115/118 [04:51<00:06,  2.22s/it]

Scanning:  98%|█████████▊| 116/118 [04:53<00:04,  2.22s/it]

Scanning:  99%|█████████▉| 117/118 [04:55<00:02,  2.22s/it]

Scanning: 100%|██████████| 118/118 [04:58<00:00,  2.22s/it]

Scanning: 100%|██████████| 118/118 [04:58<00:00,  2.53s/it]

\nScanning complete!



