# Experiment 010: Kaggle-Exact Validation + Ensemble Repair

Implement Kaggle's EXACT validation code from eazy-optimizer kernel:
1. Use scale_factor = 1e18 for polygon creation
2. Use poly.intersects(other) and not poly.touches(other) for overlap check
3. Test on baseline (should pass) and previous failed submissions
4. Create ultra-conservative ensemble with repair

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import json
import shutil
import time
import os

# Set high precision for Decimal
getcontext().prec = 25

# CRITICAL: Use Kaggle's exact scale factor
scale_factor = Decimal("1e18")

print("Libraries loaded successfully!")
print(f"Scale factor: {scale_factor}")

Libraries loaded successfully!
Scale factor: 1E+18


In [2]:
# Kaggle's EXACT ChristmasTree class from eazy-optimizer kernel
class ChristmasTree:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w = Decimal("0.15")
        trunk_h = Decimal("0.2")
        base_w = Decimal("0.7")
        mid_w = Decimal("0.4")
        top_w = Decimal("0.25")
        tip_y = Decimal("0.8")
        tier_1_y = Decimal("0.5")
        tier_2_y = Decimal("0.25")
        base_y = Decimal("0.0")
        trunk_bottom_y = -trunk_h
        
        # Create polygon with scale_factor (CRITICAL!)
        initial_polygon = Polygon([
            (float(Decimal("0.0") * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal("2") * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal("4") * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal("2") * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal("4") * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal("2") * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal("2")) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal("4")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal("2")) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal("4")) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal("2")) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(
            rotated,
            xoff=float(self.center_x * scale_factor),
            yoff=float(self.center_y * scale_factor)
        )

print("ChristmasTree class defined with scale_factor=1e18.")

ChristmasTree class defined with scale_factor=1e18.


In [3]:
def parse_value(s):
    """Parse submission value (handles 's' prefix for scientific notation)."""
    if isinstance(s, str) and s.startswith('s'):
        return s[1:]
    return str(s)

def load_trees_for_n(df, n):
    """Load trees for a specific N value."""
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap_kaggle(trees):
    """Kaggle's EXACT overlap detection using STRtree."""
    if len(trees) <= 1:
        return False, []
    
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    overlapping_pairs = []
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx <= i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                overlapping_pairs.append((i, idx))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

print("Helper functions defined.")

Helper functions defined.


In [4]:
# Test on baseline
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
baseline_df = pd.read_csv(baseline_path)
print(f"Baseline loaded: {baseline_df.shape}")

# Test a few N values
print("\nTesting baseline with Kaggle's EXACT validation:")
overlapping_ns = []
for n in range(1, 201):
    trees = load_trees_for_n(baseline_df, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns.append(n)
        if len(overlapping_ns) <= 5:
            print(f"  N={n}: OVERLAP detected! Pairs: {pairs[:3]}")

print(f"\nBaseline: {len(overlapping_ns)} N values with overlaps")
if overlapping_ns:
    print(f"  Overlapping N values: {overlapping_ns[:20]}...")
else:
    print("  Baseline passes Kaggle's EXACT validation!")

Baseline loaded: (20100, 4)

Testing baseline with Kaggle's EXACT validation:
  N=4: OVERLAP detected! Pairs: [(1, 2)]
  N=5: OVERLAP detected! Pairs: [(2, 3)]
  N=12: OVERLAP detected! Pairs: [(7, 11)]
  N=16: OVERLAP detected! Pairs: [(6, 15)]
  N=17: OVERLAP detected! Pairs: [(2, 15)]



Baseline: 49 N values with overlaps
  Overlapping N values: [4, 5, 12, 16, 17, 21, 23, 31, 35, 40, 41, 42, 43, 45, 48, 49, 53, 59, 61, 64]...


In [None]:
# Test on exp_001 ensemble (should detect overlap in group 002)
ensemble_path = '/home/code/experiments/001_ensemble/submission.csv'
if os.path.exists(ensemble_path):
    ensemble_df = pd.read_csv(ensemble_path)
    print(f"\nExp_001 ensemble loaded: {ensemble_df.shape}")
    
    # Check N=2 specifically (group 002 failed on Kaggle)
    trees = load_trees_for_n(ensemble_df, 2)
    has_overlap, pairs = has_overlap_kaggle(trees)
    print(f"N=2: has_overlap={has_overlap}, pairs={pairs}")
    
    # Check all N values
    print("\nTesting exp_001 ensemble with Kaggle's EXACT validation:")
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_for_n(ensemble_df, n)
        has_overlap, pairs = has_overlap_kaggle(trees)
        if has_overlap:
            overlapping_ns.append(n)
    
    print(f"Exp_001 ensemble: {len(overlapping_ns)} N values with overlaps")
    if overlapping_ns:
        print(f"  Overlapping N values: {overlapping_ns[:20]}")
else:
    print("Exp_001 ensemble not found.")

In [None]:
# Test on exp_002 fixed ensemble (should detect overlap in group 003)
exp002_path = '/home/code/experiments/002_fixed_ensemble/submission.csv'
if os.path.exists(exp002_path):
    exp002_df = pd.read_csv(exp002_path)
    print(f"\nExp_002 fixed ensemble loaded: {exp002_df.shape}")
    
    # Check N=3 specifically (group 003 failed on Kaggle)
    trees = load_trees_for_n(exp002_df, 3)
    has_overlap, pairs = has_overlap_kaggle(trees)
    print(f"N=3: has_overlap={has_overlap}, pairs={pairs}")
    
    # Check all N values
    print("\nTesting exp_002 fixed ensemble with Kaggle's EXACT validation:")
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_for_n(exp002_df, n)
        has_overlap, pairs = has_overlap_kaggle(trees)
        if has_overlap:
            overlapping_ns.append(n)
    
    print(f"Exp_002 fixed ensemble: {len(overlapping_ns)} N values with overlaps")
    if overlapping_ns:
        print(f"  Overlapping N values: {overlapping_ns[:20]}")
else:
    print("Exp_002 fixed ensemble not found.")

In [None]:
# Test on exp_005 fixed submission (should detect overlap in group 126)
exp005_path = '/home/code/experiments/005_fixed_submission/submission.csv'
if os.path.exists(exp005_path):
    exp005_df = pd.read_csv(exp005_path)
    print(f"\nExp_005 fixed submission loaded: {exp005_df.shape}")
    
    # Check N=126 specifically (group 126 failed on Kaggle)
    trees = load_trees_for_n(exp005_df, 126)
    has_overlap, pairs = has_overlap_kaggle(trees)
    print(f"N=126: has_overlap={has_overlap}, pairs={pairs}")
    
    # Check all N values
    print("\nTesting exp_005 fixed submission with Kaggle's EXACT validation:")
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_for_n(exp005_df, n)
        has_overlap, pairs = has_overlap_kaggle(trees)
        if has_overlap:
            overlapping_ns.append(n)
    
    print(f"Exp_005 fixed submission: {len(overlapping_ns)} N values with overlaps")
    if overlapping_ns:
        print(f"  Overlapping N values: {overlapping_ns[:20]}")
else:
    print("Exp_005 fixed submission not found.")

In [None]:
# Summary of validation results
print("=" * 70)
print("VALIDATION SUMMARY")
print("=" * 70)
print()
print("Kaggle's EXACT validation (scale_factor=1e18, intersects && !touches):")
print()
print("Expected results based on Kaggle failures:")
print("  - exp_001: Should detect overlap in group 002")
print("  - exp_002: Should detect overlap in group 003")
print("  - exp_005: Should detect overlap in group 126")
print()
print("If our validation doesn't catch these, it's STILL WRONG.")

In [None]:
# Now create ULTRA-CONSERVATIVE ensemble
# For each N, use the best configuration that passes Kaggle's EXACT validation
# If ANY overlap detected, use baseline configuration

print("Creating ULTRA-CONSERVATIVE ensemble...")
print()

# Load all candidate submissions
candidate_paths = [
    '/home/code/experiments/001_ensemble/submission.csv',
    '/home/code/experiments/002_fixed_ensemble/submission.csv',
    '/home/code/experiments/005_fixed_submission/submission.csv',
]

# Load baseline as fallback
baseline_df = pd.read_csv(baseline_path)

# For each N, find the best valid configuration
best_configs = {}
for n in range(1, 201):
    best_score = float('inf')
    best_config = None
    
    # Check baseline first
    trees = load_trees_for_n(baseline_df, n)
    has_overlap, _ = has_overlap_kaggle(trees)
    if not has_overlap:
        # Calculate score for this N
        all_coords = []
        for tree in trees:
            coords = np.array(tree.polygon.exterior.coords) / float(scale_factor)
            all_coords.append(coords)
        all_coords = np.vstack(all_coords)
        side = max(all_coords[:, 0].max() - all_coords[:, 0].min(),
                   all_coords[:, 1].max() - all_coords[:, 1].min())
        score = side**2 / n
        if score < best_score:
            best_score = score
            best_config = ('baseline', baseline_df[baseline_df['id'].str.startswith(f"{n:03d}_")])
    
    # Check other candidates
    for path in candidate_paths:
        if os.path.exists(path):
            df = pd.read_csv(path)
            trees = load_trees_for_n(df, n)
            has_overlap, _ = has_overlap_kaggle(trees)
            if not has_overlap:
                # Calculate score
                all_coords = []
                for tree in trees:
                    coords = np.array(tree.polygon.exterior.coords) / float(scale_factor)
                    all_coords.append(coords)
                all_coords = np.vstack(all_coords)
                side = max(all_coords[:, 0].max() - all_coords[:, 0].min(),
                           all_coords[:, 1].max() - all_coords[:, 1].min())
                score = side**2 / n
                if score < best_score:
                    best_score = score
                    best_config = (path, df[df['id'].str.startswith(f"{n:03d}_")])
    
    if best_config is None:
        print(f"WARNING: N={n} has no valid configuration! Using baseline.")
        best_config = ('baseline', baseline_df[baseline_df['id'].str.startswith(f"{n:03d}_")])
    
    best_configs[n] = best_config

print(f"Found best valid configurations for all 200 N values.")

In [None]:
# Build the final submission
final_rows = []
for n in range(1, 201):
    source, config_df = best_configs[n]
    final_rows.append(config_df)

final_df = pd.concat(final_rows, ignore_index=True)
print(f"Final submission shape: {final_df.shape}")

# Verify the final submission
print("\nVerifying final submission with Kaggle's EXACT validation...")
overlapping_ns = []
for n in range(1, 201):
    trees = load_trees_for_n(final_df, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    if has_overlap:
        overlapping_ns.append(n)
        print(f"  N={n}: OVERLAP detected! Pairs: {pairs[:3]}")

print(f"\nFinal submission: {len(overlapping_ns)} N values with overlaps")
if overlapping_ns:
    print(f"  Overlapping N values: {overlapping_ns}")
else:
    print("  Final submission passes Kaggle's EXACT validation!")

In [None]:
# Calculate final score
def calculate_score(df):
    total_score = 0
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        all_coords = []
        for tree in trees:
            coords = np.array(tree.polygon.exterior.coords) / float(scale_factor)
            all_coords.append(coords)
        all_coords = np.vstack(all_coords)
        side = max(all_coords[:, 0].max() - all_coords[:, 0].min(),
                   all_coords[:, 1].max() - all_coords[:, 1].min())
        score = side**2 / n
        total_score += score
    return total_score

final_score = calculate_score(final_df)
baseline_score = calculate_score(baseline_df)

print(f"Final score: {final_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - final_score:.6f}")

In [None]:
# Save the final submission
work_dir = '/home/code/experiments/010_kaggle_exact_validation'
final_df.to_csv(f'{work_dir}/submission.csv', index=False)
shutil.copy(f'{work_dir}/submission.csv', '/home/submission/submission.csv')

# Save metrics
metrics = {
    'cv_score': final_score,
    'baseline_score': baseline_score,
    'improvement': baseline_score - final_score,
    'overlapping_ns': overlapping_ns,
    'notes': f'Ultra-conservative ensemble using Kaggle EXACT validation (scale_factor=1e18). {len(overlapping_ns)} N values with overlaps.'
}
with open(f'{work_dir}/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nSubmission saved to {work_dir}/submission.csv")
print(f"Metrics: {metrics}")

In [None]:
# Final summary
print("=" * 70)
print("EXPERIMENT 010: KAGGLE-EXACT VALIDATION + ENSEMBLE REPAIR SUMMARY")
print("=" * 70)
print()
print(f"Final score: {final_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - final_score:.6f}")
print()
print(f"Overlapping N values: {len(overlapping_ns)}")
if overlapping_ns:
    print(f"  {overlapping_ns}")
else:
    print("  None - submission should pass Kaggle validation!")
print()
print(f"Target: 68.884199")
print(f"Gap to target: {final_score - 68.884199:.6f} ({(final_score - 68.884199) / 68.884199 * 100:.2f}%)")
print("=" * 70)

In [5]:
# CRITICAL FINDING: Our validation is MORE STRICT than Kaggle's!
# The baseline passed Kaggle but our validation finds 49 overlaps.
# This means Kaggle's validation is LESS strict than ours.

# Let's investigate the "overlaps" in the baseline
print("Investigating baseline 'overlaps'...")
print()

for n in [4, 5, 12]:
    trees = load_trees_for_n(baseline_df, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    print(f"N={n}: {len(pairs)} overlapping pairs")
    
    for i, j in pairs[:3]:
        poly_i = trees[i].polygon
        poly_j = trees[j].polygon
        
        # Check intersection details
        intersection = poly_i.intersection(poly_j)
        print(f"  Pair ({i}, {j}):")
        print(f"    intersects: {poly_i.intersects(poly_j)}")
        print(f"    touches: {poly_i.touches(poly_j)}")
        print(f"    intersection type: {intersection.geom_type}")
        print(f"    intersection area: {intersection.area}")
        if hasattr(intersection, 'length'):
            print(f"    intersection length: {intersection.length}")
    print()

Investigating baseline 'overlaps'...

N=4: 1 overlapping pairs
  Pair (1, 2):
    intersects: True
    touches: False
    intersection type: MultiPoint
    intersection area: 0.0
    intersection length: 0.0

N=5: 1 overlapping pairs
  Pair (2, 3):
    intersects: True
    touches: False
    intersection type: Polygon
    intersection area: 429056.0
    intersection length: 12161.30968530276

N=12: 1 overlapping pairs
  Pair (7, 11):
    intersects: True
    touches: False
    intersection type: Polygon
    intersection area: 504115236864.0
    intersection length: 35051724800.265656



In [7]:
# The issue is that Shapely's touches() returns False for MultiPoint intersections
# But Kaggle might consider MultiPoint as "touching" (no interior overlap)

# Let's check the DE-9IM matrix for these cases
print("Checking DE-9IM matrix for baseline 'overlaps'...")
print()

for n in [4, 5, 12]:
    trees = load_trees_for_n(baseline_df, n)
    has_overlap, pairs = has_overlap_kaggle(trees)
    
    for i, j in pairs[:1]:
        poly_i = trees[i].polygon
        poly_j = trees[j].polygon
        
        # Get DE-9IM matrix
        matrix = poly_i.relate(poly_j)
        intersection = poly_i.intersection(poly_j)
        
        print(f"N={n}, Pair ({i}, {j}):")
        print(f"  DE-9IM matrix: {matrix}")
        print(f"  intersection type: {intersection.geom_type}")
        print(f"  intersection area: {intersection.area}")
        print(f"  matrix[0] (interior-interior): {matrix[0]}")
        print(f"  '2' means 2D overlap (area), '1' means 1D (line), '0' means 0D (point)")
        print()

Checking DE-9IM matrix for baseline 'overlaps'...

N=4, Pair (1, 2):
  DE-9IM matrix: 212101212
  intersection type: MultiPoint
  intersection area: 0.0
  matrix[0] (interior-interior): 2
  '2' means 2D overlap (area), '1' means 1D (line), '0' means 0D (point)

N=5, Pair (2, 3):
  DE-9IM matrix: 212101212
  intersection type: Polygon
  intersection area: 429056.0
  matrix[0] (interior-interior): 2
  '2' means 2D overlap (area), '1' means 1D (line), '0' means 0D (point)

N=12, Pair (7, 11):
  DE-9IM matrix: 212101212
  intersection type: Polygon
  intersection area: 504115236864.0
  matrix[0] (interior-interior): 2
  '2' means 2D overlap (area), '1' means 1D (line), '0' means 0D (point)



In [8]:
# Wait - the intersection area for N=5 is 429056 in scaled coordinates
# That's 429056 / (1e18)^2 = 4.29e-31 in original coordinates - essentially zero!
# And for N=12: 504115236864 / (1e18)^2 = 5.04e-25 - also essentially zero!

# The scale_factor=1e18 is causing numerical precision issues
# Let me check the actual overlap in original coordinates

print("Checking actual overlap in ORIGINAL coordinates...")
print()

# Create trees WITHOUT scale_factor
class ChristmasTreeOriginal:
    def __init__(self, center_x="0", center_y="0", angle="0"):
        self.center_x = float(center_x)
        self.center_y = float(center_y)
        self.angle = float(angle)
        
        trunk_w = 0.15
        trunk_h = 0.2
        base_w = 0.7
        mid_w = 0.4
        top_w = 0.25
        tip_y = 0.8
        tier_1_y = 0.5
        tier_2_y = 0.25
        base_y = 0.0
        trunk_bottom_y = -trunk_h
        
        initial_polygon = Polygon([
            (0.0, tip_y),
            (top_w / 2, tier_1_y),
            (top_w / 4, tier_1_y),
            (mid_w / 2, tier_2_y),
            (mid_w / 4, tier_2_y),
            (base_w / 2, base_y),
            (trunk_w / 2, base_y),
            (trunk_w / 2, trunk_bottom_y),
            (-trunk_w / 2, trunk_bottom_y),
            (-trunk_w / 2, base_y),
            (-base_w / 2, base_y),
            (-mid_w / 4, tier_2_y),
            (-mid_w / 2, tier_2_y),
            (-top_w / 4, tier_1_y),
            (-top_w / 2, tier_1_y),
        ])
        
        rotated = affinity.rotate(initial_polygon, self.angle, origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=self.center_x, yoff=self.center_y)

def load_trees_original(df, n):
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTreeOriginal(x, y, deg))
    return trees

for n in [4, 5, 12]:
    trees = load_trees_original(baseline_df, n)
    
    # Find overlapping pairs
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            poly_i = trees[i].polygon
            poly_j = trees[j].polygon
            
            if poly_i.intersects(poly_j) and not poly_i.touches(poly_j):
                intersection = poly_i.intersection(poly_j)
                print(f"N={n}, Pair ({i}, {j}):")
                print(f"  intersection type: {intersection.geom_type}")
                print(f"  intersection area: {intersection.area:.15e}")
                print()

Checking actual overlap in ORIGINAL coordinates...

N=4, Pair (1, 2):
  intersection type: MultiPoint
  intersection area: 0.000000000000000e+00

N=5, Pair (2, 3):
  intersection type: Polygon
  intersection area: 3.420451581231731e-31

N=12, Pair (7, 11):
  intersection type: Polygon
  intersection area: 3.973926099021712e-25



In [9]:
# The "overlaps" are numerical precision artifacts (1e-31, 1e-25 area)
# These are NOT real overlaps - they're floating point errors

# Let's use a more robust overlap check that ignores tiny numerical errors
# We'll check if the intersection area is significant (> 1e-15)

def has_overlap_robust(trees, tolerance=1e-15):
    """Check for overlaps, ignoring numerical precision artifacts."""
    if len(trees) <= 1:
        return False, []
    
    overlapping_pairs = []
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            poly_i = trees[i].polygon
            poly_j = trees[j].polygon
            
            if poly_i.intersects(poly_j):
                intersection = poly_i.intersection(poly_j)
                # Only count as overlap if intersection has significant area
                if intersection.area > tolerance:
                    overlapping_pairs.append((i, j, intersection.area))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

# Test on baseline with original coordinates
print("Testing baseline with ROBUST overlap check (tolerance=1e-15)...")
print()

overlapping_ns = []
for n in range(1, 201):
    trees = load_trees_original(baseline_df, n)
    has_overlap, pairs = has_overlap_robust(trees, tolerance=1e-15)
    if has_overlap:
        overlapping_ns.append((n, pairs))

print(f"Baseline: {len(overlapping_ns)} N values with significant overlaps")
if overlapping_ns:
    for n, pairs in overlapping_ns[:10]:
        print(f"  N={n}: {pairs[:3]}")
else:
    print("  Baseline passes robust overlap check!")

Testing baseline with ROBUST overlap check (tolerance=1e-15)...



Baseline: 0 N values with significant overlaps
  Baseline passes robust overlap check!


In [10]:
# Now test the failed ensemble submissions with robust overlap check
print("Testing failed ensemble submissions with ROBUST overlap check...")
print()

# Test exp_001 (failed on group 002)
if os.path.exists('/home/code/experiments/001_ensemble/submission.csv'):
    df = pd.read_csv('/home/code/experiments/001_ensemble/submission.csv')
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_original(df, n)
        has_overlap, pairs = has_overlap_robust(trees, tolerance=1e-15)
        if has_overlap:
            overlapping_ns.append((n, pairs))
    
    print(f"Exp_001: {len(overlapping_ns)} N values with significant overlaps")
    if overlapping_ns:
        for n, pairs in overlapping_ns[:10]:
            print(f"  N={n}: area={pairs[0][2]:.6e}")
    print()

# Test exp_002 (failed on group 003)
if os.path.exists('/home/code/experiments/002_fixed_ensemble/submission.csv'):
    df = pd.read_csv('/home/code/experiments/002_fixed_ensemble/submission.csv')
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_original(df, n)
        has_overlap, pairs = has_overlap_robust(trees, tolerance=1e-15)
        if has_overlap:
            overlapping_ns.append((n, pairs))
    
    print(f"Exp_002: {len(overlapping_ns)} N values with significant overlaps")
    if overlapping_ns:
        for n, pairs in overlapping_ns[:10]:
            print(f"  N={n}: area={pairs[0][2]:.6e}")
    print()

# Test exp_005 (failed on group 126)
if os.path.exists('/home/code/experiments/005_fixed_submission/submission.csv'):
    df = pd.read_csv('/home/code/experiments/005_fixed_submission/submission.csv')
    overlapping_ns = []
    for n in range(1, 201):
        trees = load_trees_original(df, n)
        has_overlap, pairs = has_overlap_robust(trees, tolerance=1e-15)
        if has_overlap:
            overlapping_ns.append((n, pairs))
    
    print(f"Exp_005: {len(overlapping_ns)} N values with significant overlaps")
    if overlapping_ns:
        for n, pairs in overlapping_ns[:10]:
            print(f"  N={n}: area={pairs[0][2]:.6e}")

Testing failed ensemble submissions with ROBUST overlap check...



Exp_001: 4 N values with significant overlaps
  N=2: area=7.019842e-13
  N=29: area=3.262740e-14
  N=103: area=4.164082e-15
  N=138: area=1.180620e-13



Exp_002: 0 N values with significant overlaps



Exp_005: 0 N values with significant overlaps
