# Experiment 020: Rebuild from Corners - PROPERLY FIXED

Fix the CRITICAL BUG from exp_017:
- exp_017 used tree CENTER for distance calculation
- Chistyakov kernel uses POLYGON BOUNDS

This is the CORRECT implementation using polygon bounds.

In [None]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import glob
import json
import time
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30
SCALE_FACTOR = Decimal('1e18')

print("Setup complete")

In [None]:
# Tree shape vertices
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def create_tree_polygon(x, y, angle):
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def create_high_precision_tree(x, y, angle):
    x = Decimal(str(x))
    y = Decimal(str(y))
    angle = Decimal(str(angle))
    sf = SCALE_FACTOR
    vertices = [
        (float(Decimal('0.0') * sf), float(Decimal('0.8') * sf)),
        (float(Decimal('0.125') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('-0.125') * sf), float(Decimal('0.5') * sf)),
    ]
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, float(angle), origin=(0, 0))
    poly = affinity.translate(poly, xoff=float(x * sf), yoff=float(y * sf))
    return poly

def validate_no_overlap_strict(trees_data):
    if len(trees_data) <= 1:
        return True
    try:
        polygons = [create_high_precision_tree(t['x'], t['y'], t['deg']) for t in trees_data]
        for i in range(len(polygons)):
            for j in range(i+1, len(polygons)):
                if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                    return False
        return True
    except:
        return False

def get_bbox_side(trees):
    if len(trees) == 0:
        return 0
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def get_score(trees, n):
    side = get_bbox_side(trees)
    return (side ** 2) / n

print("Core functions defined")

In [None]:
# Load current best (exp_019)
print("Loading current best...")
baseline_path = '/home/code/experiments/019_full_kaggle_ensemble/submission.csv'
df = pd.read_csv(baseline_path)
df['N'] = df['id'].astype(str).str.split('_').str[0].astype(int)

best_trees = {}
best_scores = {}

for n, g in df.groupby('N'):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    best_trees[n] = trees
    best_scores[n] = get_score(trees, n)

baseline_total = sum(best_scores.values())
print(f"Baseline score: {baseline_total:.6f}")

In [None]:
def get_layout_bounds(trees):
    """Get bounding box of a layout."""
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    return union.bounds  # (minx, miny, maxx, maxy)

def rebuild_from_corners_FIXED(large_layout, target_n, current_best_score):
    """Extract subset of trees closest to each corner using POLYGON BOUNDS.
    
    THIS IS THE FIXED VERSION - uses polygon bounds, not tree center!
    """
    if len(large_layout) <= target_n:
        return None
    
    bounds = get_layout_bounds(large_layout)
    minx, miny, maxx, maxy = bounds
    
    # 4 corners of the bounding box
    corners = [
        (minx, miny),  # bottom-left
        (minx, maxy),  # top-left
        (maxx, miny),  # bottom-right
        (maxx, maxy),  # top-right
    ]
    
    best_subset = None
    best_score = current_best_score
    
    for corner_x, corner_y in corners:
        # Sort trees by max POLYGON BOUNDS distance from corner (Chebyshev)
        trees_with_dist = []
        for t in large_layout:
            # CREATE POLYGON AND USE ITS BOUNDS - THIS IS THE FIX!
            poly = create_tree_polygon(t['x'], t['y'], t['deg'])
            b = poly.bounds  # (minx, miny, maxx, maxy)
            
            # Distance is max of distances from all 4 bounds to corner
            dist = max(
                abs(b[0] - corner_x),  # polygon minx to corner
                abs(b[2] - corner_x),  # polygon maxx to corner
                abs(b[1] - corner_y),  # polygon miny to corner
                abs(b[3] - corner_y),  # polygon maxy to corner
            )
            trees_with_dist.append((dist, t))
        
        # Sort by distance (closest to corner first)
        trees_with_dist.sort(key=lambda x: x[0])
        
        # Extract subset of target_n trees closest to corner
        subset = [t for _, t in trees_with_dist[:target_n]]
        
        # Calculate score
        score = get_score(subset, target_n)
        
        if score < best_score - 1e-9:
            # Validate with strict 1e18 precision
            if validate_no_overlap_strict(subset):
                best_score = score
                best_subset = subset
    
    return (best_subset, best_score) if best_subset else None

print("FIXED rebuild_from_corners function defined")

In [None]:
# Demonstrate the difference between CENTER and BOUNDS distance
print("\n" + "=" * 60)
print("DEMONSTRATING THE BUG FIX")
print("=" * 60)

# Take a tree at (0, 0) with 45 degree rotation
test_tree = {'x': '0', 'y': '0', 'deg': '45'}
poly = create_tree_polygon(test_tree['x'], test_tree['y'], test_tree['deg'])
b = poly.bounds

print(f"\nTree at center (0, 0) with 45Â° rotation:")
print(f"  Tree CENTER: (0, 0)")
print(f"  Polygon BOUNDS: minx={b[0]:.4f}, miny={b[1]:.4f}, maxx={b[2]:.4f}, maxy={b[3]:.4f}")

# Distance from corner (0, 0)
corner = (0, 0)
center_dist = max(abs(0 - corner[0]), abs(0 - corner[1]))
bounds_dist = max(abs(b[0] - corner[0]), abs(b[2] - corner[0]), abs(b[1] - corner[1]), abs(b[3] - corner[1]))

print(f"\nDistance from corner (0, 0):")
print(f"  Using CENTER (WRONG): {center_dist:.4f}")
print(f"  Using BOUNDS (CORRECT): {bounds_dist:.4f}")
print(f"  Difference: {bounds_dist - center_dist:.4f}")

In [None]:
# Apply FIXED rebuild from corners to ALL large layouts
print("\n" + "=" * 60)
print("APPLYING FIXED REBUILD FROM CORNERS")
print("=" * 60)

# Large layouts to use as sources (from our current best)
large_n_values = list(range(50, 201, 10))  # 50, 60, 70, ..., 200

improvements = []
start_time = time.time()

for large_n in large_n_values:
    large_layout = best_trees[large_n]
    print(f"\nUsing N={large_n} layout as source...")
    
    # Try to extract subsets for smaller N values
    for target_n in range(2, large_n):
        result = rebuild_from_corners_FIXED(large_layout, target_n, best_scores[target_n])
        
        if result:
            subset, score = result
            improvement = best_scores[target_n] - score
            improvements.append((target_n, improvement, large_n))
            best_trees[target_n] = subset
            best_scores[target_n] = score
            print(f"  N={target_n}: +{improvement:.6f} (from N={large_n} corner)")

print(f"\nTotal time: {time.time() - start_time:.1f}s")
print(f"N values improved: {len(improvements)}")
if improvements:
    total_improvement = sum(imp for _, imp, _ in improvements)
    print(f"Total improvement: {total_improvement:.6f}")

In [None]:
# Calculate final score
final_total = sum(best_scores.values())
print(f"\n" + "=" * 60)
print("RESULTS")
print("=" * 60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"Final score: {final_total:.6f}")
print(f"Improvement: {baseline_total - final_total:.6f}")
print(f"Target: 68.876781")
print(f"Gap to target: {final_total - 68.876781:.6f}")

In [None]:
# Final validation
print("\n" + "=" * 60)
print("FINAL VALIDATION")
print("=" * 60)

final_overlaps = []
for n in range(1, 201):
    if not validate_no_overlap_strict(best_trees[n]):
        final_overlaps.append(n)

if final_overlaps:
    print(f"WARNING: {len(final_overlaps)} N values have overlaps!")
else:
    print("All N values pass strict validation!")

In [None]:
# Create submission
print("\n" + "=" * 60)
print("CREATE SUBMISSION")
print("=" * 60)

rows = []
for n in range(1, 201):
    trees = best_trees[n]
    for i, t in enumerate(trees):
        x_val = str(t['x']).replace('s', '')
        y_val = str(t['y']).replace('s', '')
        deg_val = str(t['deg']).replace('s', '')
        rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{x_val}",
            'y': f"s{y_val}",
            'deg': f"s{deg_val}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")

submission_df.to_csv('/home/code/experiments/020_rebuild_corners_proper/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("Submission saved!")

In [None]:
# Save metrics
metrics = {
    'cv_score': final_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - final_total,
    'n_improved': len(improvements),
    'final_overlaps': len(final_overlaps),
    'target': 68.876781,
    'gap': final_total - 68.876781,
    'bug_fix': 'Used POLYGON BOUNDS instead of tree CENTER for distance calculation'
}

with open('/home/code/experiments/020_rebuild_corners_proper/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\nMetrics saved!")
print(json.dumps(metrics, indent=2))