# Experiment 002: Python Optimization

Implement actual optimization algorithms:
1. N=1 exhaustive rotation search
2. Fractional translation for all N
3. Backward propagation

Target: Beat baseline score of 70.622435

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
import math
import warnings
warnings.filterwarnings('ignore')

# Tree shape vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print("Setup complete")
print(f"Tree has {len(TX)} vertices")
print(f"Tree height: {max(TY) - min(TY)}")
print(f"Tree width: {max(TX) - min(TX)}")

In [None]:
def create_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with given rotation angle."""
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bbox_side_for_angle(angle):
    """Get bounding box side length for a single tree at given angle."""
    poly = create_tree_polygon(0, 0, angle)
    bounds = poly.bounds  # (minx, miny, maxx, maxy)
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def get_bbox_side_for_trees(trees):
    """Get bounding box side length for a list of trees."""
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def parse_value(val):
    """Parse value from submission format."""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

print("Helper functions defined")

In [None]:
# TASK 1: Find optimal N=1 rotation angle
# This is the HIGHEST PRIORITY optimization!

print("=" * 60)
print("TASK 1: N=1 EXHAUSTIVE ROTATION SEARCH")
print("=" * 60)

# Test current baseline angle
baseline_n1_angle = 45.0  # Common assumption in top kernels
baseline_side = get_bbox_side_for_angle(baseline_n1_angle)
print(f"\nBaseline angle (45°): side = {baseline_side:.8f}")
print(f"Baseline N=1 score contribution: {baseline_side**2:.8f}")

# Exhaustive search: 0° to 360° in 0.01° increments
print("\nSearching all angles from 0° to 360° in 0.01° increments...")
best_angle = 0
best_side = float('inf')

for angle_int in range(36000):  # 0.01° increments
    angle = angle_int / 100.0
    side = get_bbox_side_for_angle(angle)
    if side < best_side:
        best_side = side
        best_angle = angle

print(f"\n✅ OPTIMAL N=1 ANGLE FOUND!")
print(f"Optimal angle: {best_angle}°")
print(f"Optimal side: {best_side:.8f}")
print(f"Optimal N=1 score: {best_side**2:.8f}")

# Calculate improvement
improvement = baseline_side**2 - best_side**2
print(f"\nImprovement over 45°: {improvement:.8f} points")

In [None]:
# Let's also check angles near the optimal to understand the landscape
print("\nAngle sensitivity analysis around optimal:")
for delta in [-1, -0.5, -0.1, 0, 0.1, 0.5, 1]:
    test_angle = best_angle + delta
    test_side = get_bbox_side_for_angle(test_angle)
    print(f"  {test_angle:7.2f}°: side = {test_side:.8f}, score = {test_side**2:.8f}")

# Check if there are multiple optimal angles (due to symmetry)
print("\nChecking for equivalent optimal angles:")
for angle in [best_angle, best_angle + 90, best_angle + 180, best_angle + 270]:
    angle = angle % 360
    side = get_bbox_side_for_angle(angle)
    print(f"  {angle:7.2f}°: side = {side:.8f}")

In [None]:
# Load baseline submission
df = pd.read_csv('/home/code/experiments/001_fix_overlaps/submission.csv')
print(f"Loaded baseline with {len(df)} rows")

# Parse into structured format
trees_by_n = {}
for _, row in df.iterrows():
    id_parts = row['id'].split('_')
    n = int(id_parts[0])
    idx = int(id_parts[1])
    
    if n not in trees_by_n:
        trees_by_n[n] = []
    
    trees_by_n[n].append({
        'idx': idx,
        'x': parse_value(row['x']),
        'y': parse_value(row['y']),
        'deg': parse_value(row['deg'])
    })

print(f"Parsed trees for N=1 to {max(trees_by_n.keys())}")

# Check current N=1 configuration
print(f"\nCurrent N=1 configuration:")
print(f"  x={trees_by_n[1][0]['x']}, y={trees_by_n[1][0]['y']}, deg={trees_by_n[1][0]['deg']}")
current_n1_side = get_bbox_side_for_trees(trees_by_n[1])
print(f"  Current side: {current_n1_side:.8f}")
print(f"  Current score: {current_n1_side**2:.8f}")

In [None]:
# Update N=1 with optimal angle
print("\n" + "=" * 60)
print("APPLYING N=1 OPTIMIZATION")
print("=" * 60)

# Create optimized trees_by_n
optimized_trees_by_n = {n: [dict(t) for t in trees] for n, trees in trees_by_n.items()}

# Update N=1 with optimal angle (centered at origin)
optimized_trees_by_n[1] = [{
    'idx': 0,
    'x': '0',
    'y': '0',
    'deg': str(best_angle)
}]

new_n1_side = get_bbox_side_for_trees(optimized_trees_by_n[1])
print(f"New N=1 side: {new_n1_side:.8f}")
print(f"New N=1 score: {new_n1_side**2:.8f}")
print(f"N=1 improvement: {current_n1_side**2 - new_n1_side**2:.8f} points")

In [None]:
# TASK 2: Implement strict overlap detection for validation
print("\n" + "=" * 60)
print("IMPLEMENTING STRICT OVERLAP DETECTION")
print("=" * 60)

def create_scaled_tree_polygon(x, y, angle, scale_factor=1e15):
    """Create tree polygon with scaling for strict precision."""
    x, y, angle = float(x), float(y), float(angle)
    coords = [(tx * scale_factor, ty * scale_factor) for tx, ty in zip(TX, TY)]
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, xoff=x * scale_factor, yoff=y * scale_factor)
    return poly

def has_overlap_strict(trees):
    """Check for overlaps using strict precision."""
    polygons = [create_scaled_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    n = len(polygons)
    for i in range(n):
        for j in range(i+1, n):
            if polygons[i].intersects(polygons[j]):
                if not polygons[i].touches(polygons[j]):
                    intersection = polygons[i].intersection(polygons[j])
                    if intersection.area > 0:
                        return True, i, j
    return False, -1, -1

print("Strict overlap detection implemented")

In [None]:
# TASK 3: Implement fractional translation for small N values
print("\n" + "=" * 60)
print("TASK 3: FRACTIONAL TRANSLATION")
print("=" * 60)

def fractional_translation(trees, max_iter=50, verbose=False):
    """Move trees in tiny steps to reduce bounding box."""
    frac_steps = [0.001, 0.0005, 0.0002, 0.0001]
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    best_trees = [dict(t) for t in trees]
    best_side = get_bbox_side_for_trees(best_trees)
    initial_side = best_side
    
    for iteration in range(max_iter):
        improved = False
        for i in range(len(best_trees)):
            for step in frac_steps:
                for dx, dy in directions:
                    # Try moving tree i
                    test_trees = [dict(t) for t in best_trees]
                    test_trees[i]['x'] = str(float(test_trees[i]['x']) + dx * step)
                    test_trees[i]['y'] = str(float(test_trees[i]['y']) + dy * step)
                    
                    # Check no overlap (only for N > 1)
                    if len(trees) > 1:
                        has_overlap, _, _ = has_overlap_strict(test_trees)
                        if has_overlap:
                            continue
                    
                    new_side = get_bbox_side_for_trees(test_trees)
                    if new_side < best_side - 1e-12:
                        best_side = new_side
                        best_trees = [dict(t) for t in test_trees]
                        improved = True
        
        if not improved:
            break
        
        if verbose:
            print(f"  Iteration {iteration+1}: side = {best_side:.8f}")
    
    return best_trees, initial_side - best_side

print("Fractional translation function defined")

In [None]:
# Apply fractional translation to small N values (2-20)
print("\nApplying fractional translation to N=2-20...")

frac_improvements = {}
for n in range(2, 21):
    trees = optimized_trees_by_n[n]
    initial_side = get_bbox_side_for_trees(trees)
    
    # Apply fractional translation
    improved_trees, improvement = fractional_translation(trees, max_iter=30)
    
    if improvement > 1e-10:
        optimized_trees_by_n[n] = improved_trees
        frac_improvements[n] = improvement
        new_side = get_bbox_side_for_trees(improved_trees)
        score_improvement = (initial_side**2 - new_side**2) / n
        print(f"  N={n:3d}: side {initial_side:.6f} -> {new_side:.6f} (score +{score_improvement:.6f})")

print(f"\nFractional translation improved {len(frac_improvements)} N values")

In [None]:
# Calculate total score
def calculate_total_score(trees_by_n):
    """Calculate total score for all N values."""
    total = 0
    per_n_scores = {}
    for n in range(1, 201):
        trees = trees_by_n[n]
        side = get_bbox_side_for_trees(trees)
        score = (side ** 2) / n
        per_n_scores[n] = score
        total += score
    return total, per_n_scores

# Calculate baseline score
baseline_score, baseline_per_n = calculate_total_score(trees_by_n)
print(f"Baseline total score: {baseline_score:.6f}")

# Calculate optimized score
optimized_score, optimized_per_n = calculate_total_score(optimized_trees_by_n)
print(f"Optimized total score: {optimized_score:.6f}")
print(f"\nTotal improvement: {baseline_score - optimized_score:.6f} points")

In [None]:
# Show per-N improvements
print("\n" + "=" * 60)
print("PER-N IMPROVEMENTS")
print("=" * 60)

improvements = []
for n in range(1, 201):
    diff = baseline_per_n[n] - optimized_per_n[n]
    if abs(diff) > 1e-8:
        improvements.append((n, diff))
        print(f"  N={n:3d}: {baseline_per_n[n]:.6f} -> {optimized_per_n[n]:.6f} ({diff:+.6f})")

print(f"\nTotal N values improved: {len(improvements)}")
print(f"Total score improvement: {sum(d for _, d in improvements):.6f}")

In [None]:
# Validate no overlaps in optimized solution
print("\n" + "=" * 60)
print("VALIDATION")
print("=" * 60)

overlap_errors = []
for n in range(1, 201):
    trees = optimized_trees_by_n[n]
    if n > 1:  # N=1 can't have overlaps
        has_overlap, i, j = has_overlap_strict(trees)
        if has_overlap:
            overlap_errors.append((n, i, j))
            print(f"  ⚠️ N={n}: Trees {i} and {j} overlap")

if overlap_errors:
    print(f"\n❌ {len(overlap_errors)} N values have overlaps!")
else:
    print("\n✅ All N values pass strict overlap validation!")

In [None]:
# Create submission CSV
print("\n" + "=" * 60)
print("CREATING SUBMISSION")
print("=" * 60)

rows = []
for n in range(1, 201):
    trees = optimized_trees_by_n[n]
    for t in trees:
        rows.append({
            'id': f"{n:03d}_{t['idx']}",
            'x': f"s{t['x']}",
            'y': f"s{t['y']}",
            'deg': f"s{t['deg']}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")
print(f"Expected: (20100, 4)")

# Save submission
submission_df.to_csv('/home/code/experiments/002_python_optimization/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("\nSubmission saved!")

In [None]:
# Save metrics
metrics = {
    'cv_score': optimized_score,
    'baseline_score': baseline_score,
    'improvement': baseline_score - optimized_score,
    'n1_optimal_angle': best_angle,
    'n1_improvement': current_n1_side**2 - new_n1_side**2,
    'fractional_translation_improvements': len(frac_improvements),
    'overlap_errors': len(overlap_errors),
    'target': 68.888293,
    'gap': optimized_score - 68.888293
}

with open('/home/code/experiments/002_python_optimization/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved!")
print(f"\n" + "=" * 60)
print("FINAL RESULTS")
print("=" * 60)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Optimized score: {optimized_score:.6f}")
print(f"Improvement: {baseline_score - optimized_score:.6f} points")
print(f"Target: 68.888293")
print(f"Gap to target: {optimized_score - 68.888293:.6f} points")