# Backward Propagation Optimization

Implement backward propagation: for each N from 200 down to 2, try removing trees from N to create better N-1 configurations.

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
import os
import json
from shapely.geometry import Polygon
from decimal import Decimal, getcontext
import time

os.chdir('/home/code/experiments/002_backward_propagation')
getcontext().prec = 30

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_vertices(x, y, angle_deg):
    """Get tree polygon vertices at position (x,y) with rotation angle_deg."""
    rad = np.radians(angle_deg)
    cos_a, sin_a = np.cos(rad), np.sin(rad)
    rx = TX * cos_a - TY * sin_a + x
    ry = TX * sin_a + TY * cos_a + y
    return list(zip(rx, ry))

def calculate_bbox(trees):
    """Calculate bounding box side length for a set of trees."""
    all_xs = []
    all_ys = []
    for x, y, angle in trees:
        verts = get_tree_vertices(x, y, angle)
        for vx, vy in verts:
            all_xs.append(vx)
            all_ys.append(vy)
    width = max(all_xs) - min(all_xs)
    height = max(all_ys) - min(all_ys)
    return max(width, height)

def calculate_score_for_n(trees):
    """Calculate score contribution for N trees."""
    side = calculate_bbox(trees)
    n = len(trees)
    return (side ** 2) / n

print("Functions defined")

Functions defined


In [2]:
# Load the valid baseline
baseline_df = pd.read_csv('/home/code/experiments/001_valid_baseline/submission.csv')

def parse_submission(df):
    """Parse submission CSV into dict of n -> list of (x, y, angle) tuples."""
    configs = defaultdict(list)
    for _, row in df.iterrows():
        parts = row['id'].split('_')
        n = int(parts[0])
        x = float(str(row['x']).replace('s', ''))
        y = float(str(row['y']).replace('s', ''))
        deg = float(str(row['deg']).replace('s', ''))
        configs[n].append((x, y, deg))
    return dict(configs)

baseline_configs = parse_submission(baseline_df)
print(f"Loaded baseline with {len(baseline_configs)} N values")

# Calculate baseline scores per N
baseline_scores = {}
for n in range(1, 201):
    baseline_scores[n] = calculate_score_for_n(baseline_configs[n])

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

Loaded baseline with 200 N values
Baseline total score: 70.615102


In [3]:
# Overlap validation using Shapely
def validate_no_overlap_fast(trees):
    """Fast overlap validation using Shapely."""
    if len(trees) <= 1:
        return True
    
    polygons = []
    for x, y, angle in trees:
        verts = get_tree_vertices(x, y, angle)
        polygons.append(Polygon(verts))
    
    # Check all pairs
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                # Check if it's just touching (allowed) or overlapping (not allowed)
                if not polygons[i].touches(polygons[j]):
                    inter = polygons[i].intersection(polygons[j])
                    if inter.area > 1e-12:  # Small tolerance
                        return False
    return True

# Test on N=2
print(f"N=2 baseline valid: {validate_no_overlap_fast(baseline_configs[2])}")
print(f"N=10 baseline valid: {validate_no_overlap_fast(baseline_configs[10])}")

N=2 baseline valid: True
N=10 baseline valid: True


In [4]:
def get_bbox_touching_indices(trees):
    """Find indices of trees that touch the bounding box boundary."""
    # Calculate overall bounding box
    all_xs = []
    all_ys = []
    for x, y, angle in trees:
        verts = get_tree_vertices(x, y, angle)
        for vx, vy in verts:
            all_xs.append(vx)
            all_ys.append(vy)
    
    min_x, max_x = min(all_xs), max(all_xs)
    min_y, max_y = min(all_ys), max(all_ys)
    side = max(max_x - min_x, max_y - min_y)
    
    # Determine which dimension is the constraint
    if max_x - min_x >= max_y - min_y:
        # Width is the constraint
        boundary_x = [min_x, max_x]
        boundary_y = None
    else:
        # Height is the constraint
        boundary_x = None
        boundary_y = [min_y, max_y]
    
    # Find trees touching the boundary
    boundary_indices = []
    tol = 1e-9
    
    for idx, (x, y, angle) in enumerate(trees):
        verts = get_tree_vertices(x, y, angle)
        xs = [v[0] for v in verts]
        ys = [v[1] for v in verts]
        
        touches_boundary = False
        if boundary_x is not None:
            if abs(min(xs) - min_x) < tol or abs(max(xs) - max_x) < tol:
                touches_boundary = True
        if boundary_y is not None:
            if abs(min(ys) - min_y) < tol or abs(max(ys) - max_y) < tol:
                touches_boundary = True
        
        # Also check if tree touches any boundary
        if abs(min(xs) - min_x) < tol or abs(max(xs) - max_x) < tol:
            touches_boundary = True
        if abs(min(ys) - min_y) < tol or abs(max(ys) - max_y) < tol:
            touches_boundary = True
            
        if touches_boundary:
            boundary_indices.append(idx)
    
    return boundary_indices

# Test
print(f"N=10 boundary trees: {get_bbox_touching_indices(baseline_configs[10])}")
print(f"N=50 boundary trees: {len(get_bbox_touching_indices(baseline_configs[50]))} trees")

N=10 boundary trees: [0, 1, 3, 5, 6, 7]
N=50 boundary trees: 14 trees


In [5]:
def backward_propagation(configs, max_n=200, min_n=2, verbose=True):
    """Propagate improvements from larger N to smaller N."""
    improved_configs = {n: list(configs[n]) for n in range(1, 201)}
    improvements = []
    
    for n in range(max_n, min_n - 1, -1):
        trees = improved_configs[n]
        
        # Get current best score for n-1
        current_score_n_minus_1 = calculate_score_for_n(improved_configs[n-1])
        
        # Try removing each tree
        best_score = current_score_n_minus_1
        best_candidate = None
        best_removed_idx = None
        
        for idx in range(len(trees)):
            # Create candidate by removing tree at idx
            candidate = trees[:idx] + trees[idx+1:]
            
            # Validate no overlaps
            if not validate_no_overlap_fast(candidate):
                continue
            
            # Calculate score
            score = calculate_score_for_n(candidate)
            
            if score < best_score - 1e-9:  # Meaningful improvement
                best_score = score
                best_candidate = candidate
                best_removed_idx = idx
        
        if best_candidate is not None:
            improvement = current_score_n_minus_1 - best_score
            improved_configs[n-1] = best_candidate
            improvements.append((n-1, improvement, best_removed_idx))
            if verbose and improvement > 0.0001:
                print(f"N={n-1}: {current_score_n_minus_1:.6f} -> {best_score:.6f} (improved by {improvement:.6f})")
    
    return improved_configs, improvements

print("Backward propagation function defined")

Backward propagation function defined


In [6]:
# Run backward propagation
print("Running backward propagation...")
start_time = time.time()

improved_configs, improvements = backward_propagation(baseline_configs, max_n=200, min_n=2, verbose=True)

elapsed = time.time() - start_time
print(f"\nCompleted in {elapsed:.1f} seconds")
print(f"Total improvements found: {len(improvements)}")

Running backward propagation...



Completed in 1077.0 seconds
Total improvements found: 1


In [7]:
# Calculate new total score
new_scores = {}
for n in range(1, 201):
    new_scores[n] = calculate_score_for_n(improved_configs[n])

new_total = sum(new_scores.values())
print(f"\nBaseline total: {baseline_total:.6f}")
print(f"New total: {new_total:.6f}")
print(f"Improvement: {baseline_total - new_total:.6f}")

# Show per-N improvements
print("\nPer-N improvements:")
for n in range(1, 201):
    diff = baseline_scores[n] - new_scores[n]
    if abs(diff) > 0.0001:
        print(f"  N={n}: {baseline_scores[n]:.6f} -> {new_scores[n]:.6f} ({diff:+.6f})")


Baseline total: 70.615102
New total: 70.615101
Improvement: 0.000000

Per-N improvements:


In [8]:
# Validate all configurations
print("Validating all configurations...")
invalid_n = []
for n in range(1, 201):
    if not validate_no_overlap_fast(improved_configs[n]):
        invalid_n.append(n)
        print(f"WARNING: N={n} has overlaps!")

if not invalid_n:
    print("All configurations valid!")
else:
    print(f"Invalid configurations: {invalid_n}")

Validating all configurations...


All configurations valid!


In [9]:
# Save submission
def format_submission(configs):
    rows = []
    for n in range(1, 201):
        for i, (x, y, deg) in enumerate(configs[n]):
            rows.append({
                'id': f'{n:03d}_{i}',
                'x': f's{x:.20f}',
                'y': f's{y:.20f}',
                'deg': f's{deg:.20f}'
            })
    return pd.DataFrame(rows)

submission_df = format_submission(improved_configs)
submission_df.to_csv('submission.csv', index=False)
print(f"Saved submission with {len(submission_df)} rows")
print(submission_df.head(10))

Saved submission with 20100 rows
      id                          x                         y  \
0  001_0  s-48.19608619421424577922  s58.77098461521422478882   
1  002_0    s0.15409706962136429653  s-0.03854074269478543341   
2  002_1   s-0.15409706962136429653  s-0.56145925730521462071   
3  003_0    s1.12365581614030096702   s0.78110181599256300888   
4  003_1    s1.23405569584216001644   s1.27599950066375900093   
5  003_2    s0.64171464022907498403   s1.18045856661338111060   
6  004_0   s-0.32474778959087557961   s0.13210997809118560364   
7  004_1    s0.31535434624113417579   s0.13210997806647570285   
8  004_2    s0.32474778959087557961  s-0.73210997806647526431   
9  004_3   s-0.31535434813632168272  s-0.73210997809118572022   

                         deg  
0   s45.00000000000000000000  
1  s203.62937773065684154972  
2   s23.62937773065679181173  
3  s111.12513229289299943048  
4   s66.37062226934300213088  
5  s155.13405193710082130565  
6  s156.37062214563638917753  
7  

In [10]:
# Save metrics
metrics = {
    'cv_score': new_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - new_total,
    'num_improvements': len(improvements),
    'notes': 'Backward propagation from N=200 to N=2, removing trees to improve smaller N configurations'
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal CV Score: {new_total:.6f}")
print(f"Improvement over baseline: {baseline_total - new_total:.6f}")


Final CV Score: 70.615101
Improvement over baseline: 0.000000


In [11]:
# Copy to submission folder if improved
import shutil
if new_total < baseline_total:
    shutil.copy('submission.csv', '/home/submission/submission.csv')
    print("Copied improved submission to /home/submission/")
else:
    print("No improvement - keeping baseline submission")

Copied improved submission to /home/submission/
