# Experiment 003: Deletion Cascade Algorithm

Generate novel small-N configurations by removing trees from larger configs.
This is the HIGHEST PRIORITY approach to escape the local optimum.

In [None]:
import os
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.strtree import STRtree
from decimal import Decimal, getcontext

# Set high precision for Decimal operations
getcontext().prec = 30

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    """Create a Shapely polygon for a tree at (x, y) with rotation deg."""
    base_poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
    translated = affinity.translate(rotated, x, y)
    return translated

def get_bounding_box_side(trees):
    """Calculate the side length of the bounding square for trees."""
    if not trees:
        return float('inf')
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    return max(width, height)

def has_overlap(trees):
    """Check if any trees overlap (touching is OK)."""
    if len(trees) <= 1:
        return False
    
    polygons = [get_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i != j and poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                return True
    return False

def recenter_trees(trees):
    """Recenter trees to minimize bounding box."""
    if not trees:
        return trees
    
    all_x = []
    all_y = []
    for x, y, deg in trees:
        poly = get_tree_polygon(x, y, deg)
        bounds = poly.bounds
        all_x.extend([bounds[0], bounds[2]])
        all_y.extend([bounds[1], bounds[3]])
    
    center_x = (min(all_x) + max(all_x)) / 2
    center_y = (min(all_y) + max(all_y)) / 2
    
    return [(x - center_x, y - center_y, deg) for x, y, deg in trees]

print("Functions defined successfully!")

In [None]:
# Load baseline submission with full precision
# Read as raw text to preserve precision
baseline_path = '/home/code/experiments/001_baseline/santa-2025.csv'

# Read with high precision
df_baseline = pd.read_csv(baseline_path, dtype=str)
print(f"Loaded baseline with {len(df_baseline)} rows")
print(df_baseline.head())

# Parse into configurations
def load_all_configs(df):
    """Load all configurations from a submission dataframe."""
    configs = {}
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        rows = df[df['id'].str.startswith(prefix)]
        trees = []
        for _, row in rows.iterrows():
            x = float(str(row['x']).replace('s', ''))
            y = float(str(row['y']).replace('s', ''))
            deg = float(str(row['deg']).replace('s', ''))
            trees.append((x, y, deg))
        configs[n] = trees
    return configs

baseline_configs = load_all_configs(df_baseline)
print(f"\nLoaded {len(baseline_configs)} configurations")
print(f"N=1: {len(baseline_configs[1])} trees")
print(f"N=200: {len(baseline_configs[200])} trees")

In [None]:
# Calculate baseline score
def calculate_total_score(configs):
    """Calculate total score for configurations."""
    total = 0
    per_n = []
    for n in range(1, 201):
        trees = configs[n]
        side = get_bounding_box_side(trees)
        score = side**2 / n
        total += score
        per_n.append((n, side, score))
    return total, per_n

baseline_score, baseline_per_n = calculate_total_score(baseline_configs)
print(f"Baseline Score: {baseline_score:.6f}")

# Show first 10 N values
print("\nBaseline per-N scores (first 10):")
for n, side, score in baseline_per_n[:10]:
    print(f"  N={n:3d}: side={side:.6f}, score={score:.6f}")

In [None]:
# Implement deletion cascade
def deletion_cascade(baseline_configs):
    """Generate novel configs by removing trees from larger configs."""
    # Start with copies of baseline configs
    best_configs = {n: list(baseline_configs[n]) for n in range(1, 201)}
    improvements = []
    total_improvement = 0
    
    print("Running deletion cascade from N=200 down to N=2...")
    
    for n in range(200, 1, -1):
        trees = best_configs[n]
        
        # Current best score for n-1
        current_side = get_bounding_box_side(best_configs[n-1])
        current_score = current_side**2 / (n-1)
        
        best_removal_score = current_score
        best_removal_config = best_configs[n-1]
        best_removal_side = current_side
        
        # Try removing each tree
        for i in range(n):
            # Remove tree i
            new_trees = trees[:i] + trees[i+1:]
            # Recenter to minimize bounding box
            new_trees = recenter_trees(new_trees)
            
            new_side = get_bounding_box_side(new_trees)
            new_score = new_side**2 / (n-1)
            
            if new_score < best_removal_score:
                best_removal_score = new_score
                best_removal_config = new_trees
                best_removal_side = new_side
        
        # Update if we found a better configuration
        if best_removal_score < current_score:
            improvement = current_score - best_removal_score
            best_configs[n-1] = best_removal_config
            improvements.append((n-1, improvement, best_removal_side))
            total_improvement += improvement
            print(f"  N={n-1:3d}: Improved by {improvement:.6f} (new side={best_removal_side:.6f})")
        
        if n % 50 == 0:
            print(f"  Progress: N={n} done")
    
    print(f"\nTotal improvements found: {len(improvements)}")
    print(f"Total score improvement: {total_improvement:.6f}")
    
    return best_configs, improvements

# Run deletion cascade
cascade_configs, improvements = deletion_cascade(baseline_configs)

In [None]:
# Calculate new score after deletion cascade
cascade_score, cascade_per_n = calculate_total_score(cascade_configs)
print(f"\nCascade Score: {cascade_score:.6f}")
print(f"Baseline Score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - cascade_score:.6f}")

# Show improvements
if improvements:
    print(f"\nTop 10 improvements:")
    for n, imp, side in sorted(improvements, key=lambda x: -x[1])[:10]:
        print(f"  N={n:3d}: improved by {imp:.6f}, new side={side:.6f}")
else:
    print("\nNo improvements found from deletion cascade.")

In [None]:
# Ensemble: keep best of baseline and cascade for each N
final_configs = {}
ensemble_improvements = 0

for n in range(1, 201):
    baseline_side = get_bounding_box_side(baseline_configs[n])
    cascade_side = get_bounding_box_side(cascade_configs[n])
    
    baseline_score_n = baseline_side**2 / n
    cascade_score_n = cascade_side**2 / n
    
    if cascade_score_n < baseline_score_n:
        final_configs[n] = cascade_configs[n]
        ensemble_improvements += 1
    else:
        final_configs[n] = baseline_configs[n]

print(f"Ensemble: {ensemble_improvements} N values improved by cascade")

# Calculate final score
final_score, final_per_n = calculate_total_score(final_configs)
print(f"\nFinal Ensemble Score: {final_score:.6f}")
print(f"Baseline Score: {baseline_score:.6f}")
print(f"Total Improvement: {baseline_score - final_score:.6f}")

In [None]:
# Validate for overlaps
print("Validating for overlaps...")
overlap_count = 0
for n in range(1, 201):
    trees = final_configs[n]
    if has_overlap(trees):
        print(f"  N={n}: OVERLAP DETECTED!")
        overlap_count += 1

if overlap_count == 0:
    print("✓ No overlaps detected!")
else:
    print(f"✗ {overlap_count} configurations have overlaps")

In [None]:
# Create submission dataframe with full precision
submission_rows = []
for n in range(1, 201):
    trees = final_configs[n]
    for i, (x, y, deg) in enumerate(trees):
        row_id = f'{n:03d}_{i}'
        # Use high precision formatting
        submission_rows.append({
            'id': row_id,
            'x': f's{x:.18f}',
            'y': f's{y:.18f}',
            'deg': f's{deg:.18f}'
        })

submission_df = pd.DataFrame(submission_rows)
print(f"Created submission with {len(submission_df)} rows")
print(submission_df.head())

# Save with full precision
submission_df.to_csv('/home/submission/submission.csv', index=False)
submission_df.to_csv('/home/code/experiments/003_deletion_cascade/submission.csv', index=False)
print(f"\nSaved submission files")

print(f"\n=== FINAL SCORE: {final_score:.6f} ===")
print(f"=== IMPROVEMENT: {baseline_score - final_score:.6f} ===")