# Experiment 010: Tree Removal Technique (Chistyakov Approach)

This is a CONSTRUCTIVE approach that extracts smaller N configurations from larger ones:
1. Start from a larger N configuration (e.g., N=200)
2. Remove trees one by one from corners
3. Extract smaller N configurations that may be better than direct optimization

This finds configurations in different basins of attraction than local optimization.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from tqdm import tqdm
import shutil

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def get_tree_center(row):
    return parse_value(row['x']), parse_value(row['y'])

def compute_bounding_side(trees_df):
    """Compute bounding box side length for a set of trees."""
    all_points = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    return max(all_points.max(axis=0) - all_points.min(axis=0))

def compute_score_for_trees(trees_df, n):
    """Compute score for a configuration of n trees."""
    if len(trees_df) != n:
        return float('inf')
    side = compute_bounding_side(trees_df)
    return side**2 / n

def check_overlaps(trees_df):
    """Check if any trees overlap."""
    polygons = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-12:
                    return True
    return False

print("Functions defined")

In [None]:
# Load baseline
df_baseline = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
print(f"Loaded {len(df_baseline)} rows")

# Compute baseline scores for each N
baseline_scores = {}
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_baseline[df_baseline['id'].str.startswith(prefix)]
    baseline_scores[n] = compute_score_for_trees(trees, n)

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

In [None]:
# Tree Removal Technique
# For each large N, try removing trees from corners to create smaller N configurations

def get_bounding_box_corners(trees_df):
    """Get the 4 corners of the bounding box."""
    all_points = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return [
        (min_x, min_y),  # bottom-left
        (max_x, min_y),  # bottom-right
        (min_x, max_y),  # top-left
        (max_x, max_y),  # top-right
    ]

def distance_to_corner(row, corner):
    """Compute distance from tree center to corner."""
    x, y = get_tree_center(row)
    return np.sqrt((x - corner[0])**2 + (y - corner[1])**2)

def extract_n_trees_from_config(source_df, target_n, corner_idx=0):
    """Extract target_n trees from source configuration by removing trees farthest from corner."""
    if len(source_df) < target_n:
        return None
    
    corners = get_bounding_box_corners(source_df)
    corner = corners[corner_idx % 4]
    
    # Sort trees by distance to corner (ascending - keep closest)
    source_df = source_df.copy()
    source_df['dist'] = source_df.apply(lambda row: distance_to_corner(row, corner), axis=1)
    sorted_df = source_df.sort_values('dist')
    
    # Keep the target_n closest trees
    extracted = sorted_df.head(target_n).drop(columns=['dist'])
    return extracted

print("Tree removal functions defined")

In [None]:
# Test the tree removal technique on a few configurations
print("Testing tree removal technique...")

# Get N=200 configuration
n200_prefix = "200_"
n200_trees = df_baseline[df_baseline['id'].str.startswith(n200_prefix)].copy()
print(f"N=200 has {len(n200_trees)} trees")
print(f"N=200 baseline score: {baseline_scores[200]:.9f}")

# Try extracting N=199 from N=200
for corner_idx in range(4):
    extracted = extract_n_trees_from_config(n200_trees, 199, corner_idx)
    if extracted is not None and not check_overlaps(extracted):
        score = compute_score_for_trees(extracted, 199)
        improvement = baseline_scores[199] - score
        print(f"  Corner {corner_idx}: N=199 score={score:.9f}, improvement={improvement:.9f}")

In [None]:
# Full tree removal: for each N from 199 down to 2, try extracting from larger configs
print("\nRunning full tree removal technique...")

# Store best configurations found
best_configs = {}  # n -> (score, trees_df, source_n)

# Initialize with baseline
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_baseline[df_baseline['id'].str.startswith(prefix)].copy()
    best_configs[n] = (baseline_scores[n], trees, n)

# For each source N (from 200 down to 3), try extracting smaller configs
for source_n in tqdm(range(200, 2, -1), desc="Processing source N"):
    # Get source configuration (use best found so far)
    source_score, source_trees, _ = best_configs[source_n]
    
    # Try extracting target_n from source_n for each corner
    for target_n in range(source_n - 1, max(1, source_n - 20), -1):  # Try up to 20 removals
        for corner_idx in range(4):
            extracted = extract_n_trees_from_config(source_trees, target_n, corner_idx)
            if extracted is None:
                continue
            
            # Check for overlaps
            if check_overlaps(extracted):
                continue
            
            # Compute score
            score = compute_score_for_trees(extracted, target_n)
            
            # Check if better than current best
            if score < best_configs[target_n][0] - 1e-12:
                best_configs[target_n] = (score, extracted, source_n)

print("\nTree removal complete")

In [None]:
# Analyze improvements
improvements = []
for n in range(1, 201):
    score, trees, source_n = best_configs[n]
    improvement = baseline_scores[n] - score
    if improvement > 1e-12:
        improvements.append({
            'n': n,
            'baseline_score': baseline_scores[n],
            'new_score': score,
            'improvement': improvement,
            'source_n': source_n
        })

print(f"\nN values improved: {len(improvements)}/200")

if improvements:
    df_improvements = pd.DataFrame(improvements)
    print("\nTop 10 improvements:")
    print(df_improvements.nlargest(10, 'improvement')[['n', 'baseline_score', 'new_score', 'improvement', 'source_n']])
    
    total_improvement = df_improvements['improvement'].sum()
    print(f"\nTotal score improvement: {total_improvement:.9f}")
else:
    print("No improvements found")

In [None]:
# Build ensemble submission
print("\nBuilding ensemble submission...")

ensemble_rows = []
for n in range(1, 201):
    score, trees, source_n = best_configs[n]
    
    # Renumber tree IDs
    new_trees = trees.copy()
    new_trees['id'] = [f"{n:03d}_{i}" for i in range(len(new_trees))]
    ensemble_rows.append(new_trees)

df_ensemble = pd.concat(ensemble_rows, ignore_index=True)
print(f"Ensemble has {len(df_ensemble)} rows")

# Compute total score
ensemble_total = sum(best_configs[n][0] for n in range(1, 201))
print(f"\nBaseline total: {baseline_total:.6f}")
print(f"Ensemble total: {ensemble_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.9f}")

In [None]:
# Validate ensemble
print("\nValidating ensemble...")
overlap_count = 0
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_ensemble[df_ensemble['id'].str.startswith(prefix)]
    if check_overlaps(trees):
        overlap_count += 1
        if overlap_count <= 5:
            print(f"  N={n}: OVERLAP")

print(f"\nTotal overlaps: {overlap_count}/200")

In [None]:
# Save submission if valid and improved
if overlap_count == 0 and ensemble_total < baseline_total - 1e-9:
    df_ensemble.to_csv('/home/submission/submission.csv', index=False)
    print(f"Saved ensemble to /home/submission/submission.csv")
    print(f"Score: {ensemble_total:.6f}")
elif overlap_count == 0:
    # No improvement - save baseline
    shutil.copy('/home/code/external_data/saspav/santa-2025.csv', '/home/submission/submission.csv')
    print("No improvement - saved baseline")
else:
    print("Overlaps detected - not saving")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 010 SUMMARY: Tree Removal Technique")
print("="*60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"Ensemble score: {ensemble_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.9f}")
print(f"N values improved: {len(improvements)}/200")
print(f"Overlaps: {overlap_count}/200")
print("="*60)