# Experiment 010: Tree Removal Technique (Chistyakov Approach)

This is a CONSTRUCTIVE approach that extracts smaller N configurations from larger ones:
1. Start from a larger N configuration (e.g., N=200)
2. Remove trees one by one from corners
3. Extract smaller N configurations that may be better than direct optimization

This finds configurations in different basins of attraction than local optimization.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from tqdm import tqdm
import shutil

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def get_tree_center(row):
    return parse_value(row['x']), parse_value(row['y'])

def compute_bounding_side(trees_df):
    """Compute bounding box side length for a set of trees."""
    all_points = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    return max(all_points.max(axis=0) - all_points.min(axis=0))

def compute_score_for_trees(trees_df, n):
    """Compute score for a configuration of n trees."""
    if len(trees_df) != n:
        return float('inf')
    side = compute_bounding_side(trees_df)
    return side**2 / n

def check_overlaps(trees_df):
    """Check if any trees overlap."""
    polygons = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        polygons.append(create_tree_polygon(x, y, deg))
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-12:
                    return True
    return False

print("Functions defined")

Functions defined


In [2]:
# Load baseline
df_baseline = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
print(f"Loaded {len(df_baseline)} rows")

# Compute baseline scores for each N
baseline_scores = {}
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_baseline[df_baseline['id'].str.startswith(prefix)]
    baseline_scores[n] = compute_score_for_trees(trees, n)

baseline_total = sum(baseline_scores.values())
print(f"Baseline total score: {baseline_total:.6f}")

Loaded 20100 rows


Baseline total score: 70.659959


In [3]:
# Tree Removal Technique
# For each large N, try removing trees from corners to create smaller N configurations

def get_bounding_box_corners(trees_df):
    """Get the 4 corners of the bounding box."""
    all_points = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        poly = create_tree_polygon(x, y, deg)
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return [
        (min_x, min_y),  # bottom-left
        (max_x, min_y),  # bottom-right
        (min_x, max_y),  # top-left
        (max_x, max_y),  # top-right
    ]

def distance_to_corner(row, corner):
    """Compute distance from tree center to corner."""
    x, y = get_tree_center(row)
    return np.sqrt((x - corner[0])**2 + (y - corner[1])**2)

def extract_n_trees_from_config(source_df, target_n, corner_idx=0):
    """Extract target_n trees from source configuration by removing trees farthest from corner."""
    if len(source_df) < target_n:
        return None
    
    corners = get_bounding_box_corners(source_df)
    corner = corners[corner_idx % 4]
    
    # Sort trees by distance to corner (ascending - keep closest)
    source_df = source_df.copy()
    source_df['dist'] = source_df.apply(lambda row: distance_to_corner(row, corner), axis=1)
    sorted_df = source_df.sort_values('dist')
    
    # Keep the target_n closest trees
    extracted = sorted_df.head(target_n).drop(columns=['dist'])
    return extracted

print("Tree removal functions defined")

Tree removal functions defined


In [4]:
# Test the tree removal technique on a few configurations
print("Testing tree removal technique...")

# Get N=200 configuration
n200_prefix = "200_"
n200_trees = df_baseline[df_baseline['id'].str.startswith(n200_prefix)].copy()
print(f"N=200 has {len(n200_trees)} trees")
print(f"N=200 baseline score: {baseline_scores[200]:.9f}")

# Try extracting N=199 from N=200
for corner_idx in range(4):
    extracted = extract_n_trees_from_config(n200_trees, 199, corner_idx)
    if extracted is not None and not check_overlaps(extracted):
        score = compute_score_for_trees(extracted, 199)
        improvement = baseline_scores[199] - score
        print(f"  Corner {corner_idx}: N=199 score={score:.9f}, improvement={improvement:.9f}")

Testing tree removal technique...
N=200 has 200 trees
N=200 baseline score: 0.337564179
  Corner 0: N=199 score=0.339260482, improvement=-0.000991035


  Corner 1: N=199 score=0.339260482, improvement=-0.000991035
  Corner 2: N=199 score=0.339260482, improvement=-0.000991035


  Corner 3: N=199 score=0.339260482, improvement=-0.000991035


In [5]:
# Full tree removal: for each N from 199 down to 2, try extracting from larger configs
print("\nRunning full tree removal technique...")

# Store best configurations found
best_configs = {}  # n -> (score, trees_df, source_n)

# Initialize with baseline
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_baseline[df_baseline['id'].str.startswith(prefix)].copy()
    best_configs[n] = (baseline_scores[n], trees, n)

# For each source N (from 200 down to 3), try extracting smaller configs
for source_n in tqdm(range(200, 2, -1), desc="Processing source N"):
    # Get source configuration (use best found so far)
    source_score, source_trees, _ = best_configs[source_n]
    
    # Try extracting target_n from source_n for each corner
    for target_n in range(source_n - 1, max(1, source_n - 20), -1):  # Try up to 20 removals
        for corner_idx in range(4):
            extracted = extract_n_trees_from_config(source_trees, target_n, corner_idx)
            if extracted is None:
                continue
            
            # Check for overlaps
            if check_overlaps(extracted):
                continue
            
            # Compute score
            score = compute_score_for_trees(extracted, target_n)
            
            # Check if better than current best
            if score < best_configs[target_n][0] - 1e-12:
                best_configs[target_n] = (score, extracted, source_n)

print("\nTree removal complete")


Running full tree removal technique...


Processing source N:   0%|          | 0/198 [00:00<?, ?it/s]

Processing source N:   1%|          | 1/198 [00:10<33:22, 10.17s/it]

Processing source N:   1%|          | 2/198 [00:20<33:01, 10.11s/it]

Processing source N:   2%|▏         | 3/198 [00:30<32:34, 10.02s/it]

Processing source N:   2%|▏         | 4/198 [00:39<32:09,  9.95s/it]

Processing source N:   3%|▎         | 5/198 [00:49<31:41,  9.85s/it]

Processing source N:   3%|▎         | 6/198 [00:59<31:22,  9.80s/it]

Processing source N:   4%|▎         | 7/198 [01:08<30:57,  9.73s/it]

Processing source N:   4%|▍         | 8/198 [01:18<30:34,  9.65s/it]

Processing source N:   5%|▍         | 9/198 [01:27<30:11,  9.58s/it]

Processing source N:   5%|▌         | 10/198 [01:37<29:46,  9.50s/it]

Processing source N:   6%|▌         | 11/198 [01:46<29:23,  9.43s/it]

Processing source N:   6%|▌         | 12/198 [01:55<28:58,  9.35s/it]

Processing source N:   7%|▋         | 13/198 [02:04<28:33,  9.26s/it]

Processing source N:   7%|▋         | 14/198 [02:13<28:07,  9.17s/it]

Processing source N:   8%|▊         | 15/198 [02:22<27:44,  9.09s/it]

Processing source N:   8%|▊         | 16/198 [02:31<27:21,  9.02s/it]

Processing source N:   9%|▊         | 17/198 [02:40<27:00,  8.95s/it]

Processing source N:   9%|▉         | 18/198 [02:48<26:36,  8.87s/it]

Processing source N:  10%|▉         | 19/198 [02:57<26:14,  8.80s/it]

Processing source N:  10%|█         | 20/198 [03:06<25:49,  8.71s/it]

Processing source N:  11%|█         | 21/198 [03:14<25:28,  8.63s/it]

Processing source N:  11%|█         | 22/198 [03:22<25:03,  8.54s/it]

Processing source N:  12%|█▏        | 23/198 [03:31<24:40,  8.46s/it]

Processing source N:  12%|█▏        | 24/198 [03:39<24:16,  8.37s/it]

Processing source N:  13%|█▎        | 25/198 [03:47<23:53,  8.28s/it]

Processing source N:  13%|█▎        | 26/198 [03:55<23:29,  8.20s/it]

Processing source N:  14%|█▎        | 27/198 [04:03<23:08,  8.12s/it]

Processing source N:  14%|█▍        | 28/198 [04:11<22:46,  8.04s/it]

Processing source N:  15%|█▍        | 29/198 [04:18<22:25,  7.96s/it]

Processing source N:  15%|█▌        | 30/198 [04:26<22:05,  7.89s/it]

Processing source N:  16%|█▌        | 31/198 [04:34<21:45,  7.82s/it]

Processing source N:  16%|█▌        | 32/198 [04:41<21:24,  7.74s/it]

Processing source N:  17%|█▋        | 33/198 [04:49<21:03,  7.66s/it]

Processing source N:  17%|█▋        | 34/198 [04:56<20:41,  7.57s/it]

Processing source N:  18%|█▊        | 35/198 [05:03<20:20,  7.49s/it]

Processing source N:  18%|█▊        | 36/198 [05:11<19:59,  7.41s/it]

Processing source N:  19%|█▊        | 37/198 [05:18<19:40,  7.33s/it]

Processing source N:  19%|█▉        | 38/198 [05:25<19:21,  7.26s/it]

Processing source N:  20%|█▉        | 39/198 [05:32<19:04,  7.20s/it]

Processing source N:  20%|██        | 40/198 [05:39<18:46,  7.13s/it]

Processing source N:  21%|██        | 41/198 [05:46<18:28,  7.06s/it]

Processing source N:  21%|██        | 42/198 [05:53<18:09,  6.98s/it]

Processing source N:  22%|██▏       | 43/198 [05:59<17:54,  6.93s/it]

Processing source N:  22%|██▏       | 44/198 [06:06<17:35,  6.85s/it]

Processing source N:  23%|██▎       | 45/198 [06:13<17:15,  6.77s/it]

Processing source N:  23%|██▎       | 46/198 [06:19<16:57,  6.69s/it]

Processing source N:  24%|██▎       | 47/198 [06:26<16:39,  6.62s/it]

Processing source N:  24%|██▍       | 48/198 [06:32<16:22,  6.55s/it]

Processing source N:  25%|██▍       | 49/198 [06:38<16:08,  6.50s/it]

Processing source N:  25%|██▌       | 50/198 [06:45<15:51,  6.43s/it]

Processing source N:  26%|██▌       | 51/198 [06:51<15:34,  6.36s/it]

Processing source N:  26%|██▋       | 52/198 [06:57<15:18,  6.29s/it]

Processing source N:  27%|██▋       | 53/198 [07:04<15:45,  6.52s/it]

Processing source N:  27%|██▋       | 54/198 [07:11<16:06,  6.71s/it]

Processing source N:  28%|██▊       | 55/198 [07:17<15:26,  6.48s/it]

Processing source N:  28%|██▊       | 56/198 [07:23<14:54,  6.30s/it]

Processing source N:  29%|██▉       | 57/198 [07:29<14:26,  6.15s/it]

Processing source N:  29%|██▉       | 58/198 [07:35<14:02,  6.02s/it]

Processing source N:  30%|██▉       | 59/198 [07:40<13:40,  5.90s/it]

Processing source N:  30%|███       | 60/198 [07:46<13:21,  5.81s/it]

Processing source N:  31%|███       | 61/198 [07:51<13:04,  5.73s/it]

Processing source N:  31%|███▏      | 62/198 [07:57<12:49,  5.66s/it]

Processing source N:  32%|███▏      | 63/198 [08:02<12:40,  5.64s/it]

Processing source N:  32%|███▏      | 64/198 [08:08<12:22,  5.54s/it]

Processing source N:  33%|███▎      | 65/198 [08:13<12:06,  5.46s/it]

Processing source N:  33%|███▎      | 66/198 [08:18<11:51,  5.39s/it]

Processing source N:  34%|███▍      | 67/198 [08:23<11:37,  5.33s/it]

Processing source N:  34%|███▍      | 68/198 [08:28<11:23,  5.26s/it]

Processing source N:  35%|███▍      | 69/198 [08:34<11:09,  5.19s/it]

Processing source N:  35%|███▌      | 70/198 [08:39<10:56,  5.13s/it]

Processing source N:  36%|███▌      | 71/198 [08:43<10:42,  5.06s/it]

Processing source N:  36%|███▋      | 72/198 [08:48<10:29,  5.00s/it]

Processing source N:  37%|███▋      | 73/198 [08:53<10:16,  4.93s/it]

Processing source N:  37%|███▋      | 74/198 [08:58<10:03,  4.87s/it]

Processing source N:  38%|███▊      | 75/198 [09:02<09:50,  4.80s/it]

Processing source N:  38%|███▊      | 76/198 [09:07<09:37,  4.73s/it]

Processing source N:  39%|███▉      | 77/198 [09:11<09:24,  4.66s/it]

Processing source N:  39%|███▉      | 78/198 [09:16<09:12,  4.60s/it]

Processing source N:  40%|███▉      | 79/198 [09:20<09:00,  4.54s/it]

Processing source N:  40%|████      | 80/198 [09:25<08:49,  4.49s/it]

Processing source N:  41%|████      | 81/198 [09:29<08:38,  4.43s/it]

Processing source N:  41%|████▏     | 82/198 [09:33<08:27,  4.37s/it]

Processing source N:  42%|████▏     | 83/198 [09:37<08:16,  4.31s/it]

Processing source N:  42%|████▏     | 84/198 [09:42<08:05,  4.26s/it]

Processing source N:  43%|████▎     | 85/198 [09:46<07:54,  4.20s/it]

Processing source N:  43%|████▎     | 86/198 [09:50<07:44,  4.14s/it]

Processing source N:  44%|████▍     | 87/198 [09:54<07:33,  4.09s/it]

Processing source N:  44%|████▍     | 88/198 [09:57<07:23,  4.03s/it]

Processing source N:  45%|████▍     | 89/198 [10:01<07:13,  3.98s/it]

Processing source N:  45%|████▌     | 90/198 [10:05<07:04,  3.93s/it]

Processing source N:  46%|████▌     | 91/198 [10:09<06:53,  3.87s/it]

Processing source N:  46%|████▋     | 92/198 [10:13<06:44,  3.82s/it]

Processing source N:  47%|████▋     | 93/198 [10:16<06:35,  3.76s/it]

Processing source N:  47%|████▋     | 94/198 [10:20<06:26,  3.71s/it]

Processing source N:  48%|████▊     | 95/198 [10:23<06:16,  3.66s/it]

Processing source N:  48%|████▊     | 96/198 [10:27<06:08,  3.61s/it]

Processing source N:  49%|████▉     | 97/198 [10:30<05:59,  3.56s/it]

Processing source N:  49%|████▉     | 98/198 [10:34<05:49,  3.50s/it]

Processing source N:  50%|█████     | 99/198 [10:37<05:41,  3.45s/it]

Processing source N:  51%|█████     | 100/198 [10:40<05:33,  3.40s/it]

Processing source N:  51%|█████     | 101/198 [10:43<05:24,  3.34s/it]

Processing source N:  52%|█████▏    | 102/198 [10:47<05:16,  3.30s/it]

Processing source N:  52%|█████▏    | 103/198 [10:50<05:08,  3.24s/it]

Processing source N:  53%|█████▎    | 104/198 [10:53<04:59,  3.19s/it]

Processing source N:  53%|█████▎    | 105/198 [10:56<04:51,  3.14s/it]

Processing source N:  54%|█████▎    | 106/198 [10:59<04:43,  3.08s/it]

Processing source N:  54%|█████▍    | 107/198 [11:02<04:35,  3.03s/it]

Processing source N:  55%|█████▍    | 108/198 [11:05<04:28,  2.98s/it]

Processing source N:  55%|█████▌    | 109/198 [11:07<04:21,  2.94s/it]

Processing source N:  56%|█████▌    | 110/198 [11:10<04:14,  2.89s/it]

Processing source N:  56%|█████▌    | 111/198 [11:13<04:07,  2.84s/it]

Processing source N:  57%|█████▋    | 112/198 [11:16<04:00,  2.80s/it]

Processing source N:  57%|█████▋    | 113/198 [11:18<03:53,  2.75s/it]

Processing source N:  58%|█████▊    | 114/198 [11:21<03:46,  2.70s/it]

Processing source N:  58%|█████▊    | 115/198 [11:23<03:39,  2.65s/it]

Processing source N:  59%|█████▊    | 116/198 [11:26<03:33,  2.60s/it]

Processing source N:  59%|█████▉    | 117/198 [11:28<03:26,  2.55s/it]

Processing source N:  60%|█████▉    | 118/198 [11:31<03:20,  2.51s/it]

Processing source N:  60%|██████    | 119/198 [11:33<03:14,  2.47s/it]

Processing source N:  61%|██████    | 120/198 [11:35<03:08,  2.42s/it]

Processing source N:  61%|██████    | 121/198 [11:38<03:02,  2.38s/it]

Processing source N:  62%|██████▏   | 122/198 [11:40<02:57,  2.33s/it]

Processing source N:  62%|██████▏   | 123/198 [11:42<02:51,  2.29s/it]

Processing source N:  63%|██████▎   | 124/198 [11:44<02:46,  2.25s/it]

Processing source N:  63%|██████▎   | 125/198 [11:46<02:40,  2.20s/it]

Processing source N:  64%|██████▎   | 126/198 [11:48<02:35,  2.17s/it]

Processing source N:  64%|██████▍   | 127/198 [11:50<02:30,  2.12s/it]

Processing source N:  65%|██████▍   | 128/198 [11:52<02:25,  2.08s/it]

Processing source N:  65%|██████▌   | 129/198 [11:54<02:20,  2.04s/it]

Processing source N:  66%|██████▌   | 130/198 [11:56<02:15,  2.00s/it]

Processing source N:  66%|██████▌   | 131/198 [11:58<02:11,  1.96s/it]

Processing source N:  67%|██████▋   | 132/198 [12:00<02:06,  1.92s/it]

Processing source N:  67%|██████▋   | 133/198 [12:02<02:02,  1.88s/it]

Processing source N:  68%|██████▊   | 134/198 [12:03<01:57,  1.84s/it]

Processing source N:  68%|██████▊   | 135/198 [12:05<01:53,  1.80s/it]

Processing source N:  69%|██████▊   | 136/198 [12:07<01:49,  1.76s/it]

Processing source N:  69%|██████▉   | 137/198 [12:08<01:45,  1.72s/it]

Processing source N:  70%|██████▉   | 138/198 [12:10<01:40,  1.68s/it]

Processing source N:  70%|███████   | 139/198 [12:12<01:36,  1.64s/it]

Processing source N:  71%|███████   | 140/198 [12:13<01:33,  1.61s/it]

Processing source N:  71%|███████   | 141/198 [12:15<01:29,  1.57s/it]

Processing source N:  72%|███████▏  | 142/198 [12:16<01:25,  1.53s/it]

Processing source N:  72%|███████▏  | 143/198 [12:18<01:22,  1.51s/it]

Processing source N:  73%|███████▎  | 144/198 [12:19<01:19,  1.47s/it]

Processing source N:  73%|███████▎  | 145/198 [12:20<01:15,  1.43s/it]

Processing source N:  74%|███████▎  | 146/198 [12:22<01:12,  1.39s/it]

Processing source N:  74%|███████▍  | 147/198 [12:23<01:09,  1.36s/it]

Processing source N:  75%|███████▍  | 148/198 [12:24<01:06,  1.33s/it]

Processing source N:  75%|███████▌  | 149/198 [12:25<01:03,  1.29s/it]

Processing source N:  76%|███████▌  | 150/198 [12:26<01:00,  1.26s/it]

Processing source N:  76%|███████▋  | 151/198 [12:28<00:57,  1.23s/it]

Processing source N:  77%|███████▋  | 152/198 [12:29<00:54,  1.19s/it]

Processing source N:  77%|███████▋  | 153/198 [12:30<00:52,  1.16s/it]

Processing source N:  78%|███████▊  | 154/198 [12:31<00:49,  1.13s/it]

Processing source N:  78%|███████▊  | 155/198 [12:32<00:46,  1.09s/it]

Processing source N:  79%|███████▉  | 156/198 [12:33<00:44,  1.07s/it]

Processing source N:  79%|███████▉  | 157/198 [12:34<00:42,  1.03s/it]

Processing source N:  80%|███████▉  | 158/198 [12:35<00:40,  1.01s/it]

Processing source N:  80%|████████  | 159/198 [12:36<00:37,  1.03it/s]

Processing source N:  81%|████████  | 160/198 [12:37<00:35,  1.06it/s]

Processing source N:  81%|████████▏ | 161/198 [12:37<00:33,  1.10it/s]

Processing source N:  82%|████████▏ | 162/198 [12:38<00:31,  1.13it/s]

Processing source N:  82%|████████▏ | 163/198 [12:39<00:29,  1.17it/s]

Processing source N:  83%|████████▎ | 164/198 [12:40<00:27,  1.22it/s]

Processing source N:  83%|████████▎ | 165/198 [12:40<00:26,  1.26it/s]

Processing source N:  84%|████████▍ | 166/198 [12:41<00:24,  1.31it/s]

Processing source N:  84%|████████▍ | 167/198 [12:42<00:22,  1.36it/s]

Processing source N:  85%|████████▍ | 168/198 [12:42<00:21,  1.41it/s]

Processing source N:  85%|████████▌ | 169/198 [12:43<00:19,  1.46it/s]

Processing source N:  86%|████████▌ | 170/198 [12:44<00:18,  1.53it/s]

Processing source N:  86%|████████▋ | 171/198 [12:44<00:16,  1.59it/s]

Processing source N:  87%|████████▋ | 172/198 [12:45<00:15,  1.66it/s]

Processing source N:  87%|████████▋ | 173/198 [12:45<00:14,  1.73it/s]

Processing source N:  88%|████████▊ | 174/198 [12:46<00:13,  1.81it/s]

Processing source N:  88%|████████▊ | 175/198 [12:46<00:12,  1.89it/s]

Processing source N:  89%|████████▉ | 176/198 [12:47<00:11,  1.98it/s]

Processing source N:  89%|████████▉ | 177/198 [12:47<00:10,  2.08it/s]

Processing source N:  90%|████████▉ | 178/198 [12:48<00:09,  2.20it/s]

Processing source N:  90%|█████████ | 179/198 [12:48<00:08,  2.33it/s]

Processing source N:  91%|█████████ | 180/198 [12:48<00:07,  2.46it/s]

Processing source N:  91%|█████████▏| 181/198 [12:49<00:06,  2.62it/s]

Processing source N:  92%|█████████▏| 182/198 [12:49<00:05,  2.83it/s]

Processing source N:  92%|█████████▏| 183/198 [12:49<00:04,  3.08it/s]

Processing source N:  93%|█████████▎| 184/198 [12:49<00:04,  3.38it/s]

Processing source N:  93%|█████████▎| 185/198 [12:50<00:03,  3.73it/s]

Processing source N:  94%|█████████▍| 186/198 [12:50<00:02,  4.14it/s]

Processing source N:  94%|█████████▍| 187/198 [12:50<00:02,  4.64it/s]

Processing source N:  95%|█████████▍| 188/198 [12:50<00:01,  5.24it/s]

Processing source N:  95%|█████████▌| 189/198 [12:50<00:01,  5.96it/s]

Processing source N:  96%|█████████▋| 191/198 [12:50<00:00,  7.61it/s]

Processing source N:  97%|█████████▋| 193/198 [12:50<00:00,  9.80it/s]

Processing source N: 100%|██████████| 198/198 [12:51<00:00, 18.51it/s]

Processing source N: 100%|██████████| 198/198 [12:51<00:00,  3.89s/it]


Tree removal complete





In [6]:
# Analyze improvements
improvements = []
for n in range(1, 201):
    score, trees, source_n = best_configs[n]
    improvement = baseline_scores[n] - score
    if improvement > 1e-12:
        improvements.append({
            'n': n,
            'baseline_score': baseline_scores[n],
            'new_score': score,
            'improvement': improvement,
            'source_n': source_n
        })

print(f"\nN values improved: {len(improvements)}/200")

if improvements:
    df_improvements = pd.DataFrame(improvements)
    print("\nTop 10 improvements:")
    print(df_improvements.nlargest(10, 'improvement')[['n', 'baseline_score', 'new_score', 'improvement', 'source_n']])
    
    total_improvement = df_improvements['improvement'].sum()
    print(f"\nTotal score improvement: {total_improvement:.9f}")
else:
    print("No improvements found")


N values improved: 0/200
No improvements found


In [7]:
# Build ensemble submission
print("\nBuilding ensemble submission...")

ensemble_rows = []
for n in range(1, 201):
    score, trees, source_n = best_configs[n]
    
    # Renumber tree IDs
    new_trees = trees.copy()
    new_trees['id'] = [f"{n:03d}_{i}" for i in range(len(new_trees))]
    ensemble_rows.append(new_trees)

df_ensemble = pd.concat(ensemble_rows, ignore_index=True)
print(f"Ensemble has {len(df_ensemble)} rows")

# Compute total score
ensemble_total = sum(best_configs[n][0] for n in range(1, 201))
print(f"\nBaseline total: {baseline_total:.6f}")
print(f"Ensemble total: {ensemble_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.9f}")


Building ensemble submission...
Ensemble has 20100 rows

Baseline total: 70.659959
Ensemble total: 70.659959
Improvement: 0.000000000


In [8]:
# Validate ensemble
print("\nValidating ensemble...")
overlap_count = 0
for n in range(1, 201):
    prefix = f"{n:03d}_"
    trees = df_ensemble[df_ensemble['id'].str.startswith(prefix)]
    if check_overlaps(trees):
        overlap_count += 1
        if overlap_count <= 5:
            print(f"  N={n}: OVERLAP")

print(f"\nTotal overlaps: {overlap_count}/200")


Validating ensemble...



Total overlaps: 0/200


In [9]:
# Save submission if valid and improved
if overlap_count == 0 and ensemble_total < baseline_total - 1e-9:
    df_ensemble.to_csv('/home/submission/submission.csv', index=False)
    print(f"Saved ensemble to /home/submission/submission.csv")
    print(f"Score: {ensemble_total:.6f}")
elif overlap_count == 0:
    # No improvement - save baseline
    shutil.copy('/home/code/external_data/saspav/santa-2025.csv', '/home/submission/submission.csv')
    print("No improvement - saved baseline")
else:
    print("Overlaps detected - not saving")

No improvement - saved baseline


In [10]:
# Summary
print("="*60)
print("EXPERIMENT 010 SUMMARY: Tree Removal Technique")
print("="*60)
print(f"Baseline score: {baseline_total:.6f}")
print(f"Ensemble score: {ensemble_total:.6f}")
print(f"Improvement: {baseline_total - ensemble_total:.9f}")
print(f"N values improved: {len(improvements)}/200")
print(f"Overlaps: {overlap_count}/200")
print("="*60)

EXPERIMENT 010 SUMMARY: Tree Removal Technique
Baseline score: 70.659959
Ensemble score: 70.659959
Improvement: 0.000000000
N values improved: 0/200
Overlaps: 0/200
