# Experiment 004: Backward Propagation

Implement Chistyakov's backward propagation technique:
- Start from N=200 and work down to N=2
- For each N, try removing each tree that touches the bounding box boundary
- If the resulting (N-1) configuration is better than current best for N-1, save it!

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon, box
from shapely.ops import unary_union
import json
import time

getcontext().prec = 25
scale_factor = Decimal('1e18')

print("Imports done")

Imports done


In [2]:
class ChristmasTree:
    """Represents a single, rotatable Christmas tree."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

    def clone(self):
        return ChristmasTree(
            center_x=str(self.center_x),
            center_y=str(self.center_y),
            angle=str(self.angle),
        )

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
def get_tree_list_side_length(tree_list):
    """Get the side length of the bounding box."""
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length):
    """Calculate total score from side lengths."""
    score = Decimal('0')
    for k, v in dict_of_side_length.items():
        score += v ** 2 / Decimal(k)
    return score

def get_bbox_touching_tree_indices(tree_list):
    """Find indices of trees that touch the bounding box boundary."""
    if not tree_list:
        return []

    polys = [t.polygon for t in tree_list]
    minx = min(p.bounds[0] for p in polys)
    miny = min(p.bounds[1] for p in polys)
    maxx = max(p.bounds[2] for p in polys)
    maxy = max(p.bounds[3] for p in polys)

    bbox = box(minx, miny, maxx, maxy)

    touching_indices = [
        i for i, poly in enumerate(polys)
        if poly.boundary.intersects(bbox.boundary)
    ]
    return touching_indices

print("Helper functions defined")

Helper functions defined


In [4]:
def parse_csv(csv_path):
    """Parse submission CSV into tree dictionaries."""
    print(f'Loading: {csv_path}')
    result = pd.read_csv(csv_path)
    result['x'] = result['x'].str.strip('s')
    result['y'] = result['y'].str.strip('s')
    result['deg'] = result['deg'].str.strip('s')
    result[['group_id', 'item_id']] = result['id'].str.split('_', n=2, expand=True)

    dict_of_tree_list = {}
    dict_of_side_length = {}
    for group_id, group_data in result.groupby('group_id'):
        tree_list = [ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg']) 
                     for _, row in group_data.iterrows()]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)

    return dict_of_tree_list, dict_of_side_length

# Load baseline
dict_of_tree_list, dict_of_side_length = parse_csv('/home/code/experiments/002_valid_baseline/submission.csv')

# Calculate current total score
current_score = get_total_score(dict_of_side_length)
print(f'Baseline score: {float(current_score):.6f}')

Loading: /home/code/experiments/002_valid_baseline/submission.csv


Baseline score: 70.615102


In [5]:
# Run backward propagation
print("\nRunning backward propagation...")
print("="*60)

improvements_found = []
start_time = time.time()

for group_id_main in range(200, 2, -1):
    group_id_main_str = f'{int(group_id_main):03d}'
    
    candidate_tree_list = [tree.clone() for tree in dict_of_tree_list[group_id_main_str]]

    while len(candidate_tree_list) > 1:
        group_id_prev = f'{len(candidate_tree_list) - 1:03d}'
        best_side_length = dict_of_side_length[group_id_prev]
        best_side_length_temp = Decimal('100')
        best_tree_idx_to_delete = None

        # Try to delete each tree that touches the boundary
        tree_idx_list = get_bbox_touching_tree_indices(candidate_tree_list)
        
        for tree_idx_to_delete in tree_idx_list:
            candidate_tree_list_short = [tree.clone() for tree in candidate_tree_list]
            del candidate_tree_list_short[tree_idx_to_delete]
    
            candidate_side_length = get_tree_list_side_length(candidate_tree_list_short)
                
            if candidate_side_length < best_side_length_temp:
                best_side_length_temp = candidate_side_length
                best_tree_idx_to_delete = tree_idx_to_delete

        # Save the best
        if best_tree_idx_to_delete is not None:
            del candidate_tree_list[best_tree_idx_to_delete]

            if best_side_length_temp < best_side_length:
                improvement = float(best_side_length - best_side_length_temp)
                old_score = float(best_side_length ** 2 / Decimal(group_id_prev))
                new_score = float(best_side_length_temp ** 2 / Decimal(group_id_prev))
                score_improvement = old_score - new_score
                
                print(f'✅ N={group_id_prev}: IMPROVED! side {float(best_side_length):.6f} -> {float(best_side_length_temp):.6f} (score: {score_improvement:.6f})')
                improvements_found.append((int(group_id_prev), score_improvement))
            
                dict_of_tree_list[group_id_prev] = [tree.clone() for tree in candidate_tree_list]
                dict_of_side_length[group_id_prev] = get_tree_list_side_length(dict_of_tree_list[group_id_prev])

        # Stop if we've gone too far from original N
        if int(group_id_main_str) - len(candidate_tree_list) > 5:
            break

elapsed = time.time() - start_time
print("="*60)
print(f"Backward propagation completed in {elapsed:.1f}s")
print(f"Found {len(improvements_found)} improvements")


Running backward propagation...


✅ N=135: IMPROVED! side 6.841466 -> 6.841462 (score: 0.000000)


Backward propagation completed in 82.7s
Found 1 improvements


In [None]:
# Calculate new total score
new_score = get_total_score(dict_of_side_length)
print(f'\nBaseline score: {float(current_score):.6f}')
print(f'New score: {float(new_score):.6f}')
print(f'Total improvement: {float(current_score - new_score):.6f}')

if improvements_found:
    print(f'\nImprovements by N:')
    for n, imp in sorted(improvements_found):
        print(f'  N={n}: {imp:.6f}')
    total_imp = sum(imp for _, imp in improvements_found)
    print(f'\nSum of improvements: {total_imp:.6f}')

In [None]:
# Save results
print("\nSaving submission...")

tree_data = []
for group_name, tree_list in dict_of_tree_list.items():
    for item_id, tree in enumerate(tree_list):
        tree_data.append({
            'id': f'{group_name}_{item_id}',
            'x': f's{tree.center_x}',
            'y': f's{tree.center_y}',
            'deg': f's{tree.angle}'
        })

tree_df = pd.DataFrame(tree_data)
tree_df = tree_df.sort_values('id').reset_index(drop=True)

# Save to experiment folder
tree_df.to_csv('/home/code/experiments/004_backward_propagation/submission.csv', index=False)

# Save to submission folder
import os
os.makedirs('/home/submission', exist_ok=True)
tree_df.to_csv('/home/submission/submission.csv', index=False)

print(f'Saved submission with {len(tree_df)} rows')
print(f'Final score: {float(new_score):.6f}')

In [None]:
# Save metrics
metrics = {
    'cv_score': float(new_score),
    'baseline_score': float(current_score),
    'improvement': float(current_score - new_score),
    'improvements_found': len(improvements_found),
    'improvements_by_n': {str(n): imp for n, imp in improvements_found}
}

with open('/home/code/experiments/004_backward_propagation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nMetrics saved")
print(f"CV Score: {float(new_score):.6f}")
print(f"Improvement: {float(current_score - new_score):.6f}")