# Extended Optimization Experiment

This notebook:
1. Starts bbox3 optimization with extended parameters
2. Implements fix_direction post-processing
3. Builds ensemble from multiple CSV sources
4. Validates and scores the result

In [1]:
import numpy as np
import pandas as pd
import os
import subprocess
import time
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import csv

getcontext().prec = 30
scale_factor = 1

print("Libraries loaded successfully")

Libraries loaded successfully


In [2]:
# ChristmasTree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0')), float(tip_y)),
            (float(top_w / Decimal('2')), float(tier_1_y)),
            (float(top_w / Decimal('4')), float(tier_1_y)),
            (float(mid_w / Decimal('2')), float(tier_2_y)),
            (float(mid_w / Decimal('4')), float(tier_2_y)),
            (float(base_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(base_y)),
            (float(trunk_w / Decimal('2')), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(trunk_bottom_y)),
            (float(-(trunk_w / Decimal('2'))), float(base_y)),
            (float(-(base_w / Decimal('2'))), float(base_y)),
            (float(-(mid_w / Decimal('4'))), float(tier_2_y)),
            (float(-(mid_w / Decimal('2'))), float(tier_2_y)),
            (float(-(top_w / Decimal('4'))), float(tier_1_y)),
            (float(-(top_w / Decimal('2'))), float(tier_1_y)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

    def clone(self):
        return ChristmasTree(str(self.center_x), str(self.center_y), str(self.angle))

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
# Helper functions
def parse_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def get_bounding_box_side(trees):
    if not trees:
        return 0
    all_points = []
    for tree in trees:
        coords = np.asarray(tree.polygon.exterior.xy).T
        all_points.append(coords)
    all_points = np.concatenate(all_points)
    min_coords = all_points.min(axis=0)
    max_coords = all_points.max(axis=0)
    side = max(max_coords - min_coords)
    return side

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 1e-10:
                    return True
    return False

def calculate_total_score(df):
    total = 0
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        side = get_bounding_box_side(trees)
        score_n = (side ** 2) / n
        total += score_n
    return total

print("Helper functions defined")

Helper functions defined


In [4]:
# fix_direction functions from saspav kernel
def calculate_bbox_side_at_angle(angle_deg, points):
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def optimize_rotation(trees):
    """Find optimal rotation angle to minimize bounding box"""
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    
    try:
        hull_points = points_np[ConvexHull(points_np).vertices]
    except:
        return Decimal(get_bounding_box_side(trees)), 0.0
    
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    
    res = minimize_scalar(lambda a: calculate_bbox_side_at_angle(a, hull_points),
                          bounds=(0.001, 89.999), method='bounded')
    found_angle_deg = res.x
    found_side = res.fun
    
    improvement = initial_side - found_side
    EPSILON = 1e-8
    
    if improvement > EPSILON:
        return Decimal(found_side), found_angle_deg
    else:
        return Decimal(initial_side), 0.0

def apply_rotation(trees, angle_deg):
    """Apply rotation to all trees around their collective center"""
    if not trees or abs(angle_deg) < 1e-9:
        return [t.clone() for t in trees]
    
    bounds = [t.polygon.bounds for t in trees]
    min_x = min(b[0] for b in bounds)
    min_y = min(b[1] for b in bounds)
    max_x = max(b[2] for b in bounds)
    max_y = max(b[3] for b in bounds)
    rotation_center = np.array([(min_x + max_x) / 2.0, (min_y + max_y) / 2.0])
    
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    
    points = np.array([[float(t.center_x), float(t.center_y)] for t in trees])
    shifted = points - rotation_center
    rotated = shifted.dot(rot_matrix.T) + rotation_center
    
    rotated_trees = []
    for i in range(len(trees)):
        new_tree = ChristmasTree(
            str(rotated[i, 0]), 
            str(rotated[i, 1]),
            str(float(trees[i].angle) + angle_deg)
        )
        rotated_trees.append(new_tree)
    return rotated_trees

print("fix_direction functions defined")

fix_direction functions defined


In [5]:
# Load all available CSV files for ensemble
csv_files = []
base_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'

# Main files
for f in ['santa-2025.csv', 'ensemble.csv', 'best_ensemble.csv', 'submission.csv']:
    path = os.path.join(base_path, f)
    if os.path.exists(path):
        csv_files.append(path)

# bucket-of-chump
boc_path = os.path.join(base_path, 'bucket-of-chump/submission.csv')
if os.path.exists(boc_path):
    csv_files.append(boc_path)

# telegram files
for f in ['72.49.csv', '71.97.csv']:
    path = os.path.join(base_path, 'telegram', f)
    if os.path.exists(path):
        csv_files.append(path)

# santa25-public files
public_path = os.path.join(base_path, 'santa25-public')
if os.path.exists(public_path):
    for f in os.listdir(public_path):
        if f.endswith('.csv'):
            csv_files.append(os.path.join(public_path, f))

print(f"Found {len(csv_files)} CSV files for ensemble:")
for f in csv_files[:10]:
    print(f"  {f}")
if len(csv_files) > 10:
    print(f"  ... and {len(csv_files) - 10} more")

Found 23 CSV files for ensemble:
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/72.49.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram/71.97.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT4.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/New_Tree_144_196.csv
  /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public/submission_JKoT3.csv
  ... and 13 more


In [6]:
# Build ensemble: for each N, keep the configuration with smallest bounding box
def build_ensemble(csv_files):
    """Build ensemble by keeping best configuration for each N"""
    best_configs = {}  # n -> (side, df_rows)
    
    for csv_path in csv_files:
        try:
            df = pd.read_csv(csv_path)
            if 'id' not in df.columns:
                continue
                
            for n in range(1, 201):
                trees = load_trees_for_n(df, n)
                if not trees:
                    continue
                    
                # Check for overlaps
                if has_overlap(trees):
                    continue
                    
                side = get_bounding_box_side(trees)
                
                if n not in best_configs or side < best_configs[n][0]:
                    # Get the rows for this N
                    prefix = f"{n:03d}_"
                    rows = df[df['id'].str.startswith(prefix)].copy()
                    best_configs[n] = (side, rows)
                    
        except Exception as e:
            print(f"Error processing {csv_path}: {e}")
            continue
    
    return best_configs

print("Building ensemble from all CSV files...")
best_configs = build_ensemble(csv_files)
print(f"Found best configurations for {len(best_configs)} N values")

Building ensemble from all CSV files...


Found best configurations for 200 N values


In [7]:
# Create ensemble submission
def create_ensemble_submission(best_configs, output_path):
    """Create submission from best configurations"""
    all_rows = []
    
    for n in range(1, 201):
        if n in best_configs:
            rows = best_configs[n][1]
            all_rows.append(rows)
        else:
            print(f"Warning: No configuration for N={n}")
    
    if all_rows:
        ensemble_df = pd.concat(all_rows, ignore_index=True)
        ensemble_df.to_csv(output_path, index=False)
        return ensemble_df
    return None

ensemble_path = '/home/code/experiments/002_extended_optimization/ensemble.csv'
ensemble_df = create_ensemble_submission(best_configs, ensemble_path)
print(f"Created ensemble with {len(ensemble_df)} rows")

Created ensemble with 20100 rows


In [None]:
# Calculate ensemble score
ensemble_score = calculate_total_score(ensemble_df)
print(f"Ensemble score: {ensemble_score:.6f}")

# Compare to baseline
baseline_df = pd.read_csv('/home/code/current.csv')
baseline_score = calculate_total_score(baseline_df)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - ensemble_score:.6f}")

In [None]:
# Apply fix_direction to ensemble
def apply_fix_direction_to_submission(df):
    """Apply fix_direction optimization to each N configuration"""
    improved_rows = []
    total_improvement = 0
    
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        if not trees:
            continue
            
        original_side = get_bounding_box_side(trees)
        
        # Try to optimize rotation
        best_side, best_angle = optimize_rotation(trees)
        
        if best_angle != 0.0 and float(best_side) < original_side:
            # Apply rotation
            rotated_trees = apply_rotation(trees, best_angle)
            new_side = get_bounding_box_side(rotated_trees)
            
            if new_side < original_side and not has_overlap(rotated_trees):
                improvement = original_side - new_side
                total_improvement += (original_side**2 - new_side**2) / n
                
                # Create rows for rotated trees
                for i, tree in enumerate(rotated_trees):
                    improved_rows.append({
                        'id': f"{n:03d}_{i}",
                        'x': f"s{float(tree.center_x)}",
                        'y': f"s{float(tree.center_y)}",
                        'deg': f"s{float(tree.angle)}"
                    })
            else:
                # Keep original
                for i, tree in enumerate(trees):
                    improved_rows.append({
                        'id': f"{n:03d}_{i}",
                        'x': f"s{float(tree.center_x)}",
                        'y': f"s{float(tree.center_y)}",
                        'deg': f"s{float(tree.angle)}"
                    })
        else:
            # Keep original
            for i, tree in enumerate(trees):
                improved_rows.append({
                    'id': f"{n:03d}_{i}",
                    'x': f"s{float(tree.center_x)}",
                    'y': f"s{float(tree.center_y)}",
                    'deg': f"s{float(tree.angle)}"
                })
        
        if n % 50 == 0:
            print(f"  Processed N=1 to {n}...")
    
    return pd.DataFrame(improved_rows), total_improvement

print("Applying fix_direction to ensemble...")
fixed_df, improvement = apply_fix_direction_to_submission(ensemble_df)
print(f"Total score improvement from fix_direction: {improvement:.6f}")

In [None]:
# Calculate final score
final_score = calculate_total_score(fixed_df)
print(f"\nFinal score after fix_direction: {final_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Total improvement: {baseline_score - final_score:.6f}")

In [None]:
# Validate no overlaps
print("\nValidating no overlaps...")
overlap_count = 0
for n in range(1, 201):
    trees = load_trees_for_n(fixed_df, n)
    if has_overlap(trees):
        print(f"  Overlap in N={n}!")
        overlap_count += 1

if overlap_count == 0:
    print("✓ No overlaps detected!")
else:
    print(f"⚠ Found {overlap_count} configurations with overlaps!")

In [None]:
# Save final submission
import shutil

# Use the best result
if final_score < baseline_score:
    print(f"\nUsing optimized submission (score: {final_score:.6f})")
    fixed_df.to_csv('/home/submission/submission.csv', index=False)
    best_score = final_score
else:
    print(f"\nOptimization didn't improve - using baseline (score: {baseline_score:.6f})")
    shutil.copy('/home/code/current.csv', '/home/submission/submission.csv')
    best_score = baseline_score

print(f"\n=== FINAL SCORE: {best_score:.6f} ===")
print(f"Target: 68.919154")
print(f"Gap: {best_score - 68.919154:.6f}")

In [None]:
# Save metrics
import json

metrics = {
    'baseline_score': float(baseline_score),
    'ensemble_score': float(ensemble_score),
    'final_score': float(final_score),
    'best_score': float(best_score),
    'target_score': 68.919154,
    'gap': float(best_score - 68.919154),
    'csv_files_used': len(csv_files),
    'overlaps': overlap_count
}

with open('/home/code/experiments/002_extended_optimization/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved")
print(json.dumps(metrics, indent=2))