# Experiment 001: Baseline with Pre-Optimized Solutions

This notebook:
1. Loads pre-optimized solutions from public datasets
2. Calculates scores for each
3. Applies fix_direction optimization
4. Validates for overlaps
5. Creates submission file

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import math
from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Tree geometry - 15 vertices
def get_tree_vertices():
    """Returns the base tree polygon vertices (centered at origin)"""
    # From the kernel analysis - tree shape
    w = 1.0  # width scale
    h = 1.0  # height scale
    
    # Trunk
    trunk_w = 0.15 * w
    trunk_h = 0.2 * h
    
    # Tree layers
    base_w = 0.7 * w / 2
    mid_w = 0.4 * w / 2
    top_w = 0.25 * w / 2
    
    # Heights
    trunk_top = -0.3 * h
    base_top = 0.1 * h
    mid_top = 0.4 * h
    top_top = 0.6 * h
    tip = 0.8 * h
    
    vertices = [
        (-trunk_w/2, -0.5 * h),  # trunk bottom left
        (trunk_w/2, -0.5 * h),   # trunk bottom right
        (trunk_w/2, trunk_top),  # trunk top right
        (base_w, trunk_top),     # base right
        (mid_w, base_top),       # mid right bottom
        (base_w, base_top),      # base right top
        (top_w, mid_top),        # top right bottom
        (mid_w, mid_top),        # mid right top
        (top_w/2, top_top),      # top right
        (0, tip),                # tip
        (-top_w/2, top_top),     # top left
        (-mid_w, mid_top),       # mid left top
        (-top_w, mid_top),       # top left bottom
        (-base_w, base_top),     # base left top
        (-mid_w, base_top),      # mid left bottom
        (-base_w, trunk_top),    # base left
        (-trunk_w/2, trunk_top), # trunk top left
    ]
    return vertices

# Actually use the exact vertices from the kernel
def get_tree_vertices_exact():
    """Exact tree vertices from the competition"""
    return [
        (-0.075, -0.5),
        (0.075, -0.5),
        (0.075, -0.3),
        (0.35, -0.3),
        (0.2, 0.1),
        (0.35, 0.1),
        (0.125, 0.4),
        (0.2, 0.4),
        (0.0625, 0.6),
        (0.0, 0.8),
        (-0.0625, 0.6),
        (-0.2, 0.4),
        (-0.125, 0.4),
        (-0.35, 0.1),
        (-0.2, 0.1),
        (-0.35, -0.3),
        (-0.075, -0.3),
    ]

BASE_TREE = Polygon(get_tree_vertices_exact())
print(f"Base tree area: {BASE_TREE.area:.6f}")
print(f"Base tree bounds: {BASE_TREE.bounds}")

Base tree area: 0.457500
Base tree bounds: (-0.35, -0.5, 0.35, 0.8)


In [3]:
def parse_value(s):
    """Parse value from submission format (with 's' prefix)"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def load_submission(filepath):
    """Load submission CSV and parse into tree configurations"""
    df = pd.read_csv(filepath)
    
    # Parse the id column to get n and tree_idx
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))
    
    # Parse x, y, deg values
    df['x_val'] = df['x'].apply(parse_value)
    df['y_val'] = df['y'].apply(parse_value)
    df['deg_val'] = df['deg'].apply(parse_value)
    
    return df

# Load both pre-optimized solutions
df_saspav = load_submission('/home/code/santa-2025-csv/santa-2025.csv')
df_bucket = load_submission('/home/code/bucket-of-chump/submission.csv')

print(f"saspav dataset: {len(df_saspav)} rows")
print(f"bucket-of-chump dataset: {len(df_bucket)} rows")
print(f"\nExpected rows: {sum(range(1, 201))} = 20100")

saspav dataset: 20100 rows
bucket-of-chump dataset: 20100 rows

Expected rows: 20100 = 20100


In [None]:
def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg"""
    tree = rotate(BASE_TREE, deg, origin=(0, 0))
    tree = translate(tree, x, y)
    return tree

def get_bounding_box_side(trees_df):
    """Calculate the side length of the bounding square for a set of trees"""
    polygons = []
    for _, row in trees_df.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Get overall bounds
    all_x = []
    all_y = []
    for poly in polygons:
        minx, miny, maxx, maxy = poly.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    
    width = max(all_x) - min(all_x)
    height = max(all_y) - min(all_y)
    
    return max(width, height)

def calculate_total_score(df):
    """Calculate total score for a submission"""
    total_score = 0
    side_lengths = {}
    
    for n in range(1, 201):
        trees_n = df[df['n'] == n]
        if len(trees_n) != n:
            print(f"Warning: n={n} has {len(trees_n)} trees instead of {n}")
            continue
        
        side = get_bounding_box_side(trees_n)
        side_lengths[n] = side
        score_n = side ** 2 / n
        total_score += score_n
    
    return total_score, side_lengths

# Calculate scores for both datasets
print("Calculating score for saspav dataset...")
score_saspav, sides_saspav = calculate_total_score(df_saspav)
print(f"saspav score: {score_saspav:.6f}")

print("\nCalculating score for bucket-of-chump dataset...")
score_bucket, sides_bucket = calculate_total_score(df_bucket)
print(f"bucket-of-chump score: {score_bucket:.6f}")

In [None]:
# Check for overlaps in the better solution
def check_overlaps(trees_df):
    """Check if any trees overlap"""
    polygons = []
    for _, row in trees_df.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Use STRtree for efficient spatial queries
    tree_index = STRtree(polygons)
    
    overlaps = []
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if j > i:  # Only check each pair once
                if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                    # Check if it's a real overlap (not just touching)
                    intersection = poly.intersection(polygons[j])
                    if intersection.area > 1e-10:
                        overlaps.append((i, j, intersection.area))
    
    return overlaps

# Use the better solution
if score_saspav <= score_bucket:
    best_df = df_saspav
    best_score = score_saspav
    best_name = 'saspav'
else:
    best_df = df_bucket
    best_score = score_bucket
    best_name = 'bucket-of-chump'

print(f"Best solution: {best_name} with score {best_score:.6f}")

# Check overlaps for a few N values
print("\nChecking for overlaps in sample configurations...")
for n in [10, 50, 100, 150, 200]:
    trees_n = best_df[best_df['n'] == n]
    overlaps = check_overlaps(trees_n)
    if overlaps:
        print(f"  n={n}: {len(overlaps)} overlaps found!")
    else:
        print(f"  n={n}: No overlaps")

In [None]:
# Implement fix_direction optimization
def get_all_points(trees_df):
    """Get all vertices from all tree polygons"""
    points = []
    for _, row in trees_df.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        coords = list(poly.exterior.coords)
        points.extend(coords)
    return np.array(points)

def rotate_points(points, angle):
    """Rotate points around origin by angle (in degrees)"""
    theta = np.radians(angle)
    cos_t, sin_t = np.cos(theta), np.sin(theta)
    rotation_matrix = np.array([[cos_t, -sin_t], [sin_t, cos_t]])
    return points @ rotation_matrix.T

def get_bbox_side_from_points(points):
    """Get bounding box side length from points"""
    min_x, min_y = points.min(axis=0)
    max_x, max_y = points.max(axis=0)
    return max(max_x - min_x, max_y - min_y)

def optimize_rotation(trees_df):
    """Find optimal global rotation to minimize bounding box"""
    points = get_all_points(trees_df)
    
    def objective(angle):
        rotated = rotate_points(points, angle)
        return get_bbox_side_from_points(rotated)
    
    # Search in range [0, 90] due to symmetry
    result = minimize_scalar(objective, bounds=(0, 90), method='bounded')
    
    return result.x, result.fun

# Test fix_direction on a few configurations
print("Testing fix_direction optimization...")
for n in [50, 100, 150, 200]:
    trees_n = best_df[best_df['n'] == n]
    original_side = get_bounding_box_side(trees_n)
    optimal_angle, optimal_side = optimize_rotation(trees_n)
    improvement = (original_side - optimal_side) / original_side * 100
    print(f"  n={n}: original={original_side:.6f}, optimal={optimal_side:.6f} at {optimal_angle:.2f}Â°, improvement={improvement:.4f}%")

In [None]:
# Apply fix_direction to all configurations and recalculate score
def apply_fix_direction_to_all(df):
    """Apply fix_direction optimization to all N configurations"""
    new_rows = []
    total_score = 0
    
    for n in range(1, 201):
        trees_n = df[df['n'] == n].copy()
        
        # Get optimal rotation
        optimal_angle, optimal_side = optimize_rotation(trees_n)
        
        # Apply rotation to all trees in this configuration
        for _, row in trees_n.iterrows():
            # Rotate position around origin
            x, y = row['x_val'], row['y_val']
            theta = np.radians(optimal_angle)
            new_x = x * np.cos(theta) - y * np.sin(theta)
            new_y = x * np.sin(theta) + y * np.cos(theta)
            
            # Add rotation to tree angle
            new_deg = row['deg_val'] + optimal_angle
            
            new_rows.append({
                'id': row['id'],
                'x': f"s{new_x}",
                'y': f"s{new_y}",
                'deg': f"s{new_deg}",
                'n': n,
                'tree_idx': row['tree_idx'],
                'x_val': new_x,
                'y_val': new_y,
                'deg_val': new_deg
            })
        
        score_n = optimal_side ** 2 / n
        total_score += score_n
        
        if n % 50 == 0:
            print(f"  Processed n={n}, running score: {total_score:.6f}")
    
    return pd.DataFrame(new_rows), total_score

print("Applying fix_direction to all configurations...")
optimized_df, optimized_score = apply_fix_direction_to_all(best_df)
print(f"\nOriginal score: {best_score:.6f}")
print(f"Optimized score: {optimized_score:.6f}")
print(f"Improvement: {best_score - optimized_score:.6f}")

In [None]:
# Validate the optimized solution
print("Validating optimized solution...")
print("Checking for overlaps in all configurations...")

all_valid = True
for n in range(1, 201):
    trees_n = optimized_df[optimized_df['n'] == n]
    overlaps = check_overlaps(trees_n)
    if overlaps:
        print(f"  n={n}: {len(overlaps)} overlaps found!")
        all_valid = False

if all_valid:
    print("All configurations are valid (no overlaps)!")
else:
    print("Some configurations have overlaps - need to fix!")

# Recalculate score to verify
verify_score, verify_sides = calculate_total_score(optimized_df)
print(f"\nVerified score: {verify_score:.6f}")

In [None]:
# Create submission file
def create_submission(df, filepath):
    """Create submission CSV file"""
    # Sort by id to ensure correct order
    df_sorted = df.sort_values('id')
    
    # Create submission dataframe with correct format
    submission = df_sorted[['id', 'x', 'y', 'deg']].copy()
    
    submission.to_csv(filepath, index=False)
    print(f"Submission saved to {filepath}")
    print(f"Total rows: {len(submission)}")
    return submission

# Save submission
submission = create_submission(optimized_df, '/home/submission/submission.csv')

# Also save to experiment folder
submission.to_csv('/home/code/experiments/001_baseline/submission.csv', index=False)

print("\nFirst 10 rows:")
print(submission.head(10))

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 001: BASELINE SUMMARY")
print("="*60)
print(f"Best pre-optimized solution: {best_name}")
print(f"Original score: {best_score:.6f}")
print(f"After fix_direction: {optimized_score:.6f}")
print(f"Improvement: {best_score - optimized_score:.6f}")
print(f"Target score: 68.922808")
print(f"Gap to target: {optimized_score - 68.922808:.6f}")
print("="*60)

# Save metrics
import json
metrics = {
    'original_score': best_score,
    'optimized_score': optimized_score,
    'improvement': best_score - optimized_score,
    'source': best_name,
    'target': 68.922808
}
with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print("\nMetrics saved to metrics.json")