# Baseline Experiment: Evaluate Pre-optimized Submissions

This notebook:
1. Loads pre-optimized submissions from snapshots
2. Calculates scores for each
3. Selects the best one
4. Applies fix_direction optimization
5. Validates for overlaps
6. Creates submission

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull
import os
import glob
from tqdm import tqdm

# Christmas tree polygon vertices (15 vertices)
TREE_VERTICES = np.array([
    (0.0, 0.8),      # Tip
    (0.125, 0.5),    # Right top tier
    (0.0625, 0.5),
    (0.2, 0.25),     # Right mid tier
    (0.1, 0.25),
    (0.35, 0.0),     # Right base
    (0.075, 0.0),    # Right trunk
    (0.075, -0.2),
    (-0.075, -0.2),  # Left trunk
    (-0.075, 0.0),
    (-0.35, 0.0),    # Left base
    (-0.1, 0.25),    # Left mid tier
    (-0.2, 0.25),
    (-0.0625, 0.5),  # Left top tier
    (-0.125, 0.5),
])

print("Tree vertices shape:", TREE_VERTICES.shape)

In [None]:
class ChristmasTree:
    """Represents a Christmas tree with position and rotation."""
    def __init__(self, x=0, y=0, deg=0):
        self.x = x
        self.y = y
        self.deg = deg
        self._polygon = None
    
    @property
    def polygon(self):
        if self._polygon is None:
            base = Polygon(TREE_VERTICES)
            rotated = rotate(base, self.deg, origin=(0, 0))
            self._polygon = translate(rotated, self.x, self.y)
        return self._polygon
    
    def invalidate(self):
        self._polygon = None

def load_submission(filepath):
    """Load a submission CSV file."""
    df = pd.read_csv(filepath)
    # Remove 's' prefix from values
    for col in ['x', 'y', 'deg']:
        df[col] = df[col].astype(str).str.replace('s', '', regex=False).astype(float)
    return df

def get_trees_for_n(df, n):
    """Get list of ChristmasTree objects for configuration n."""
    prefix = f"{n:03d}_"
    subset = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in subset.iterrows():
        trees.append(ChristmasTree(row['x'], row['y'], row['deg']))
    return trees

def get_bounding_box_side(trees):
    """Calculate the side length of the square bounding box."""
    if not trees:
        return 0
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    min_xy = all_points.min(axis=0)
    max_xy = all_points.max(axis=0)
    return max(max_xy - min_xy)

def calculate_total_score(df):
    """Calculate the total score for a submission."""
    total = 0
    for n in range(1, 201):
        trees = get_trees_for_n(df, n)
        side = get_bounding_box_side(trees)
        total += side**2 / n
    return total

print("Functions defined.")

In [None]:
# Find all pre-optimized submissions
preopt_dir = "/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized"

# List all CSV files
csv_files = []
for root, dirs, files in os.walk(preopt_dir):
    for f in files:
        if f.endswith('.csv'):
            csv_files.append(os.path.join(root, f))

print(f"Found {len(csv_files)} CSV files")
for f in csv_files[:10]:
    print(f"  {f}")

In [None]:
# Evaluate each submission and find the best one
results = []

for filepath in tqdm(csv_files):
    try:
        df = load_submission(filepath)
        # Check if it has the right number of rows (20100 = 1+2+...+200)
        if len(df) != 20100:
            print(f"Skipping {filepath}: wrong row count ({len(df)})")
            continue
        score = calculate_total_score(df)
        results.append({'filepath': filepath, 'score': score})
        print(f"{os.path.basename(filepath)}: {score:.6f}")
    except Exception as e:
        print(f"Error with {filepath}: {e}")

results_df = pd.DataFrame(results)
results_df = results_df.sort_values('score')
print("\nTop 5 submissions:")
print(results_df.head())

In [None]:
# Load the best submission
best_filepath = results_df.iloc[0]['filepath']
best_score = results_df.iloc[0]['score']
print(f"Best submission: {best_filepath}")
print(f"Best score: {best_score:.6f}")

best_df = load_submission(best_filepath)

In [None]:
def has_overlap(trees):
    """Check if any trees overlap."""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if i < j:  # Only check each pair once
                if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                    # Check if intersection is more than just touching
                    intersection = poly.intersection(polygons[j])
                    if intersection.area > 1e-10:
                        return True
    return False

# Validate the best submission for overlaps
print("Validating for overlaps...")
overlap_count = 0
for n in tqdm(range(1, 201)):
    trees = get_trees_for_n(best_df, n)
    if has_overlap(trees):
        overlap_count += 1
        print(f"  Overlap found in n={n}")

print(f"\nTotal configurations with overlaps: {overlap_count}")

In [None]:
def optimize_rotation_for_config(trees):
    """Optimize global rotation angle to minimize bounding box."""
    if len(trees) <= 1:
        return 0, get_bounding_box_side(trees)
    
    # Get all points from all trees
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    # Get convex hull for efficiency
    try:
        hull = ConvexHull(all_points)
        hull_points = all_points[hull.vertices]
    except:
        hull_points = all_points
    
    def bbox_at_angle(angle_deg):
        angle_rad = np.radians(angle_deg)
        c, s = np.cos(angle_rad), np.sin(angle_rad)
        rot_matrix = np.array([[c, s], [-s, c]])
        rotated = hull_points.dot(rot_matrix.T)
        min_xy = rotated.min(axis=0)
        max_xy = rotated.max(axis=0)
        return max(max_xy - min_xy)
    
    # Search for optimal angle
    res = minimize_scalar(bbox_at_angle, bounds=(0.001, 89.999), method='bounded')
    return res.x, res.fun

# Test on a few configurations
print("Testing fix_direction on a few configurations:")
for n in [10, 50, 100, 150, 200]:
    trees = get_trees_for_n(best_df, n)
    original_side = get_bounding_box_side(trees)
    opt_angle, opt_side = optimize_rotation_for_config(trees)
    improvement = original_side - opt_side
    print(f"  n={n}: original={original_side:.6f}, optimized={opt_side:.6f}, improvement={improvement:.6f}, angle={opt_angle:.2f}")

In [None]:
def apply_rotation_to_config(df, n, angle_deg):
    """Apply global rotation to a configuration."""
    prefix = f"{n:03d}_"
    mask = df['id'].str.startswith(prefix)
    
    # Get current positions
    subset = df[mask].copy()
    
    # Rotate all positions around origin
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    
    new_x = c * subset['x'] + s * subset['y']
    new_y = -s * subset['x'] + c * subset['y']
    new_deg = (subset['deg'] + angle_deg) % 360
    
    df.loc[mask, 'x'] = new_x.values
    df.loc[mask, 'y'] = new_y.values
    df.loc[mask, 'deg'] = new_deg.values
    
    return df

# Apply fix_direction to all configurations
print("Applying fix_direction to all configurations...")
optimized_df = best_df.copy()

improvements = []
for n in tqdm(range(1, 201)):
    trees = get_trees_for_n(optimized_df, n)
    original_side = get_bounding_box_side(trees)
    opt_angle, opt_side = optimize_rotation_for_config(trees)
    
    if opt_side < original_side - 1e-8:
        optimized_df = apply_rotation_to_config(optimized_df, n, opt_angle)
        improvements.append({'n': n, 'original': original_side, 'optimized': opt_side, 'improvement': original_side - opt_side})

print(f"\nApplied improvements to {len(improvements)} configurations")
if improvements:
    imp_df = pd.DataFrame(improvements)
    print(f"Total improvement: {imp_df['improvement'].sum():.6f}")
    print(f"Average improvement: {imp_df['improvement'].mean():.6f}")

In [None]:
# Calculate new score
new_score = calculate_total_score(optimized_df)
print(f"Original score: {best_score:.6f}")
print(f"New score: {new_score:.6f}")
print(f"Improvement: {best_score - new_score:.6f}")

In [None]:
# Validate optimized submission for overlaps
print("Validating optimized submission for overlaps...")
overlap_count = 0
for n in tqdm(range(1, 201)):
    trees = get_trees_for_n(optimized_df, n)
    if has_overlap(trees):
        overlap_count += 1
        print(f"  Overlap found in n={n}")

print(f"\nTotal configurations with overlaps: {overlap_count}")

In [None]:
# Create submission file
def create_submission(df, output_path):
    """Create submission file with 's' prefix."""
    sub_df = df.copy()
    for col in ['x', 'y', 'deg']:
        sub_df[col] = 's' + sub_df[col].astype(str)
    sub_df.to_csv(output_path, index=False)
    print(f"Saved submission to {output_path}")
    return sub_df

# Save submission
submission_path = '/home/submission/submission.csv'
create_submission(optimized_df, submission_path)

# Verify row count
verify_df = pd.read_csv(submission_path)
print(f"Submission row count: {len(verify_df)} (expected: 20100)")

In [None]:
# Summary
print("="*50)
print("BASELINE EXPERIMENT SUMMARY")
print("="*50)
print(f"Best pre-optimized submission: {os.path.basename(best_filepath)}")
print(f"Original score: {best_score:.6f}")
print(f"After fix_direction: {new_score:.6f}")
print(f"Improvement: {best_score - new_score:.6f}")
print(f"Overlaps: {overlap_count}")
print(f"Submission saved to: {submission_path}")
print("="*50)