# Baseline Evaluation and Rotation Tightening

This notebook:
1. Evaluates the baseline submission score
2. Implements rotation tightening (fix_direction) optimization
3. Validates for overlaps

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import math
import os

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = np.array(list(zip(TX, TY)))

print("Tree vertices shape:", TREE_VERTICES.shape)
print("Tree height:", max(TY) - min(TY))
print("Tree width:", max(TX) - min(TX))

In [None]:
def parse_value(s):
    """Parse submission value (e.g., 's0.123' -> 0.123)"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def format_value(v):
    """Format value for submission (e.g., 0.123 -> 's0.123')"""
    return f's{v}'

def get_tree_polygon(x, y, deg):
    """Get the polygon for a tree at position (x,y) with rotation deg"""
    angle_rad = math.radians(deg)
    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
    
    # Rotate and translate vertices
    rotated = []
    for vx, vy in TREE_VERTICES:
        rx = vx * cos_a - vy * sin_a + x
        ry = vx * sin_a + vy * cos_a + y
        rotated.append((rx, ry))
    
    return Polygon(rotated)

def load_submission(path):
    """Load submission CSV and parse values"""
    df = pd.read_csv(path)
    df['x_val'] = df['x'].apply(parse_value)
    df['y_val'] = df['y'].apply(parse_value)
    df['deg_val'] = df['deg'].apply(parse_value)
    return df

def get_trees_for_n(df, n):
    """Get all trees for n-tree configuration"""
    prefix = f'{n:03d}_'
    mask = df['id'].str.startswith(prefix)
    return df[mask].copy()

print("Utility functions defined")

In [None]:
def get_bounding_box_side(trees_df):
    """Calculate the side of the square bounding box for trees"""
    min_x, max_x = float('inf'), float('-inf')
    min_y, max_y = float('inf'), float('-inf')
    
    for _, row in trees_df.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        bounds = poly.bounds  # (minx, miny, maxx, maxy)
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    side = max(max_x - min_x, max_y - min_y)
    return side

def calculate_score(df):
    """Calculate the total score for a submission"""
    total_score = 0
    sides = {}
    
    for n in range(1, 201):
        trees = get_trees_for_n(df, n)
        if len(trees) == 0:
            print(f"Warning: No trees found for n={n}")
            continue
        
        side = get_bounding_box_side(trees)
        sides[n] = side
        total_score += (side ** 2) / n
    
    return total_score, sides

print("Scoring functions defined")

In [None]:
# Load and evaluate baseline
baseline_path = '/home/code/experiments/001_baseline/baseline.csv'
df = load_submission(baseline_path)
print(f"Loaded {len(df)} rows")
print(f"Sample rows:")
print(df.head(10))

In [None]:
# Calculate baseline score
print("Calculating baseline score...")
score, sides = calculate_score(df)
print(f"\nBaseline Score: {score:.6f}")
print(f"\nTop 10 largest contributions (side^2/n):")
contributions = [(n, sides[n], (sides[n]**2)/n) for n in sides]
contributions.sort(key=lambda x: -x[2])
for n, side, contrib in contributions[:10]:
    print(f"  n={n}: side={side:.4f}, contribution={contrib:.4f}")

In [None]:
def check_overlaps(trees_df):
    """Check for overlapping trees"""
    polygons = []
    for _, row in trees_df.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    if len(polygons) <= 1:
        return False, []
    
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        candidates = tree_index.query(poly)
        for j in candidates:
            if j > i:  # Only check each pair once
                if polygons[j].intersects(poly) and not polygons[j].touches(poly):
                    # Check if it's a real overlap (not just touching)
                    intersection = polygons[j].intersection(poly)
                    if intersection.area > 1e-10:
                        overlaps.append((i, j))
    
    return len(overlaps) > 0, overlaps

# Check a few configurations for overlaps
print("Checking for overlaps in sample configurations...")
for n in [10, 50, 100, 150, 200]:
    trees = get_trees_for_n(df, n)
    has_overlap, overlap_pairs = check_overlaps(trees)
    if has_overlap:
        print(f"  n={n}: OVERLAPS FOUND - {len(overlap_pairs)} pairs")
    else:
        print(f"  n={n}: OK")

In [None]:
def get_all_points(trees_df):
    """Get all polygon vertices for all trees"""
    all_points = []
    for _, row in trees_df.iterrows():
        poly = get_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        coords = list(poly.exterior.coords)[:-1]  # Exclude closing point
        all_points.extend(coords)
    return np.array(all_points)

def rotate_points(points, angle_deg, center=None):
    """Rotate points around center by angle_deg degrees"""
    if center is None:
        center = points.mean(axis=0)
    
    angle_rad = math.radians(angle_deg)
    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
    
    centered = points - center
    rotated = np.zeros_like(centered)
    rotated[:, 0] = centered[:, 0] * cos_a - centered[:, 1] * sin_a
    rotated[:, 1] = centered[:, 0] * sin_a + centered[:, 1] * cos_a
    
    return rotated + center

def get_bbox_side_for_points(points):
    """Get the side of the square bounding box for points"""
    min_x, max_x = points[:, 0].min(), points[:, 0].max()
    min_y, max_y = points[:, 1].min(), points[:, 1].max()
    return max(max_x - min_x, max_y - min_y)

def optimize_rotation_for_n(trees_df):
    """Find the optimal rotation angle to minimize bounding box"""
    all_points = get_all_points(trees_df)
    center = all_points.mean(axis=0)
    
    def objective(angle):
        rotated = rotate_points(all_points, angle, center)
        return get_bbox_side_for_points(rotated)
    
    # Try multiple starting points
    best_angle = 0
    best_side = objective(0)
    
    # Search in range [0, 90) since bounding box is symmetric
    result = minimize_scalar(objective, bounds=(0, 90), method='bounded')
    
    if result.fun < best_side:
        best_angle = result.x
        best_side = result.fun
    
    return best_angle, best_side

print("Rotation optimization functions defined")

In [None]:
def apply_rotation_to_trees(trees_df, angle_deg):
    """Apply rotation to all trees in a configuration"""
    if abs(angle_deg) < 0.001:
        return trees_df.copy()
    
    # Get center of all points
    all_points = get_all_points(trees_df)
    center = all_points.mean(axis=0)
    
    # Rotate each tree's position and add angle to rotation
    new_df = trees_df.copy()
    angle_rad = math.radians(angle_deg)
    cos_a, sin_a = math.cos(angle_rad), math.sin(angle_rad)
    
    for idx in new_df.index:
        x, y = new_df.loc[idx, 'x_val'], new_df.loc[idx, 'y_val']
        # Rotate position around center
        dx, dy = x - center[0], y - center[1]
        new_x = dx * cos_a - dy * sin_a + center[0]
        new_y = dx * sin_a + dy * cos_a + center[1]
        
        new_df.loc[idx, 'x_val'] = new_x
        new_df.loc[idx, 'y_val'] = new_y
        new_df.loc[idx, 'deg_val'] = (new_df.loc[idx, 'deg_val'] + angle_deg) % 360
    
    return new_df

# Test rotation optimization on a few configurations
print("Testing rotation optimization...")
for n in [5, 10, 20]:
    trees = get_trees_for_n(df, n)
    original_side = get_bounding_box_side(trees)
    best_angle, optimized_side = optimize_rotation_for_n(trees)
    improvement = original_side - optimized_side
    print(f"  n={n}: original={original_side:.6f}, optimized={optimized_side:.6f}, angle={best_angle:.2f}°, improvement={improvement:.6f}")

In [None]:
# Apply rotation optimization to all configurations
print("Applying rotation optimization to all configurations...")

improved_df = df.copy()
total_improvement = 0
improvements = []

for n in range(1, 201):
    trees = get_trees_for_n(improved_df, n)
    original_side = get_bounding_box_side(trees)
    
    best_angle, optimized_side = optimize_rotation_for_n(trees)
    
    if optimized_side < original_side - 1e-8:
        # Apply the rotation
        rotated_trees = apply_rotation_to_trees(trees, best_angle)
        
        # Update the main dataframe
        for idx in trees.index:
            improved_df.loc[idx, 'x_val'] = rotated_trees.loc[idx, 'x_val']
            improved_df.loc[idx, 'y_val'] = rotated_trees.loc[idx, 'y_val']
            improved_df.loc[idx, 'deg_val'] = rotated_trees.loc[idx, 'deg_val']
        
        improvement = (original_side**2 - optimized_side**2) / n
        total_improvement += improvement
        improvements.append((n, original_side, optimized_side, best_angle, improvement))

print(f"\nTotal score improvement from rotation: {total_improvement:.6f}")
print(f"\nTop 10 improvements:")
improvements.sort(key=lambda x: -x[4])
for n, orig, opt, angle, imp in improvements[:10]:
    print(f"  n={n}: {orig:.6f} -> {opt:.6f} (angle={angle:.2f}°, improvement={imp:.6f})")

In [None]:
# Calculate new score
print("Calculating improved score...")
new_score, new_sides = calculate_score(improved_df)
print(f"\nOriginal Score: {score:.6f}")
print(f"Improved Score: {new_score:.6f}")
print(f"Improvement: {score - new_score:.6f}")

In [None]:
# Center all configurations to minimize coordinate values
def center_configuration(trees_df):
    """Center a configuration around origin"""
    all_points = get_all_points(trees_df)
    min_x, max_x = all_points[:, 0].min(), all_points[:, 0].max()
    min_y, max_y = all_points[:, 1].min(), all_points[:, 1].max()
    
    center_x = (min_x + max_x) / 2
    center_y = (min_y + max_y) / 2
    
    new_df = trees_df.copy()
    new_df['x_val'] = new_df['x_val'] - center_x
    new_df['y_val'] = new_df['y_val'] - center_y
    
    return new_df

# Center all configurations
print("Centering all configurations...")
for n in range(1, 201):
    trees = get_trees_for_n(improved_df, n)
    centered = center_configuration(trees)
    for idx in trees.index:
        improved_df.loc[idx, 'x_val'] = centered.loc[idx, 'x_val']
        improved_df.loc[idx, 'y_val'] = centered.loc[idx, 'y_val']

print("Done centering")

In [None]:
# Validate no overlaps were introduced
print("Validating no overlaps...")
has_any_overlap = False
for n in range(1, 201):
    trees = get_trees_for_n(improved_df, n)
    has_overlap, overlap_pairs = check_overlaps(trees)
    if has_overlap:
        print(f"  n={n}: OVERLAPS FOUND - {len(overlap_pairs)} pairs")
        has_any_overlap = True

if not has_any_overlap:
    print("All configurations valid - no overlaps!")

In [None]:
# Save improved submission
def save_submission(df, path):
    """Save submission in correct format"""
    output_df = pd.DataFrame({
        'id': df['id'],
        'x': df['x_val'].apply(format_value),
        'y': df['y_val'].apply(format_value),
        'deg': df['deg_val'].apply(format_value)
    })
    output_df.to_csv(path, index=False)
    print(f"Saved to {path}")

# Save to submission folder
submission_path = '/home/submission/submission.csv'
save_submission(improved_df, submission_path)

# Also save to experiment folder
experiment_path = '/home/code/experiments/001_baseline/submission.csv'
save_submission(improved_df, experiment_path)

print(f"\nFinal Score: {new_score:.6f}")

In [None]:
# Verify the saved submission
verify_df = load_submission(submission_path)
verify_score, _ = calculate_score(verify_df)
print(f"Verified Score: {verify_score:.6f}")

# Check first few rows
print("\nFirst 10 rows of submission:")
print(pd.read_csv(submission_path).head(10))