# Experiment 001: Baseline with Pre-optimized Solutions

This notebook:
1. Loads pre-optimized submissions from Kaggle datasets
2. Scores and validates them
3. Applies rotation optimization (fix_direction)
4. Selects the best submission

In [None]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from scipy.spatial import ConvexHull
from scipy.optimize import minimize_scalar
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30
scale_factor = 1

print('Libraries loaded successfully')

In [None]:
# Define the ChristmasTree class
class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""

    def __init__(self, center_x='0', center_y='0', angle='0'):
        """Initializes the Christmas tree with a specific position and rotation."""
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon(
            [
                (Decimal('0.0') * scale_factor, tip_y * scale_factor),
                (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
                (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
                (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
                (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
                (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
                (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
                (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
                (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
                (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
                (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
                (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
            ]
        )
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

    def clone(self):
        return ChristmasTree(
            center_x=str(self.center_x),
            center_y=str(self.center_y),
            angle=str(self.angle),
        )

print('ChristmasTree class defined')

In [None]:
# Helper functions for scoring and validation
def load_configuration_from_df(n, df):
    """Loads all trees for a given N from the submission DataFrame."""
    group_data = df[df["id"].str.startswith(f"{n:03d}_")]
    trees = []
    for _, row in group_data.iterrows():
        x = str(row["x"])[1:] if str(row["x"]).startswith('s') else str(row["x"])
        y = str(row["y"])[1:] if str(row["y"]).startswith('s') else str(row["y"])
        deg = str(row["deg"])[1:] if str(row["deg"]).startswith('s') else str(row["deg"])
        if x and y and deg:
            trees.append(ChristmasTree(x, y, deg))
    return trees


def get_score(trees, n):
    """Calculates the score (S^2 / N) for a given configuration of trees."""
    if not trees:
        return 0.0
    xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T / float(scale_factor) for t in trees])
    min_x, min_y = xys.min(axis=0)
    max_x, max_y = xys.max(axis=0)
    side_length = max(max_x - min_x, max_y - min_y)
    return side_length**2 / n


def has_overlap(trees):
    """Check if any two ChristmasTree polygons overlap."""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False


def score_and_validate_submission(file_path, max_n=200):
    """Reads a submission CSV, calculates the total score, and checks for overlaps."""
    try:
        df = pd.read_csv(file_path)
    except Exception as e:
        return {"status": "FAILED", "error": str(e), "total_score": float('inf')}

    total_score = 0.0
    failed_overlap_n = []
    
    for n in range(1, max_n + 1):
        trees = load_configuration_from_df(n, df)
        if trees:
            current_score = get_score(trees, n)
            total_score += current_score
            if has_overlap(trees):
                failed_overlap_n.append(n)
    
    status = "SUCCESS" if not failed_overlap_n else "FAILED (Overlaps)"
    return {
        "status": status,
        "total_score": total_score,
        "failed_overlap_n": failed_overlap_n
    }

print('Helper functions defined')

In [None]:
# Score all available pre-optimized submissions
submissions = {
    'bucket-of-chump': '/home/code/datasets/bucket-of-chump/submission.csv',
    'santa-2025-csv': '/home/code/datasets/santa-2025-csv/santa-2025.csv',
    'smartmanoj': '/home/code/datasets/smartmanoj_submission.csv',
    'sample_submission': '/home/data/sample_submission.csv'
}

results = {}
for name, path in submissions.items():
    print(f'\nScoring {name}...')
    result = score_and_validate_submission(path)
    results[name] = result
    print(f'  Status: {result["status"]}')
    print(f'  Total Score: {result["total_score"]:.6f}')
    if result['failed_overlap_n']:
        print(f'  Overlaps in N: {result["failed_overlap_n"][:10]}...' if len(result['failed_overlap_n']) > 10 else f'  Overlaps in N: {result["failed_overlap_n"]}')

# Find best submission
best_name = min(results.keys(), key=lambda k: results[k]['total_score'] if results[k]['status'] == 'SUCCESS' else float('inf'))
print(f'\n=== Best submission: {best_name} with score {results[best_name]["total_score"]:.6f} ===')

In [None]:
# Rotation optimization functions
def calculate_bbox_side_at_angle(angle_deg, points):
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])


def optimize_rotation(trees):
    """Find optimal rotation angle to minimize bounding box."""
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    hull_points = points_np[ConvexHull(points_np).vertices]
    
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    res = minimize_scalar(lambda a: calculate_bbox_side_at_angle(a, hull_points),
                          bounds=(0.001, 89.999), method='bounded')
    found_angle_deg = res.x
    found_side = res.fun
    
    improvement = initial_side - found_side
    EPSILON = 1e-8
    
    if improvement > EPSILON:
        return Decimal(found_side) / scale_factor, found_angle_deg
    else:
        return Decimal(initial_side) / scale_factor, 0.0


def apply_rotation(trees, angle_deg):
    """Apply rotation to all trees."""
    if not trees or abs(angle_deg) < 1e-9:
        return [t.clone() for t in trees]
    
    bounds = [t.polygon.bounds for t in trees]
    min_x = min(b[0] for b in bounds)
    min_y = min(b[1] for b in bounds)
    max_x = max(b[2] for b in bounds)
    max_y = max(b[3] for b in bounds)
    rotation_center = np.array([(min_x + max_x) / 2.0, (min_y + max_y) / 2.0])
    
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    
    points = np.array([[float(t.center_x), float(t.center_y)] for t in trees])
    shifted = points - rotation_center
    rotated = shifted.dot(rot_matrix.T) + rotation_center
    
    rotated_trees = []
    for i in range(len(trees)):
        new_tree = ChristmasTree(
            str(rotated[i, 0]), 
            str(rotated[i, 1]),
            str(float(trees[i].angle) + angle_deg)
        )
        rotated_trees.append(new_tree)
    return rotated_trees

print('Rotation optimization functions defined')

In [None]:
# Apply rotation optimization to the best submission
def fix_direction(input_path, output_path):
    """Apply rotation optimization to all configurations."""
    df = pd.read_csv(input_path)
    
    # Parse all groups
    dict_of_tree_list = {}
    dict_of_side_length = {}
    
    for n in range(1, 201):
        trees = load_configuration_from_df(n, df)
        if trees:
            dict_of_tree_list[f'{n:03d}'] = trees
            # Calculate side length
            xys = np.concatenate([np.asarray(t.polygon.exterior.xy).T for t in trees])
            min_x, min_y = xys.min(axis=0)
            max_x, max_y = xys.max(axis=0)
            dict_of_side_length[f'{n:03d}'] = Decimal(max(max_x - min_x, max_y - min_y))
    
    # Calculate initial score
    initial_score = sum(v**2 / Decimal(k) for k, v in dict_of_side_length.items())
    print(f'Initial score: {initial_score:.6f}')
    
    # Optimize each group
    improved_count = 0
    for group_id, trees in dict_of_tree_list.items():
        if len(trees) < 3:  # Skip small groups
            continue
        
        best_side, best_angle = optimize_rotation(trees)
        
        if best_angle > 0.001:
            rotated_trees = apply_rotation(trees, best_angle)
            # Verify no overlaps
            if not has_overlap(rotated_trees):
                dict_of_tree_list[group_id] = rotated_trees
                dict_of_side_length[group_id] = best_side
                improved_count += 1
    
    # Calculate final score
    final_score = sum(v**2 / Decimal(k) for k, v in dict_of_side_length.items())
    print(f'Final score: {final_score:.6f}')
    print(f'Improved {improved_count} configurations')
    
    # Save to output
    rows = []
    for group_id in sorted(dict_of_tree_list.keys()):
        for i, tree in enumerate(dict_of_tree_list[group_id]):
            rows.append({
                'id': f'{group_id}_{i}',
                'x': f's{float(tree.center_x):.15g}',
                'y': f's{float(tree.center_y):.15g}',
                'deg': f's{float(tree.angle):.15g}'
            })
    
    result_df = pd.DataFrame(rows)
    result_df.to_csv(output_path, index=False)
    print(f'Saved to {output_path}')
    
    return float(final_score)

# Apply to best submission
best_path = submissions[best_name]
output_path = '/home/code/experiments/001_baseline/optimized_submission.csv'
final_score = fix_direction(best_path, output_path)

In [None]:
# Validate the optimized submission
print('\nValidating optimized submission...')
result = score_and_validate_submission(output_path)
print(f'Status: {result["status"]}')
print(f'Total Score: {result["total_score"]:.6f}')
if result['failed_overlap_n']:
    print(f'Overlaps in N: {result["failed_overlap_n"]}')

# Compare with original
print(f'\nOriginal best ({best_name}): {results[best_name]["total_score"]:.6f}')
print(f'After rotation optimization: {result["total_score"]:.6f}')
print(f'Improvement: {results[best_name]["total_score"] - result["total_score"]:.6f}')

In [None]:
# Copy best submission to /home/submission/submission.csv
import shutil

# Use the better of original or optimized
if result['status'] == 'SUCCESS' and result['total_score'] < results[best_name]['total_score']:
    final_submission_path = output_path
    final_score = result['total_score']
else:
    final_submission_path = best_path
    final_score = results[best_name]['total_score']

shutil.copy(final_submission_path, '/home/submission/submission.csv')
print(f'\nFinal submission copied to /home/submission/submission.csv')
print(f'Final score: {final_score:.6f}')
print(f'Target to beat: 68.922808')

In [None]:
# Summary
print('\n' + '='*60)
print('EXPERIMENT 001 SUMMARY')
print('='*60)
print(f'Best pre-optimized submission: {best_name}')
print(f'Original score: {results[best_name]["total_score"]:.6f}')
print(f'After rotation optimization: {result["total_score"]:.6f}')
print(f'Final score: {final_score:.6f}')
print(f'Target: 68.922808')
print(f'Gap to target: {final_score - 68.922808:.6f}')
print('='*60)