# Baseline: Sample Submission Score + Fix Direction Optimization

This notebook:
1. Loads sample_submission.csv and calculates baseline score
2. Applies fix_direction rotation optimization to minimize bounding box
3. Implements simple local search
4. Validates for overlaps

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
from scipy.optimize import minimize_scalar
from scipy.spatial import ConvexHull
import time

getcontext().prec = 25
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree polygon vertices (relative to center at origin, 0Â° rotation)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    """Represents a single, rotatable Christmas tree of a fixed size."""
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))
    
    def clone(self):
        return ChristmasTree(center_x=str(self.center_x), center_y=str(self.center_y), angle=str(self.angle))

print('ChristmasTree class defined')

ChristmasTree class defined


In [3]:
def parse_csv(csv_path):
    """Parse submission CSV into dict of tree lists and side lengths."""
    df = pd.read_csv(csv_path)
    
    # Strip 's' prefix
    df['x'] = df['x'].astype(str).str.strip().str.lstrip('s')
    df['y'] = df['y'].astype(str).str.strip().str.lstrip('s')
    df['deg'] = df['deg'].astype(str).str.strip().str.lstrip('s')
    df[['group_id', 'item_id']] = df['id'].str.split('_', n=2, expand=True)
    
    dict_of_tree_list = {}
    dict_of_side_length = {}
    
    for group_id, group_data in df.groupby('group_id'):
        tree_list = [
            ChristmasTree(center_x=row['x'], center_y=row['y'], angle=row['deg'])
            for _, row in group_data.iterrows()
        ]
        dict_of_tree_list[group_id] = tree_list
        dict_of_side_length[group_id] = get_tree_list_side_length(tree_list)
    
    return dict_of_tree_list, dict_of_side_length

def get_tree_list_side_length(tree_list):
    """Calculate bounding box side length for a list of trees."""
    all_polygons = [t.polygon for t in tree_list]
    bounds = unary_union(all_polygons).bounds
    return Decimal(max(bounds[2] - bounds[0], bounds[3] - bounds[1])) / scale_factor

def get_total_score(dict_of_side_length):
    """Calculate total score from side lengths."""
    score = Decimal('0')
    for k, v in dict_of_side_length.items():
        n = int(k)
        score += v ** 2 / Decimal(str(n))
    return score

print('Helper functions defined')

Helper functions defined


In [4]:
# Load sample submission and calculate baseline score
print('Loading sample submission...')
start_time = time.time()

dict_of_tree_list, dict_of_side_length = parse_csv('/home/data/sample_submission.csv')

baseline_score = get_total_score(dict_of_side_length)
print(f'Baseline score: {float(baseline_score):.10f}')
print(f'Time to load: {time.time() - start_time:.2f}s')

Loading sample submission...


Baseline score: 173.6522986029
Time to load: 4.64s


In [5]:
# Fix direction (rotation optimization) functions
def calculate_bbox_side_at_angle(angle_deg, points):
    """Calculate bounding box side length after rotating points by angle_deg."""
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix_T = np.array([[c, s], [-s, c]])
    rotated_points = points.dot(rot_matrix_T)
    min_xy = np.min(rotated_points, axis=0)
    max_xy = np.max(rotated_points, axis=0)
    return max(max_xy[0] - min_xy[0], max_xy[1] - min_xy[1])

def optimize_rotation(trees, angle_max=89.999, epsilon=1e-7):
    """Find optimal rotation angle to minimize bounding box."""
    all_points = []
    for tree in trees:
        all_points.extend(list(tree.polygon.exterior.coords))
    points_np = np.array(all_points)
    
    if len(points_np) < 3:
        return get_tree_list_side_length(trees), 0.0
    
    try:
        hull_points = points_np[ConvexHull(points_np).vertices]
    except:
        hull_points = points_np
    
    initial_side = calculate_bbox_side_at_angle(0, hull_points)
    
    res = minimize_scalar(
        lambda a: calculate_bbox_side_at_angle(a, hull_points),
        bounds=(0.001, float(angle_max)),
        method='bounded',
    )
    
    found_angle_deg = float(res.x)
    found_side = float(res.fun)
    
    improvement = initial_side - found_side
    if improvement > float(epsilon):
        best_angle_deg = found_angle_deg
        best_side = Decimal(str(found_side)) / scale_factor
    else:
        best_angle_deg = 0.0
        best_side = Decimal(str(initial_side)) / scale_factor
    
    return best_side, best_angle_deg

def apply_rotation(trees, angle_deg):
    """Apply rotation to all trees around their collective center."""
    if not trees or abs(angle_deg) < 1e-12:
        return [t.clone() for t in trees]
    
    bounds = [t.polygon.bounds for t in trees]
    min_x = min(b[0] for b in bounds)
    min_y = min(b[1] for b in bounds)
    max_x = max(b[2] for b in bounds)
    max_y = max(b[3] for b in bounds)
    rotation_center = np.array([(min_x + max_x) / 2.0, (min_y + max_y) / 2.0])
    
    angle_rad = np.radians(angle_deg)
    c, s = np.cos(angle_rad), np.sin(angle_rad)
    rot_matrix = np.array([[c, -s], [s, c]])
    
    points = np.array([[float(t.center_x), float(t.center_y)] for t in trees])
    shifted = points - rotation_center / float(scale_factor)
    rotated = shifted.dot(rot_matrix.T) + rotation_center / float(scale_factor)
    
    rotated_trees = []
    for i in range(len(trees)):
        new_tree = ChristmasTree(
            Decimal(rotated[i, 0]),
            Decimal(rotated[i, 1]),
            Decimal(trees[i].angle + Decimal(str(angle_deg))),
        )
        rotated_trees.append(new_tree)
    
    return rotated_trees

print('Rotation optimization functions defined')

Rotation optimization functions defined


In [6]:
# Apply fix_direction to all configurations
print('Applying fix_direction optimization...')
start_time = time.time()

improved_tree_list = {}
improved_side_length = {}

for group_id in sorted(dict_of_tree_list.keys()):
    trees = dict_of_tree_list[group_id]
    original_side = dict_of_side_length[group_id]
    
    # Optimize rotation
    new_side, best_angle = optimize_rotation(trees)
    
    if best_angle > 0.001:
        rotated_trees = apply_rotation(trees, best_angle)
        # Verify the new side length
        actual_new_side = get_tree_list_side_length(rotated_trees)
        if actual_new_side < original_side:
            improved_tree_list[group_id] = rotated_trees
            improved_side_length[group_id] = actual_new_side
        else:
            improved_tree_list[group_id] = [t.clone() for t in trees]
            improved_side_length[group_id] = original_side
    else:
        improved_tree_list[group_id] = [t.clone() for t in trees]
        improved_side_length[group_id] = original_side

new_score = get_total_score(improved_side_length)
print(f'Score after fix_direction: {float(new_score):.10f}')
print(f'Improvement: {float(baseline_score - new_score):.10f}')
print(f'Time: {time.time() - start_time:.2f}s')

Applying fix_direction optimization...


Score after fix_direction: 151.1743147690
Improvement: 22.4779838339
Time: 4.56s


In [7]:
# Overlap detection
def check_overlaps(trees):
    """Check if any trees overlap (touching is OK)."""
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        possible_indices = tree_index.query(poly)
        for j in possible_indices:
            if i < j:  # Avoid checking same pair twice
                if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                    return True
    return False

# Validate all configurations
print('Validating for overlaps...')
overlap_count = 0
for group_id, trees in improved_tree_list.items():
    if check_overlaps(trees):
        overlap_count += 1
        print(f'  Overlap found in group {group_id}')

print(f'Total configurations with overlaps: {overlap_count}')

Validating for overlaps...


Total configurations with overlaps: 0


In [8]:
# Write submission
def write_submission(dict_of_tree_list, out_file):
    rows = []
    for group_name in sorted(dict_of_tree_list.keys()):
        tree_list = dict_of_tree_list[group_name]
        for item_id, tree in enumerate(tree_list):
            rows.append({
                'id': f'{group_name}_{item_id}',
                'x': f's{tree.center_x}',
                'y': f's{tree.center_y}',
                'deg': f's{tree.angle}',
            })
    pd.DataFrame(rows).to_csv(out_file, index=False)
    print(f'Saved submission to {out_file}')

write_submission(improved_tree_list, '/home/submission/submission.csv')

# Final score
final_score = float(new_score)
print(f'\nFinal Score: {final_score:.10f}')
print(f'Target: 68.931058')
print(f'Improvement from baseline: {float(baseline_score) - final_score:.10f}')

Saved submission to /home/submission/submission.csv

Final Score: 151.1743147690
Target: 68.931058
Improvement from baseline: 22.4779838339


In [9]:
# Summary
print('='*50)
print('BASELINE EXPERIMENT SUMMARY')
print('='*50)
print(f'Baseline score (sample_submission): {float(baseline_score):.10f}')
print(f'After fix_direction: {final_score:.10f}')
print(f'Total improvement: {float(baseline_score) - final_score:.10f}')
print(f'Target score: 68.931058')
print(f'Gap to target: {final_score - 68.931058:.10f}')

BASELINE EXPERIMENT SUMMARY
Baseline score (sample_submission): 173.6522986029
After fix_direction: 151.1743147690
Total improvement: 22.4779838339
Target score: 68.931058
Gap to target: 82.2432567690
