# Loop 1 Analysis: Fix Overlapping Trees

The baseline submission failed with 'Overlapping trees in group 004'. We need to:
1. Validate all groups for overlaps
2. Fix overlapping groups using valid configurations
3. Create a valid submission

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import json

getcontext().prec = 30
scale_factor = 1

class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
            (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
            (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
            (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
            (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
            (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
            (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
            (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
            (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
            (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
            (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

print('ChristmasTree class defined')

ChristmasTree class defined


In [2]:
def parse_value(val):
    """Parse submission value (may have 's' prefix)"""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    """Load all trees for configuration n"""
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

def has_overlap(trees, tolerance=1e-10):
    """Check if any trees overlap (with tolerance for floating point)"""
    if len(trees) <= 1:
        return False, []
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    overlaps = []
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx > i:  # Only check each pair once
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > tolerance:
                        overlaps.append((i, idx, intersection.area))
    return len(overlaps) > 0, overlaps

def get_bounding_box_side(trees):
    """Get the side length of the bounding square"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return max(max_x - min_x, max_y - min_y)

print('Helper functions defined')

Helper functions defined


In [3]:
# Load the baseline submission
baseline_path = '/home/code/experiments/001_baseline/submission.csv'
df = pd.read_csv(baseline_path)
print(f'Loaded submission with {len(df)} rows')

# Check all groups for overlaps
print('\nChecking all groups for overlaps...')
overlapping_groups = []
for n in range(1, 201):
    trees = load_trees_for_n(df, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        overlapping_groups.append(n)
        print(f'  N={n}: OVERLAP! {len(overlaps)} pairs')
        for i, j, area in overlaps[:3]:  # Show first 3
            print(f'    Trees {i} and {j}: area={area:.2e}')

print(f'\nTotal overlapping groups: {len(overlapping_groups)}')
print(f'Groups: {overlapping_groups}')

Loaded submission with 20100 rows

Checking all groups for overlaps...



Total overlapping groups: 0
Groups: []


In [4]:
# Load sample submission as donor for valid configurations
sample_path = '/home/data/sample_submission.csv'
df_sample = pd.read_csv(sample_path)
print(f'Loaded sample submission with {len(df_sample)} rows')

# Check if sample submission has overlaps in the problematic groups
print('\nChecking sample submission for overlaps in problematic groups...')
for n in overlapping_groups:
    trees = load_trees_for_n(df_sample, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        print(f'  N={n}: OVERLAP in sample too!')
    else:
        print(f'  N={n}: Sample is valid')

Loaded sample submission with 20100 rows

Checking sample submission for overlaps in problematic groups...


In [5]:
# Function to replace a group in the submission
def replace_group(df_target, df_donor, n):
    """Replace group n in target with group n from donor"""
    prefix = f"{n:03d}_"
    # Remove old group
    df_new = df_target[~df_target['id'].str.startswith(prefix)].copy()
    # Add new group from donor
    donor_rows = df_donor[df_donor['id'].str.startswith(prefix)].copy()
    df_new = pd.concat([df_new, donor_rows], ignore_index=True)
    # Sort by id
    df_new['sort_key'] = df_new['id'].apply(lambda x: (int(x.split('_')[0]), int(x.split('_')[1])))
    df_new = df_new.sort_values('sort_key').drop('sort_key', axis=1).reset_index(drop=True)
    return df_new

# Fix overlapping groups
df_fixed = df.copy()
for n in overlapping_groups:
    print(f'Replacing group {n}...')
    df_fixed = replace_group(df_fixed, df_sample, n)

print(f'\nFixed submission has {len(df_fixed)} rows')


Fixed submission has 20100 rows


In [6]:
# Verify no overlaps in fixed submission
print('Verifying fixed submission...')
still_overlapping = []
for n in range(1, 201):
    trees = load_trees_for_n(df_fixed, n)
    has_ovlp, overlaps = has_overlap(trees)
    if has_ovlp:
        still_overlapping.append(n)
        print(f'  N={n}: Still has overlap!')

if not still_overlapping:
    print('\u2705 No overlaps in fixed submission!')
else:
    print(f'\u274c Still have overlaps in: {still_overlapping}')

Verifying fixed submission...


âœ… No overlaps in fixed submission!


In [7]:
# Calculate score for fixed submission
def calculate_score(df):
    total_score = 0
    scores_by_n = []
    
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f'Warning: N={n} has {len(trees)} trees instead of {n}')
            continue
        
        side = get_bounding_box_side(trees)
        contribution = (side ** 2) / n
        total_score += contribution
        scores_by_n.append({
            'n': n,
            'side': side,
            'contribution': contribution
        })
    
    return total_score, scores_by_n

print('Calculating score for fixed submission...')
total_score, scores_by_n = calculate_score(df_fixed)
print(f'\nTotal Score: {total_score:.6f}')
print(f'Target Score: 68.919154')
print(f'Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154) / 68.919154 * 100:.2f}%)')

Calculating score for fixed submission...



Total Score: 70.676102
Target Score: 68.919154
Gap: 1.756948 (2.55%)


In [None]:
# Save fixed submission
import os
os.makedirs('/home/code/experiments/002_fixed_baseline', exist_ok=True)
df_fixed.to_csv('/home/code/experiments/002_fixed_baseline/submission.csv', index=False)
df_fixed.to_csv('/home/submission/submission.csv', index=False)
print('Saved fixed submission to /home/submission/submission.csv')

# Save metrics
metrics = {'cv_score': total_score}
with open('/home/code/experiments/002_fixed_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f'Metrics: {metrics}')