# Loop 1 Analysis - Fix Overlapping Trees Issue

The submission failed with 'Overlapping trees in group 004'. We need to:
1. Validate ALL configurations for overlaps (not just a sample)
2. Find a valid submission from snapshots or sample_submission
3. Understand why our validation missed this

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely import STRtree
import math

# Tree shape vertices (15 points)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def get_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg degrees."""
    angle_rad = math.radians(deg)
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    
    vertices = []
    for tx, ty in zip(TX, TY):
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    
    return Polygon(vertices)

def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def check_overlaps_detailed(polygons):
    """Check if any polygons overlap and return details."""
    if len(polygons) < 2:
        return False, []
    
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                # Check if it's a real overlap (not just touching)
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:  # Significant overlap
                    overlaps.append((i, j, intersection.area))
    
    return len(overlaps) > 0, overlaps

print("Functions defined")

Functions defined


In [2]:
# Load the failed submission and check group 004
df_failed = pd.read_csv('/home/code/experiments/001_baseline/snapshot_submission.csv')
df_failed['x_val'] = df_failed['x'].apply(parse_value)
df_failed['y_val'] = df_failed['y'].apply(parse_value)
df_failed['deg_val'] = df_failed['deg'].apply(parse_value)
df_failed['n'] = df_failed['id'].apply(lambda x: int(x.split('_')[0]))

# Check group 004 specifically
config_4 = df_failed[df_failed['n'] == 4]
print("Group 004 configuration:")
print(config_4[['id', 'x_val', 'y_val', 'deg_val']])

# Create polygons and check overlaps
polygons_4 = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
              for _, row in config_4.iterrows()]
has_overlap, overlap_details = check_overlaps_detailed(polygons_4)
print(f"\nHas overlaps: {has_overlap}")
print(f"Overlap details: {overlap_details}")

Group 004 configuration:
      id     x_val    y_val     deg_val
6  004_0 -0.324748  0.13211  156.370622
7  004_1  0.315354  0.13211  156.370622
8  004_2  0.324748 -0.73211  336.370622
9  004_3 -0.315354 -0.73211  336.370622

Has overlaps: False
Overlap details: []


In [3]:
# Check ALL configurations for overlaps
print("Checking all 200 configurations for overlaps...")
overlapping_configs = []

for n in range(1, 201):
    config = df_failed[df_failed['n'] == n]
    if len(config) != n:
        print(f"ERROR: n={n} has {len(config)} trees")
        continue
    
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap, overlap_details = check_overlaps_detailed(polygons)
    
    if has_overlap:
        overlapping_configs.append((n, overlap_details))
        print(f"n={n}: OVERLAP! {overlap_details}")

print(f"\nTotal overlapping configurations: {len(overlapping_configs)}")
print(f"Overlapping groups: {[x[0] for x in overlapping_configs]}")

Checking all 200 configurations for overlaps...



Total overlapping configurations: 0
Overlapping groups: []


In [4]:
# Now check the sample_submission.csv which should be valid
df_sample = pd.read_csv('/home/data/sample_submission.csv')
df_sample['x_val'] = df_sample['x'].apply(parse_value)
df_sample['y_val'] = df_sample['y'].apply(parse_value)
df_sample['deg_val'] = df_sample['deg'].apply(parse_value)
df_sample['n'] = df_sample['id'].apply(lambda x: int(x.split('_')[0]))

print("Checking sample_submission.csv for overlaps...")
sample_overlaps = []

for n in range(1, 201):
    config = df_sample[df_sample['n'] == n]
    if len(config) != n:
        print(f"ERROR: n={n} has {len(config)} trees")
        continue
    
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap, overlap_details = check_overlaps_detailed(polygons)
    
    if has_overlap:
        sample_overlaps.append((n, overlap_details))
        if n <= 20:  # Only print small N
            print(f"n={n}: OVERLAP! {overlap_details}")

print(f"\nTotal overlapping configurations in sample: {len(sample_overlaps)}")

Checking sample_submission.csv for overlaps...



Total overlapping configurations in sample: 0


In [5]:
# Calculate score for sample_submission
def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    
    for poly in polygons:
        bounds = poly.bounds
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    width = max_x - min_x
    height = max_y - min_y
    return max(width, height)

# Calculate score for sample submission
scores_sample = []
for n in range(1, 201):
    config = df_sample[df_sample['n'] == n]
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    side = get_bounding_box_side(polygons)
    score_contrib = (side ** 2) / n
    scores_sample.append({'n': n, 'side': side, 'score_contrib': score_contrib})

scores_sample_df = pd.DataFrame(scores_sample)
sample_total = scores_sample_df['score_contrib'].sum()
print(f"Sample submission score: {sample_total:.6f}")
print(f"Target: 68.894234")
print(f"Gap: {sample_total - 68.894234:.6f}")

Sample submission score: 173.652299
Target: 68.894234
Gap: 104.758065


In [6]:
# Check preoptimized submissions from snapshots
import os

preopt_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized'
print(f"Checking preoptimized submissions in {preopt_dir}")

for fname in ['submission.csv', 'ensemble.csv', 'best_ensemble.csv', 'santa-2025.csv']:
    fpath = os.path.join(preopt_dir, fname)
    if os.path.exists(fpath):
        df = pd.read_csv(fpath)
        df['x_val'] = df['x'].apply(parse_value)
        df['y_val'] = df['y'].apply(parse_value)
        df['deg_val'] = df['deg'].apply(parse_value)
        df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
        
        # Check for overlaps
        has_any_overlap = False
        for n in range(1, 201):
            config = df[df['n'] == n]
            if len(config) != n:
                continue
            polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                        for _, row in config.iterrows()]
            has_overlap, _ = check_overlaps_detailed(polygons)
            if has_overlap:
                has_any_overlap = True
                break
        
        # Calculate score
        scores = []
        for n in range(1, 201):
            config = df[df['n'] == n]
            if len(config) != n:
                continue
            polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                        for _, row in config.iterrows()]
            side = get_bounding_box_side(polygons)
            scores.append((side ** 2) / n)
        
        total = sum(scores)
        print(f"{fname}: score={total:.6f}, has_overlaps={has_any_overlap}")

Checking preoptimized submissions in /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized


submission.csv: score=70.676501, has_overlaps=False


ensemble.csv: score=70.676102, has_overlaps=False


best_ensemble.csv: score=70.676102, has_overlaps=False


santa-2025.csv: score=70.676102, has_overlaps=False


In [7]:
# The issue is likely precision. Let's check the official metric implementation
# and look for submissions that have been successfully submitted to Kaggle

# First, let's look at the bucket-of-chump folder which is from a top kernel
bucket_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump'
print(f"Checking {bucket_dir}")
if os.path.exists(bucket_dir):
    for f in os.listdir(bucket_dir)[:10]:
        print(f"  {f}")

# Check santa25-public which might have validated submissions
public_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public'
print(f"\nChecking {public_dir}")
if os.path.exists(public_dir):
    for f in os.listdir(public_dir)[:10]:
        print(f"  {f}")

Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump
  bbox3
  submission.csv
  submission visualization.pdf

Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public
  submission_JKoT4.csv
  New_Tree_144_196.csv
  submission_JKoT3.csv
  santa2025_ver2_v61.csv
  submission_JKoT2.csv
  santa2025_ver2_v67.csv
  santa2025_ver2_v76.csv
  submission_70_936673758122.csv
  santa2025_ver2_v65.csv
  submission_70_926149550346.csv


In [9]:
# Check bucket-of-chump submission which is from a top kernel
bucket_sub = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/bucket-of-chump/submission.csv'
df_bucket = pd.read_csv(bucket_sub)
df_bucket['x_val'] = df_bucket['x'].apply(parse_value)
df_bucket['y_val'] = df_bucket['y'].apply(parse_value)
df_bucket['deg_val'] = df_bucket['deg'].apply(parse_value)
df_bucket['n'] = df_bucket['id'].apply(lambda x: int(x.split('_')[0]))

# Calculate score
scores_bucket = []
for n in range(1, 201):
    config = df_bucket[df_bucket['n'] == n]
    if len(config) != n:
        continue
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    side = get_bounding_box_side(polygons)
    scores_bucket.append((side ** 2) / n)

bucket_total = sum(scores_bucket)
print(f"bucket-of-chump submission score: {bucket_total:.6f}")
print(f"Target: 68.894234")
print(f"Gap: {bucket_total - 68.894234:.6f}")

# Check group 004 in bucket submission
config_4_bucket = df_bucket[df_bucket['n'] == 4]
print("\nGroup 004 in bucket-of-chump:")
print(config_4_bucket[['id', 'x_val', 'y_val', 'deg_val']])

bucket-of-chump submission score: 70.676501
Target: 68.894234
Gap: 1.782267

Group 004 in bucket-of-chump:
      id     x_val    y_val     deg_val
6  004_0 -0.324748  0.13211  156.370622
7  004_1  0.315354  0.13211  156.370622
8  004_2  0.324748 -0.73211  336.370622
9  004_3 -0.315354 -0.73211  336.370622


In [10]:
# Use the EXACT same validation as the official metric
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 25
scale_factor = Decimal('1e18')

class ChristmasTreeMetric:
    """Exact copy from official metric."""
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(center_x)
        self.center_y = Decimal(center_y)
        self.angle = Decimal(angle)

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def check_overlaps_metric(df_group):
    """Check overlaps using exact metric method."""
    placed_trees = []
    for _, row in df_group.iterrows():
        x = str(row['x']).lstrip('s') if str(row['x']).startswith('s') else str(row['x'])
        y = str(row['y']).lstrip('s') if str(row['y']).startswith('s') else str(row['y'])
        deg = str(row['deg']).lstrip('s') if str(row['deg']).startswith('s') else str(row['deg'])
        placed_trees.append(ChristmasTreeMetric(x, y, deg))
    
    all_polygons = [p.polygon for p in placed_trees]
    r_tree = STRtree(all_polygons)
    
    for i, poly in enumerate(all_polygons):
        indices = r_tree.query(poly)
        for index in indices:
            if index == i:
                continue
            if poly.intersects(all_polygons[index]) and not poly.touches(all_polygons[index]):
                return True, (i, index)
    return False, None

print("Metric validation functions defined")

Metric validation functions defined


In [11]:
# Now check group 004 with the exact metric validation
df_failed = pd.read_csv('/home/code/experiments/001_baseline/snapshot_submission.csv')
df_failed['tree_count_group'] = df_failed['id'].str.split('_').str[0]

# Check group 004
group_004 = df_failed[df_failed['tree_count_group'] == '004']
print("Checking group 004 with metric validation...")
has_overlap, overlap_pair = check_overlaps_metric(group_004)
print(f"Has overlap: {has_overlap}, pair: {overlap_pair}")

# Check all groups
print("\nChecking all groups with metric validation...")
overlapping_groups = []
for group, df_group in df_failed.groupby('tree_count_group'):
    has_overlap, overlap_pair = check_overlaps_metric(df_group)
    if has_overlap:
        overlapping_groups.append((group, overlap_pair))
        print(f"Group {group}: OVERLAP at {overlap_pair}")

print(f"\nTotal overlapping groups: {len(overlapping_groups)}")

Checking group 004 with metric validation...
Has overlap: True, pair: (1, 2)

Checking all groups with metric validation...
Group 004: OVERLAP at (1, 2)
Group 018: OVERLAP at (4, 12)
Group 025: OVERLAP at (16, 22)
Group 031: OVERLAP at (1, 7)


Group 042: OVERLAP at (3, 17)


Group 048: OVERLAP at (13, 29)


Group 066: OVERLAP at (10, 48)
Group 070: OVERLAP at (40, 55)


Group 126: OVERLAP at (42, 86)



Total overlapping groups: 9


In [12]:
# Check the sample_submission.csv with metric validation
df_sample = pd.read_csv('/home/data/sample_submission.csv')
df_sample['tree_count_group'] = df_sample['id'].str.split('_').str[0]

print("Checking sample_submission.csv with metric validation...")
sample_overlaps = []
for group, df_group in df_sample.groupby('tree_count_group'):
    has_overlap, overlap_pair = check_overlaps_metric(df_group)
    if has_overlap:
        sample_overlaps.append((group, overlap_pair))
        print(f"Group {group}: OVERLAP at {overlap_pair}")

print(f"\nTotal overlapping groups in sample: {len(sample_overlaps)}")

Checking sample_submission.csv with metric validation...



Total overlapping groups in sample: 0


In [13]:
# Calculate score for sample_submission using metric method
def calculate_score_metric(df):
    """Calculate score using exact metric method."""
    df['tree_count_group'] = df['id'].str.split('_').str[0]
    
    total_score = Decimal('0.0')
    for group, df_group in df.groupby('tree_count_group'):
        num_trees = len(df_group)
        
        placed_trees = []
        for _, row in df_group.iterrows():
            x = str(row['x']).lstrip('s') if str(row['x']).startswith('s') else str(row['x'])
            y = str(row['y']).lstrip('s') if str(row['y']).startswith('s') else str(row['y'])
            deg = str(row['deg']).lstrip('s') if str(row['deg']).startswith('s') else str(row['deg'])
            placed_trees.append(ChristmasTreeMetric(x, y, deg))
        
        all_polygons = [p.polygon for p in placed_trees]
        bounds = unary_union(all_polygons).bounds
        side_length_scaled = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
        
        group_score = (Decimal(side_length_scaled) ** 2) / (scale_factor**2) / Decimal(num_trees)
        total_score += group_score
    
    return float(total_score)

sample_score = calculate_score_metric(df_sample)
print(f"Sample submission score (metric method): {sample_score:.6f}")
print(f"Target: 68.894234")
print(f"Gap: {sample_score - 68.894234:.6f}")

Sample submission score (metric method): 173.652299
Target: 68.894234
Gap: 104.758065


In [14]:
# Search for valid submissions in snapshots
import os

def check_submission_validity(filepath):
    """Check if a submission is valid (no overlaps) and return its score."""
    try:
        df = pd.read_csv(filepath)
        df['tree_count_group'] = df['id'].str.split('_').str[0]
        
        # Check for overlaps
        for group, df_group in df.groupby('tree_count_group'):
            has_overlap, _ = check_overlaps_metric(df_group)
            if has_overlap:
                return None, f"Overlap in group {group}"
        
        # Calculate score
        score = calculate_score_metric(df)
        return score, "Valid"
    except Exception as e:
        return None, str(e)

# Check submissions in various snapshot directories
snapshot_dirs = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/blended',
]

valid_submissions = []
for snap_dir in snapshot_dirs:
    if not os.path.exists(snap_dir):
        continue
    print(f"\nChecking {snap_dir}...")
    for fname in os.listdir(snap_dir)[:5]:  # Check first 5 files
        if fname.endswith('.csv'):
            fpath = os.path.join(snap_dir, fname)
            score, status = check_submission_validity(fpath)
            if score is not None:
                valid_submissions.append((fpath, score))
                print(f"  {fname}: score={score:.6f} - VALID")
            else:
                print(f"  {fname}: {status}")

print(f"\n\nFound {len(valid_submissions)} valid submissions")


Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa25-public...


  submission_JKoT4.csv: score=72.489504 - VALID


  New_Tree_144_196.csv: score=72.927920 - VALID


  submission_JKoT3.csv: score=72.489488 - VALID


  santa2025_ver2_v61.csv: score=72.951925 - VALID


  submission_JKoT2.csv: score=72.489348 - VALID

Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov...


  submission_best.csv: score=70.926150 - VALID

Checking /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/blended...


  submission (77).csv: score=72.135010 - VALID


Found 7 valid submissions


In [15]:
# Check all files in chistyakov directory
chistyakov_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov'
print(f"All files in {chistyakov_dir}:")
for fname in os.listdir(chistyakov_dir):
    print(f"  {fname}")
    
# Check all CSV files in chistyakov
print("\nChecking all CSV files in chistyakov...")
for fname in os.listdir(chistyakov_dir):
    if fname.endswith('.csv'):
        fpath = os.path.join(chistyakov_dir, fname)
        score, status = check_submission_validity(fpath)
        if score is not None:
            print(f"  {fname}: score={score:.6f} - VALID")
        else:
            print(f"  {fname}: {status}")

All files in /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/chistyakov:
  submission_best.csv
  santa2025-packed-version-of-current-best-public.zip

Checking all CSV files in chistyakov...


  submission_best.csv: score=70.926150 - VALID


In [16]:
# Check more snapshot directories for better submissions
# Look at the telegram folder and other places
telegram_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram'
if os.path.exists(telegram_dir):
    print(f"Files in {telegram_dir}:")
    for fname in os.listdir(telegram_dir)[:10]:
        print(f"  {fname}")
        
# Check santa-2025-csv folder
csv_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv'
if os.path.exists(csv_dir):
    print(f"\nFiles in {csv_dir}:")
    for fname in os.listdir(csv_dir)[:10]:
        print(f"  {fname}")

Files in /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram:
  telegram_extracted
  telegram-public-shared-solution-for-santa-2025.zip
  72.49.csv
  71.97.csv

Files in /home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv:
  bbox3
  santa-2025.csv


In [17]:
# Check the telegram CSV files
telegram_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/telegram'
for fname in ['71.97.csv', '72.49.csv']:
    fpath = os.path.join(telegram_dir, fname)
    if os.path.exists(fpath):
        score, status = check_submission_validity(fpath)
        if score is not None:
            print(f"{fname}: score={score:.6f} - VALID")
        else:
            print(f"{fname}: {status}")

# Check santa-2025-csv
csv_dir = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025-csv'
fpath = os.path.join(csv_dir, 'santa-2025.csv')
if os.path.exists(fpath):
    score, status = check_submission_validity(fpath)
    if score is not None:
        print(f"santa-2025.csv: score={score:.6f} - VALID")
    else:
        print(f"santa-2025.csv: {status}")

71.97.csv: score=71.972027 - VALID


72.49.csv: score=72.495739 - VALID


santa-2025.csv: score=70.676102 - VALID
