# Validate Overlaps with Shapely

Verify overlap detection and understand why the C++ optimizer isn't finding improvements.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.ops import unary_union
import json

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

def get_tree_polygon(x, y, deg):
    """Get Shapely polygon for a tree."""
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotated_x = TX * cos_a - TY * sin_a + x
    rotated_y = TX * sin_a + TY * cos_a + y
    return Polygon(zip(rotated_x, rotated_y))

def check_overlaps(df, n):
    """Check for overlaps in N-tree configuration."""
    prefix = f"{n:03d}_"
    trees_data = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in trees_data.iterrows():
        x = float(str(row['x'])[1:]) if str(row['x']).startswith('s') else float(row['x'])
        y = float(str(row['y'])[1:]) if str(row['y']).startswith('s') else float(row['y'])
        deg = float(str(row['deg'])[1:]) if str(row['deg']).startswith('s') else float(row['deg'])
        polygons.append(get_tree_polygon(x, y, deg))
    
    # Check all pairs for overlaps
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i + 1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 1e-10:
                    overlaps.append((i, j, intersection.area))
    
    return overlaps, polygons

print("Functions defined")

Functions defined


In [2]:
# Load baseline and invalid optimized file
baseline_df = pd.read_csv('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv')
optimized_df = pd.read_csv('/home/code/experiments/004_cpp_optimizer/optimized20.csv')

print(f"Baseline: {len(baseline_df)} rows")
print(f"Optimized: {len(optimized_df)} rows")

Baseline: 20100 rows
Optimized: 20100 rows


In [3]:
# Check overlaps in baseline for N=2-10
print("Checking baseline for overlaps...")
for n in range(2, 11):
    overlaps, _ = check_overlaps(baseline_df, n)
    if overlaps:
        print(f"N={n}: {len(overlaps)} overlaps - {overlaps}")
    else:
        print(f"N={n}: No overlaps")

Checking baseline for overlaps...
N=2: No overlaps
N=3: No overlaps
N=4: No overlaps
N=5: No overlaps
N=6: No overlaps
N=7: No overlaps
N=8: No overlaps
N=9: No overlaps
N=10: No overlaps


In [4]:
# Check overlaps in optimized file for N=2-10
print("\nChecking optimized file for overlaps...")
for n in range(2, 11):
    overlaps, _ = check_overlaps(optimized_df, n)
    if overlaps:
        print(f"N={n}: {len(overlaps)} overlaps")
        for i, j, area in overlaps:
            print(f"  Trees {i} and {j}: intersection area = {area:.6f}")
    else:
        print(f"N={n}: No overlaps")


Checking optimized file for overlaps...
N=2: 1 overlaps
  Trees 0 and 1: intersection area = 0.180381
N=3: 3 overlaps
  Trees 0 and 1: intersection area = 0.128905
  Trees 0 and 2: intersection area = 0.145846
  Trees 1 and 2: intersection area = 0.114819
N=4: 6 overlaps
  Trees 0 and 1: intersection area = 0.189887
  Trees 0 and 2: intersection area = 0.141898
  Trees 0 and 3: intersection area = 0.159460
  Trees 1 and 2: intersection area = 0.167439
  Trees 1 and 3: intersection area = 0.176622
  Trees 2 and 3: intersection area = 0.189329
N=5: 10 overlaps
  Trees 0 and 1: intersection area = 0.056030
  Trees 0 and 2: intersection area = 0.112288
  Trees 0 and 3: intersection area = 0.120540
  Trees 0 and 4: intersection area = 0.089999
  Trees 1 and 2: intersection area = 0.079910
  Trees 1 and 3: intersection area = 0.060374
  Trees 1 and 4: intersection area = 0.003383
  Trees 2 and 3: intersection area = 0.132809
  Trees 2 and 4: intersection area = 0.115430
  Trees 3 and 4: int

In [5]:
# Calculate bounding box and score for each N
def calculate_score(df, n):
    prefix = f"{n:03d}_"
    trees_data = df[df['id'].str.startswith(prefix)]
    
    all_x = []
    all_y = []
    for _, row in trees_data.iterrows():
        x = float(str(row['x'])[1:]) if str(row['x']).startswith('s') else float(row['x'])
        y = float(str(row['y'])[1:]) if str(row['y']).startswith('s') else float(row['y'])
        deg = float(str(row['deg'])[1:]) if str(row['deg']).startswith('s') else float(row['deg'])
        
        angle_rad = np.radians(deg)
        cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
        rotated_x = TX * cos_a - TY * sin_a + x
        rotated_y = TX * sin_a + TY * cos_a + y
        all_x.extend(rotated_x)
        all_y.extend(rotated_y)
    
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return (side ** 2) / n

print("\nScore comparison for N=2-10:")
for n in range(2, 11):
    baseline_score = calculate_score(baseline_df, n)
    optimized_score = calculate_score(optimized_df, n)
    overlaps, _ = check_overlaps(optimized_df, n)
    status = "INVALID (overlaps)" if overlaps else "VALID"
    print(f"N={n}: Baseline={baseline_score:.6f}, Optimized={optimized_score:.6f}, Status={status}")


Score comparison for N=2-10:
N=2: Baseline=0.450779, Optimized=0.338427, Status=INVALID (overlaps)
N=3: Baseline=0.434745, Optimized=0.262632, Status=INVALID (overlaps)
N=4: Baseline=0.416545, Optimized=0.199376, Status=INVALID (overlaps)
N=5: Baseline=0.416850, Optimized=0.206839, Status=INVALID (overlaps)
N=6: Baseline=0.399610, Optimized=0.200469, Status=INVALID (overlaps)


N=7: Baseline=0.399897, Optimized=0.297988, Status=INVALID (overlaps)
N=8: Baseline=0.385407, Optimized=0.385407, Status=VALID
N=9: Baseline=0.387415, Optimized=0.387415, Status=VALID
N=10: Baseline=0.376630, Optimized=0.366574, Status=INVALID (overlaps)


In [6]:
# Since the optimized file has overlaps, let's create a valid submission
# by using baseline for N values with overlaps

print("\nCreating valid submission by reverting overlapping N values to baseline...")

valid_submission = baseline_df.copy()

# Check all N values for overlaps in optimized file
improved_n = []
for n in range(1, 201):
    overlaps, _ = check_overlaps(optimized_df, n)
    if not overlaps:
        # No overlaps - check if optimized is better
        baseline_score = calculate_score(baseline_df, n)
        optimized_score = calculate_score(optimized_df, n)
        if optimized_score < baseline_score - 1e-9:
            # Use optimized configuration
            prefix = f"{n:03d}_"
            valid_submission = valid_submission[~valid_submission['id'].str.startswith(prefix)]
            optimized_rows = optimized_df[optimized_df['id'].str.startswith(prefix)]
            valid_submission = pd.concat([valid_submission, optimized_rows], ignore_index=True)
            improved_n.append((n, baseline_score, optimized_score))
            print(f"N={n}: Using optimized (improvement: {baseline_score - optimized_score:.6f})")

print(f"\nTotal N values improved: {len(improved_n)}")


Creating valid submission by reverting overlapping N values to baseline...


N=128: Using optimized (improvement: 0.000000)



Total N values improved: 1


In [7]:
# Calculate total score of valid submission
total_baseline = sum(calculate_score(baseline_df, n) for n in range(1, 201))
total_valid = sum(calculate_score(valid_submission, n) for n in range(1, 201))

print(f"\nBaseline total score: {total_baseline:.6f}")
print(f"Valid submission total score: {total_valid:.6f}")
print(f"Improvement: {total_baseline - total_valid:.6f}")


Baseline total score: 70.676102
Valid submission total score: 70.676102
Improvement: 0.000000


In [None]:
# Sort and save valid submission
valid_submission['sort_key'] = valid_submission['id'].apply(lambda x: (int(x.split('_')[0]), int(x.split('_')[1])))
valid_submission = valid_submission.sort_values('sort_key').drop('sort_key', axis=1)
valid_submission.to_csv('/home/submission/submission.csv', index=False)
print("Saved valid submission to /home/submission/submission.csv")

# Save metrics
metrics = {
    'cv_score': total_valid,
    'baseline_score': total_baseline,
    'improvement': total_baseline - total_valid,
    'improved_n_values': len(improved_n)
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal CV Score: {total_valid:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {total_valid - 68.919154:.6f}")